From 51199fd0b936e9979e510a29eb7ee20d621b2f29 Mon Sep 17 00:00:00 2001
From: Ann Almgren <asalmgren@lbl.gov>
Date: Tue, 1 Oct 2024 18:20:57 -0700
Subject: [PATCH 1/2] fix anelastic and add heffte as option (still a WIP)

---
 Exec/Make.ERF                                 |  12 +-
 Exec/RegTests/DensityCurrent/inputs_anelastic |  23 +-
 Source/DataStructs/ERF_DataStruct.H           |  75 +++---
 Source/ERF.H                                  |   3 +
 Source/ERF.cpp                                |   2 +
 Source/SourceTerms/ERF_Src_headers.H          |   5 +-
 Source/SourceTerms/ERF_make_buoyancy.cpp      | 123 ++++++---
 Source/SourceTerms/ERF_make_mom_sources.cpp   |   7 +-
 Source/TimeIntegration/ERF_TI_slow_rhs_fun.H  |   6 +-
 Source/Utils/ERF_EOS.H                        |  14 +
 Source/Utils/ERF_PoissonSolve.cpp             |  71 ++---
 Source/Utils/ERF_solve_with_heffte.cpp        | 245 ++++++++++++++++++
 Source/Utils/Make.package                     |   1 +
 13 files changed, 461 insertions(+), 126 deletions(-)
 create mode 100644 Source/Utils/ERF_solve_with_heffte.cpp

diff --git a/Exec/Make.ERF b/Exec/Make.ERF
index 5956e35c1..5d129369a 100644
--- a/Exec/Make.ERF
+++ b/Exec/Make.ERF
@@ -181,10 +181,20 @@ VPATH_LOCATIONS   += $(HEFFTE_HOME)/include
 INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include
 LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib
 LIBRARIES += -lheffte
+ifeq ($(USE_CUDA),TRUE)
+  libraries += -lcufft
+else ifeq ($(USE_HIP),TRUE)
+  # Use rocFFT.  ROC_PATH is defined in amrex
+  INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include
+  LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib
+  LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft
+else
+  libraries += -lfftw3_mpi -lfftw3f -lfftw3
+endif
 endif
 
 ifeq ($(USE_WW3_COUPLING), TRUE)
-	DEFINES += -DERF_USE_WW3_COUPLING
+DEFINES += -DERF_USE_WW3_COUPLING
 endif
 
 ERF_LSM_DIR = $(ERF_SOURCE_DIR)/LandSurfaceModel
diff --git a/Exec/RegTests/DensityCurrent/inputs_anelastic b/Exec/RegTests/DensityCurrent/inputs_anelastic
index 77fcd810b..b57a49996 100644
--- a/Exec/RegTests/DensityCurrent/inputs_anelastic
+++ b/Exec/RegTests/DensityCurrent/inputs_anelastic
@@ -1,12 +1,8 @@
 # ------------------  INPUTS TO MAIN PROGRAM  -------------------
-max_step = 999999
 stop_time = 900.0
 
 erf.anelastic      = 1
-erf.no_substepping = 1
-erf.buoyancy_type  = 2
 
-erf.use_terrain = true
 erf.use_terrain = false
 
 amrex.fpe_trap_invalid = 1
@@ -15,32 +11,27 @@ fabarray.mfiter_tile_size = 1024 1024 1024
 
 # PROBLEM SIZE & GEOMETRY
 
-#geometry.prob_lo     = -12800.   0.    0.
-#geometry.prob_hi     =  12800. 1600. 6400.
-#xlo.type = "Outflow"
-
 geometry.prob_lo     =      0.   0.    0.
-geometry.prob_hi     =  25600. 1600. 6400.
-xlo.type = "Symmetry"
-
-xhi.type = "HO_Outflow"
+geometry.prob_hi     =  25600.  100. 6400.
 
 geometry.is_periodic = 0 1 0
 
-amr.n_cell           =   256     16    64   # dx=dy=dz=100 m, Straka et al 1993 / Xue et al 2000
+amr.n_cell           =    256     1    64   # dx=dy=dz=100 m, Straka et al 1993 / Xue et al 2000
+
+xlo.type = "Symmetry"
+xhi.type = "HO_Outflow"
 
 zlo.type = "SlipWall"
 zhi.type = "SlipWall"
 
 # TIME STEP CONTROL
 erf.fixed_dt       = 1.0      # fixed time step [s] -- Straka et al 1993
-erf.fixed_fast_dt  = 0.25     # fixed time step [s] -- Straka et al 1993
 
 # DIAGNOSTICS & VERBOSITY
 erf.sum_interval   = 1       # timesteps between computing mass
-erf.v              = 0       # verbosity in ERF.cpp
-erf.mg_v           = 1       # verbosity in ERF.cpp
+erf.v              = 1       # verbosity in ERF.cpp
 amr.v              = 1       # verbosity in Amr.cpp
+erf.mg_v           = 1       # verbosity in ERF.cpp
 
 # REFINEMENT / REGRIDDING
 amr.max_level       = 0       # maximum level number allowed
diff --git a/Source/DataStructs/ERF_DataStruct.H b/Source/DataStructs/ERF_DataStruct.H
index a4e4616f9..3c245b8bc 100644
--- a/Source/DataStructs/ERF_DataStruct.H
+++ b/Source/DataStructs/ERF_DataStruct.H
@@ -139,9 +139,6 @@ struct SolverChoice {
 
         // Which expression (1,2 or 3) to use for buoyancy
         pp.query("buoyancy_type", buoyancy_type);
-        if (buoyancy_type != 1 && buoyancy_type != 2 && buoyancy_type != 3 && buoyancy_type != 4) {
-            amrex::Abort("buoyancy_type must be 1, 2, 3 or 4");
-        }
 
         // What type of land surface model to use
         static std::string lsm_model_string = "None";
@@ -168,10 +165,6 @@ struct SolverChoice {
         // Use lagged_delta_rt in the fast integrator?
         pp.query("use_lagged_delta_rt", use_lagged_delta_rt);
 
-        if (!use_lagged_delta_rt && !(terrain_type == TerrainType::Moving)) {
-            amrex::Error("Can't turn off lagged_delta_rt when terrain not moving");
-        }
-
         // These default to true but are used for unit testing
         pp.query("use_gravity", use_gravity);
         gravity = use_gravity? CONST_GRAV: 0.0;
@@ -179,6 +172,7 @@ struct SolverChoice {
         pp.query("c_p", c_p);
         rdOcp = R_d / c_p;
 
+        read_int_string(max_level, "no_substepping", no_substepping, 0);
 
 #if defined(ERF_USE_POISSON_SOLVE)
         // Should we project the initial velocity field to make it divergence-free?
@@ -194,6 +188,7 @@ struct SolverChoice {
         // If anelastic at all, we do not advect rho -- it is always == rho0
         if (any_anelastic == 1) {
             constant_density = true;
+            buoyancy_type = 2; // uses Tprime
         } else {
             constant_density = false;  // We default to false but allow the user to set it
             pp.query("constant_density", constant_density);
@@ -203,24 +198,20 @@ struct SolverChoice {
         pp.query("ncorr", ncorr);
         pp.query("poisson_abstol", poisson_abstol);
         pp.query("poisson_reltol", poisson_reltol);
-#else
-        anelastic.resize(max_level+1);
-        for (int i = 0; i <= max_level; ++i) anelastic[i] = 0;
-#endif
 
-        read_int_string(max_level, "no_substepping", no_substepping, 0);
-
-        pp.query("force_stage1_single_substep", force_stage1_single_substep);
-
-#if defined(ERF_USE_POISSON_SOLVE)
         for (int lev = 0; lev <= max_level; lev++) {
-            if (anelastic[lev] != 0 && no_substepping[lev] == 0)
+            if (anelastic[lev] != 0)
             {
                 no_substepping[lev] = 1;
             }
         }
+#else
+        anelastic.resize(max_level+1);
+        for (int i = 0; i <= max_level; ++i) anelastic[i] = 0;
 #endif
 
+        pp.query("force_stage1_single_substep", force_stage1_single_substep);
+
         // Include Coriolis forcing?
         pp.query("use_coriolis", use_coriolis);
 
@@ -344,12 +335,6 @@ struct SolverChoice {
             }
         }
 
-        // Warn for PBL models and moisture - these may not yet be compatible
-        for (int lev = 0; lev <= max_level; lev++) {
-            if ((moisture_type != MoistureType::None) && (turbChoice[lev].pbl_type != PBLType::None)) {
-                amrex::Warning("\n*** WARNING: Moisture may not yet be compatible with PBL models, \n    proceed with caution ***");
-            }
-        }
 
         // Which type of refinement
         static std::string coupling_type_string = "TwoWay";
@@ -401,30 +386,56 @@ struct SolverChoice {
         // turbine. ie. the sampling distance will be this number multiplied
         // by the diameter of the turbine
         pp.query("sampling_distance_by_D", sampling_distance_by_D);
-        if(windfarm_type==WindFarmType::SimpleAD and sampling_distance_by_D < 0.0) {
+        pp.query("turb_disk_angle_from_x", turb_disk_angle);
+
+        pp.query("windfarm_x_shift",windfarm_x_shift);
+        pp.query("windfarm_y_shift",windfarm_y_shift);
+        // Test if time averaged data is to be output
+        pp.query("time_avg_vel",time_avg_vel);
+
+        check_params(max_level);
+    }
+
+    void check_params(int max_level)
+    {
+        // Warn for PBL models and moisture - these may not yet be compatible
+        for (int lev = 0; lev <= max_level; lev++) {
+            if ((moisture_type != MoistureType::None) && (turbChoice[lev].pbl_type != PBLType::None)) {
+                amrex::Warning("\n*** WARNING: Moisture may not yet be compatible with PBL models, \n    proceed with caution ***");
+            }
+        }
+        //
+        // Buoyancy type check
+        //
+        if (buoyancy_type != 1 && buoyancy_type != 2 && buoyancy_type != 3 && buoyancy_type != 4) {
+            amrex::Abort("buoyancy_type must be 1, 2, 3 or 4");
+        }
+
+        if (!use_lagged_delta_rt && !(terrain_type == TerrainType::Moving)) {
+            amrex::Error("Can't turn off lagged_delta_rt when terrain not moving");
+        }
+
+        //
+        // Wind farm checks
+        //
+        if (windfarm_type==WindFarmType::SimpleAD and sampling_distance_by_D < 0.0) {
              amrex::Abort("To use simplified actuator disks, you need to provide a variable"
                            " erf.sampling_distance_by_D in the inputs which specifies the upstream"
                            " distance as a factor of the turbine diameter at which the incoming free stream"
                            " velocity will be computed at.");
         }
-        pp.query("turb_disk_angle_from_x", turb_disk_angle);
-        if(windfarm_type==WindFarmType::SimpleAD and turb_disk_angle < 0.0) {
+        if (windfarm_type==WindFarmType::SimpleAD and turb_disk_angle < 0.0) {
             amrex::Abort("To use simplified actuator disks, you need to provide a variable"
                           " erf.turb_disk_angle_from_x in the inputs which is the angle of the face of the"
                           " turbine disk from the x-axis. A turbine facing an oncoming flow in the x-direction"
                           " will have turb_disk_angle value of 90 deg.");
         }
-
-        pp.query("windfarm_x_shift",windfarm_x_shift);
-        pp.query("windfarm_y_shift",windfarm_y_shift);
-        if(windfarm_loc_type == WindFarmLocType::lat_lon and (windfarm_x_shift < 0.0 or windfarm_y_shift < 0.0)) {
+        if (windfarm_loc_type == WindFarmLocType::lat_lon and (windfarm_x_shift < 0.0 or windfarm_y_shift < 0.0)) {
             amrex::Abort("You are using windfarms with latitude-logitude option to position the turbines."
                          " For this you should provide the inputs erf.windfarm_x_shift and"
                          " erf.windfarm_y_shift which are the values by which the bounding box of the"
                          " windfarm is shifted from the x and the y axes.");
         }
-        // Test if time averaged data is to be output
-        pp.query("time_avg_vel",time_avg_vel);
     }
 
     void display(int max_level)
diff --git a/Source/ERF.H b/Source/ERF.H
index 386649e5f..0d950437f 100644
--- a/Source/ERF.H
+++ b/Source/ERF.H
@@ -137,6 +137,8 @@ public:
 #ifdef ERF_USE_POISSON_SOLVE
     // Project the velocities to be divergence-free
     void project_velocities (int lev, amrex::Real dt, amrex::Vector<amrex::MultiFab >& vars, amrex::MultiFab& p);
+    void solve_with_heffte  (int lev, amrex::MultiFab& rhs, amrex::MultiFab& soln,
+                             amrex::Array<amrex::MultiFab,AMREX_SPACEDIM>& fluxes);
 
     // Project the velocities to be divergence-free with a thin body
     void project_velocities_tb (int lev, amrex::Real dt, amrex::Vector<amrex::MultiFab >& vars, amrex::MultiFab& p);
@@ -940,6 +942,7 @@ private:
     static int verbose;
 #ifdef ERF_USE_POISSON_SOLVE
     static int mg_verbose;
+    static bool use_heffte;
 #endif
 
     // Diagnostic output interval
diff --git a/Source/ERF.cpp b/Source/ERF.cpp
index bab3aac42..f3611ab74 100644
--- a/Source/ERF.cpp
+++ b/Source/ERF.cpp
@@ -42,6 +42,7 @@ int  ERF::fixed_mri_dt_ratio = 0;
 int ERF::verbose       = 0;
 #ifdef ERF_USE_POISSON_SOLVE
 int ERF::mg_verbose    = 0;
+int ERF::use_heffte    = false;
 #endif
 
 // Frequency of diagnostic output
@@ -1380,6 +1381,7 @@ ERF::ReadParameters ()
         pp.query("v", verbose);
 #ifdef ERF_USE_POISSON_SOLVE
         pp.query("mg_v", mg_verbose);
+        pp.query("use_heffte", use_heffte);
 #endif
 
         // Frequency of diagnostic output
diff --git a/Source/SourceTerms/ERF_Src_headers.H b/Source/SourceTerms/ERF_Src_headers.H
index 9fad2d582..a54dd256a 100644
--- a/Source/SourceTerms/ERF_Src_headers.H
+++ b/Source/SourceTerms/ERF_Src_headers.H
@@ -21,7 +21,9 @@ void make_buoyancy (amrex::Vector<  amrex::MultiFab>& S_data,
                     const amrex::Geometry geom,
                     const SolverChoice& solverChoice,
                     const amrex::MultiFab* r0,
-                    const int n_qstate);
+                    const amrex::MultiFab* p0,
+                    const int n_qstate,
+                    const int anelastic);
 
 void make_sources (int level, int nrk,
                    amrex::Real dt, amrex::Real time,
@@ -55,6 +57,7 @@ void make_mom_sources (int level, int nrk,
                              amrex::MultiFab& ymom_source,
                              amrex::MultiFab& zmom_source,
                        amrex::MultiFab* r0,
+                       amrex::MultiFab* p0,
                        const amrex::Geometry geom,
                        const SolverChoice& solverChoice,
                        std::unique_ptr<amrex::MultiFab>& mapfac_m,
diff --git a/Source/SourceTerms/ERF_make_buoyancy.cpp b/Source/SourceTerms/ERF_make_buoyancy.cpp
index 1488daf80..7f7dd2cdc 100644
--- a/Source/SourceTerms/ERF_make_buoyancy.cpp
+++ b/Source/SourceTerms/ERF_make_buoyancy.cpp
@@ -35,21 +35,63 @@ void make_buoyancy (Vector<MultiFab>& S_data,
                     const amrex::Geometry geom,
                     const SolverChoice& solverChoice,
                     const MultiFab* r0,
-                    const int n_qstate)
+                    const MultiFab* p0,
+                    const int n_qstate,
+                    const int anelastic)
 {
     BL_PROFILE("make_buoyancy()");
 
     const    Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -solverChoice.gravity};
     const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
 
-    const int klo = 0;
+    const int klo = geom.Domain().smallEnd()[2];
     const int khi = geom.Domain().bigEnd()[2] + 1;
 
-    // ******************************************************************************************
-    // Dry versions of buoyancy expressions (type 1 and type 2/3 -- types 2 and 3 are equivalent)
-    // ******************************************************************************************
-    if (solverChoice.moisture_type == MoistureType::None) {
-        if (solverChoice.buoyancy_type == 1) {
+    if (anelastic == 1) {
+#ifdef _OPENMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
+        for ( MFIter mfi(buoyancy,TilingIfNotGPU()); mfi.isValid(); ++mfi)
+        {
+            Box tbz = mfi.tilebox();
+
+            // We don't compute a source term for z-momentum on the bottom or top boundary
+            if (tbz.smallEnd(2) == klo) tbz.growLo(2,-1);
+            if (tbz.bigEnd(2)   == khi) tbz.growHi(2,-1);
+
+            const Array4<const Real> & cell_data  = S_data[IntVars::cons].array(mfi);
+            const Array4<      Real> & buoyancy_fab = buoyancy.array(mfi);
+
+            // Base state density, pressure
+            const Array4<const Real>& r0_arr = r0->const_array(mfi);
+            const Array4<const Real>& p0_arr = p0->const_array(mfi);
+
+            ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
+            {
+                Real rt0_hi = getRhoThetagivenP(p0_arr(i,j,k));
+                Real  t0_hi = getTgivenPandTh(p0_arr(i,j,k), rt0_hi/r0_arr(i,j,k), solverChoice.rdOcp);
+                Real   t_hi = getTgivenPandTh(p0_arr(i,j,k), cell_data(i,j,k,RhoTheta_comp)/r0_arr(i,j,k), solverChoice.rdOcp);
+                Real qplus   = (t_hi-t0_hi)/t0_hi;
+
+                Real rt0_lo = getRhoThetagivenP(p0_arr(i,j,k-1));
+                Real  t0_lo = getTgivenPandTh(p0_arr(i,j,k-1), rt0_lo/r0_arr(i,j,k-1), solverChoice.rdOcp);
+                Real   t_lo = getTgivenPandTh(p0_arr(i,j,k-1), cell_data(i,j,k-1,RhoTheta_comp)/r0_arr(i,j,k-1), solverChoice.rdOcp);
+                Real qminus  = (t_lo-t0_lo)/t0_lo;
+
+                Real r0_q_avg = Real(0.5) * (r0_arr(i,j,k) * qplus + r0_arr(i,j,k-1) * qminus);
+                buoyancy_fab(i, j, k) = -r0_q_avg * grav_gpu[2];
+            });
+        } // mfi
+    }
+    else
+    {
+        // ******************************************************************************************
+        // Dry versions of buoyancy expressions (type 1 and type 2/3 -- types 2 and 3 are equivalent)
+        // ******************************************************************************************
+        if (solverChoice.moisture_type == MoistureType::None)
+        {
+
+          if (solverChoice.buoyancy_type == 1) {
             for ( MFIter mfi(buoyancy,TilingIfNotGPU()); mfi.isValid(); ++mfi)
             {
                 Box tbz = mfi.tilebox();
@@ -71,7 +113,9 @@ void make_buoyancy (Vector<MultiFab>& S_data,
                 });
             } // mfi
 
-        } else if (solverChoice.buoyancy_type == 2 || solverChoice.buoyancy_type == 3) {
+          }
+          else if (solverChoice.buoyancy_type == 2 || solverChoice.buoyancy_type == 3)
+          {
             PlaneAverage state_ave(&(S_data[IntVars::cons]), geom, solverChoice.ave_plane);
             PlaneAverage prim_ave(&S_prim, geom, solverChoice.ave_plane);
 
@@ -110,37 +154,34 @@ void make_buoyancy (Vector<MultiFab>& S_data,
                 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
                 {
                     Real tempp1d = getTgivenRandRTh(rho_d_ptr[k  ], rho_d_ptr[k  ]*theta_d_ptr[k  ]);
-                    Real tempm1d = getTgivenRandRTh(rho_d_ptr[k-1], rho_d_ptr[k-1]*theta_d_ptr[k-1]);
-
-                    Real tempp3d  = getTgivenRandRTh(cell_data(i,j,k  ,Rho_comp), cell_data(i,j,k  ,RhoTheta_comp));
-                    Real tempm3d  = getTgivenRandRTh(cell_data(i,j,k-1,Rho_comp), cell_data(i,j,k-1,RhoTheta_comp));
+                    Real tempp3d = getTgivenRandRTh(cell_data(i,j,k  ,Rho_comp), cell_data(i,j,k  ,RhoTheta_comp));
+                    Real qplus   = (tempp3d-tempp1d)/tempp1d;
 
-                    Real qplus  = (tempp3d-tempp1d)/tempp1d;
-                    Real qminus = (tempm3d-tempm1d)/tempm1d;
+                    Real tempm1d = getTgivenRandRTh(rho_d_ptr[k-1], rho_d_ptr[k-1]*theta_d_ptr[k-1]);
+                    Real tempm3d = getTgivenRandRTh(cell_data(i,j,k-1,Rho_comp), cell_data(i,j,k-1,RhoTheta_comp));
+                    Real qminus  = (tempm3d-tempm1d)/tempm1d;
 
-                    Real qavg  = Real(0.5) * (qplus + qminus);
-                    Real r0avg = Real(0.5) * (rho_d_ptr[k] + rho_d_ptr[k-1]);
+                    Real r0_q_avg = Real(0.5) * (rho_d_ptr[k]*qplus + rho_d_ptr[k-1]*qminus);
 
-                    buoyancy_fab(i, j, k) = -qavg * r0avg * grav_gpu[2];
+                    buoyancy_fab(i, j, k) = -r0_q_avg * grav_gpu[2];
                 });
             } // mfi
-        } // buoyancy_type
-    } // no moisture
-
-    // ******************************************************************************************
-    // Moist versions of buoyancy expressions
-    // ******************************************************************************************
-    if (solverChoice.moisture_type != MoistureType::None) {
-
-        if (solverChoice.moisture_type == MoistureType::Kessler_NoRain) {
-            AMREX_ALWAYS_ASSERT(solverChoice.buoyancy_type == 1);
-        }
-
-        if (solverChoice.moisture_type == MoistureType::SAM or solverChoice.moisture_type == MoistureType::SAM_NoPrecip_NoIce) {
-            AMREX_ALWAYS_ASSERT(solverChoice.buoyancy_type == 1);
-        }
-
-        if (solverChoice.buoyancy_type == 1) {
+          } // buoyancy_type
+        } // moisture type
+        else
+        {
+        // ******************************************************************************************
+        // Moist versions of buoyancy expressions
+        // ******************************************************************************************
+
+          if ( (solverChoice.moisture_type == MoistureType::Kessler_NoRain) ||
+               (solverChoice.moisture_type == MoistureType::SAM)            ||
+               (solverChoice.moisture_type == MoistureType::SAM_NoPrecip_NoIce) )
+           {
+              AMREX_ALWAYS_ASSERT(solverChoice.buoyancy_type == 1);
+          }
+
+          if (solverChoice.buoyancy_type == 1) {
 
             for ( MFIter mfi(buoyancy,TilingIfNotGPU()); mfi.isValid(); ++mfi)
             {
@@ -158,8 +199,11 @@ void make_buoyancy (Vector<MultiFab>& S_data,
 
                 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
                 {
-                    Real rhop_hi = cell_data(i,j,k  ,Rho_comp) + cell_data(i,j,k  ,RhoQ1_comp) + cell_data(i,j,k  ,RhoQ2_comp) - r0_arr(i,j,k  );
-                    Real rhop_lo = cell_data(i,j,k-1,Rho_comp) + cell_data(i,j,k-1,RhoQ1_comp) + cell_data(i,j,k-1,RhoQ2_comp) - r0_arr(i,j,k-1);
+                    Real rhop_hi = cell_data(i,j,k  ,Rho_comp) + cell_data(i,j,k  ,RhoQ1_comp)
+                                 + cell_data(i,j,k  ,RhoQ2_comp) - r0_arr(i,j,k  );
+                    Real rhop_lo = cell_data(i,j,k-1,Rho_comp) + cell_data(i,j,k-1,RhoQ1_comp)
+                                 + cell_data(i,j,k-1,RhoQ2_comp) - r0_arr(i,j,k-1);
+
                     for (int q_offset(2); q_offset<n_qstate; ++q_offset) {
                         rhop_hi += cell_data(i,j,k  ,RhoQ1_comp+q_offset);
                         rhop_lo += cell_data(i,j,k-1,RhoQ1_comp+q_offset);
@@ -168,7 +212,7 @@ void make_buoyancy (Vector<MultiFab>& S_data,
                 });
             } // mfi
 
-        } else {
+          } else {
 
             PlaneAverage state_ave(&(S_data[IntVars::cons]), geom, solverChoice.ave_plane);
             PlaneAverage  prim_ave(&S_prim                , geom, solverChoice.ave_plane);
@@ -332,6 +376,7 @@ void make_buoyancy (Vector<MultiFab>& S_data,
                     });
                 } // mfi
             }  // buoyancy_type
-        } // not buoyancy_type == 1
-    } // has moisture
+          } // not buoyancy_type == 1
+        } // has moisture
+    } // anelastic?
 }
diff --git a/Source/SourceTerms/ERF_make_mom_sources.cpp b/Source/SourceTerms/ERF_make_mom_sources.cpp
index 330f9c089..b60190bfe 100644
--- a/Source/SourceTerms/ERF_make_mom_sources.cpp
+++ b/Source/SourceTerms/ERF_make_mom_sources.cpp
@@ -37,7 +37,7 @@ using namespace amrex;
  * @param[in] n_qstate number of moisture components
  */
 
-void make_mom_sources (int /*level*/,
+void make_mom_sources (int level,
                        int /*nrk*/, Real dt, Real time,
                        Vector<MultiFab>& S_data,
                        const  MultiFab & S_prim,
@@ -48,7 +48,7 @@ void make_mom_sources (int /*level*/,
                               MultiFab & xmom_src,
                               MultiFab & ymom_src,
                               MultiFab & zmom_src,
-                       MultiFab* r0,
+                       MultiFab* r0, MultiFab* p0,
                        const Geometry geom,
                        const SolverChoice& solverChoice,
                        std::unique_ptr<MultiFab>& mapfac_m,
@@ -190,7 +190,8 @@ void make_mom_sources (int /*level*/,
     // *****************************************************************************
     // 1. Create the BUOYANCY forcing term in the z-direction
     // *****************************************************************************
-    make_buoyancy(S_data, S_prim, zmom_src, geom, solverChoice, r0, n_qstate);
+    make_buoyancy(S_data, S_prim, zmom_src, geom, solverChoice, r0, p0,
+                  n_qstate, solverChoice.anelastic[level]);
 
     // *****************************************************************************
     // Add all the other forcings
diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H b/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H
index 784024334..c4f15752c 100644
--- a/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H
+++ b/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H
@@ -113,7 +113,7 @@
                              z_phys_nd[level], z_phys_cc[level],
                              xvel_new, yvel_new,
                              xmom_src, ymom_src, zmom_src,
-                             r0_new, fine_geom, solverChoice,
+                             r0_new, p0_new, fine_geom, solverChoice,
                              mapfac_m[level], mapfac_u[level], mapfac_v[level],
                              dptr_u_geos, dptr_v_geos, dptr_wbar_sub,
                              d_rayleigh_ptrs_at_lev, d_sponge_ptrs_at_lev,
@@ -217,7 +217,7 @@
                              z_phys_nd[level], z_phys_cc[level],
                              xvel_new, yvel_new,
                              xmom_src, ymom_src, zmom_src,
-                             r0, fine_geom, solverChoice,
+                             r0, p0, fine_geom, solverChoice,
                              mapfac_m[level], mapfac_u[level], mapfac_v[level],
                              dptr_u_geos, dptr_v_geos, dptr_wbar_sub,
                              d_rayleigh_ptrs_at_lev, d_sponge_ptrs_at_lev,
@@ -428,7 +428,7 @@
                          z_phys_nd[level], z_phys_cc[level],
                          xvel_new, yvel_new,
                          xmom_src, ymom_src, zmom_src,
-                         r0, fine_geom, solverChoice,
+                         r0, p0, fine_geom, solverChoice,
                          mapfac_m[level], mapfac_u[level], mapfac_v[level],
                          dptr_u_geos, dptr_v_geos, dptr_wbar_sub,
                          d_rayleigh_ptrs_at_lev, d_sponge_ptrs_at_lev,
diff --git a/Source/Utils/ERF_EOS.H b/Source/Utils/ERF_EOS.H
index 133f45598..3d47639b8 100644
--- a/Source/Utils/ERF_EOS.H
+++ b/Source/Utils/ERF_EOS.H
@@ -19,6 +19,19 @@ amrex::Real getThgivenPandT(const amrex::Real T, const amrex::Real P, const amre
     return T*std::pow(p_0/P, rdOcp);
 }
 
+/**
+ * Function to return temperature given pressure and potential temperature
+ *
+ * @params[in] pressure
+ * @params[in] potential temperature
+ * @params[in] rdOcp ratio of R_d to c_p
+*/
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real getTgivenPandTh(const amrex::Real P, const amrex::Real Th, const amrex::Real rdOcp)
+{
+    return Th / std::pow(p_0/P, rdOcp);
+}
+
 /**
  * Function to return temperature given density and potential temperature
  *
@@ -31,6 +44,7 @@ amrex::Real getTgivenRandRTh(const amrex::Real rho, const amrex::Real rhotheta,
     // rho and rhotheta are dry values. We should be using moist value of theta when using moisture
     // theta_m = theta * (1 + R_v/R_d*qv)
     amrex::Real p_loc = p_0 * std::pow(R_d * rhotheta * (1.0 + R_v/R_d*qv) * ip_0, Gamma);
+
     // p = rho_d * R_d * T_v (not T)
     // T_v = T * (1 + R_v/R_d*qv)
     return p_loc / (R_d * rho * (1.0 + R_v/R_d*qv) );
diff --git a/Source/Utils/ERF_PoissonSolve.cpp b/Source/Utils/ERF_PoissonSolve.cpp
index 7b87d1f17..069502179 100644
--- a/Source/Utils/ERF_PoissonSolve.cpp
+++ b/Source/Utils/ERF_PoissonSolve.cpp
@@ -31,8 +31,6 @@ ERF::get_projection_bc (Orientation::Side side) const noexcept
             }
         }
     }
-    // r[2] = LinOpBCType::Neumann;
-    // amrex::Print() << "BCs for Poisson solve " << r[0] << " " << r[1] << " " << r[2] << std::endl;
     return r;
 }
 bool ERF::projection_has_dirichlet (Array<LinOpBCType,AMREX_SPACEDIM> bcs) const
@@ -58,14 +56,12 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
     auto const dom_hi = ubound(geom[lev].Domain());
 
     LPInfo info;
-#if 0
     // Allow a hidden direction if the domain is one cell wide in any lateral direction
     if (dom_lo.x == dom_hi.x) {
         info.setHiddenDirection(0);
     } else if (dom_lo.y == dom_hi.y) {
         info.setHiddenDirection(1);
     }
-#endif
 
     // Make sure the solver only sees the levels over which we are solving
     Vector<BoxArray>            ba_tmp;   ba_tmp.push_back(mom_mf[Vars::cons].boxArray());
@@ -90,15 +86,10 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
     Vector<MultiFab> phi;
     Vector<Array<MultiFab,AMREX_SPACEDIM> > fluxes;
 
-    rhs.resize(1);
-    phi.resize(1);
-    fluxes.resize(1);
-
-    rhs[0].define(ba_tmp[0], dm_tmp[0], 1, 0);
-    phi[0].define(ba_tmp[0], dm_tmp[0], 1, 0);
-    rhs[0].setVal(0.0);
-    phi[0].setVal(0.0);
+    rhs.resize(1); rhs[0].define(ba_tmp[0], dm_tmp[0], 1, 0);
+    phi.resize(1); phi[0].define(ba_tmp[0], dm_tmp[0], 1, 1);
 
+    fluxes.resize(1);
     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
         fluxes[0][idim].define(convert(ba_tmp[0], IntVect::TheDimensionVector(idim)), dm_tmp[0], 1, 0);
     }
@@ -109,34 +100,52 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
     rho0_u_const[2] = &mom_mf[IntVars::zmom];
 
     computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]);
-    Print() << "Max norm of divergence before at level " << lev << " : " << rhs[0].norm0() << std::endl;
+
+    if (mg_verbose > 0) {
+        Print() << "Max norm of divergence before at level " << lev << " : " << rhs[0].norm0() << std::endl;
+    }
 
     // If all Neumann BCs, adjust RHS to make sure we can converge
     if (need_adjust_rhs)
     {
         Real offset = volWgtSumMF(lev, rhs[0], 0, *mapfac_m[lev], false, false);
-        // amrex::Print() << "Poisson solvability offset = " << offset << std::endl;
+        if (mg_verbose > 1) {
+            Print() << "Poisson solvability offset = " << offset << std::endl;
+        }
         rhs[0].plus(-offset, 0, 1);
     }
 
-    // Initialize phi to 0
-    phi[0].setVal(0.0);
+    Real start_step = static_cast<Real>(ParallelDescriptor::second());
 
-    MLMG mlmg(mlpoisson);
-    int max_iter = 100;
-    mlmg.setMaxIter(max_iter);
+#ifdef ERF_USE_HEFFTE
+    if (use_heffte) {
+        solve_with_heffte(lev, rhs[0], phi[0], fluxes[0]);
+    } else
+#endif
+    {
+        // Initialize phi to 0
+        phi[0].setVal(0.0);
+
+        MLMG mlmg(mlpoisson);
+        int max_iter = 100;
+        mlmg.setMaxIter(max_iter);
 
-    mlmg.setVerbose(mg_verbose);
-    mlmg.setBottomVerbose(0);
+        mlmg.setVerbose(mg_verbose);
+        mlmg.setBottomVerbose(0);
 
-    mlmg.solve(GetVecOfPtrs(phi),
-               GetVecOfConstPtrs(rhs),
-               solverChoice.poisson_reltol,
-               solverChoice.poisson_abstol);
+        mlmg.solve(GetVecOfPtrs(phi),
+                   GetVecOfConstPtrs(rhs),
+                   solverChoice.poisson_reltol,
+                   solverChoice.poisson_abstol);
+        mlmg.getFluxes(GetVecOfArrOfPtrs(fluxes));
+    }
 
-    mlmg.getFluxes(GetVecOfArrOfPtrs(fluxes));
+    Real end_step = static_cast<Real>(ParallelDescriptor::second());
+    if (mg_verbose > 0) {
+        amrex::Print() << "Time in solve " << end_step - start_step << std::endl;
+    }
 
-    // Subtract (dt rho0/rho) grad(phi) from the rho0-weighted velocity components
+    // Subtract dt grad(phi) from the momenta
     MultiFab::Add(mom_mf[IntVars::xmom],fluxes[0][0],0,0,1,0);
     MultiFab::Add(mom_mf[IntVars::ymom],fluxes[0][1],0,0,1,0);
     MultiFab::Add(mom_mf[IntVars::zmom],fluxes[0][2],0,0,1,0);
@@ -223,12 +232,12 @@ void ERF::project_velocities (int lev, Real l_dt, Vector<MultiFab>& mom_mf, Mult
     // Now overwrite with periodic fill outside domain and fine-fine fill inside
     pmf.FillBoundary(geom[lev].periodicity());
 
-#if 0
     //
     // BELOW IS SIMPLY VERIFYING THE DIVERGENCE AFTER THE SOLVE
     //
-    computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]);
-    Print() << "Max norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << std::endl;
-#endif
+    if (mg_verbose > 0) {
+        computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]);
+        Print() << "Max norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << std::endl;
+    }
 }
 #endif
diff --git a/Source/Utils/ERF_solve_with_heffte.cpp b/Source/Utils/ERF_solve_with_heffte.cpp
new file mode 100644
index 000000000..4236d0d25
--- /dev/null
+++ b/Source/Utils/ERF_solve_with_heffte.cpp
@@ -0,0 +1,245 @@
+#include "ERF.H"
+#include <AMReX_MLMG.H>
+#include <AMReX_MLPoisson.H>
+#include "ERF_Utils.H"
+#ifdef ERF_USE_HEFFTE
+#include "heffte.h"
+#endif
+
+#ifdef ERF_USE_POISSON_SOLVE
+#ifdef ERF_USE_HEFFTE
+
+using namespace amrex;
+
+void ERF::solve_with_heffte (int lev, MultiFab& rhs, MultiFab& phi,
+                             Array<MultiFab,AMREX_SPACEDIM>& fluxes)
+{
+    BoxArray ba(rhs.boxArray());
+    DistributionMapping dm(rhs.DistributionMap());
+
+    // The heffte solve uses a solution array with no ghost cells
+    MultiFab soln(ba,dm,1,0);
+
+    // Determine the domain length in each direction
+    Real L_x = geom[lev].ProbHi(0) - geom[lev].ProbLo(0);
+    Real L_y = geom[lev].ProbHi(1) - geom[lev].ProbLo(1);
+    Real L_z = geom[lev].ProbHi(2) - geom[lev].ProbLo(2);
+
+    Box domain = geom[lev].Domain();
+    auto const& domlo = lbound(domain);
+    auto const& domhi = ubound(domain);
+
+    int n_cell_x = domain.length(0);
+    int n_cell_y = domain.length(1);
+    int n_cell_z = domain.length(2);
+
+    auto dx    = geom[lev].CellSize();
+    auto dxinv = geom[lev].InvCellSize();
+
+    // Since there is 1 MPI rank per box, here each MPI rank obtains its local box and the associated boxid
+    Box local_box;
+    int local_boxid;
+    {
+        for (int i = 0; i < ba.size(); ++i) {
+            Box b = ba[i];
+            // each MPI rank has its own local_box Box and local_boxid ID
+            if (ParallelDescriptor::MyProc() == dm[i]) {
+                local_box = b;
+                local_boxid = i;
+            }
+        }
+    }
+
+    // Now each MPI rank works on its own box
+    // Ror real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset
+
+    // start by coarsening each box by 2 in the x-direction
+    Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1)));
+
+    // If the coarsened box's high-x index is even, we shrink the size in 1 in x
+    // this avoids overlap between coarsened boxes
+    if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) {
+        c_local_box.setBig(0,c_local_box.bigEnd(0)-1);
+    }
+    // For any boxes that touch the hi-x domain we increase the size of boxes by 1 in x
+    // This makes the overall fft dataset have size (Nx/2+1 x Ny x Nz)
+    if (local_box.bigEnd(0) == geom[lev].Domain().bigEnd(0)) {
+        c_local_box.growHi(0,1);
+    }
+
+    // Each MPI rank gets storage for its piece of the fft
+    BaseFab<GpuComplex<Real> > spectral_field(c_local_box, 1, The_Device_Arena());
+
+    // Create real->complex fft objects with the appropriate backend and data about
+    // the domain size and its local box size
+
+    bool do_2d_solves = false;
+
+    // ********************************************************************************************
+    // ********************************************************************************************
+    // ********************************************************************************************
+
+    // ********************************************************************************************
+    // NOTE: THIS IS A WIP - IT DOES NOT WORK YET
+    // ********************************************************************************************
+    if (do_2d_solves) {
+
+#ifdef AMREX_USE_CUDA
+        heffte::fft2d_r2c<heffte::backend::cufft> fft
+#elif AMREX_USE_HIP
+        heffte::fft2d_r2c<heffte::backend::rocfft> fft
+#else
+        heffte::fft2d_r2c<heffte::backend::fftw> fft
+#endif
+        ({{local_box.smallEnd(0),local_box.smallEnd(1),0},
+          {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,0}},
+         {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),0},
+          {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,0}},
+         0, ParallelDescriptor::Communicator());
+
+        using heffte_complex = typename heffte::fft_output<Real>::type;
+        heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr();
+
+        // ********************************************************************************************
+
+        for (int k = domlo.z; k <= domhi.z; k++) {
+            int offset = k * (n_cell_x*n_cell_y);
+            fft.forward(rhs[local_boxid].dataPtr(offset), spectral_data);
+        }
+
+        // ********************************************************************************************
+
+        // Now we take the standard FFT and scale it by 1/k^2
+        Array4< GpuComplex<Real> > spectral = spectral_field.array();
+
+        ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k)
+        {
+            Real a = 2.*M_PI*i / L_x;
+            Real b = 2.*M_PI*j / L_y;
+            Real c = 2.*M_PI*k / L_z;
+
+            // the values in the upper-half of the spectral array in y and z are here interpreted as negative wavenumbers
+            if (j >= n_cell_y/2) b = 2.*M_PI*(n_cell_y-j) / L_y;
+            if (k >= n_cell_z/2) c = 2.*M_PI*(n_cell_z-k) / L_z;
+
+            Real k2 =  2.0*(std::cos(a*dx[0])-1.)*(dxinv[0]*dxinv[0]) +
+                       2.0*(std::cos(b*dx[1])-1.)*(dxinv[1]*dxinv[1]) ;
+            if (k2 != 0.) {
+                spectral(i,j,k) /= k2;
+            } else {
+                spectral(i,j,k) *= 0.; // interpretation here is that the average value of the solution is zero
+            }
+        });
+
+        // ********************************************************************************************
+
+        for (int k = domlo.z; k <= domhi.z; k++) {
+            int offset = k * (n_cell_x*n_cell_y);
+            fft.backward(spectral_data, soln[local_boxid].dataPtr(offset));
+        }
+
+        // ********************************************************************************************
+
+    } else {
+
+#ifdef AMREX_USE_CUDA
+        heffte::fft3d_r2c<heffte::backend::cufft> fft
+#elif AMREX_USE_HIP
+        heffte::fft3d_r2c<heffte::backend::rocfft> fft
+#else
+        heffte::fft3d_r2c<heffte::backend::fftw> fft
+#endif
+        ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)},
+          {local_box.bigEnd(0)  ,local_box.bigEnd(1)  ,local_box.bigEnd(2)}},
+         {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)},
+          {c_local_box.bigEnd(0)  ,c_local_box.bigEnd(1)  ,c_local_box.bigEnd(2)}},
+         0, ParallelDescriptor::Communicator());
+
+        using heffte_complex = typename heffte::fft_output<Real>::type;
+        heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr();
+
+        // ********************************************************************************************
+
+        fft.forward(rhs[local_boxid].dataPtr(), spectral_data);
+
+        // ********************************************************************************************
+
+        // Now we take the standard FFT and scale it by 1/k^2
+        Array4< GpuComplex<Real> > spectral = spectral_field.array();
+
+        ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k)
+        {
+            Real a = 2.*M_PI*i / L_x;
+            Real b = 2.*M_PI*j / L_y;
+            Real c = 2.*M_PI*k / L_z;
+
+            // the values in the upper-half of the spectral array in y and z are here interpreted as negative wavenumbers
+            if (j >= n_cell_y/2) b = 2.*M_PI*(n_cell_y-j) / L_y;
+            if (k >= n_cell_z/2) c = 2.*M_PI*(n_cell_z-k) / L_z;
+
+            Real k2 =  2.0*(std::cos(a*dx[0])-1.)*(dxinv[0]*dxinv[0]) +
+                       2.0*(std::cos(b*dx[1])-1.)*(dxinv[1]*dxinv[1]) +
+                       2.0*(std::cos(c*dx[2])-1.)*(dxinv[2]*dxinv[2]);
+            if (k2 != 0.) {
+                spectral(i,j,k) /= k2;
+            } else {
+                spectral(i,j,k) *= 0.; // interpretation here is that the average value of the solution is zero
+            }
+        });
+
+        // ********************************************************************************************
+
+        fft.backward(spectral_data, soln[local_boxid].dataPtr());
+
+        // ********************************************************************************************
+
+    } // 3d solve
+
+    // ********************************************************************************************
+    // ********************************************************************************************
+    // ********************************************************************************************
+
+    // Scale by 1/npts (both forward and inverse need sqrt(npts) scaling so I am doing it all here)
+    Real npts = static_cast<Real>(ba.numPts());
+    soln.mult(1./npts);
+
+    // ********************************************************************************************
+
+    phi.copy(soln);
+    phi.FillBoundary(geom[lev].periodicity());
+
+#ifdef _OPENMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+    for (MFIter mfi(soln, TilingIfNotGPU()); mfi.isValid(); ++mfi)
+    {
+        Array4<Real const> const&  p_arr  = phi.array(mfi);
+
+        Box const& xbx = mfi.nodaltilebox(0);
+        const Real dx_inv = dxinv[0];
+        Array4<Real      > const& fx_arr  = fluxes[0].array(mfi);
+        ParallelFor(xbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            fx_arr(i,j,k) = -(p_arr(i,j,k) - p_arr(i-1,j,k)) * dx_inv;
+        });
+
+        Box const& ybx = mfi.nodaltilebox(1);
+        const Real dy_inv = dxinv[1];
+        Array4<Real      > const& fy_arr  = fluxes[1].array(mfi);
+        ParallelFor(ybx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            fy_arr(i,j,k) = -(p_arr(i,j,k) - p_arr(i,j-1,k)) * dy_inv;
+        });
+
+        Box const& zbx = mfi.nodaltilebox(2);
+        const Real dz_inv = dxinv[2];
+        Array4<Real      > const& fz_arr  = fluxes[2].array(mfi);
+        ParallelFor(zbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            fz_arr(i,j,k) = -(p_arr(i,j,k) - p_arr(i,j-1,k)) * dz_inv;
+        });
+    } // mfi
+}
+
+#endif
+#endif
diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package
index e3d62008f..82d3e8e99 100644
--- a/Source/Utils/Make.package
+++ b/Source/Utils/Make.package
@@ -26,4 +26,5 @@ CEXE_sources += ERF_Time_Avg_Vel.cpp
 ifeq ($(USE_POISSON_SOLVE),TRUE)
 CEXE_sources += ERF_PoissonSolve.cpp
 CEXE_sources += ERF_PoissonSolve_tb.cpp
+CEXE_sources += ERF_solve_with_heffte.cpp
 endif

From f3cb7f4bfa4237aa3ca903cb27f6a91598387408 Mon Sep 17 00:00:00 2001
From: Ann Almgren <asalmgren@lbl.gov>
Date: Tue, 1 Oct 2024 18:58:26 -0700
Subject: [PATCH 2/2] fix for gpu

---
 Source/ERF.cpp                           |  4 ++--
 Source/SourceTerms/ERF_make_buoyancy.cpp | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/Source/ERF.cpp b/Source/ERF.cpp
index f3611ab74..96f389eba 100644
--- a/Source/ERF.cpp
+++ b/Source/ERF.cpp
@@ -41,8 +41,8 @@ int  ERF::fixed_mri_dt_ratio = 0;
 // Dictate verbosity in screen output
 int ERF::verbose       = 0;
 #ifdef ERF_USE_POISSON_SOLVE
-int ERF::mg_verbose    = 0;
-int ERF::use_heffte    = false;
+int  ERF::mg_verbose    = 0;
+bool ERF::use_heffte    = false;
 #endif
 
 // Frequency of diagnostic output
diff --git a/Source/SourceTerms/ERF_make_buoyancy.cpp b/Source/SourceTerms/ERF_make_buoyancy.cpp
index 7f7dd2cdc..caec66a57 100644
--- a/Source/SourceTerms/ERF_make_buoyancy.cpp
+++ b/Source/SourceTerms/ERF_make_buoyancy.cpp
@@ -47,6 +47,8 @@ void make_buoyancy (Vector<MultiFab>& S_data,
     const int klo = geom.Domain().smallEnd()[2];
     const int khi = geom.Domain().bigEnd()[2] + 1;
 
+    Real rd_over_cp = solverChoice.rdOcp;
+
     if (anelastic == 1) {
 #ifdef _OPENMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
@@ -69,13 +71,13 @@ void make_buoyancy (Vector<MultiFab>& S_data,
             ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k)
             {
                 Real rt0_hi = getRhoThetagivenP(p0_arr(i,j,k));
-                Real  t0_hi = getTgivenPandTh(p0_arr(i,j,k), rt0_hi/r0_arr(i,j,k), solverChoice.rdOcp);
-                Real   t_hi = getTgivenPandTh(p0_arr(i,j,k), cell_data(i,j,k,RhoTheta_comp)/r0_arr(i,j,k), solverChoice.rdOcp);
+                Real  t0_hi = getTgivenPandTh(p0_arr(i,j,k), rt0_hi/r0_arr(i,j,k), rd_over_cp);
+                Real   t_hi = getTgivenPandTh(p0_arr(i,j,k), cell_data(i,j,k,RhoTheta_comp)/r0_arr(i,j,k), rd_over_cp);
                 Real qplus   = (t_hi-t0_hi)/t0_hi;
 
                 Real rt0_lo = getRhoThetagivenP(p0_arr(i,j,k-1));
-                Real  t0_lo = getTgivenPandTh(p0_arr(i,j,k-1), rt0_lo/r0_arr(i,j,k-1), solverChoice.rdOcp);
-                Real   t_lo = getTgivenPandTh(p0_arr(i,j,k-1), cell_data(i,j,k-1,RhoTheta_comp)/r0_arr(i,j,k-1), solverChoice.rdOcp);
+                Real  t0_lo = getTgivenPandTh(p0_arr(i,j,k-1), rt0_lo/r0_arr(i,j,k-1), rd_over_cp);
+                Real   t_lo = getTgivenPandTh(p0_arr(i,j,k-1), cell_data(i,j,k-1,RhoTheta_comp)/r0_arr(i,j,k-1), rd_over_cp);
                 Real qminus  = (t_lo-t0_lo)/t0_lo;
 
                 Real r0_q_avg = Real(0.5) * (r0_arr(i,j,k) * qplus + r0_arr(i,j,k-1) * qminus);