From 51199fd0b936e9979e510a29eb7ee20d621b2f29 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Tue, 1 Oct 2024 18:20:57 -0700 Subject: [PATCH 1/2] fix anelastic and add heffte as option (still a WIP) --- Exec/Make.ERF | 12 +- Exec/RegTests/DensityCurrent/inputs_anelastic | 23 +- Source/DataStructs/ERF_DataStruct.H | 75 +++--- Source/ERF.H | 3 + Source/ERF.cpp | 2 + Source/SourceTerms/ERF_Src_headers.H | 5 +- Source/SourceTerms/ERF_make_buoyancy.cpp | 123 ++++++--- Source/SourceTerms/ERF_make_mom_sources.cpp | 7 +- Source/TimeIntegration/ERF_TI_slow_rhs_fun.H | 6 +- Source/Utils/ERF_EOS.H | 14 + Source/Utils/ERF_PoissonSolve.cpp | 71 ++--- Source/Utils/ERF_solve_with_heffte.cpp | 245 ++++++++++++++++++ Source/Utils/Make.package | 1 + 13 files changed, 461 insertions(+), 126 deletions(-) create mode 100644 Source/Utils/ERF_solve_with_heffte.cpp diff --git a/Exec/Make.ERF b/Exec/Make.ERF index 5956e35c1..5d129369a 100644 --- a/Exec/Make.ERF +++ b/Exec/Make.ERF @@ -181,10 +181,20 @@ VPATH_LOCATIONS += $(HEFFTE_HOME)/include INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib LIBRARIES += -lheffte +ifeq ($(USE_CUDA),TRUE) + libraries += -lcufft +else ifeq ($(USE_HIP),TRUE) + # Use rocFFT. ROC_PATH is defined in amrex + INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include + LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib + LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft +else + libraries += -lfftw3_mpi -lfftw3f -lfftw3 +endif endif ifeq ($(USE_WW3_COUPLING), TRUE) - DEFINES += -DERF_USE_WW3_COUPLING +DEFINES += -DERF_USE_WW3_COUPLING endif ERF_LSM_DIR = $(ERF_SOURCE_DIR)/LandSurfaceModel diff --git a/Exec/RegTests/DensityCurrent/inputs_anelastic b/Exec/RegTests/DensityCurrent/inputs_anelastic index 77fcd810b..b57a49996 100644 --- a/Exec/RegTests/DensityCurrent/inputs_anelastic +++ b/Exec/RegTests/DensityCurrent/inputs_anelastic @@ -1,12 +1,8 @@ # ------------------ INPUTS TO MAIN PROGRAM ------------------- -max_step = 999999 stop_time = 900.0 erf.anelastic = 1 -erf.no_substepping = 1 -erf.buoyancy_type = 2 -erf.use_terrain = true erf.use_terrain = false amrex.fpe_trap_invalid = 1 @@ -15,32 +11,27 @@ fabarray.mfiter_tile_size = 1024 1024 1024 # PROBLEM SIZE & GEOMETRY -#geometry.prob_lo = -12800. 0. 0. -#geometry.prob_hi = 12800. 1600. 6400. -#xlo.type = "Outflow" - geometry.prob_lo = 0. 0. 0. -geometry.prob_hi = 25600. 1600. 6400. -xlo.type = "Symmetry" - -xhi.type = "HO_Outflow" +geometry.prob_hi = 25600. 100. 6400. geometry.is_periodic = 0 1 0 -amr.n_cell = 256 16 64 # dx=dy=dz=100 m, Straka et al 1993 / Xue et al 2000 +amr.n_cell = 256 1 64 # dx=dy=dz=100 m, Straka et al 1993 / Xue et al 2000 + +xlo.type = "Symmetry" +xhi.type = "HO_Outflow" zlo.type = "SlipWall" zhi.type = "SlipWall" # TIME STEP CONTROL erf.fixed_dt = 1.0 # fixed time step [s] -- Straka et al 1993 -erf.fixed_fast_dt = 0.25 # fixed time step [s] -- Straka et al 1993 # DIAGNOSTICS & VERBOSITY erf.sum_interval = 1 # timesteps between computing mass -erf.v = 0 # verbosity in ERF.cpp -erf.mg_v = 1 # verbosity in ERF.cpp +erf.v = 1 # verbosity in ERF.cpp amr.v = 1 # verbosity in Amr.cpp +erf.mg_v = 1 # verbosity in ERF.cpp # REFINEMENT / REGRIDDING amr.max_level = 0 # maximum level number allowed diff --git a/Source/DataStructs/ERF_DataStruct.H b/Source/DataStructs/ERF_DataStruct.H index a4e4616f9..3c245b8bc 100644 --- a/Source/DataStructs/ERF_DataStruct.H +++ b/Source/DataStructs/ERF_DataStruct.H @@ -139,9 +139,6 @@ struct SolverChoice { // Which expression (1,2 or 3) to use for buoyancy pp.query("buoyancy_type", buoyancy_type); - if (buoyancy_type != 1 && buoyancy_type != 2 && buoyancy_type != 3 && buoyancy_type != 4) { - amrex::Abort("buoyancy_type must be 1, 2, 3 or 4"); - } // What type of land surface model to use static std::string lsm_model_string = "None"; @@ -168,10 +165,6 @@ struct SolverChoice { // Use lagged_delta_rt in the fast integrator? pp.query("use_lagged_delta_rt", use_lagged_delta_rt); - if (!use_lagged_delta_rt && !(terrain_type == TerrainType::Moving)) { - amrex::Error("Can't turn off lagged_delta_rt when terrain not moving"); - } - // These default to true but are used for unit testing pp.query("use_gravity", use_gravity); gravity = use_gravity? CONST_GRAV: 0.0; @@ -179,6 +172,7 @@ struct SolverChoice { pp.query("c_p", c_p); rdOcp = R_d / c_p; + read_int_string(max_level, "no_substepping", no_substepping, 0); #if defined(ERF_USE_POISSON_SOLVE) // Should we project the initial velocity field to make it divergence-free? @@ -194,6 +188,7 @@ struct SolverChoice { // If anelastic at all, we do not advect rho -- it is always == rho0 if (any_anelastic == 1) { constant_density = true; + buoyancy_type = 2; // uses Tprime } else { constant_density = false; // We default to false but allow the user to set it pp.query("constant_density", constant_density); @@ -203,24 +198,20 @@ struct SolverChoice { pp.query("ncorr", ncorr); pp.query("poisson_abstol", poisson_abstol); pp.query("poisson_reltol", poisson_reltol); -#else - anelastic.resize(max_level+1); - for (int i = 0; i <= max_level; ++i) anelastic[i] = 0; -#endif - read_int_string(max_level, "no_substepping", no_substepping, 0); - - pp.query("force_stage1_single_substep", force_stage1_single_substep); - -#if defined(ERF_USE_POISSON_SOLVE) for (int lev = 0; lev <= max_level; lev++) { - if (anelastic[lev] != 0 && no_substepping[lev] == 0) + if (anelastic[lev] != 0) { no_substepping[lev] = 1; } } +#else + anelastic.resize(max_level+1); + for (int i = 0; i <= max_level; ++i) anelastic[i] = 0; #endif + pp.query("force_stage1_single_substep", force_stage1_single_substep); + // Include Coriolis forcing? pp.query("use_coriolis", use_coriolis); @@ -344,12 +335,6 @@ struct SolverChoice { } } - // Warn for PBL models and moisture - these may not yet be compatible - for (int lev = 0; lev <= max_level; lev++) { - if ((moisture_type != MoistureType::None) && (turbChoice[lev].pbl_type != PBLType::None)) { - amrex::Warning("\n*** WARNING: Moisture may not yet be compatible with PBL models, \n proceed with caution ***"); - } - } // Which type of refinement static std::string coupling_type_string = "TwoWay"; @@ -401,30 +386,56 @@ struct SolverChoice { // turbine. ie. the sampling distance will be this number multiplied // by the diameter of the turbine pp.query("sampling_distance_by_D", sampling_distance_by_D); - if(windfarm_type==WindFarmType::SimpleAD and sampling_distance_by_D < 0.0) { + pp.query("turb_disk_angle_from_x", turb_disk_angle); + + pp.query("windfarm_x_shift",windfarm_x_shift); + pp.query("windfarm_y_shift",windfarm_y_shift); + // Test if time averaged data is to be output + pp.query("time_avg_vel",time_avg_vel); + + check_params(max_level); + } + + void check_params(int max_level) + { + // Warn for PBL models and moisture - these may not yet be compatible + for (int lev = 0; lev <= max_level; lev++) { + if ((moisture_type != MoistureType::None) && (turbChoice[lev].pbl_type != PBLType::None)) { + amrex::Warning("\n*** WARNING: Moisture may not yet be compatible with PBL models, \n proceed with caution ***"); + } + } + // + // Buoyancy type check + // + if (buoyancy_type != 1 && buoyancy_type != 2 && buoyancy_type != 3 && buoyancy_type != 4) { + amrex::Abort("buoyancy_type must be 1, 2, 3 or 4"); + } + + if (!use_lagged_delta_rt && !(terrain_type == TerrainType::Moving)) { + amrex::Error("Can't turn off lagged_delta_rt when terrain not moving"); + } + + // + // Wind farm checks + // + if (windfarm_type==WindFarmType::SimpleAD and sampling_distance_by_D < 0.0) { amrex::Abort("To use simplified actuator disks, you need to provide a variable" " erf.sampling_distance_by_D in the inputs which specifies the upstream" " distance as a factor of the turbine diameter at which the incoming free stream" " velocity will be computed at."); } - pp.query("turb_disk_angle_from_x", turb_disk_angle); - if(windfarm_type==WindFarmType::SimpleAD and turb_disk_angle < 0.0) { + if (windfarm_type==WindFarmType::SimpleAD and turb_disk_angle < 0.0) { amrex::Abort("To use simplified actuator disks, you need to provide a variable" " erf.turb_disk_angle_from_x in the inputs which is the angle of the face of the" " turbine disk from the x-axis. A turbine facing an oncoming flow in the x-direction" " will have turb_disk_angle value of 90 deg."); } - - pp.query("windfarm_x_shift",windfarm_x_shift); - pp.query("windfarm_y_shift",windfarm_y_shift); - if(windfarm_loc_type == WindFarmLocType::lat_lon and (windfarm_x_shift < 0.0 or windfarm_y_shift < 0.0)) { + if (windfarm_loc_type == WindFarmLocType::lat_lon and (windfarm_x_shift < 0.0 or windfarm_y_shift < 0.0)) { amrex::Abort("You are using windfarms with latitude-logitude option to position the turbines." " For this you should provide the inputs erf.windfarm_x_shift and" " erf.windfarm_y_shift which are the values by which the bounding box of the" " windfarm is shifted from the x and the y axes."); } - // Test if time averaged data is to be output - pp.query("time_avg_vel",time_avg_vel); } void display(int max_level) diff --git a/Source/ERF.H b/Source/ERF.H index 386649e5f..0d950437f 100644 --- a/Source/ERF.H +++ b/Source/ERF.H @@ -137,6 +137,8 @@ public: #ifdef ERF_USE_POISSON_SOLVE // Project the velocities to be divergence-free void project_velocities (int lev, amrex::Real dt, amrex::Vector& vars, amrex::MultiFab& p); + void solve_with_heffte (int lev, amrex::MultiFab& rhs, amrex::MultiFab& soln, + amrex::Array& fluxes); // Project the velocities to be divergence-free with a thin body void project_velocities_tb (int lev, amrex::Real dt, amrex::Vector& vars, amrex::MultiFab& p); @@ -940,6 +942,7 @@ private: static int verbose; #ifdef ERF_USE_POISSON_SOLVE static int mg_verbose; + static bool use_heffte; #endif // Diagnostic output interval diff --git a/Source/ERF.cpp b/Source/ERF.cpp index bab3aac42..f3611ab74 100644 --- a/Source/ERF.cpp +++ b/Source/ERF.cpp @@ -42,6 +42,7 @@ int ERF::fixed_mri_dt_ratio = 0; int ERF::verbose = 0; #ifdef ERF_USE_POISSON_SOLVE int ERF::mg_verbose = 0; +int ERF::use_heffte = false; #endif // Frequency of diagnostic output @@ -1380,6 +1381,7 @@ ERF::ReadParameters () pp.query("v", verbose); #ifdef ERF_USE_POISSON_SOLVE pp.query("mg_v", mg_verbose); + pp.query("use_heffte", use_heffte); #endif // Frequency of diagnostic output diff --git a/Source/SourceTerms/ERF_Src_headers.H b/Source/SourceTerms/ERF_Src_headers.H index 9fad2d582..a54dd256a 100644 --- a/Source/SourceTerms/ERF_Src_headers.H +++ b/Source/SourceTerms/ERF_Src_headers.H @@ -21,7 +21,9 @@ void make_buoyancy (amrex::Vector< amrex::MultiFab>& S_data, const amrex::Geometry geom, const SolverChoice& solverChoice, const amrex::MultiFab* r0, - const int n_qstate); + const amrex::MultiFab* p0, + const int n_qstate, + const int anelastic); void make_sources (int level, int nrk, amrex::Real dt, amrex::Real time, @@ -55,6 +57,7 @@ void make_mom_sources (int level, int nrk, amrex::MultiFab& ymom_source, amrex::MultiFab& zmom_source, amrex::MultiFab* r0, + amrex::MultiFab* p0, const amrex::Geometry geom, const SolverChoice& solverChoice, std::unique_ptr& mapfac_m, diff --git a/Source/SourceTerms/ERF_make_buoyancy.cpp b/Source/SourceTerms/ERF_make_buoyancy.cpp index 1488daf80..7f7dd2cdc 100644 --- a/Source/SourceTerms/ERF_make_buoyancy.cpp +++ b/Source/SourceTerms/ERF_make_buoyancy.cpp @@ -35,21 +35,63 @@ void make_buoyancy (Vector& S_data, const amrex::Geometry geom, const SolverChoice& solverChoice, const MultiFab* r0, - const int n_qstate) + const MultiFab* p0, + const int n_qstate, + const int anelastic) { BL_PROFILE("make_buoyancy()"); const Array grav{0.0, 0.0, -solverChoice.gravity}; const GpuArray grav_gpu{grav[0], grav[1], grav[2]}; - const int klo = 0; + const int klo = geom.Domain().smallEnd()[2]; const int khi = geom.Domain().bigEnd()[2] + 1; - // ****************************************************************************************** - // Dry versions of buoyancy expressions (type 1 and type 2/3 -- types 2 and 3 are equivalent) - // ****************************************************************************************** - if (solverChoice.moisture_type == MoistureType::None) { - if (solverChoice.buoyancy_type == 1) { + if (anelastic == 1) { +#ifdef _OPENMP +#pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(buoyancy,TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box tbz = mfi.tilebox(); + + // We don't compute a source term for z-momentum on the bottom or top boundary + if (tbz.smallEnd(2) == klo) tbz.growLo(2,-1); + if (tbz.bigEnd(2) == khi) tbz.growHi(2,-1); + + const Array4 & cell_data = S_data[IntVars::cons].array(mfi); + const Array4< Real> & buoyancy_fab = buoyancy.array(mfi); + + // Base state density, pressure + const Array4& r0_arr = r0->const_array(mfi); + const Array4& p0_arr = p0->const_array(mfi); + + ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + Real rt0_hi = getRhoThetagivenP(p0_arr(i,j,k)); + Real t0_hi = getTgivenPandTh(p0_arr(i,j,k), rt0_hi/r0_arr(i,j,k), solverChoice.rdOcp); + Real t_hi = getTgivenPandTh(p0_arr(i,j,k), cell_data(i,j,k,RhoTheta_comp)/r0_arr(i,j,k), solverChoice.rdOcp); + Real qplus = (t_hi-t0_hi)/t0_hi; + + Real rt0_lo = getRhoThetagivenP(p0_arr(i,j,k-1)); + Real t0_lo = getTgivenPandTh(p0_arr(i,j,k-1), rt0_lo/r0_arr(i,j,k-1), solverChoice.rdOcp); + Real t_lo = getTgivenPandTh(p0_arr(i,j,k-1), cell_data(i,j,k-1,RhoTheta_comp)/r0_arr(i,j,k-1), solverChoice.rdOcp); + Real qminus = (t_lo-t0_lo)/t0_lo; + + Real r0_q_avg = Real(0.5) * (r0_arr(i,j,k) * qplus + r0_arr(i,j,k-1) * qminus); + buoyancy_fab(i, j, k) = -r0_q_avg * grav_gpu[2]; + }); + } // mfi + } + else + { + // ****************************************************************************************** + // Dry versions of buoyancy expressions (type 1 and type 2/3 -- types 2 and 3 are equivalent) + // ****************************************************************************************** + if (solverChoice.moisture_type == MoistureType::None) + { + + if (solverChoice.buoyancy_type == 1) { for ( MFIter mfi(buoyancy,TilingIfNotGPU()); mfi.isValid(); ++mfi) { Box tbz = mfi.tilebox(); @@ -71,7 +113,9 @@ void make_buoyancy (Vector& S_data, }); } // mfi - } else if (solverChoice.buoyancy_type == 2 || solverChoice.buoyancy_type == 3) { + } + else if (solverChoice.buoyancy_type == 2 || solverChoice.buoyancy_type == 3) + { PlaneAverage state_ave(&(S_data[IntVars::cons]), geom, solverChoice.ave_plane); PlaneAverage prim_ave(&S_prim, geom, solverChoice.ave_plane); @@ -110,37 +154,34 @@ void make_buoyancy (Vector& S_data, ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) { Real tempp1d = getTgivenRandRTh(rho_d_ptr[k ], rho_d_ptr[k ]*theta_d_ptr[k ]); - Real tempm1d = getTgivenRandRTh(rho_d_ptr[k-1], rho_d_ptr[k-1]*theta_d_ptr[k-1]); - - Real tempp3d = getTgivenRandRTh(cell_data(i,j,k ,Rho_comp), cell_data(i,j,k ,RhoTheta_comp)); - Real tempm3d = getTgivenRandRTh(cell_data(i,j,k-1,Rho_comp), cell_data(i,j,k-1,RhoTheta_comp)); + Real tempp3d = getTgivenRandRTh(cell_data(i,j,k ,Rho_comp), cell_data(i,j,k ,RhoTheta_comp)); + Real qplus = (tempp3d-tempp1d)/tempp1d; - Real qplus = (tempp3d-tempp1d)/tempp1d; - Real qminus = (tempm3d-tempm1d)/tempm1d; + Real tempm1d = getTgivenRandRTh(rho_d_ptr[k-1], rho_d_ptr[k-1]*theta_d_ptr[k-1]); + Real tempm3d = getTgivenRandRTh(cell_data(i,j,k-1,Rho_comp), cell_data(i,j,k-1,RhoTheta_comp)); + Real qminus = (tempm3d-tempm1d)/tempm1d; - Real qavg = Real(0.5) * (qplus + qminus); - Real r0avg = Real(0.5) * (rho_d_ptr[k] + rho_d_ptr[k-1]); + Real r0_q_avg = Real(0.5) * (rho_d_ptr[k]*qplus + rho_d_ptr[k-1]*qminus); - buoyancy_fab(i, j, k) = -qavg * r0avg * grav_gpu[2]; + buoyancy_fab(i, j, k) = -r0_q_avg * grav_gpu[2]; }); } // mfi - } // buoyancy_type - } // no moisture - - // ****************************************************************************************** - // Moist versions of buoyancy expressions - // ****************************************************************************************** - if (solverChoice.moisture_type != MoistureType::None) { - - if (solverChoice.moisture_type == MoistureType::Kessler_NoRain) { - AMREX_ALWAYS_ASSERT(solverChoice.buoyancy_type == 1); - } - - if (solverChoice.moisture_type == MoistureType::SAM or solverChoice.moisture_type == MoistureType::SAM_NoPrecip_NoIce) { - AMREX_ALWAYS_ASSERT(solverChoice.buoyancy_type == 1); - } - - if (solverChoice.buoyancy_type == 1) { + } // buoyancy_type + } // moisture type + else + { + // ****************************************************************************************** + // Moist versions of buoyancy expressions + // ****************************************************************************************** + + if ( (solverChoice.moisture_type == MoistureType::Kessler_NoRain) || + (solverChoice.moisture_type == MoistureType::SAM) || + (solverChoice.moisture_type == MoistureType::SAM_NoPrecip_NoIce) ) + { + AMREX_ALWAYS_ASSERT(solverChoice.buoyancy_type == 1); + } + + if (solverChoice.buoyancy_type == 1) { for ( MFIter mfi(buoyancy,TilingIfNotGPU()); mfi.isValid(); ++mfi) { @@ -158,8 +199,11 @@ void make_buoyancy (Vector& S_data, ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) { - Real rhop_hi = cell_data(i,j,k ,Rho_comp) + cell_data(i,j,k ,RhoQ1_comp) + cell_data(i,j,k ,RhoQ2_comp) - r0_arr(i,j,k ); - Real rhop_lo = cell_data(i,j,k-1,Rho_comp) + cell_data(i,j,k-1,RhoQ1_comp) + cell_data(i,j,k-1,RhoQ2_comp) - r0_arr(i,j,k-1); + Real rhop_hi = cell_data(i,j,k ,Rho_comp) + cell_data(i,j,k ,RhoQ1_comp) + + cell_data(i,j,k ,RhoQ2_comp) - r0_arr(i,j,k ); + Real rhop_lo = cell_data(i,j,k-1,Rho_comp) + cell_data(i,j,k-1,RhoQ1_comp) + + cell_data(i,j,k-1,RhoQ2_comp) - r0_arr(i,j,k-1); + for (int q_offset(2); q_offset& S_data, }); } // mfi - } else { + } else { PlaneAverage state_ave(&(S_data[IntVars::cons]), geom, solverChoice.ave_plane); PlaneAverage prim_ave(&S_prim , geom, solverChoice.ave_plane); @@ -332,6 +376,7 @@ void make_buoyancy (Vector& S_data, }); } // mfi } // buoyancy_type - } // not buoyancy_type == 1 - } // has moisture + } // not buoyancy_type == 1 + } // has moisture + } // anelastic? } diff --git a/Source/SourceTerms/ERF_make_mom_sources.cpp b/Source/SourceTerms/ERF_make_mom_sources.cpp index 330f9c089..b60190bfe 100644 --- a/Source/SourceTerms/ERF_make_mom_sources.cpp +++ b/Source/SourceTerms/ERF_make_mom_sources.cpp @@ -37,7 +37,7 @@ using namespace amrex; * @param[in] n_qstate number of moisture components */ -void make_mom_sources (int /*level*/, +void make_mom_sources (int level, int /*nrk*/, Real dt, Real time, Vector& S_data, const MultiFab & S_prim, @@ -48,7 +48,7 @@ void make_mom_sources (int /*level*/, MultiFab & xmom_src, MultiFab & ymom_src, MultiFab & zmom_src, - MultiFab* r0, + MultiFab* r0, MultiFab* p0, const Geometry geom, const SolverChoice& solverChoice, std::unique_ptr& mapfac_m, @@ -190,7 +190,8 @@ void make_mom_sources (int /*level*/, // ***************************************************************************** // 1. Create the BUOYANCY forcing term in the z-direction // ***************************************************************************** - make_buoyancy(S_data, S_prim, zmom_src, geom, solverChoice, r0, n_qstate); + make_buoyancy(S_data, S_prim, zmom_src, geom, solverChoice, r0, p0, + n_qstate, solverChoice.anelastic[level]); // ***************************************************************************** // Add all the other forcings diff --git a/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H b/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H index 784024334..c4f15752c 100644 --- a/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H +++ b/Source/TimeIntegration/ERF_TI_slow_rhs_fun.H @@ -113,7 +113,7 @@ z_phys_nd[level], z_phys_cc[level], xvel_new, yvel_new, xmom_src, ymom_src, zmom_src, - r0_new, fine_geom, solverChoice, + r0_new, p0_new, fine_geom, solverChoice, mapfac_m[level], mapfac_u[level], mapfac_v[level], dptr_u_geos, dptr_v_geos, dptr_wbar_sub, d_rayleigh_ptrs_at_lev, d_sponge_ptrs_at_lev, @@ -217,7 +217,7 @@ z_phys_nd[level], z_phys_cc[level], xvel_new, yvel_new, xmom_src, ymom_src, zmom_src, - r0, fine_geom, solverChoice, + r0, p0, fine_geom, solverChoice, mapfac_m[level], mapfac_u[level], mapfac_v[level], dptr_u_geos, dptr_v_geos, dptr_wbar_sub, d_rayleigh_ptrs_at_lev, d_sponge_ptrs_at_lev, @@ -428,7 +428,7 @@ z_phys_nd[level], z_phys_cc[level], xvel_new, yvel_new, xmom_src, ymom_src, zmom_src, - r0, fine_geom, solverChoice, + r0, p0, fine_geom, solverChoice, mapfac_m[level], mapfac_u[level], mapfac_v[level], dptr_u_geos, dptr_v_geos, dptr_wbar_sub, d_rayleigh_ptrs_at_lev, d_sponge_ptrs_at_lev, diff --git a/Source/Utils/ERF_EOS.H b/Source/Utils/ERF_EOS.H index 133f45598..3d47639b8 100644 --- a/Source/Utils/ERF_EOS.H +++ b/Source/Utils/ERF_EOS.H @@ -19,6 +19,19 @@ amrex::Real getThgivenPandT(const amrex::Real T, const amrex::Real P, const amre return T*std::pow(p_0/P, rdOcp); } +/** + * Function to return temperature given pressure and potential temperature + * + * @params[in] pressure + * @params[in] potential temperature + * @params[in] rdOcp ratio of R_d to c_p +*/ +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +amrex::Real getTgivenPandTh(const amrex::Real P, const amrex::Real Th, const amrex::Real rdOcp) +{ + return Th / std::pow(p_0/P, rdOcp); +} + /** * Function to return temperature given density and potential temperature * @@ -31,6 +44,7 @@ amrex::Real getTgivenRandRTh(const amrex::Real rho, const amrex::Real rhotheta, // rho and rhotheta are dry values. We should be using moist value of theta when using moisture // theta_m = theta * (1 + R_v/R_d*qv) amrex::Real p_loc = p_0 * std::pow(R_d * rhotheta * (1.0 + R_v/R_d*qv) * ip_0, Gamma); + // p = rho_d * R_d * T_v (not T) // T_v = T * (1 + R_v/R_d*qv) return p_loc / (R_d * rho * (1.0 + R_v/R_d*qv) ); diff --git a/Source/Utils/ERF_PoissonSolve.cpp b/Source/Utils/ERF_PoissonSolve.cpp index 7b87d1f17..069502179 100644 --- a/Source/Utils/ERF_PoissonSolve.cpp +++ b/Source/Utils/ERF_PoissonSolve.cpp @@ -31,8 +31,6 @@ ERF::get_projection_bc (Orientation::Side side) const noexcept } } } - // r[2] = LinOpBCType::Neumann; - // amrex::Print() << "BCs for Poisson solve " << r[0] << " " << r[1] << " " << r[2] << std::endl; return r; } bool ERF::projection_has_dirichlet (Array bcs) const @@ -58,14 +56,12 @@ void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, Mult auto const dom_hi = ubound(geom[lev].Domain()); LPInfo info; -#if 0 // Allow a hidden direction if the domain is one cell wide in any lateral direction if (dom_lo.x == dom_hi.x) { info.setHiddenDirection(0); } else if (dom_lo.y == dom_hi.y) { info.setHiddenDirection(1); } -#endif // Make sure the solver only sees the levels over which we are solving Vector ba_tmp; ba_tmp.push_back(mom_mf[Vars::cons].boxArray()); @@ -90,15 +86,10 @@ void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, Mult Vector phi; Vector > fluxes; - rhs.resize(1); - phi.resize(1); - fluxes.resize(1); - - rhs[0].define(ba_tmp[0], dm_tmp[0], 1, 0); - phi[0].define(ba_tmp[0], dm_tmp[0], 1, 0); - rhs[0].setVal(0.0); - phi[0].setVal(0.0); + rhs.resize(1); rhs[0].define(ba_tmp[0], dm_tmp[0], 1, 0); + phi.resize(1); phi[0].define(ba_tmp[0], dm_tmp[0], 1, 1); + fluxes.resize(1); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { fluxes[0][idim].define(convert(ba_tmp[0], IntVect::TheDimensionVector(idim)), dm_tmp[0], 1, 0); } @@ -109,34 +100,52 @@ void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, Mult rho0_u_const[2] = &mom_mf[IntVars::zmom]; computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]); - Print() << "Max norm of divergence before at level " << lev << " : " << rhs[0].norm0() << std::endl; + + if (mg_verbose > 0) { + Print() << "Max norm of divergence before at level " << lev << " : " << rhs[0].norm0() << std::endl; + } // If all Neumann BCs, adjust RHS to make sure we can converge if (need_adjust_rhs) { Real offset = volWgtSumMF(lev, rhs[0], 0, *mapfac_m[lev], false, false); - // amrex::Print() << "Poisson solvability offset = " << offset << std::endl; + if (mg_verbose > 1) { + Print() << "Poisson solvability offset = " << offset << std::endl; + } rhs[0].plus(-offset, 0, 1); } - // Initialize phi to 0 - phi[0].setVal(0.0); + Real start_step = static_cast(ParallelDescriptor::second()); - MLMG mlmg(mlpoisson); - int max_iter = 100; - mlmg.setMaxIter(max_iter); +#ifdef ERF_USE_HEFFTE + if (use_heffte) { + solve_with_heffte(lev, rhs[0], phi[0], fluxes[0]); + } else +#endif + { + // Initialize phi to 0 + phi[0].setVal(0.0); + + MLMG mlmg(mlpoisson); + int max_iter = 100; + mlmg.setMaxIter(max_iter); - mlmg.setVerbose(mg_verbose); - mlmg.setBottomVerbose(0); + mlmg.setVerbose(mg_verbose); + mlmg.setBottomVerbose(0); - mlmg.solve(GetVecOfPtrs(phi), - GetVecOfConstPtrs(rhs), - solverChoice.poisson_reltol, - solverChoice.poisson_abstol); + mlmg.solve(GetVecOfPtrs(phi), + GetVecOfConstPtrs(rhs), + solverChoice.poisson_reltol, + solverChoice.poisson_abstol); + mlmg.getFluxes(GetVecOfArrOfPtrs(fluxes)); + } - mlmg.getFluxes(GetVecOfArrOfPtrs(fluxes)); + Real end_step = static_cast(ParallelDescriptor::second()); + if (mg_verbose > 0) { + amrex::Print() << "Time in solve " << end_step - start_step << std::endl; + } - // Subtract (dt rho0/rho) grad(phi) from the rho0-weighted velocity components + // Subtract dt grad(phi) from the momenta MultiFab::Add(mom_mf[IntVars::xmom],fluxes[0][0],0,0,1,0); MultiFab::Add(mom_mf[IntVars::ymom],fluxes[0][1],0,0,1,0); MultiFab::Add(mom_mf[IntVars::zmom],fluxes[0][2],0,0,1,0); @@ -223,12 +232,12 @@ void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, Mult // Now overwrite with periodic fill outside domain and fine-fine fill inside pmf.FillBoundary(geom[lev].periodicity()); -#if 0 // // BELOW IS SIMPLY VERIFYING THE DIVERGENCE AFTER THE SOLVE // - computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]); - Print() << "Max norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << std::endl; -#endif + if (mg_verbose > 0) { + computeDivergence(rhs[0], rho0_u_const, geom_tmp[0]); + Print() << "Max norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << std::endl; + } } #endif diff --git a/Source/Utils/ERF_solve_with_heffte.cpp b/Source/Utils/ERF_solve_with_heffte.cpp new file mode 100644 index 000000000..4236d0d25 --- /dev/null +++ b/Source/Utils/ERF_solve_with_heffte.cpp @@ -0,0 +1,245 @@ +#include "ERF.H" +#include +#include +#include "ERF_Utils.H" +#ifdef ERF_USE_HEFFTE +#include "heffte.h" +#endif + +#ifdef ERF_USE_POISSON_SOLVE +#ifdef ERF_USE_HEFFTE + +using namespace amrex; + +void ERF::solve_with_heffte (int lev, MultiFab& rhs, MultiFab& phi, + Array& fluxes) +{ + BoxArray ba(rhs.boxArray()); + DistributionMapping dm(rhs.DistributionMap()); + + // The heffte solve uses a solution array with no ghost cells + MultiFab soln(ba,dm,1,0); + + // Determine the domain length in each direction + Real L_x = geom[lev].ProbHi(0) - geom[lev].ProbLo(0); + Real L_y = geom[lev].ProbHi(1) - geom[lev].ProbLo(1); + Real L_z = geom[lev].ProbHi(2) - geom[lev].ProbLo(2); + + Box domain = geom[lev].Domain(); + auto const& domlo = lbound(domain); + auto const& domhi = ubound(domain); + + int n_cell_x = domain.length(0); + int n_cell_y = domain.length(1); + int n_cell_z = domain.length(2); + + auto dx = geom[lev].CellSize(); + auto dxinv = geom[lev].InvCellSize(); + + // Since there is 1 MPI rank per box, here each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // Now each MPI rank works on its own box + // Ror real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // If the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // For any boxes that touch the hi-x domain we increase the size of boxes by 1 in x + // This makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom[lev].Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // Each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); + + // Create real->complex fft objects with the appropriate backend and data about + // the domain size and its local box size + + bool do_2d_solves = false; + + // ******************************************************************************************** + // ******************************************************************************************** + // ******************************************************************************************** + + // ******************************************************************************************** + // NOTE: THIS IS A WIP - IT DOES NOT WORK YET + // ******************************************************************************************** + if (do_2d_solves) { + +#ifdef AMREX_USE_CUDA + heffte::fft2d_r2c fft +#elif AMREX_USE_HIP + heffte::fft2d_r2c fft +#else + heffte::fft2d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),0}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,0}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),0}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,0}}, + 0, ParallelDescriptor::Communicator()); + + using heffte_complex = typename heffte::fft_output::type; + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + + // ******************************************************************************************** + + for (int k = domlo.z; k <= domhi.z; k++) { + int offset = k * (n_cell_x*n_cell_y); + fft.forward(rhs[local_boxid].dataPtr(offset), spectral_data); + } + + // ******************************************************************************************** + + // Now we take the standard FFT and scale it by 1/k^2 + Array4< GpuComplex > spectral = spectral_field.array(); + + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + Real a = 2.*M_PI*i / L_x; + Real b = 2.*M_PI*j / L_y; + Real c = 2.*M_PI*k / L_z; + + // the values in the upper-half of the spectral array in y and z are here interpreted as negative wavenumbers + if (j >= n_cell_y/2) b = 2.*M_PI*(n_cell_y-j) / L_y; + if (k >= n_cell_z/2) c = 2.*M_PI*(n_cell_z-k) / L_z; + + Real k2 = 2.0*(std::cos(a*dx[0])-1.)*(dxinv[0]*dxinv[0]) + + 2.0*(std::cos(b*dx[1])-1.)*(dxinv[1]*dxinv[1]) ; + if (k2 != 0.) { + spectral(i,j,k) /= k2; + } else { + spectral(i,j,k) *= 0.; // interpretation here is that the average value of the solution is zero + } + }); + + // ******************************************************************************************** + + for (int k = domlo.z; k <= domhi.z; k++) { + int offset = k * (n_cell_x*n_cell_y); + fft.backward(spectral_data, soln[local_boxid].dataPtr(offset)); + } + + // ******************************************************************************************** + + } else { + +#ifdef AMREX_USE_CUDA + heffte::fft3d_r2c fft +#elif AMREX_USE_HIP + heffte::fft3d_r2c fft +#else + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + 0, ParallelDescriptor::Communicator()); + + using heffte_complex = typename heffte::fft_output::type; + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + + // ******************************************************************************************** + + fft.forward(rhs[local_boxid].dataPtr(), spectral_data); + + // ******************************************************************************************** + + // Now we take the standard FFT and scale it by 1/k^2 + Array4< GpuComplex > spectral = spectral_field.array(); + + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + Real a = 2.*M_PI*i / L_x; + Real b = 2.*M_PI*j / L_y; + Real c = 2.*M_PI*k / L_z; + + // the values in the upper-half of the spectral array in y and z are here interpreted as negative wavenumbers + if (j >= n_cell_y/2) b = 2.*M_PI*(n_cell_y-j) / L_y; + if (k >= n_cell_z/2) c = 2.*M_PI*(n_cell_z-k) / L_z; + + Real k2 = 2.0*(std::cos(a*dx[0])-1.)*(dxinv[0]*dxinv[0]) + + 2.0*(std::cos(b*dx[1])-1.)*(dxinv[1]*dxinv[1]) + + 2.0*(std::cos(c*dx[2])-1.)*(dxinv[2]*dxinv[2]); + if (k2 != 0.) { + spectral(i,j,k) /= k2; + } else { + spectral(i,j,k) *= 0.; // interpretation here is that the average value of the solution is zero + } + }); + + // ******************************************************************************************** + + fft.backward(spectral_data, soln[local_boxid].dataPtr()); + + // ******************************************************************************************** + + } // 3d solve + + // ******************************************************************************************** + // ******************************************************************************************** + // ******************************************************************************************** + + // Scale by 1/npts (both forward and inverse need sqrt(npts) scaling so I am doing it all here) + Real npts = static_cast(ba.numPts()); + soln.mult(1./npts); + + // ******************************************************************************************** + + phi.copy(soln); + phi.FillBoundary(geom[lev].periodicity()); + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(soln, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Array4 const& p_arr = phi.array(mfi); + + Box const& xbx = mfi.nodaltilebox(0); + const Real dx_inv = dxinv[0]; + Array4 const& fx_arr = fluxes[0].array(mfi); + ParallelFor(xbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + fx_arr(i,j,k) = -(p_arr(i,j,k) - p_arr(i-1,j,k)) * dx_inv; + }); + + Box const& ybx = mfi.nodaltilebox(1); + const Real dy_inv = dxinv[1]; + Array4 const& fy_arr = fluxes[1].array(mfi); + ParallelFor(ybx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + fy_arr(i,j,k) = -(p_arr(i,j,k) - p_arr(i,j-1,k)) * dy_inv; + }); + + Box const& zbx = mfi.nodaltilebox(2); + const Real dz_inv = dxinv[2]; + Array4 const& fz_arr = fluxes[2].array(mfi); + ParallelFor(zbx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + fz_arr(i,j,k) = -(p_arr(i,j,k) - p_arr(i,j-1,k)) * dz_inv; + }); + } // mfi +} + +#endif +#endif diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package index e3d62008f..82d3e8e99 100644 --- a/Source/Utils/Make.package +++ b/Source/Utils/Make.package @@ -26,4 +26,5 @@ CEXE_sources += ERF_Time_Avg_Vel.cpp ifeq ($(USE_POISSON_SOLVE),TRUE) CEXE_sources += ERF_PoissonSolve.cpp CEXE_sources += ERF_PoissonSolve_tb.cpp +CEXE_sources += ERF_solve_with_heffte.cpp endif From f3cb7f4bfa4237aa3ca903cb27f6a91598387408 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Tue, 1 Oct 2024 18:58:26 -0700 Subject: [PATCH 2/2] fix for gpu --- Source/ERF.cpp | 4 ++-- Source/SourceTerms/ERF_make_buoyancy.cpp | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Source/ERF.cpp b/Source/ERF.cpp index f3611ab74..96f389eba 100644 --- a/Source/ERF.cpp +++ b/Source/ERF.cpp @@ -41,8 +41,8 @@ int ERF::fixed_mri_dt_ratio = 0; // Dictate verbosity in screen output int ERF::verbose = 0; #ifdef ERF_USE_POISSON_SOLVE -int ERF::mg_verbose = 0; -int ERF::use_heffte = false; +int ERF::mg_verbose = 0; +bool ERF::use_heffte = false; #endif // Frequency of diagnostic output diff --git a/Source/SourceTerms/ERF_make_buoyancy.cpp b/Source/SourceTerms/ERF_make_buoyancy.cpp index 7f7dd2cdc..caec66a57 100644 --- a/Source/SourceTerms/ERF_make_buoyancy.cpp +++ b/Source/SourceTerms/ERF_make_buoyancy.cpp @@ -47,6 +47,8 @@ void make_buoyancy (Vector& S_data, const int klo = geom.Domain().smallEnd()[2]; const int khi = geom.Domain().bigEnd()[2] + 1; + Real rd_over_cp = solverChoice.rdOcp; + if (anelastic == 1) { #ifdef _OPENMP #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) @@ -69,13 +71,13 @@ void make_buoyancy (Vector& S_data, ParallelFor(tbz, [=] AMREX_GPU_DEVICE (int i, int j, int k) { Real rt0_hi = getRhoThetagivenP(p0_arr(i,j,k)); - Real t0_hi = getTgivenPandTh(p0_arr(i,j,k), rt0_hi/r0_arr(i,j,k), solverChoice.rdOcp); - Real t_hi = getTgivenPandTh(p0_arr(i,j,k), cell_data(i,j,k,RhoTheta_comp)/r0_arr(i,j,k), solverChoice.rdOcp); + Real t0_hi = getTgivenPandTh(p0_arr(i,j,k), rt0_hi/r0_arr(i,j,k), rd_over_cp); + Real t_hi = getTgivenPandTh(p0_arr(i,j,k), cell_data(i,j,k,RhoTheta_comp)/r0_arr(i,j,k), rd_over_cp); Real qplus = (t_hi-t0_hi)/t0_hi; Real rt0_lo = getRhoThetagivenP(p0_arr(i,j,k-1)); - Real t0_lo = getTgivenPandTh(p0_arr(i,j,k-1), rt0_lo/r0_arr(i,j,k-1), solverChoice.rdOcp); - Real t_lo = getTgivenPandTh(p0_arr(i,j,k-1), cell_data(i,j,k-1,RhoTheta_comp)/r0_arr(i,j,k-1), solverChoice.rdOcp); + Real t0_lo = getTgivenPandTh(p0_arr(i,j,k-1), rt0_lo/r0_arr(i,j,k-1), rd_over_cp); + Real t_lo = getTgivenPandTh(p0_arr(i,j,k-1), cell_data(i,j,k-1,RhoTheta_comp)/r0_arr(i,j,k-1), rd_over_cp); Real qminus = (t_lo-t0_lo)/t0_lo; Real r0_q_avg = Real(0.5) * (r0_arr(i,j,k) * qplus + r0_arr(i,j,k-1) * qminus);