Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix various dt_diff issues recently introduced and bump Parthenon (for dn outputs) #129

Merged
merged 5 commits into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
- [[PR 84]](https://github.com/parthenon-hpc-lab/athenapk/pull/84) Bump Parthenon to latest develop (2024-02-15)

### Fixed (not changing behavior/API/variables/...)
- [[PR 128]](https://github.com/parthenon-hpc-lab/athenapk/pull/128) Fixed `dt_diff` in RKL2

### Infrastructure
- [[PR 128]](https://github.com/parthenon-hpc-lab/athenapk/pull/128) Bump Parthenon to support `dn` based outputs
- [[PR 124]](https://github.com/parthenon-hpc-lab/athenapk/pull/124) Bump Kokkos 4.4.1 (and Parthenon to include view-of-view fix)
- [[PR 117]](https://github.com/parthenon-hpc-lab/athenapk/pull/117) Update devcontainer.json to latest CI container
- [[PR 114]](https://github.com/parthenon-hpc-lab/athenapk/pull/114) Bump Parthenon 24.08 and Kokkos to 4.4.00
Expand Down
2 changes: 1 addition & 1 deletion external/parthenon
110 changes: 93 additions & 17 deletions src/hydro/hydro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,87 @@ parthenon::Packages_t ProcessPackages(std::unique_ptr<ParameterInput> &pin) {
return packages;
}

// Calculate mininum dx, which is used in calculating the divergence cleaning speed c_h
// TODO(PG) eventually move to calculating the timestep once the timestep calc
// has been moved to be done before Step()
Real CalculateGlobalMinDx(MeshData<Real> *md) {
auto *pmb = md->GetBlockData(0)->GetBlockPointer();
auto hydro_pkg = pmb->packages.Get("Hydro");

const auto &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});

IndexRange ib = md->GetBlockData(0)->GetBoundsI(IndexDomain::interior);
IndexRange jb = md->GetBlockData(0)->GetBoundsJ(IndexDomain::interior);
IndexRange kb = md->GetBlockData(0)->GetBoundsK(IndexDomain::interior);

Real mindx = std::numeric_limits<Real>::max();

bool nx2 = prim_pack.GetDim(2) > 1;
bool nx3 = prim_pack.GetDim(3) > 1;
pmb->par_reduce(
"CalculateGlobalMinDx", 0, prim_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lmindx) {
const auto &coords = prim_pack.GetCoords(b);
lmindx = fmin(lmindx, coords.Dxc<1>(k, j, i));
if (nx2) {
lmindx = fmin(lmindx, coords.Dxc<2>(k, j, i));
}
if (nx3) {
lmindx = fmin(lmindx, coords.Dxc<3>(k, j, i));
}
},
Kokkos::Min<Real>(mindx));

return mindx;
}

// Using this per cycle function to populate various variables in
// Params that require global reduction *and* need to be set/known when
// the task list is constructed (versus when the task list is being executed).
// TODO(next person touching this function): If more/separate feature are required
// please separate concerns.
void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {}
void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {
auto hydro_pkg = pmesh->packages.Get("Hydro");

// Calculate hyperbolic divergence cleaning speed
// TODO(pgrete) Calculating mindx is only required after remeshing. Need to
// find a clean solution for this one-off global reduction.
if (hydro_pkg->Param<bool>("calc_c_h") ||
hydro_pkg->Param<DiffInt>("diffint") != DiffInt::none) {

Real mindx = std::numeric_limits<Real>::max();
// Going over default partitions. Not using a (new) single partition containing
// all blocks here as this (default) split is also used main Step() function and
// thus does not create an overhead (such as creating a new MeshBlockPack that is just
// used here). All partitions are executed sequentially. Given that a par_reduce to a
// host var is blocking it's save to dirctly use the return value.
const int num_partitions = pmesh->DefaultNumPartitions();
for (int i = 0; i < num_partitions; i++) {
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
mindx = std::min(mindx, CalculateGlobalMinDx(mu0.get()));
}
#ifdef MPI_PARALLEL
Real mins[3];
mins[0] = mindx;
mins[1] = hydro_pkg->Param<Real>("dt_hyp");
mins[2] = hydro_pkg->Param<Real>("dt_diff");
PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_PARTHENON_REAL, MPI_MIN,
MPI_COMM_WORLD));

hydro_pkg->UpdateParam("mindx", mins[0]);
hydro_pkg->UpdateParam("dt_hyp", mins[1]);
hydro_pkg->UpdateParam("dt_diff", mins[2]);
#else
hydro_pkg->UpdateParam("mindx", mindx);
// dt_hyp and dt_diff are already set directly in Params when they're calculated
#endif
// Finally update c_h
const auto &cfl_hyp = hydro_pkg->Param<Real>("cfl");
const auto &dt_hyp = hydro_pkg->Param<Real>("dt_hyp");
hydro_pkg->UpdateParam("c_h", cfl_hyp * mindx / dt_hyp);
}
}

template <Hst hst, int idx = -1>
Real HydroHst(MeshData<Real> *md) {
Expand Down Expand Up @@ -227,13 +302,13 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
// they're all used in the main loop.
// TODO(pgrete) think about which approach (selective versus always is preferable)
pkg->AddParam<Real>(
"c_h", 0.0, Params::Mutability::Restart); // hyperbolic divergence cleaning speed
"c_h", 0.0, Params::Mutability::Mutable); // hyperbolic divergence cleaning speed
// global minimum dx (used to calc c_h)
pkg->AddParam<Real>("mindx", std::numeric_limits<Real>::max(),
Params::Mutability::Restart);
Params::Mutability::Mutable);
// hyperbolic timestep constraint
pkg->AddParam<Real>("dt_hyp", std::numeric_limits<Real>::max(),
Params::Mutability::Restart);
Params::Mutability::Mutable);

const auto recon_str = pin->GetString("hydro", "reconstruction");
int recon_need_nghost = 3; // largest number for the choices below
Expand Down Expand Up @@ -610,7 +685,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
pkg->AddParam<>("cfl_diff", cfl_diff);
}
pkg->AddParam<Real>("dt_diff", std::numeric_limits<Real>::max(),
Params::Mutability::Restart); // diffusive timestep constraint
Params::Mutability::Mutable); // diffusive timestep constraint
pkg->AddParam<>("diffint", diffint);

if (fluid == Fluid::euler) {
Expand Down Expand Up @@ -1189,10 +1264,10 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
const auto &u0_prim = u0_prim_pack(b);
auto &u0_cons = u0_cons_pack(b);

// In principle, the u_cons.fluxes could be updated in parallel by a different
// thread resulting in a race conditon here.
// However, if the fluxes of a cell have been updated (anywhere) then the entire
// kernel will be called again anyway, and, at that point the already fixed
// In principle, the u_cons.fluxes could be updated in parallel by a
// different thread resulting in a race conditon here. However, if the
// fluxes of a cell have been updated (anywhere) then the entire kernel will
// be called again anyway, and, at that point the already fixed
// u0_cons.fluxes will automaticlly be used here.
Real new_cons[NVAR];
for (auto v = 0; v < NVAR; v++) {
Expand Down Expand Up @@ -1220,13 +1295,13 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
lnum_need_floor += 1;
return;
}
// In principle, there could be a racecondion as this loop goes over all k,j,i
// and we updating the i+1 flux here.
// However, the results are idential because u0_prim is never updated in this
// kernel so we don't worry about it.
// TODO(pgrete) as we need to keep the function signature idential for now (due
// to Cuda compiler bug) we could potentially template these function and get
// rid of the `if constexpr`
// In principle, there could be a racecondion as this loop goes over all
// k,j,i and we updating the i+1 flux here. However, the results are
// idential because u0_prim is never updated in this kernel so we don't
// worry about it.
// TODO(pgrete) as we need to keep the function signature idential for now
// (due to Cuda compiler bug) we could potentially template these function
// and get rid of the `if constexpr`
riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h);
riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h);

Expand All @@ -1243,7 +1318,8 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
Kokkos::Sum<std::int64_t>(num_corrected),
Kokkos::Sum<std::int64_t>(num_need_floor));
// TODO(pgrete) make this optional and global (potentially store values in Params)
// std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " << num_attempts
// std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " <<
// num_attempts
// << " Corrected (center): " << num_corrected
// << " Failed (will rely on floor): " << num_need_floor << std::endl;
num_attempts += 1;
Expand Down
124 changes: 16 additions & 108 deletions src/hydro/hydro_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,46 +37,6 @@ HydroDriver::HydroDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm
pin->CheckDesired("parthenon/time", "cfl");
}

// Calculate mininum dx, which is used in calculating the divergence cleaning speed c_h
TaskStatus CalculateGlobalMinDx(MeshData<Real> *md) {
auto pmb = md->GetBlockData(0)->GetBlockPointer();
auto hydro_pkg = pmb->packages.Get("Hydro");

const auto &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});

IndexRange ib = md->GetBlockData(0)->GetBoundsI(IndexDomain::interior);
IndexRange jb = md->GetBlockData(0)->GetBoundsJ(IndexDomain::interior);
IndexRange kb = md->GetBlockData(0)->GetBoundsK(IndexDomain::interior);

Real mindx = std::numeric_limits<Real>::max();

bool nx2 = prim_pack.GetDim(2) > 1;
bool nx3 = prim_pack.GetDim(3) > 1;
pmb->par_reduce(
"CalculateGlobalMinDx", 0, prim_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lmindx) {
const auto &coords = prim_pack.GetCoords(b);
lmindx = fmin(lmindx, coords.Dxc<1>(k, j, i));
if (nx2) {
lmindx = fmin(lmindx, coords.Dxc<2>(k, j, i));
}
if (nx3) {
lmindx = fmin(lmindx, coords.Dxc<3>(k, j, i));
}
},
Kokkos::Min<Real>(mindx));

// Reduction to host var is blocking and only have one of this tasks run at the same
// time so modifying the package should be safe.
auto mindx_pkg = hydro_pkg->Param<Real>("mindx");
if (mindx < mindx_pkg) {
hydro_pkg->UpdateParam("mindx", mindx);
}

return TaskStatus::complete;
}

// Sets all fluxes to 0
TaskStatus ResetFluxes(MeshData<Real> *md) {
auto pmb = md->GetBlockData(0)->GetBlockPointer();
Expand Down Expand Up @@ -444,60 +404,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
}
}

// Calculate hyperbolic divergence cleaning speed
// TODO(pgrete) Calculating mindx is only required after remeshing. Need to find a clean
// solution for this one-off global reduction.
// TODO(PG) move this to PreStepMeshUserWorkInLoop
if ((hydro_pkg->Param<bool>("calc_c_h") ||
hydro_pkg->Param<DiffInt>("diffint") != DiffInt::none) &&
(stage == 1)) {
// need to make sure that there's only one region in order to MPI_reduce to work
TaskRegion &single_task_region = tc.AddRegion(1);
auto &tl = single_task_region[0];
// Adding one task for each partition. Not using a (new) single partition containing
// all blocks here as this (default) split is also used for the following tasks and
// thus does not create an overhead (such as creating a new MeshBlockPack that is just
// used here). Given that all partitions are in one task list they'll be executed
// sequentially. Given that a par_reduce to a host var is blocking it's also save to
// store the variable in the Params for now.
auto prev_task = none;
for (int i = 0; i < num_partitions; i++) {
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
auto new_mindx = tl.AddTask(prev_task, CalculateGlobalMinDx, mu0.get());
prev_task = new_mindx;
}
auto reduce_c_h = prev_task;
#ifdef MPI_PARALLEL
reduce_c_h = tl.AddTask(
prev_task,
[](StateDescriptor *hydro_pkg) {
Real mins[3];
mins[0] = hydro_pkg->Param<Real>("mindx");
mins[1] = hydro_pkg->Param<Real>("dt_hyp");
mins[2] = hydro_pkg->Param<Real>("dt_diff");
PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_PARTHENON_REAL,
MPI_MIN, MPI_COMM_WORLD));

hydro_pkg->UpdateParam("mindx", mins[0]);
hydro_pkg->UpdateParam("dt_hyp", mins[1]);
hydro_pkg->UpdateParam("dt_diff", mins[2]);
return TaskStatus::complete;
},
hydro_pkg.get());
#endif
// Finally update c_h
auto update_c_h = tl.AddTask(
reduce_c_h,
[](StateDescriptor *hydro_pkg) {
const auto &mindx = hydro_pkg->Param<Real>("mindx");
const auto &cfl_hyp = hydro_pkg->Param<Real>("cfl");
const auto &dt_hyp = hydro_pkg->Param<Real>("dt_hyp");
hydro_pkg->UpdateParam("c_h", cfl_hyp * mindx / dt_hyp);
return TaskStatus::complete;
},
hydro_pkg.get());
}

// calculate magnetic tower scaling
if ((stage == 1) && hydro_pkg->AllParams().hasKey("magnetic_tower_power_scaling") &&
hydro_pkg->Param<bool>("magnetic_tower_power_scaling")) {
Expand Down Expand Up @@ -660,8 +566,24 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
pmesh->multilevel);
}

TaskRegion &single_tasklist_per_pack_region_3 = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region_3[i];
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
auto fill_derived =
tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
}
const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
// If any tasks modify the conserved variables before this place and after FillDerived,
// then the STS tasks should be updated to not assume prim and cons are in sync.
if (diffint == DiffInt::rkl2 && stage == integrator->nstages) {
AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
}

// Single task in single (serial) region to reset global vars used in reductions in the
// first stage.
// TODO(pgrete) check if we logically need this reset or if we can reset within the
// timestep task
if (stage == integrator->nstages &&
(hydro_pkg->Param<bool>("calc_c_h") ||
hydro_pkg->Param<DiffInt>("diffint") != DiffInt::none)) {
Expand All @@ -678,20 +600,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
hydro_pkg.get());
}

TaskRegion &single_tasklist_per_pack_region_3 = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region_3[i];
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
auto fill_derived =
tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
}
const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
// If any tasks modify the conserved variables before this place and after FillDerived,
// then the STS tasks should be updated to not assume prim and cons are in sync.
if (diffint == DiffInt::rkl2 && stage == integrator->nstages) {
AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
}

if (stage == integrator->nstages) {
TaskRegion &tr = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
Expand Down