From 59d00674e7f133968b80402719b193234b35833a Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 14 Nov 2024 13:55:47 +0100 Subject: [PATCH 1/4] Only update energy in RKL2 int --- src/hydro/hydro_driver.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp index eeb29d3d..b0619a51 100644 --- a/src/hydro/hydro_driver.cpp +++ b/src/hydro/hydro_driver.cpp @@ -185,19 +185,19 @@ TaskStatus RKL2StepOther(MeshData *md_Y0, MeshData *md_Yjm1, // by enough work over the entire pack and it allows to not use any conditionals. parthenon::par_for( DEFAULT_LOOP_PATTERN, "RKL other step", parthenon::DevExecSpace(), 0, - Y0.GetDim(5) - 1, 0, Y0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) { + Y0.GetDim(5) - 1, 0, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { // First calc this step const auto &coords = Yjm1.GetCoords(b); const Real MYjm1 = - parthenon::Update::FluxDivHelper(v, k, j, i, ndim, coords, Yjm1(b)); - const Real Yj = mu_j * Yjm1(b, v, k, j, i) + nu_j * Yjm2(b, v, k, j, i) + - (1.0 - mu_j - nu_j) * Y0(b, v, k, j, i) + + parthenon::Update::FluxDivHelper(IEN, k, j, i, ndim, coords, Yjm1(b)); + const Real Yj = mu_j * Yjm1(b, IEN, k, j, i) + nu_j * Yjm2(b, IEN, k, j, i) + + (1.0 - mu_j - nu_j) * Y0(b, IEN, k, j, i) + mu_tilde_j * tau * MYjm1 + - gamma_tilde_j * tau * MY0(b, v, k, j, i); + gamma_tilde_j * tau * MY0(b, IEN, k, j, i); // Then shuffle vars for next step - Yjm2(b, v, k, j, i) = Yjm1(b, v, k, j, i); - Yjm1(b, v, k, j, i) = Yj; + Yjm2(b, IEN, k, j, i) = Yjm1(b, IEN, k, j, i); + Yjm1(b, IEN, k, j, i) = Yj; }); return TaskStatus::complete; From 1dbfccd3b05333afc4782bb44c2a0a794469d736 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 14 Nov 2024 15:06:48 +0100 Subject: [PATCH 2/4] Fix leftover index --- src/hydro/hydro_driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp index b0619a51..f01e4c2d 100644 --- a/src/hydro/hydro_driver.cpp +++ b/src/hydro/hydro_driver.cpp @@ -185,7 +185,7 @@ TaskStatus RKL2StepOther(MeshData *md_Y0, MeshData *md_Yjm1, // by enough work over the entire pack and it allows to not use any conditionals. parthenon::par_for( DEFAULT_LOOP_PATTERN, "RKL other step", parthenon::DevExecSpace(), 0, - Y0.GetDim(5) - 1, 0, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + Y0.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { // First calc this step const auto &coords = Yjm1.GetCoords(b); From a4155c927af6f56e63999fd346a3a61a08e6881c Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 14 Nov 2024 15:23:13 +0100 Subject: [PATCH 3/4] Fix reset of dt_diff --- src/hydro/hydro_driver.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp index f01e4c2d..afcba1d8 100644 --- a/src/hydro/hydro_driver.cpp +++ b/src/hydro/hydro_driver.cpp @@ -660,8 +660,24 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) { pmesh->multilevel); } + TaskRegion &single_tasklist_per_pack_region_3 = tc.AddRegion(num_partitions); + for (int i = 0; i < num_partitions; i++) { + auto &tl = single_tasklist_per_pack_region_3[i]; + auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i); + auto fill_derived = + tl.AddTask(none, parthenon::Update::FillDerived>, mu0.get()); + } + const auto &diffint = hydro_pkg->Param("diffint"); + // If any tasks modify the conserved variables before this place and after FillDerived, + // then the STS tasks should be updated to not assume prim and cons are in sync. + if (diffint == DiffInt::rkl2 && stage == integrator->nstages) { + AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt); + } + // Single task in single (serial) region to reset global vars used in reductions in the // first stage. + // TODO(pgrete) check if we logically need this reset or if we can reset within the + // timestep task if (stage == integrator->nstages && (hydro_pkg->Param("calc_c_h") || hydro_pkg->Param("diffint") != DiffInt::none)) { @@ -678,20 +694,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) { hydro_pkg.get()); } - TaskRegion &single_tasklist_per_pack_region_3 = tc.AddRegion(num_partitions); - for (int i = 0; i < num_partitions; i++) { - auto &tl = single_tasklist_per_pack_region_3[i]; - auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i); - auto fill_derived = - tl.AddTask(none, parthenon::Update::FillDerived>, mu0.get()); - } - const auto &diffint = hydro_pkg->Param("diffint"); - // If any tasks modify the conserved variables before this place and after FillDerived, - // then the STS tasks should be updated to not assume prim and cons are in sync. - if (diffint == DiffInt::rkl2 && stage == integrator->nstages) { - AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt); - } - if (stage == integrator->nstages) { TaskRegion &tr = tc.AddRegion(num_partitions); for (int i = 0; i < num_partitions; i++) { From 487c6b82a8fc4882a38a87eac56b9260b6052951 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Tue, 19 Nov 2024 10:12:30 +0100 Subject: [PATCH 4/4] Ensure dt_diff is set when STS tasks are added --- src/hydro/hydro.cpp | 102 ++++++++++++++++++++++++++++++++----- src/hydro/hydro_driver.cpp | 94 ---------------------------------- 2 files changed, 89 insertions(+), 107 deletions(-) diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp index 5720346c..1f6e8e36 100644 --- a/src/hydro/hydro.cpp +++ b/src/hydro/hydro.cpp @@ -57,12 +57,87 @@ parthenon::Packages_t ProcessPackages(std::unique_ptr &pin) { return packages; } +// Calculate mininum dx, which is used in calculating the divergence cleaning speed c_h +// TODO(PG) eventually move to calculating the timestep once the timestep calc +// has been moved to be done before Step() +Real CalculateGlobalMinDx(MeshData *md) { + auto *pmb = md->GetBlockData(0)->GetBlockPointer(); + auto hydro_pkg = pmb->packages.Get("Hydro"); + + const auto &prim_pack = md->PackVariables(std::vector{"prim"}); + + IndexRange ib = md->GetBlockData(0)->GetBoundsI(IndexDomain::interior); + IndexRange jb = md->GetBlockData(0)->GetBoundsJ(IndexDomain::interior); + IndexRange kb = md->GetBlockData(0)->GetBoundsK(IndexDomain::interior); + + Real mindx = std::numeric_limits::max(); + + bool nx2 = prim_pack.GetDim(2) > 1; + bool nx3 = prim_pack.GetDim(3) > 1; + pmb->par_reduce( + "CalculateGlobalMinDx", 0, prim_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lmindx) { + const auto &coords = prim_pack.GetCoords(b); + lmindx = fmin(lmindx, coords.Dxc<1>(k, j, i)); + if (nx2) { + lmindx = fmin(lmindx, coords.Dxc<2>(k, j, i)); + } + if (nx3) { + lmindx = fmin(lmindx, coords.Dxc<3>(k, j, i)); + } + }, + Kokkos::Min(mindx)); + + return mindx; +} + // Using this per cycle function to populate various variables in // Params that require global reduction *and* need to be set/known when // the task list is constructed (versus when the task list is being executed). // TODO(next person touching this function): If more/separate feature are required // please separate concerns. -void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {} +void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) { + auto hydro_pkg = pmesh->packages.Get("Hydro"); + + // Calculate hyperbolic divergence cleaning speed + // TODO(pgrete) Calculating mindx is only required after remeshing. Need to + // find a clean solution for this one-off global reduction. + if (hydro_pkg->Param("calc_c_h") || + hydro_pkg->Param("diffint") != DiffInt::none) { + + Real mindx = std::numeric_limits::max(); + // Going over default partitions. Not using a (new) single partition containing + // all blocks here as this (default) split is also used main Step() function and + // thus does not create an overhead (such as creating a new MeshBlockPack that is just + // used here). All partitions are executed sequentially. Given that a par_reduce to a + // host var is blocking it's save to dirctly use the return value. + const int num_partitions = pmesh->DefaultNumPartitions(); + for (int i = 0; i < num_partitions; i++) { + auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i); + mindx = std::min(mindx, CalculateGlobalMinDx(mu0.get())); + } +#ifdef MPI_PARALLEL + Real mins[3]; + mins[0] = mindx; + mins[1] = hydro_pkg->Param("dt_hyp"); + mins[2] = hydro_pkg->Param("dt_diff"); + PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_PARTHENON_REAL, MPI_MIN, + MPI_COMM_WORLD)); + + hydro_pkg->UpdateParam("mindx", mins[0]); + hydro_pkg->UpdateParam("dt_hyp", mins[1]); + hydro_pkg->UpdateParam("dt_diff", mins[2]); +#else + hydro_pkg->UpdateParam("mindx", mindx); + // dt_hyp and dt_diff are already set directly in Params when they're calculated +#endif + // Finally update c_h + const auto &cfl_hyp = hydro_pkg->Param("cfl"); + const auto &dt_hyp = hydro_pkg->Param("dt_hyp"); + hydro_pkg->UpdateParam("c_h", cfl_hyp * mindx / dt_hyp); + } +} template Real HydroHst(MeshData *md) { @@ -1189,10 +1264,10 @@ TaskStatus FirstOrderFluxCorrect(MeshData *u0_data, MeshData *u1_dat const auto &u0_prim = u0_prim_pack(b); auto &u0_cons = u0_cons_pack(b); - // In principle, the u_cons.fluxes could be updated in parallel by a different - // thread resulting in a race conditon here. - // However, if the fluxes of a cell have been updated (anywhere) then the entire - // kernel will be called again anyway, and, at that point the already fixed + // In principle, the u_cons.fluxes could be updated in parallel by a + // different thread resulting in a race conditon here. However, if the + // fluxes of a cell have been updated (anywhere) then the entire kernel will + // be called again anyway, and, at that point the already fixed // u0_cons.fluxes will automaticlly be used here. Real new_cons[NVAR]; for (auto v = 0; v < NVAR; v++) { @@ -1220,13 +1295,13 @@ TaskStatus FirstOrderFluxCorrect(MeshData *u0_data, MeshData *u1_dat lnum_need_floor += 1; return; } - // In principle, there could be a racecondion as this loop goes over all k,j,i - // and we updating the i+1 flux here. - // However, the results are idential because u0_prim is never updated in this - // kernel so we don't worry about it. - // TODO(pgrete) as we need to keep the function signature idential for now (due - // to Cuda compiler bug) we could potentially template these function and get - // rid of the `if constexpr` + // In principle, there could be a racecondion as this loop goes over all + // k,j,i and we updating the i+1 flux here. However, the results are + // idential because u0_prim is never updated in this kernel so we don't + // worry about it. + // TODO(pgrete) as we need to keep the function signature idential for now + // (due to Cuda compiler bug) we could potentially template these function + // and get rid of the `if constexpr` riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h); riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h); @@ -1243,7 +1318,8 @@ TaskStatus FirstOrderFluxCorrect(MeshData *u0_data, MeshData *u1_dat Kokkos::Sum(num_corrected), Kokkos::Sum(num_need_floor)); // TODO(pgrete) make this optional and global (potentially store values in Params) - // std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " << num_attempts + // std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " << + // num_attempts // << " Corrected (center): " << num_corrected // << " Failed (will rely on floor): " << num_need_floor << std::endl; num_attempts += 1; diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp index afcba1d8..dd9d4540 100644 --- a/src/hydro/hydro_driver.cpp +++ b/src/hydro/hydro_driver.cpp @@ -37,46 +37,6 @@ HydroDriver::HydroDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm pin->CheckDesired("parthenon/time", "cfl"); } -// Calculate mininum dx, which is used in calculating the divergence cleaning speed c_h -TaskStatus CalculateGlobalMinDx(MeshData *md) { - auto pmb = md->GetBlockData(0)->GetBlockPointer(); - auto hydro_pkg = pmb->packages.Get("Hydro"); - - const auto &prim_pack = md->PackVariables(std::vector{"prim"}); - - IndexRange ib = md->GetBlockData(0)->GetBoundsI(IndexDomain::interior); - IndexRange jb = md->GetBlockData(0)->GetBoundsJ(IndexDomain::interior); - IndexRange kb = md->GetBlockData(0)->GetBoundsK(IndexDomain::interior); - - Real mindx = std::numeric_limits::max(); - - bool nx2 = prim_pack.GetDim(2) > 1; - bool nx3 = prim_pack.GetDim(3) > 1; - pmb->par_reduce( - "CalculateGlobalMinDx", 0, prim_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lmindx) { - const auto &coords = prim_pack.GetCoords(b); - lmindx = fmin(lmindx, coords.Dxc<1>(k, j, i)); - if (nx2) { - lmindx = fmin(lmindx, coords.Dxc<2>(k, j, i)); - } - if (nx3) { - lmindx = fmin(lmindx, coords.Dxc<3>(k, j, i)); - } - }, - Kokkos::Min(mindx)); - - // Reduction to host var is blocking and only have one of this tasks run at the same - // time so modifying the package should be safe. - auto mindx_pkg = hydro_pkg->Param("mindx"); - if (mindx < mindx_pkg) { - hydro_pkg->UpdateParam("mindx", mindx); - } - - return TaskStatus::complete; -} - // Sets all fluxes to 0 TaskStatus ResetFluxes(MeshData *md) { auto pmb = md->GetBlockData(0)->GetBlockPointer(); @@ -444,60 +404,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) { } } - // Calculate hyperbolic divergence cleaning speed - // TODO(pgrete) Calculating mindx is only required after remeshing. Need to find a clean - // solution for this one-off global reduction. - // TODO(PG) move this to PreStepMeshUserWorkInLoop - if ((hydro_pkg->Param("calc_c_h") || - hydro_pkg->Param("diffint") != DiffInt::none) && - (stage == 1)) { - // need to make sure that there's only one region in order to MPI_reduce to work - TaskRegion &single_task_region = tc.AddRegion(1); - auto &tl = single_task_region[0]; - // Adding one task for each partition. Not using a (new) single partition containing - // all blocks here as this (default) split is also used for the following tasks and - // thus does not create an overhead (such as creating a new MeshBlockPack that is just - // used here). Given that all partitions are in one task list they'll be executed - // sequentially. Given that a par_reduce to a host var is blocking it's also save to - // store the variable in the Params for now. - auto prev_task = none; - for (int i = 0; i < num_partitions; i++) { - auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i); - auto new_mindx = tl.AddTask(prev_task, CalculateGlobalMinDx, mu0.get()); - prev_task = new_mindx; - } - auto reduce_c_h = prev_task; -#ifdef MPI_PARALLEL - reduce_c_h = tl.AddTask( - prev_task, - [](StateDescriptor *hydro_pkg) { - Real mins[3]; - mins[0] = hydro_pkg->Param("mindx"); - mins[1] = hydro_pkg->Param("dt_hyp"); - mins[2] = hydro_pkg->Param("dt_diff"); - PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_PARTHENON_REAL, - MPI_MIN, MPI_COMM_WORLD)); - - hydro_pkg->UpdateParam("mindx", mins[0]); - hydro_pkg->UpdateParam("dt_hyp", mins[1]); - hydro_pkg->UpdateParam("dt_diff", mins[2]); - return TaskStatus::complete; - }, - hydro_pkg.get()); -#endif - // Finally update c_h - auto update_c_h = tl.AddTask( - reduce_c_h, - [](StateDescriptor *hydro_pkg) { - const auto &mindx = hydro_pkg->Param("mindx"); - const auto &cfl_hyp = hydro_pkg->Param("cfl"); - const auto &dt_hyp = hydro_pkg->Param("dt_hyp"); - hydro_pkg->UpdateParam("c_h", cfl_hyp * mindx / dt_hyp); - return TaskStatus::complete; - }, - hydro_pkg.get()); - } - // calculate magnetic tower scaling if ((stage == 1) && hydro_pkg->AllParams().hasKey("magnetic_tower_power_scaling") && hydro_pkg->Param("magnetic_tower_power_scaling")) {