Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flatter SetBounds #1196

Open
wants to merge 11 commits into
base: develop
Choose a base branch
from
33 changes: 25 additions & 8 deletions src/bvals/comms/boundary_communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,24 @@ TaskStatus SetBounds(std::shared_ptr<MeshData<Real>> &md) {
}
// const Real threshold = Globals::sparse_config.allocation_threshold;
auto &bnd_info = cache.bnd_info;
auto &bnd_info_h = cache.bnd_info_h;
size_t Nel_max = 0;
for (int b = 0; b < nbound; b++) {
for (int it = 0; it < bnd_info_h(b).ntopological_elements; ++it) {
const int tensor_size = bnd_info_h(b).idxer[it].sizeAtAndBelow(2);
if (tensor_size > Nel_max) {
Nel_max = tensor_size;
}
}
}
const int Nteam = Nel_max;

Kokkos::parallel_for(
PARTHENON_AUTO_LABEL,
Kokkos::TeamPolicy<>(parthenon::DevExecSpace(), nbound, Kokkos::AUTO),
Kokkos::TeamPolicy<>(parthenon::DevExecSpace(), nbound * Nteam, Kokkos::AUTO),
KOKKOS_LAMBDA(parthenon::team_mbr_t team_member) {
const int b = team_member.league_rank();
const int b = team_member.league_rank() / Nteam;
const int bteam = team_member.league_rank() % Nteam;
if (bnd_info(b).same_to_same) return;
int idx_offset = 0;
for (int it = 0; it < bnd_info(b).ntopological_elements; ++it) {
Expand All @@ -285,13 +298,17 @@ TaskStatus SetBounds(std::shared_ptr<MeshData<Real>> &md) {
lcoord_trans.InverseTransform(bnd_info(b).topo_idx[it]);
Real fac = ftemp; // Can't capture structured bindings
const int iel = static_cast<int>(tel) % 3;
const int Ni = idxer.template EndIdx<5>() - idxer.template StartIdx<5>() + 1;
// Element t, u, v in variable
const int Nel = idxer.sizeAtAndBelow(2);
const int Nidx = idxer.sizeAtAndAbove(3);
if (bteam >= Nel) return;
const int Ni = idxer.size(5);
if (bnd_info(b).buf_allocated && bnd_info(b).allocated) {
Kokkos::parallel_for(
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni),
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Nel / Ni),
[&](const int idx) {
Real *buf = &bnd_info(b).buf(idx * Ni + idx_offset);
const auto [t, u, v, k, j, i] = idxer(idx * Ni);
Real *buf = &bnd_info(b).buf(bteam * Nidx + idx * Ni + idx_offset);
const auto [t, u, v, k, j, i] = idxer(bteam * Nidx + idx * Ni);
// Have to do this because of some weird issue about structure bindings
// being captured
const int tt = t;
Expand All @@ -311,9 +328,9 @@ TaskStatus SetBounds(std::shared_ptr<MeshData<Real>> &md) {
} else if (bnd_info(b).allocated && bound_type != BoundaryType::flxcor_recv) {
const Real default_val = bnd_info(b).var.sparse_default_val;
Kokkos::parallel_for(
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni),
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Nel / Ni),
[&](const int idx) {
const auto [t, u, v, k, j, i] = idxer(idx * Ni);
const auto [t, u, v, k, j, i] = idxer(bteam * Nidx + idx * Ni);
const int tt = t;
const int uu = u;
const int vv = v;
Expand Down
19 changes: 19 additions & 0 deletions src/utils/indexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,25 @@ struct Indexer {
KOKKOS_FORCEINLINE_FUNCTION
std::size_t size() const { return _size; }

KOKKOS_FORCEINLINE_FUNCTION
std::size_t size(int dim) const { return end[dim] - start[dim] + 1; }

KOKKOS_FORCEINLINE_FUNCTION
std::size_t sizeAtAndAbove(int dim) const {
std::size_t out = dim < rank;
for (int i = dim; i < rank; ++i)
out *= size(i);
return out;
}

KOKKOS_FORCEINLINE_FUNCTION
std::size_t sizeAtAndBelow(int dim) const {
std::size_t out = dim >= 0;
for (int i = 0; i <= dim; ++i)
out *= size(i);
return out;
}

KOKKOS_FORCEINLINE_FUNCTION
std::tuple<Ts...> operator()(int idx) const {
return GetIndicesImpl(idx, std::make_index_sequence<sizeof...(Ts)>());
Expand Down
Loading