From b51464c26ddea3eb3e3438d44da66969300dbe0c Mon Sep 17 00:00:00 2001 From: Sam <6740307+streeve@users.noreply.github.com> Date: Wed, 13 Sep 2023 22:36:01 -0400 Subject: [PATCH] fixup: only get comm bounds once --- src/CabanaPD_Comm.hpp | 83 +++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/src/CabanaPD_Comm.hpp b/src/CabanaPD_Comm.hpp index 832f933c..e110d5e4 100644 --- a/src/CabanaPD_Comm.hpp +++ b/src/CabanaPD_Comm.hpp @@ -36,10 +36,18 @@ template struct HaloIds { static constexpr std::size_t num_space_dim = LocalGridType::num_space_dim; + // FIXME: 2d + static constexpr int topology_size = 26; + + using memory_space = MemorySpace; int _min_halo; - using memory_space = MemorySpace; + using coord_type = Kokkos::Array; + Kokkos::Array _min_coord; + Kokkos::Array _max_coord; + + Kokkos::Array _device_topology; using DestinationRankView = typename Kokkos::View; using CountView = @@ -65,36 +73,29 @@ struct HaloIds // Check within the halo width, within the local domain. _min_halo = minimum_halo_width; + auto topology = Cajita::getTopology( local_grid ); + _device_topology = vectorToArray( topology ); + + // Get the neighboring mesh bounds (only needed once unless load + // balancing). + neighborBounds( local_grid ); + build( - local_grid, positions, + positions, KOKKOS_LAMBDA( const int, const double[3] ) { return true; } ); } - //---------------------------------------------------------------------------// - // Locate particles within the local grid and determine if any from this - // rank need to be ghosted to one (or more) of the 26 neighbor ranks, - // keeping track of destination rank and index in the container. - template - void build( const LocalGridType& local_grid, - const PositionSliceType& positions, UserFunctor user_functor ) + // Find the bounds of each neighbor rank and store for determining which + // ghost particles to communicate. + void neighborBounds( const LocalGridType& local_grid ) { - using execution_space = typename PositionSliceType::execution_space; const auto& local_mesh = Cajita::createLocalMesh( local_grid ); - auto topology = Cajita::getTopology( local_grid ); - // FIXME: 2d - constexpr int topology_size = 26; Kokkos::Array, topology_size> index_spaces; - using coord_type = Kokkos::Array; - Kokkos::Array min_coord; - Kokkos::Array max_coord; - - // Look for ghosts within the halo width of the local mesh boundary, - // potentially for each of the 26 neighbors cells. Do this one neighbor - // rank at a time so that sends are contiguous. Store all neighboring - // mesh bounds so we only have to launch one kernel below. + // Store all neighboring shared index space mesh bounds so we only have + // to launch one kernel during the actual ghost search. int n = 0; for ( int k = -1; k < 2; ++k ) { @@ -116,20 +117,35 @@ struct HaloIds auto max_ind = sis.max(); local_mesh.coordinates( Cajita::Node(), min_ind.data(), - min_coord[n].data() ); + _min_coord[n].data() ); local_mesh.coordinates( Cajita::Node(), max_ind.data(), - max_coord[n].data() ); + _max_coord[n].data() ); } } } } } + } + + //---------------------------------------------------------------------------// + // Locate particles within the local grid and determine if any from this + // rank need to be ghosted to one (or more) of the 26 neighbor ranks, + // keeping track of destination rank and index. + template + void build( const PositionSliceType& positions, UserFunctor user_functor ) + { + using execution_space = typename PositionSliceType::execution_space; - auto device_topology = vectorToArray( topology ); + // Local copies of member variables for lambda capture. auto send_count = _send_count; auto destinations = _destinations; auto ids = _ids; + auto device_topology = _device_topology; + + // Look for ghosts within the halo width of the local mesh boundary, + // potentially for each of the 26 neighbors cells. + // Do this one neighbor rank at a time so that sends are contiguous. auto ghost_search = KOKKOS_LAMBDA( const int p ) { for ( std::size_t n = 0; n < topology_size; n++ ) @@ -142,12 +158,12 @@ struct HaloIds // space and the ghosted space of this neighbor // (ignore the current cell). bool within_halo = false; - if ( positions( p, 0 ) > min_coord[n][0] && - positions( p, 0 ) < max_coord[n][0] && - positions( p, 1 ) > min_coord[n][1] && - positions( p, 1 ) < max_coord[n][1] && - positions( p, 2 ) > min_coord[n][2] && - positions( p, 2 ) < max_coord[n][2] ) + if ( positions( p, 0 ) > _min_coord[n][0] && + positions( p, 0 ) < _max_coord[n][0] && + positions( p, 1 ) > _min_coord[n][1] && + positions( p, 1 ) < _max_coord[n][1] && + positions( p, 2 ) > _min_coord[n][2] && + positions( p, 2 ) < _max_coord[n][2] ) within_halo = true; if ( within_halo ) { @@ -181,8 +197,7 @@ struct HaloIds } template - void rebuild( const LocalGridType& local_grid, - const PositionSliceType& positions ) + void rebuild( const PositionSliceType& positions ) { // Resize views to actual send sizes. int dest_size = _destinations.extent( 0 ); @@ -200,7 +215,7 @@ struct HaloIds { Kokkos::deep_copy( _send_count, 0 ); build( - local_grid, positions, + positions, KOKKOS_LAMBDA( const int, const double[3] ) { return true; } ); } } @@ -244,7 +259,7 @@ class Comm auto halo_ids = createHaloIds( *local_grid, positions, halo_width, max_export ); // Rebuild if needed. - halo_ids.rebuild( *local_grid, positions ); + halo_ids.rebuild( positions ); // Create the Cabana Halo. halo = std::make_shared( local_grid->globalGrid().comm(),