From 2efff47b60400ab6c76ab79f701b7eb5bb053ef7 Mon Sep 17 00:00:00 2001 From: atmyers Date: Tue, 16 Apr 2024 04:13:22 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20main=20from=20@=20AMReX-Codes/?= =?UTF-8?q?amrex@e293ff68519961dd77a3e5c8a839c619e3be1803=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../amrex.pdf | Bin 8636051 -> 8636051 bytes .../AMReX__ParticleContainerI_8H_source.html | 2749 +++++++++-------- .../AMReX__ParticleContainer_8H_source.html | 20 +- .../doxygen/AMReX__ParticleContainerI_8H.xml | 2741 ++++++++-------- .../classamrex_1_1ParticleContainer__impl.xml | 22 +- 5 files changed, 2783 insertions(+), 2749 deletions(-) diff --git a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf index 2f1c713546128d5692997a93e9f20eea49cff6f2..dbb8d033b4d12426ee39e54aeffa35ce7f2aae8a 100644 GIT binary patch delta 370 zcmajV*;b4J0KoBsLX1{Y(qgGBtutTqeI;q;n;8^kEg{*nr;weP->u`Ki!SB?ynu6h z0=J%tb3A~H|2e;l|LV8zWvhQ%vB5^0lqt8_7F$)=X1jzPDpjdgqgI`rcB!{pgGPHa z*{fNLR&Dm#?|_5ab?9`+VMiR*rCX0)eflM(1O}vqB7=q;Gwiq#CydI-isj@LoOH@* zW5%6v)`UstoOi)Rms~bw+7&ady5_oBH_W-|mfP;Q>z@1OJ@C*Yk3I3!GYg(uwB&`C zURn0q8*jby-UlCj^4W?nzWyvGO65U+GL>GdDHR+3alNNhoK3{}%wSl^CZlXFN(}^I eCXOEE5J3fc=5nuy!4eIO>>AUAhI*dW0gejI6|QC-ge0&nc(%J0mA=z*&Qa z3_EAUc^6!C$*9Y&xaykg#@ukzEw|lq*SLG`d*Go*9(&@cXP%qz!b`8b_QqT9Oqw!n z#(N)p^vSF_pMCMwH{Z=$u;|C1LaJCEgi)AYt|=B8*16JIER3genJ9?!`8W=eTz|Sd e?8zo^m`kEen2*v)Hi-LLdjGeq{QX^6ZC(Qc&4R-K diff --git a/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html b/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html index f3bca6448d..9124b17eb3 100644 --- a/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html +++ b/amrex/docs_html/doxygen/AMReX__ParticleContainerI_8H_source.html @@ -1206,1423 +1206,1436 @@
1115  });
1116  }
-
1118  }
-
1119 
-
1120  { // Create a scope for the temporary vector below
-
1121  RealVector tmp_real(np_total);
-
1122  for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) {
-
1123  auto src = ptile.GetStructOfArrays().GetRealData(comp).data();
-
1124  ParticleReal* dst = tmp_real.data();
-
1125  AMREX_HOST_DEVICE_FOR_1D( np_total, i,
-
1126  {
-
1127  dst[i] = i < np ? src[permutations[i]] : src[i];
-
1128  });
+
1118  } else {
+
1119  typename SoA::IdCPU tmp_idcpu(np_total);
+
1120 
+
1121  auto src = ptile.GetStructOfArrays().GetIdCPUData().data();
+
1122  uint64_t* dst = tmp_idcpu.data();
+
1123  AMREX_HOST_DEVICE_FOR_1D( np_total, i,
+
1124  {
+
1125  dst[i] = i < np ? src[permutations[i]] : src[i];
+
1126  });
+
1127 
+
1129 
- -
1131 
-
1132  ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real);
-
1133  }
-
1134  }
-
1135 
-
1136  IntVector tmp_int(np_total);
-
1137  for (int comp = 0; comp < NArrayInt + m_num_runtime_int; ++comp) {
-
1138  auto src = ptile.GetStructOfArrays().GetIntData(comp).data();
-
1139  int* dst = tmp_int.data();
-
1140  AMREX_HOST_DEVICE_FOR_1D( np_total , i,
-
1141  {
-
1142  dst[i] = i < np ? src[permutations[i]] : src[i];
-
1143  });
+
1130  ptile.GetStructOfArrays().GetIdCPUData().swap(tmp_idcpu);
+
1131  }
+
1132 
+
1133  { // Create a scope for the temporary vector below
+
1134  RealVector tmp_real(np_total);
+
1135  for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) {
+
1136  auto src = ptile.GetStructOfArrays().GetRealData(comp).data();
+
1137  ParticleReal* dst = tmp_real.data();
+
1138  AMREX_HOST_DEVICE_FOR_1D( np_total, i,
+
1139  {
+
1140  dst[i] = i < np ? src[permutations[i]] : src[i];
+
1141  });
+
1142 
+
1144 
- -
1146 
-
1147  ptile.GetStructOfArrays().GetIntData(comp).swap(tmp_int);
-
1148  }
-
1149  } else {
-
1150  ParticleTileType ptile_tmp;
-
1151  ptile_tmp.define(m_num_runtime_real, m_num_runtime_int);
-
1152  ptile_tmp.resize(np_total);
-
1153  // copy re-ordered particles
-
1154  gatherParticles(ptile_tmp, ptile, np, permutations);
-
1155  // copy neighbor particles
-
1156  amrex::copyParticles(ptile_tmp, ptile, np, np, np_total-np);
-
1157  ptile.swap(ptile_tmp);
-
1158  }
-
1159 }
-
1160 
-
1161 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
1162  template<class> class Allocator, class CellAssignor>
-
1163 void
- -
1165 {
-
1166  SortParticlesByBin(IntVect(AMREX_D_DECL(1, 1, 1)));
-
1167 }
-
1168 
-
1169 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
1170  template<class> class Allocator, class CellAssignor>
-
1171 void
- - -
1174 {
-
1175  BL_PROFILE("ParticleContainer::SortParticlesByBin()");
-
1176 
-
1177  if (bin_size == IntVect::TheZeroVector()) { return; }
-
1178 
-
1179  for (int lev = 0; lev < numLevels(); ++lev)
-
1180  {
-
1181  const Geometry& geom = Geom(lev);
-
1182  const auto dxi = geom.InvCellSizeArray();
-
1183  const auto plo = geom.ProbLoArray();
-
1184  const auto domain = geom.Domain();
-
1185 
-
1186  for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
-
1187  {
-
1188  auto& ptile = ParticlesAt(lev, mfi);
-
1189  const size_t np = ptile.numParticles();
-
1190 
-
1191  const Box& box = mfi.validbox();
-
1192 
-
1193  int ntiles = numTilesInBox(box, true, bin_size);
- -
1195  m_bins.build(np, ptile.getParticleTileData(), ntiles,
-
1196  GetParticleBin{plo, dxi, domain, bin_size, box});
-
1197  ReorderParticles(lev, mfi, m_bins.permutationPtr());
-
1198  }
-
1199  }
-
1200 }
-
1201 
-
1202 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
1203  template<class> class Allocator, class CellAssignor>
-
1204 void
-
1205 ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
-
1206 ::SortParticlesForDeposition (IntVect idx_type)
-
1207 {
-
1208  BL_PROFILE("ParticleContainer::SortParticlesForDeposition()");
- -
1210  for (int lev = 0; lev < numLevels(); ++lev)
-
1211  {
-
1212  const Geometry& geom = Geom(lev);
-
1213 
-
1214  for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
-
1215  {
-
1216  const auto& ptile = ParticlesAt(lev, mfi);
-
1217  const size_t np = ptile.numParticles();
-
1218 
-
1219  const Box& box = mfi.validbox();
-
1220 
-
1221  using index_type = typename decltype(m_bins)::index_type;
- -
1223  PermutationForDeposition<index_type>(perm, np, ptile, box, geom, idx_type);
-
1224  ReorderParticles(lev, mfi, perm.dataPtr());
-
1225  }
-
1226  }
-
1227 }
-
1228 
-
1229 //
-
1230 // The GPU implementation of Redistribute
-
1231 //
-
1232 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
1233  template<class> class Allocator, class CellAssignor>
-
1234 void
-
1235 ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
-
1236 ::RedistributeGPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative)
-
1237 {
-
1238 #ifdef AMREX_USE_GPU
-
1239 
-
1240  if (local) { AMREX_ASSERT(numParticlesOutOfRange(*this, lev_min, lev_max, local) == 0); }
+
1145  ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real);
+
1146  }
+
1147  }
+
1148 
+
1149  IntVector tmp_int(np_total);
+
1150  for (int comp = 0; comp < NArrayInt + m_num_runtime_int; ++comp) {
+
1151  auto src = ptile.GetStructOfArrays().GetIntData(comp).data();
+
1152  int* dst = tmp_int.data();
+
1153  AMREX_HOST_DEVICE_FOR_1D( np_total , i,
+
1154  {
+
1155  dst[i] = i < np ? src[permutations[i]] : src[i];
+
1156  });
+
1157 
+ +
1159 
+
1160  ptile.GetStructOfArrays().GetIntData(comp).swap(tmp_int);
+
1161  }
+
1162  } else {
+
1163  ParticleTileType ptile_tmp;
+
1164  ptile_tmp.define(m_num_runtime_real, m_num_runtime_int);
+
1165  ptile_tmp.resize(np_total);
+
1166  // copy re-ordered particles
+
1167  gatherParticles(ptile_tmp, ptile, np, permutations);
+
1168  // copy neighbor particles
+
1169  amrex::copyParticles(ptile_tmp, ptile, np, np, np_total-np);
+
1170  ptile.swap(ptile_tmp);
+
1171  }
+
1172 }
+
1173 
+
1174 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
1175  template<class> class Allocator, class CellAssignor>
+
1176 void
+ +
1178 {
+
1179  SortParticlesByBin(IntVect(AMREX_D_DECL(1, 1, 1)));
+
1180 }
+
1181 
+
1182 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
1183  template<class> class Allocator, class CellAssignor>
+
1184 void
+ + +
1187 {
+
1188  BL_PROFILE("ParticleContainer::SortParticlesByBin()");
+
1189 
+
1190  if (bin_size == IntVect::TheZeroVector()) { return; }
+
1191 
+
1192  for (int lev = 0; lev < numLevels(); ++lev)
+
1193  {
+
1194  const Geometry& geom = Geom(lev);
+
1195  const auto dxi = geom.InvCellSizeArray();
+
1196  const auto plo = geom.ProbLoArray();
+
1197  const auto domain = geom.Domain();
+
1198 
+
1199  for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+
1200  {
+
1201  auto& ptile = ParticlesAt(lev, mfi);
+
1202  const size_t np = ptile.numParticles();
+
1203 
+
1204  const Box& box = mfi.validbox();
+
1205 
+
1206  int ntiles = numTilesInBox(box, true, bin_size);
+
1207 
+
1208  m_bins.build(np, ptile.getParticleTileData(), ntiles,
+
1209  GetParticleBin{plo, dxi, domain, bin_size, box});
+
1210  ReorderParticles(lev, mfi, m_bins.permutationPtr());
+
1211  }
+
1212  }
+
1213 }
+
1214 
+
1215 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
1216  template<class> class Allocator, class CellAssignor>
+
1217 void
+
1218 ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
+
1219 ::SortParticlesForDeposition (IntVect idx_type)
+
1220 {
+
1221  BL_PROFILE("ParticleContainer::SortParticlesForDeposition()");
+
1222 
+
1223  for (int lev = 0; lev < numLevels(); ++lev)
+
1224  {
+
1225  const Geometry& geom = Geom(lev);
+
1226 
+
1227  for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+
1228  {
+
1229  const auto& ptile = ParticlesAt(lev, mfi);
+
1230  const size_t np = ptile.numParticles();
+
1231 
+
1232  const Box& box = mfi.validbox();
+
1233 
+
1234  using index_type = typename decltype(m_bins)::index_type;
+ +
1236  PermutationForDeposition<index_type>(perm, np, ptile, box, geom, idx_type);
+
1237  ReorderParticles(lev, mfi, perm.dataPtr());
+
1238  }
+
1239  }
+
1240 }
1241 
-
1242  // sanity check
-
1243  AMREX_ALWAYS_ASSERT(do_tiling == false);
-
1244 
-
1245  BL_PROFILE("ParticleContainer::RedistributeGPU()");
-
1246  BL_PROFILE_VAR_NS("Redistribute_partition", blp_partition);
-
1247 
-
1248  int theEffectiveFinestLevel = m_gdb->finestLevel();
-
1249  while (!m_gdb->LevelDefined(theEffectiveFinestLevel)) { theEffectiveFinestLevel--; }
-
1250 
-
1251  if (int(m_particles.size()) < theEffectiveFinestLevel+1) {
-
1252  if (Verbose()) {
-
1253  amrex::Print() << "ParticleContainer::Redistribute() resizing containers from "
-
1254  << m_particles.size() << " to "
-
1255  << theEffectiveFinestLevel + 1 << '\n';
-
1256  }
-
1257  m_particles.resize(theEffectiveFinestLevel+1);
-
1258  m_dummy_mf.resize(theEffectiveFinestLevel+1);
-
1259  }
+
1242 //
+
1243 // The GPU implementation of Redistribute
+
1244 //
+
1245 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
1246  template<class> class Allocator, class CellAssignor>
+
1247 void
+ +
1249 ::RedistributeGPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative)
+
1250 {
+
1251 #ifdef AMREX_USE_GPU
+
1252 
+
1253  if (local) { AMREX_ASSERT(numParticlesOutOfRange(*this, lev_min, lev_max, local) == 0); }
+
1254 
+
1255  // sanity check
+
1256  AMREX_ALWAYS_ASSERT(do_tiling == false);
+
1257 
+
1258  BL_PROFILE("ParticleContainer::RedistributeGPU()");
+
1259  BL_PROFILE_VAR_NS("Redistribute_partition", blp_partition);
1260 
-
1261  for (int lev = 0; lev < theEffectiveFinestLevel+1; ++lev) { RedefineDummyMF(lev); }
-
1262 
-
1263  int finest_lev_particles;
-
1264  if (lev_max == -1) {
-
1265  lev_max = theEffectiveFinestLevel;
-
1266  finest_lev_particles = m_particles.size() - 1;
-
1267  } else {
-
1268  finest_lev_particles = lev_max;
-
1269  }
-
1270  AMREX_ASSERT(lev_max <= finestLevel());
-
1271 
-
1272  this->defineBufferMap();
+
1261  int theEffectiveFinestLevel = m_gdb->finestLevel();
+
1262  while (!m_gdb->LevelDefined(theEffectiveFinestLevel)) { theEffectiveFinestLevel--; }
+
1263 
+
1264  if (int(m_particles.size()) < theEffectiveFinestLevel+1) {
+
1265  if (Verbose()) {
+
1266  amrex::Print() << "ParticleContainer::Redistribute() resizing containers from "
+
1267  << m_particles.size() << " to "
+
1268  << theEffectiveFinestLevel + 1 << '\n';
+
1269  }
+
1270  m_particles.resize(theEffectiveFinestLevel+1);
+
1271  m_dummy_mf.resize(theEffectiveFinestLevel+1);
+
1272  }
1273 
-
1274  if (! m_particle_locator.isValid(GetParGDB())) { m_particle_locator.build(GetParGDB()); }
-
1275  m_particle_locator.setGeometry(GetParGDB());
-
1276  auto assign_grid = m_particle_locator.getGridAssignor();
-
1277 
-
1278  BL_PROFILE_VAR_START(blp_partition);
-
1279  ParticleCopyOp op;
-
1280  int num_levels = finest_lev_particles + 1;
-
1281  op.setNumLevels(num_levels);
-
1282  Vector<std::map<int, int> > new_sizes(num_levels);
-
1283  const auto plo = Geom(0).ProbLoArray();
-
1284  const auto phi = Geom(0).ProbHiArray();
-
1285  const auto rlo = Geom(0).ProbLoArrayInParticleReal();
-
1286  const auto rhi = Geom(0).ProbHiArrayInParticleReal();
-
1287  const auto is_per = Geom(0).isPeriodicArray();
-
1288  for (int lev = lev_min; lev <= finest_lev_particles; ++lev)
-
1289  {
-
1290  auto& plev = m_particles[lev];
-
1291  for (auto& kv : plev)
-
1292  {
-
1293  int gid = kv.first.first;
-
1294  int tid = kv.first.second;
-
1295  auto index = std::make_pair(gid, tid);
-
1296 
-
1297  auto& src_tile = plev[index];
-
1298  const size_t np = src_tile.numParticles();
-
1299 
-
1300  int num_stay = partitionParticlesByDest(src_tile, assign_grid,
-
1301  std::forward<CellAssignor>(CellAssignor{}),
-
1302  BufferMap(),
-
1303  plo, phi, rlo, rhi, is_per, lev, gid, tid,
-
1304  lev_min, lev_max, nGrow, remove_negative);
-
1305 
-
1306  int num_move = np - num_stay;
-
1307  new_sizes[lev][gid] = num_stay;
-
1308  op.resize(gid, lev, num_move);
+
1274  for (int lev = 0; lev < theEffectiveFinestLevel+1; ++lev) { RedefineDummyMF(lev); }
+
1275 
+
1276  int finest_lev_particles;
+
1277  if (lev_max == -1) {
+
1278  lev_max = theEffectiveFinestLevel;
+
1279  finest_lev_particles = m_particles.size() - 1;
+
1280  } else {
+
1281  finest_lev_particles = lev_max;
+
1282  }
+
1283  AMREX_ASSERT(lev_max <= finestLevel());
+
1284 
+
1285  this->defineBufferMap();
+
1286 
+
1287  if (! m_particle_locator.isValid(GetParGDB())) { m_particle_locator.build(GetParGDB()); }
+
1288  m_particle_locator.setGeometry(GetParGDB());
+
1289  auto assign_grid = m_particle_locator.getGridAssignor();
+
1290 
+
1291  BL_PROFILE_VAR_START(blp_partition);
+
1292  ParticleCopyOp op;
+
1293  int num_levels = finest_lev_particles + 1;
+
1294  op.setNumLevels(num_levels);
+
1295  Vector<std::map<int, int> > new_sizes(num_levels);
+
1296  const auto plo = Geom(0).ProbLoArray();
+
1297  const auto phi = Geom(0).ProbHiArray();
+
1298  const auto rlo = Geom(0).ProbLoArrayInParticleReal();
+
1299  const auto rhi = Geom(0).ProbHiArrayInParticleReal();
+
1300  const auto is_per = Geom(0).isPeriodicArray();
+
1301  for (int lev = lev_min; lev <= finest_lev_particles; ++lev)
+
1302  {
+
1303  auto& plev = m_particles[lev];
+
1304  for (auto& kv : plev)
+
1305  {
+
1306  int gid = kv.first.first;
+
1307  int tid = kv.first.second;
+
1308  auto index = std::make_pair(gid, tid);
1309 
-
1310  auto p_boxes = op.m_boxes[lev][gid].dataPtr();
-
1311  auto p_levs = op.m_levels[lev][gid].dataPtr();
-
1312  auto p_src_indices = op.m_src_indices[lev][gid].dataPtr();
-
1313  auto p_periodic_shift = op.m_periodic_shift[lev][gid].dataPtr();
-
1314  auto ptd = src_tile.getParticleTileData();
-
1315 
-
1316  AMREX_FOR_1D ( num_move, i,
-
1317  {
-
1318  const auto p = make_particle<ParticleType>{}(ptd,i + num_stay);
-
1319 
-
1320  if (p.id() < 0)
-
1321  {
-
1322  p_boxes[i] = -1;
-
1323  p_levs[i] = -1;
-
1324  }
-
1325  else
-
1326  {
-
1327  const auto tup = assign_grid(p, lev_min, lev_max, nGrow,
-
1328  std::forward<CellAssignor>(CellAssignor{}));
-
1329  p_boxes[i] = amrex::get<0>(tup);
-
1330  p_levs[i] = amrex::get<1>(tup);
-
1331  }
-
1332  p_periodic_shift[i] = IntVect(AMREX_D_DECL(0,0,0));
-
1333  p_src_indices[i] = i+num_stay;
-
1334  });
-
1335  }
-
1336  }
-
1337  BL_PROFILE_VAR_STOP(blp_partition);
-
1338 
-
1339  ParticleCopyPlan plan;
-
1340 
-
1341  plan.build(*this, op, h_redistribute_int_comp,
-
1342  h_redistribute_real_comp, local);
-
1343 
- -
1345  Gpu::DeviceVector<char> rcv_buffer;
-
1346 
-
1347  packBuffer(*this, op, plan, snd_buffer);
-
1348 
-
1349  // clear particles from container
-
1350  for (int lev = lev_min; lev <= lev_max; ++lev)
-
1351  {
-
1352  auto& plev = m_particles[lev];
-
1353  for (auto& kv : plev)
-
1354  {
-
1355  int gid = kv.first.first;
-
1356  int tid = kv.first.second;
-
1357  auto index = std::make_pair(gid, tid);
-
1358  auto& tile = plev[index];
-
1359  tile.resize(new_sizes[lev][gid]);
-
1360  }
-
1361  }
-
1362 
-
1363  for (int lev = lev_min; lev <= lev_max; lev++)
+
1310  auto& src_tile = plev[index];
+
1311  const size_t np = src_tile.numParticles();
+
1312 
+
1313  int num_stay = partitionParticlesByDest(src_tile, assign_grid,
+
1314  std::forward<CellAssignor>(CellAssignor{}),
+
1315  BufferMap(),
+
1316  plo, phi, rlo, rhi, is_per, lev, gid, tid,
+
1317  lev_min, lev_max, nGrow, remove_negative);
+
1318 
+
1319  int num_move = np - num_stay;
+
1320  new_sizes[lev][gid] = num_stay;
+
1321  op.resize(gid, lev, num_move);
+
1322 
+
1323  auto p_boxes = op.m_boxes[lev][gid].dataPtr();
+
1324  auto p_levs = op.m_levels[lev][gid].dataPtr();
+
1325  auto p_src_indices = op.m_src_indices[lev][gid].dataPtr();
+
1326  auto p_periodic_shift = op.m_periodic_shift[lev][gid].dataPtr();
+
1327  auto ptd = src_tile.getParticleTileData();
+
1328 
+
1329  AMREX_FOR_1D ( num_move, i,
+
1330  {
+
1331  const auto p = make_particle<ParticleType>{}(ptd,i + num_stay);
+
1332 
+
1333  if (p.id() < 0)
+
1334  {
+
1335  p_boxes[i] = -1;
+
1336  p_levs[i] = -1;
+
1337  }
+
1338  else
+
1339  {
+
1340  const auto tup = assign_grid(p, lev_min, lev_max, nGrow,
+
1341  std::forward<CellAssignor>(CellAssignor{}));
+
1342  p_boxes[i] = amrex::get<0>(tup);
+
1343  p_levs[i] = amrex::get<1>(tup);
+
1344  }
+
1345  p_periodic_shift[i] = IntVect(AMREX_D_DECL(0,0,0));
+
1346  p_src_indices[i] = i+num_stay;
+
1347  });
+
1348  }
+
1349  }
+
1350  BL_PROFILE_VAR_STOP(blp_partition);
+
1351 
+
1352  ParticleCopyPlan plan;
+
1353 
+
1354  plan.build(*this, op, h_redistribute_int_comp,
+
1355  h_redistribute_real_comp, local);
+
1356 
+ +
1358  Gpu::DeviceVector<char> rcv_buffer;
+
1359 
+
1360  packBuffer(*this, op, plan, snd_buffer);
+
1361 
+
1362  // clear particles from container
+
1363  for (int lev = lev_min; lev <= lev_max; ++lev)
1364  {
-
1365  particle_detail::clearEmptyEntries(m_particles[lev]);
-
1366  }
-
1367 
-
1368  if (int(m_particles.size()) > theEffectiveFinestLevel+1) {
-
1369  if (m_verbose > 0) {
-
1370  amrex::Print() << "ParticleContainer::Redistribute() resizing m_particles from "
-
1371  << m_particles.size() << " to " << theEffectiveFinestLevel+1 << '\n';
-
1372  }
-
1373  AMREX_ASSERT(int(m_particles.size()) >= 2);
-
1374 
-
1375  m_particles.resize(theEffectiveFinestLevel + 1);
-
1376  m_dummy_mf.resize(theEffectiveFinestLevel + 1);
-
1377  }
-
1378 
- -
1380  {
-
1381  plan.buildMPIFinish(BufferMap());
-
1382  communicateParticlesStart(*this, plan, snd_buffer, rcv_buffer);
-
1383  unpackBuffer(*this, plan, snd_buffer, RedistributeUnpackPolicy());
- -
1385  unpackRemotes(*this, plan, rcv_buffer, RedistributeUnpackPolicy());
-
1386  }
-
1387  else
-
1388  {
- -
1390  Gpu::PinnedVector<char> pinned_snd_buffer;
-
1391  Gpu::PinnedVector<char> pinned_rcv_buffer;
-
1392 
-
1393  if (snd_buffer.arena()->isPinned()) {
-
1394  plan.buildMPIFinish(BufferMap());
- -
1396  communicateParticlesStart(*this, plan, snd_buffer, pinned_rcv_buffer);
-
1397  } else {
-
1398  pinned_snd_buffer.resize(snd_buffer.size());
-
1399  Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(), snd_buffer.dataPtr(), snd_buffer.size());
-
1400  plan.buildMPIFinish(BufferMap());
- -
1402  communicateParticlesStart(*this, plan, pinned_snd_buffer, pinned_rcv_buffer);
-
1403  }
-
1404 
-
1405  rcv_buffer.resize(pinned_rcv_buffer.size());
-
1406  unpackBuffer(*this, plan, snd_buffer, RedistributeUnpackPolicy());
- -
1408  Gpu::htod_memcpy_async(rcv_buffer.dataPtr(), pinned_rcv_buffer.dataPtr(), pinned_rcv_buffer.size());
-
1409  unpackRemotes(*this, plan, rcv_buffer, RedistributeUnpackPolicy());
-
1410  }
- - -
1413  AMREX_ASSERT(numParticlesOutOfRange(*this, lev_min, lev_max, nGrow) == 0);
-
1414 #else
-
1415  amrex::ignore_unused(lev_min,lev_max,nGrow,local,remove_negative);
-
1416 #endif
-
1417 }
-
1418 
-
1419 //
-
1420 // The CPU implementation of Redistribute
-
1421 //
-
1422 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
1423  template<class> class Allocator, class CellAssignor>
-
1424 void
- -
1426 ::RedistributeCPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative)
-
1427 {
-
1428  BL_PROFILE("ParticleContainer::RedistributeCPU()");
- -
1430  const int MyProc = ParallelContext::MyProcSub();
-
1431  auto strttime = amrex::second();
-
1432 
-
1433  if (local > 0) { BuildRedistributeMask(0, local); }
-
1434 
-
1435  // On startup there are cases where Redistribute() could be called
-
1436  // with a given finestLevel() where that AmrLevel has yet to be defined.
-
1437  int theEffectiveFinestLevel = m_gdb->finestLevel();
-
1438 
-
1439  while (!m_gdb->LevelDefined(theEffectiveFinestLevel)) {
-
1440  theEffectiveFinestLevel--;
-
1441  }
+
1365  auto& plev = m_particles[lev];
+
1366  for (auto& kv : plev)
+
1367  {
+
1368  int gid = kv.first.first;
+
1369  int tid = kv.first.second;
+
1370  auto index = std::make_pair(gid, tid);
+
1371  auto& tile = plev[index];
+
1372  tile.resize(new_sizes[lev][gid]);
+
1373  }
+
1374  }
+
1375 
+
1376  for (int lev = lev_min; lev <= lev_max; lev++)
+
1377  {
+
1378  particle_detail::clearEmptyEntries(m_particles[lev]);
+
1379  }
+
1380 
+
1381  if (int(m_particles.size()) > theEffectiveFinestLevel+1) {
+
1382  if (m_verbose > 0) {
+
1383  amrex::Print() << "ParticleContainer::Redistribute() resizing m_particles from "
+
1384  << m_particles.size() << " to " << theEffectiveFinestLevel+1 << '\n';
+
1385  }
+
1386  AMREX_ASSERT(int(m_particles.size()) >= 2);
+
1387 
+
1388  m_particles.resize(theEffectiveFinestLevel + 1);
+
1389  m_dummy_mf.resize(theEffectiveFinestLevel + 1);
+
1390  }
+
1391 
+ +
1393  {
+
1394  plan.buildMPIFinish(BufferMap());
+
1395  communicateParticlesStart(*this, plan, snd_buffer, rcv_buffer);
+
1396  unpackBuffer(*this, plan, snd_buffer, RedistributeUnpackPolicy());
+ +
1398  unpackRemotes(*this, plan, rcv_buffer, RedistributeUnpackPolicy());
+
1399  }
+
1400  else
+
1401  {
+ +
1403  Gpu::PinnedVector<char> pinned_snd_buffer;
+
1404  Gpu::PinnedVector<char> pinned_rcv_buffer;
+
1405 
+
1406  if (snd_buffer.arena()->isPinned()) {
+
1407  plan.buildMPIFinish(BufferMap());
+ +
1409  communicateParticlesStart(*this, plan, snd_buffer, pinned_rcv_buffer);
+
1410  } else {
+
1411  pinned_snd_buffer.resize(snd_buffer.size());
+
1412  Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(), snd_buffer.dataPtr(), snd_buffer.size());
+
1413  plan.buildMPIFinish(BufferMap());
+ +
1415  communicateParticlesStart(*this, plan, pinned_snd_buffer, pinned_rcv_buffer);
+
1416  }
+
1417 
+
1418  rcv_buffer.resize(pinned_rcv_buffer.size());
+
1419  unpackBuffer(*this, plan, snd_buffer, RedistributeUnpackPolicy());
+ +
1421  Gpu::htod_memcpy_async(rcv_buffer.dataPtr(), pinned_rcv_buffer.dataPtr(), pinned_rcv_buffer.size());
+
1422  unpackRemotes(*this, plan, rcv_buffer, RedistributeUnpackPolicy());
+
1423  }
+
1424 
+ +
1426  AMREX_ASSERT(numParticlesOutOfRange(*this, lev_min, lev_max, nGrow) == 0);
+
1427 #else
+
1428  amrex::ignore_unused(lev_min,lev_max,nGrow,local,remove_negative);
+
1429 #endif
+
1430 }
+
1431 
+
1432 //
+
1433 // The CPU implementation of Redistribute
+
1434 //
+
1435 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
1436  template<class> class Allocator, class CellAssignor>
+
1437 void
+ +
1439 ::RedistributeCPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative)
+
1440 {
+
1441  BL_PROFILE("ParticleContainer::RedistributeCPU()");
1442 
-
1443  if (int(m_particles.size()) < theEffectiveFinestLevel+1) {
-
1444  if (Verbose()) {
-
1445  amrex::Print() << "ParticleContainer::Redistribute() resizing containers from "
-
1446  << m_particles.size() << " to "
-
1447  << theEffectiveFinestLevel + 1 << '\n';
-
1448  }
-
1449  m_particles.resize(theEffectiveFinestLevel+1);
-
1450  m_dummy_mf.resize(theEffectiveFinestLevel+1);
-
1451  }
-
1452 
-
1453  // It is important to do this even if we don't have more levels because we may have changed the
-
1454  // grids at this level in a regrid.
-
1455  for (int lev = 0; lev < theEffectiveFinestLevel+1; ++lev) {
-
1456  RedefineDummyMF(lev);
-
1457  }
-
1458 
-
1459  int finest_lev_particles;
-
1460  if (lev_max == -1) {
-
1461  lev_max = theEffectiveFinestLevel;
-
1462  finest_lev_particles = m_particles.size() - 1;
-
1463  } else {
-
1464  finest_lev_particles = lev_max;
-
1465  }
-
1466  AMREX_ASSERT(lev_max <= finestLevel());
-
1467 
-
1468  // This will hold the valid particles that go to another process
-
1469  std::map<int, Vector<char> > not_ours;
-
1470 
-
1471  int num_threads = OpenMP::get_max_threads();
-
1472 
-
1473  // these are temporary buffers for each thread
-
1474  std::map<int, Vector<Vector<char> > > tmp_remote;
-
1475  Vector<std::map<std::pair<int, int>, Vector<ParticleVector> > > tmp_local;
-
1476  Vector<std::map<std::pair<int, int>, Vector<StructOfArrays<NArrayReal, NArrayInt, Allocator> > > > soa_local;
-
1477  tmp_local.resize(theEffectiveFinestLevel+1);
-
1478  soa_local.resize(theEffectiveFinestLevel+1);
-
1479 
-
1480  // we resize these buffers outside the parallel region
-
1481  for (int lev = lev_min; lev <= lev_max; lev++) {
-
1482  for (MFIter mfi(*m_dummy_mf[lev], this->do_tiling ? this->tile_size : IntVect::TheZeroVector());
-
1483  mfi.isValid(); ++mfi) {
-
1484  auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
-
1485  tmp_local[lev][index].resize(num_threads);
-
1486  soa_local[lev][index].resize(num_threads);
-
1487  for (int t = 0; t < num_threads; ++t) {
-
1488  soa_local[lev][index][t].define(m_num_runtime_real, m_num_runtime_int);
-
1489  }
-
1490  }
-
1491  }
-
1492  if (local) {
-
1493  for (int i = 0; i < neighbor_procs.size(); ++i) {
-
1494  tmp_remote[neighbor_procs[i]].resize(num_threads);
-
1495  }
-
1496  } else {
-
1497  for (int i = 0; i < ParallelContext::NProcsSub(); ++i) {
-
1498  tmp_remote[i].resize(num_threads);
-
1499  }
-
1500  }
-
1501 
-
1502  // first pass: for each tile in parallel, in each thread copies the particles that
-
1503  // need to be moved into it's own, temporary buffer.
-
1504  for (int lev = lev_min; lev <= finest_lev_particles; lev++) {
-
1505  auto& pmap = m_particles[lev];
-
1506 
-
1507  Vector<std::pair<int, int> > grid_tile_ids;
-
1508  Vector<ParticleTileType*> ptile_ptrs;
-
1509  for (auto& kv : pmap)
-
1510  {
-
1511  grid_tile_ids.push_back(kv.first);
-
1512  ptile_ptrs.push_back(&(kv.second));
-
1513  }
+
1443  const int MyProc = ParallelContext::MyProcSub();
+
1444  auto strttime = amrex::second();
+
1445 
+
1446  if (local > 0) { BuildRedistributeMask(0, local); }
+
1447 
+
1448  // On startup there are cases where Redistribute() could be called
+
1449  // with a given finestLevel() where that AmrLevel has yet to be defined.
+
1450  int theEffectiveFinestLevel = m_gdb->finestLevel();
+
1451 
+
1452  while (!m_gdb->LevelDefined(theEffectiveFinestLevel)) {
+
1453  theEffectiveFinestLevel--;
+
1454  }
+
1455 
+
1456  if (int(m_particles.size()) < theEffectiveFinestLevel+1) {
+
1457  if (Verbose()) {
+
1458  amrex::Print() << "ParticleContainer::Redistribute() resizing containers from "
+
1459  << m_particles.size() << " to "
+
1460  << theEffectiveFinestLevel + 1 << '\n';
+
1461  }
+
1462  m_particles.resize(theEffectiveFinestLevel+1);
+
1463  m_dummy_mf.resize(theEffectiveFinestLevel+1);
+
1464  }
+
1465 
+
1466  // It is important to do this even if we don't have more levels because we may have changed the
+
1467  // grids at this level in a regrid.
+
1468  for (int lev = 0; lev < theEffectiveFinestLevel+1; ++lev) {
+
1469  RedefineDummyMF(lev);
+
1470  }
+
1471 
+
1472  int finest_lev_particles;
+
1473  if (lev_max == -1) {
+
1474  lev_max = theEffectiveFinestLevel;
+
1475  finest_lev_particles = m_particles.size() - 1;
+
1476  } else {
+
1477  finest_lev_particles = lev_max;
+
1478  }
+
1479  AMREX_ASSERT(lev_max <= finestLevel());
+
1480 
+
1481  // This will hold the valid particles that go to another process
+
1482  std::map<int, Vector<char> > not_ours;
+
1483 
+
1484  int num_threads = OpenMP::get_max_threads();
+
1485 
+
1486  // these are temporary buffers for each thread
+
1487  std::map<int, Vector<Vector<char> > > tmp_remote;
+ + +
1490  tmp_local.resize(theEffectiveFinestLevel+1);
+
1491  soa_local.resize(theEffectiveFinestLevel+1);
+
1492 
+
1493  // we resize these buffers outside the parallel region
+
1494  for (int lev = lev_min; lev <= lev_max; lev++) {
+
1495  for (MFIter mfi(*m_dummy_mf[lev], this->do_tiling ? this->tile_size : IntVect::TheZeroVector());
+
1496  mfi.isValid(); ++mfi) {
+
1497  auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
+
1498  tmp_local[lev][index].resize(num_threads);
+
1499  soa_local[lev][index].resize(num_threads);
+
1500  for (int t = 0; t < num_threads; ++t) {
+
1501  soa_local[lev][index][t].define(m_num_runtime_real, m_num_runtime_int);
+
1502  }
+
1503  }
+
1504  }
+
1505  if (local) {
+
1506  for (int i = 0; i < neighbor_procs.size(); ++i) {
+
1507  tmp_remote[neighbor_procs[i]].resize(num_threads);
+
1508  }
+
1509  } else {
+
1510  for (int i = 0; i < ParallelContext::NProcsSub(); ++i) {
+
1511  tmp_remote[i].resize(num_threads);
+
1512  }
+
1513  }
1514 
-
1515 #ifdef AMREX_USE_OMP
-
1516 #pragma omp parallel for
-
1517 #endif
-
1518  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
-
1519  {
-
1520  int thread_num = OpenMP::get_thread_num();
-
1521  int grid = grid_tile_ids[pmap_it].first;
-
1522  int tile = grid_tile_ids[pmap_it].second;
-
1523  auto& soa = ptile_ptrs[pmap_it]->GetStructOfArrays();
-
1524  auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs();
-
1525 
-
1526  // AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0)
-
1527  // || aos.size() == soa.size(),
-
1528  // "The AoS and SoA data on this tile are different sizes - "
-
1529  // "perhaps particles have not been initialized correctly?");
-
1530  unsigned npart = ptile_ptrs[pmap_it]->numParticles();
-
1531  ParticleLocData pld;
-
1532 
-
1533  if constexpr (!ParticleType::is_soa_particle){
-
1534 
-
1535  if (npart != 0) {
-
1536  Long last = npart - 1;
-
1537  Long pindex = 0;
-
1538  while (pindex <= last) {
-
1539  ParticleType& p = aos[pindex];
-
1540 
-
1541  if ((remove_negative == false) && (p.id() < 0)) {
-
1542  ++pindex;
-
1543  continue;
-
1544  }
+
1515  // first pass: for each tile in parallel, in each thread copies the particles that
+
1516  // need to be moved into it's own, temporary buffer.
+
1517  for (int lev = lev_min; lev <= finest_lev_particles; lev++) {
+
1518  auto& pmap = m_particles[lev];
+
1519 
+
1520  Vector<std::pair<int, int> > grid_tile_ids;
+
1521  Vector<ParticleTileType*> ptile_ptrs;
+
1522  for (auto& kv : pmap)
+
1523  {
+
1524  grid_tile_ids.push_back(kv.first);
+
1525  ptile_ptrs.push_back(&(kv.second));
+
1526  }
+
1527 
+
1528 #ifdef AMREX_USE_OMP
+
1529 #pragma omp parallel for
+
1530 #endif
+
1531  for (int pmap_it = 0; pmap_it < static_cast<int>(ptile_ptrs.size()); ++pmap_it)
+
1532  {
+
1533  int thread_num = OpenMP::get_thread_num();
+
1534  int grid = grid_tile_ids[pmap_it].first;
+
1535  int tile = grid_tile_ids[pmap_it].second;
+
1536  auto& soa = ptile_ptrs[pmap_it]->GetStructOfArrays();
+
1537  auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs();
+
1538 
+
1539  // AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0)
+
1540  // || aos.size() == soa.size(),
+
1541  // "The AoS and SoA data on this tile are different sizes - "
+
1542  // "perhaps particles have not been initialized correctly?");
+
1543  unsigned npart = ptile_ptrs[pmap_it]->numParticles();
+
1544  ParticleLocData pld;
1545 
-
1546  if (p.id() < 0)
-
1547  {
-
1548  aos[pindex] = aos[last];
-
1549  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1550  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
-
1551  }
-
1552  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1553  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
-
1554  }
-
1555  correctCellVectors(last, pindex, grid, aos[pindex]);
-
1556  --last;
-
1557  continue;
-
1558  }
-
1559 
-
1560  locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
-
1561 
-
1562  particlePostLocate(p, pld, lev);
-
1563 
-
1564  if (p.id() < 0)
-
1565  {
-
1566  aos[pindex] = aos[last];
-
1567  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1568  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
-
1569  }
-
1570  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1571  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
-
1572  }
-
1573  correctCellVectors(last, pindex, grid, aos[pindex]);
-
1574  --last;
-
1575  continue;
-
1576  }
-
1577 
-
1578  const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]);
-
1579  if (who == MyProc) {
-
1580  if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) {
-
1581  // We own it but must shift it to another place.
-
1582  auto index = std::make_pair(pld.m_grid, pld.m_tile);
-
1583  AMREX_ASSERT(tmp_local[pld.m_lev][index].size() == num_threads);
-
1584  tmp_local[pld.m_lev][index][thread_num].push_back(p);
-
1585  for (int comp = 0; comp < NumRealComps(); ++comp) {
-
1586  RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp);
-
1587  arr.push_back(soa.GetRealData(comp)[pindex]);
-
1588  }
-
1589  for (int comp = 0; comp < NumIntComps(); ++comp) {
-
1590  IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp);
-
1591  arr.push_back(soa.GetIntData(comp)[pindex]);
-
1592  }
-
1593 
-
1594  p.id() = -p.id(); // Invalidate the particle
-
1595  }
-
1596  }
-
1597  else {
-
1598  auto& particles_to_send = tmp_remote[who][thread_num];
-
1599  auto old_size = particles_to_send.size();
-
1600  auto new_size = old_size + superparticle_size;
-
1601  particles_to_send.resize(new_size);
-
1602  std::memcpy(&particles_to_send[old_size], &p, particle_size);
-
1603  char* dst = &particles_to_send[old_size] + particle_size;
-
1604  int array_comp_start = AMREX_SPACEDIM + NStructReal;
-
1605  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1606  if (h_redistribute_real_comp[array_comp_start + comp]) {
-
1607  std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
-
1608  dst += sizeof(ParticleReal);
-
1609  }
-
1610  }
-
1611  array_comp_start = 2 + NStructInt;
-
1612  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1613  if (h_redistribute_int_comp[array_comp_start + comp]) {
-
1614  std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
-
1615  dst += sizeof(int);
-
1616  }
-
1617  }
-
1618 
-
1619  p.id() = -p.id(); // Invalidate the particle
-
1620  }
-
1621 
-
1622  if (p.id() < 0)
-
1623  {
-
1624  aos[pindex] = aos[last];
-
1625  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1626  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
-
1627  }
-
1628  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1629  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1546  if constexpr (!ParticleType::is_soa_particle){
+
1547 
+
1548  if (npart != 0) {
+
1549  Long last = npart - 1;
+
1550  Long pindex = 0;
+
1551  while (pindex <= last) {
+
1552  ParticleType& p = aos[pindex];
+
1553 
+
1554  if ((remove_negative == false) && (p.id() < 0)) {
+
1555  ++pindex;
+
1556  continue;
+
1557  }
+
1558 
+
1559  if (p.id() < 0)
+
1560  {
+
1561  aos[pindex] = aos[last];
+
1562  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1563  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
+
1564  }
+
1565  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1566  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1567  }
+
1568  correctCellVectors(last, pindex, grid, aos[pindex]);
+
1569  --last;
+
1570  continue;
+
1571  }
+
1572 
+
1573  locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
+
1574 
+
1575  particlePostLocate(p, pld, lev);
+
1576 
+
1577  if (p.id() < 0)
+
1578  {
+
1579  aos[pindex] = aos[last];
+
1580  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1581  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
+
1582  }
+
1583  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1584  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1585  }
+
1586  correctCellVectors(last, pindex, grid, aos[pindex]);
+
1587  --last;
+
1588  continue;
+
1589  }
+
1590 
+
1591  const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]);
+
1592  if (who == MyProc) {
+
1593  if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) {
+
1594  // We own it but must shift it to another place.
+
1595  auto index = std::make_pair(pld.m_grid, pld.m_tile);
+
1596  AMREX_ASSERT(tmp_local[pld.m_lev][index].size() == num_threads);
+
1597  tmp_local[pld.m_lev][index][thread_num].push_back(p);
+
1598  for (int comp = 0; comp < NumRealComps(); ++comp) {
+
1599  RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp);
+
1600  arr.push_back(soa.GetRealData(comp)[pindex]);
+
1601  }
+
1602  for (int comp = 0; comp < NumIntComps(); ++comp) {
+
1603  IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp);
+
1604  arr.push_back(soa.GetIntData(comp)[pindex]);
+
1605  }
+
1606 
+
1607  p.id() = -p.id(); // Invalidate the particle
+
1608  }
+
1609  }
+
1610  else {
+
1611  auto& particles_to_send = tmp_remote[who][thread_num];
+
1612  auto old_size = particles_to_send.size();
+
1613  auto new_size = old_size + superparticle_size;
+
1614  particles_to_send.resize(new_size);
+
1615  std::memcpy(&particles_to_send[old_size], &p, particle_size);
+
1616  char* dst = &particles_to_send[old_size] + particle_size;
+
1617  int array_comp_start = AMREX_SPACEDIM + NStructReal;
+
1618  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1619  if (h_redistribute_real_comp[array_comp_start + comp]) {
+
1620  std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
+
1621  dst += sizeof(ParticleReal);
+
1622  }
+
1623  }
+
1624  array_comp_start = 2 + NStructInt;
+
1625  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1626  if (h_redistribute_int_comp[array_comp_start + comp]) {
+
1627  std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
+
1628  dst += sizeof(int);
+
1629  }
1630  }
-
1631  correctCellVectors(last, pindex, grid, aos[pindex]);
-
1632  --last;
-
1633  continue;
-
1634  }
-
1635 
-
1636  ++pindex;
-
1637  }
-
1638 
-
1639  aos().erase(aos().begin() + last + 1, aos().begin() + npart);
-
1640  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1641  RealVector& rdata = soa.GetRealData(comp);
-
1642  rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
-
1643  }
-
1644  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1645  IntVector& idata = soa.GetIntData(comp);
-
1646  idata.erase(idata.begin() + last + 1, idata.begin() + npart);
-
1647  }
-
1648  }
-
1649 
-
1650  } else { // soa particle
+
1631 
+
1632  p.id() = -p.id(); // Invalidate the particle
+
1633  }
+
1634 
+
1635  if (p.id() < 0)
+
1636  {
+
1637  aos[pindex] = aos[last];
+
1638  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1639  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
+
1640  }
+
1641  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1642  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1643  }
+
1644  correctCellVectors(last, pindex, grid, aos[pindex]);
+
1645  --last;
+
1646  continue;
+
1647  }
+
1648 
+
1649  ++pindex;
+
1650  }
1651 
-
1652  auto particle_tile = ptile_ptrs[pmap_it];
-
1653  if (npart != 0) {
-
1654  Long last = npart - 1;
-
1655  Long pindex = 0;
-
1656  auto ptd = particle_tile->getParticleTileData();
-
1657  while (pindex <= last) {
-
1658  ParticleType p(ptd,pindex);
-
1659 
-
1660  if ((remove_negative == false) && (p.id() < 0)) {
-
1661  ++pindex;
-
1662  continue;
-
1663  }
+
1652  aos().erase(aos().begin() + last + 1, aos().begin() + npart);
+
1653  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1654  RealVector& rdata = soa.GetRealData(comp);
+
1655  rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
+
1656  }
+
1657  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1658  IntVector& idata = soa.GetIntData(comp);
+
1659  idata.erase(idata.begin() + last + 1, idata.begin() + npart);
+
1660  }
+
1661  }
+
1662 
+
1663  } else { // soa particle
1664 
-
1665  if (p.id() < 0){
-
1666  soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
-
1667  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1668  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
-
1669  }
-
1670  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1671  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
-
1672  }
-
1673  correctCellVectors(last, pindex, grid, ptd[pindex]);
-
1674  --last;
+
1665  auto particle_tile = ptile_ptrs[pmap_it];
+
1666  if (npart != 0) {
+
1667  Long last = npart - 1;
+
1668  Long pindex = 0;
+
1669  auto ptd = particle_tile->getParticleTileData();
+
1670  while (pindex <= last) {
+
1671  ParticleType p(ptd,pindex);
+
1672 
+
1673  if ((remove_negative == false) && (p.id() < 0)) {
+
1674  ++pindex;
1675  continue;
1676  }
1677 
-
1678  locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
-
1679 
-
1680  particlePostLocate(p, pld, lev);
-
1681 
-
1682  if (p.id() < 0) {
-
1683  soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
-
1684  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1685  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
-
1686  }
-
1687  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1688  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
-
1689  }
-
1690  correctCellVectors(last, pindex, grid, ptd[pindex]);
-
1691  --last;
-
1692  continue;
-
1693  }
+
1678  if (p.id() < 0){
+
1679  soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
+
1680  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1681  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
+
1682  }
+
1683  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1684  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1685  }
+
1686  correctCellVectors(last, pindex, grid, ptd[pindex]);
+
1687  --last;
+
1688  continue;
+
1689  }
+
1690 
+
1691  locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
+
1692 
+
1693  particlePostLocate(p, pld, lev);
1694 
-
1695  const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]);
-
1696  if (who == MyProc) {
-
1697  if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) {
-
1698  // We own it but must shift it to another place.
-
1699  auto index = std::make_pair(pld.m_grid, pld.m_tile);
-
1700  AMREX_ASSERT(soa_local[pld.m_lev][index].size() == num_threads);
-
1701  {
-
1702  auto& arr = soa_local[pld.m_lev][index][thread_num].GetIdCPUData();
-
1703  arr.push_back(soa.GetIdCPUData()[pindex]);
-
1704  }
-
1705  for (int comp = 0; comp < NumRealComps(); ++comp) {
-
1706  RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp);
-
1707  arr.push_back(soa.GetRealData(comp)[pindex]);
-
1708  }
-
1709  for (int comp = 0; comp < NumIntComps(); ++comp) {
-
1710  IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp);
-
1711  arr.push_back(soa.GetIntData(comp)[pindex]);
-
1712  }
-
1713 
-
1714  p.id() = -p.id(); // Invalidate the particle
-
1715  }
-
1716  }
-
1717  else {
-
1718  auto& particles_to_send = tmp_remote[who][thread_num];
-
1719  auto old_size = particles_to_send.size();
-
1720  auto new_size = old_size + superparticle_size;
-
1721  particles_to_send.resize(new_size);
-
1722 
-
1723  char* dst = &particles_to_send[old_size];
-
1724  {
-
1725  std::memcpy(dst, &soa.GetIdCPUData()[pindex], sizeof(uint64_t));
-
1726  dst += sizeof(uint64_t);
-
1727  }
-
1728  int array_comp_start = AMREX_SPACEDIM + NStructReal;
-
1729  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1730  if (h_redistribute_real_comp[array_comp_start + comp]) {
-
1731  std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
-
1732  dst += sizeof(ParticleReal);
-
1733  }
-
1734  }
-
1735  array_comp_start = 2 + NStructInt;
-
1736  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1737  if (h_redistribute_int_comp[array_comp_start + comp]) {
-
1738  std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
-
1739  dst += sizeof(int);
-
1740  }
-
1741  }
-
1742  p.id() = -p.id(); // Invalidate the particle
-
1743  }
-
1744 
-
1745  if (p.id() < 0){
-
1746  soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
-
1747  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1748  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
-
1749  }
-
1750  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1751  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
-
1752  }
-
1753  correctCellVectors(last, pindex, grid, ptd[pindex]);
-
1754  --last;
-
1755  continue;
+
1695  if (p.id() < 0) {
+
1696  soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
+
1697  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1698  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
+
1699  }
+
1700  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1701  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1702  }
+
1703  correctCellVectors(last, pindex, grid, ptd[pindex]);
+
1704  --last;
+
1705  continue;
+
1706  }
+
1707 
+
1708  const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]);
+
1709  if (who == MyProc) {
+
1710  if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) {
+
1711  // We own it but must shift it to another place.
+
1712  auto index = std::make_pair(pld.m_grid, pld.m_tile);
+
1713  AMREX_ASSERT(soa_local[pld.m_lev][index].size() == num_threads);
+
1714  {
+
1715  auto& arr = soa_local[pld.m_lev][index][thread_num].GetIdCPUData();
+
1716  arr.push_back(soa.GetIdCPUData()[pindex]);
+
1717  }
+
1718  for (int comp = 0; comp < NumRealComps(); ++comp) {
+
1719  RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp);
+
1720  arr.push_back(soa.GetRealData(comp)[pindex]);
+
1721  }
+
1722  for (int comp = 0; comp < NumIntComps(); ++comp) {
+
1723  IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp);
+
1724  arr.push_back(soa.GetIntData(comp)[pindex]);
+
1725  }
+
1726 
+
1727  p.id() = -p.id(); // Invalidate the particle
+
1728  }
+
1729  }
+
1730  else {
+
1731  auto& particles_to_send = tmp_remote[who][thread_num];
+
1732  auto old_size = particles_to_send.size();
+
1733  auto new_size = old_size + superparticle_size;
+
1734  particles_to_send.resize(new_size);
+
1735 
+
1736  char* dst = &particles_to_send[old_size];
+
1737  {
+
1738  std::memcpy(dst, &soa.GetIdCPUData()[pindex], sizeof(uint64_t));
+
1739  dst += sizeof(uint64_t);
+
1740  }
+
1741  int array_comp_start = AMREX_SPACEDIM + NStructReal;
+
1742  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1743  if (h_redistribute_real_comp[array_comp_start + comp]) {
+
1744  std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
+
1745  dst += sizeof(ParticleReal);
+
1746  }
+
1747  }
+
1748  array_comp_start = 2 + NStructInt;
+
1749  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1750  if (h_redistribute_int_comp[array_comp_start + comp]) {
+
1751  std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
+
1752  dst += sizeof(int);
+
1753  }
+
1754  }
+
1755  p.id() = -p.id(); // Invalidate the particle
1756  }
1757 
-
1758  ++pindex;
-
1759  }
-
1760 
-
1761  {
-
1762  auto& iddata = soa.GetIdCPUData();
-
1763  iddata.erase(iddata.begin() + last + 1, iddata.begin() + npart);
-
1764  }
-
1765  for (int comp = 0; comp < NumRealComps(); comp++) {
-
1766  RealVector& rdata = soa.GetRealData(comp);
-
1767  rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
-
1768  }
-
1769  for (int comp = 0; comp < NumIntComps(); comp++) {
-
1770  IntVector& idata = soa.GetIntData(comp);
-
1771  idata.erase(idata.begin() + last + 1, idata.begin() + npart);
+
1758  if (p.id() < 0){
+
1759  soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
+
1760  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1761  soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
+
1762  }
+
1763  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1764  soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last];
+
1765  }
+
1766  correctCellVectors(last, pindex, grid, ptd[pindex]);
+
1767  --last;
+
1768  continue;
+
1769  }
+
1770 
+
1771  ++pindex;
1772  }
-
1773  }
-
1774  }
-
1775  }
-
1776  }
-
1777 
-
1778  for (int lev = lev_min; lev <= lev_max; lev++) {
-
1779  particle_detail::clearEmptyEntries(m_particles[lev]);
-
1780  }
-
1781 
-
1782  // Second pass - for each tile in parallel, collect the particles we are owed from all thread's buffers.
-
1783  for (int lev = lev_min; lev <= lev_max; lev++) {
-
1784  typename std::map<std::pair<int, int>, Vector<ParticleVector > >::iterator pmap_it;
-
1785 
-
1786  if constexpr(!ParticleType::is_soa_particle) {
-
1787  Vector<std::pair<int, int> > grid_tile_ids;
-
1788  Vector<Vector<ParticleVector>* > pvec_ptrs;
-
1789 
-
1790  // we need to create any missing map entries in serial here
-
1791  for (pmap_it=tmp_local[lev].begin(); pmap_it != tmp_local[lev].end(); pmap_it++)
-
1792  {
-
1793  m_particles[lev][pmap_it->first];
-
1794  grid_tile_ids.push_back(pmap_it->first);
-
1795  pvec_ptrs.push_back(&(pmap_it->second));
-
1796  }
-
1797 
-
1798 #ifdef AMREX_USE_OMP
-
1799 #pragma omp parallel for
-
1800 #endif
-
1801  for (int pit = 0; pit < static_cast<int>(pvec_ptrs.size()); ++pit)
-
1802  {
-
1803  auto index = grid_tile_ids[pit];
-
1804  auto& ptile = DefineAndReturnParticleTile(lev, index.first, index.second);
-
1805  auto& aos = ptile.GetArrayOfStructs();
-
1806  auto& soa = ptile.GetStructOfArrays();
-
1807  auto& aos_tmp = *(pvec_ptrs[pit]);
-
1808  auto& soa_tmp = soa_local[lev][index];
-
1809  for (int i = 0; i < num_threads; ++i) {
-
1810  aos.insert(aos.end(), aos_tmp[i].begin(), aos_tmp[i].end());
-
1811  aos_tmp[i].erase(aos_tmp[i].begin(), aos_tmp[i].end());
-
1812  for (int comp = 0; comp < NumRealComps(); ++comp) {
-
1813  RealVector& arr = soa.GetRealData(comp);
-
1814  RealVector& tmp = soa_tmp[i].GetRealData(comp);
-
1815  arr.insert(arr.end(), tmp.begin(), tmp.end());
-
1816  tmp.erase(tmp.begin(), tmp.end());
-
1817  }
-
1818  for (int comp = 0; comp < NumIntComps(); ++comp) {
-
1819  IntVector& arr = soa.GetIntData(comp);
-
1820  IntVector& tmp = soa_tmp[i].GetIntData(comp);
-
1821  arr.insert(arr.end(), tmp.begin(), tmp.end());
-
1822  tmp.erase(tmp.begin(), tmp.end());
-
1823  }
-
1824  }
-
1825  }
-
1826  } else { // soa particle
-
1827  Vector<std::pair<int, int> > grid_tile_ids;
-
1828 
-
1829  // we need to create any missing map entries in serial here
-
1830  for (auto soa_map_it=soa_local[lev].begin(); soa_map_it != soa_local[lev].end(); soa_map_it++)
-
1831  {
-
1832  m_particles[lev][soa_map_it->first];
-
1833  grid_tile_ids.push_back(soa_map_it->first);
-
1834  }
-
1835 
-
1836 #ifdef AMREX_USE_OMP
-
1837 #pragma omp parallel for
-
1838 #endif
-
1839  for (int pit = 0; pit < static_cast<int>(grid_tile_ids.size()); ++pit) // NOLINT(modernize-loop-convert)
-
1840  {
-
1841  auto index = grid_tile_ids[pit];
-
1842  auto& ptile = DefineAndReturnParticleTile(lev, index.first, index.second);
-
1843  auto& soa = ptile.GetStructOfArrays();
-
1844  auto& soa_tmp = soa_local[lev][index];
-
1845  for (int i = 0; i < num_threads; ++i) {
-
1846  {
-
1847  auto& arr = soa.GetIdCPUData();
-
1848  auto& tmp = soa_tmp[i].GetIdCPUData();
-
1849  arr.insert(arr.end(), tmp.begin(), tmp.end());
-
1850  tmp.erase(tmp.begin(), tmp.end());
-
1851  }
-
1852  for (int comp = 0; comp < NumRealComps(); ++comp) {
-
1853  RealVector& arr = soa.GetRealData(comp);
-
1854  RealVector& tmp = soa_tmp[i].GetRealData(comp);
-
1855  arr.insert(arr.end(), tmp.begin(), tmp.end());
-
1856  tmp.erase(tmp.begin(), tmp.end());
-
1857  }
-
1858  for (int comp = 0; comp < NumIntComps(); ++comp) {
-
1859  IntVector& arr = soa.GetIntData(comp);
-
1860  IntVector& tmp = soa_tmp[i].GetIntData(comp);
-
1861  arr.insert(arr.end(), tmp.begin(), tmp.end());
-
1862  tmp.erase(tmp.begin(), tmp.end());
-
1863  }
-
1864  }
-
1865  }
-
1866  }
-
1867  }
-
1868 
-
1869  for (auto& map_it : tmp_remote) {
-
1870  int who = map_it.first;
-
1871  not_ours[who];
-
1872  }
-
1873 
-
1874  Vector<int> dest_proc_ids;
-
1875  Vector<Vector<Vector<char> >* > pbuff_ptrs;
-
1876  for (auto& kv : tmp_remote)
-
1877  {
-
1878  dest_proc_ids.push_back(kv.first);
-
1879  pbuff_ptrs.push_back(&(kv.second));
+
1773 
+
1774  {
+
1775  auto& iddata = soa.GetIdCPUData();
+
1776  iddata.erase(iddata.begin() + last + 1, iddata.begin() + npart);
+
1777  }
+
1778  for (int comp = 0; comp < NumRealComps(); comp++) {
+
1779  RealVector& rdata = soa.GetRealData(comp);
+
1780  rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
+
1781  }
+
1782  for (int comp = 0; comp < NumIntComps(); comp++) {
+
1783  IntVector& idata = soa.GetIntData(comp);
+
1784  idata.erase(idata.begin() + last + 1, idata.begin() + npart);
+
1785  }
+
1786  }
+
1787  }
+
1788  }
+
1789  }
+
1790 
+
1791  for (int lev = lev_min; lev <= lev_max; lev++) {
+
1792  particle_detail::clearEmptyEntries(m_particles[lev]);
+
1793  }
+
1794 
+
1795  // Second pass - for each tile in parallel, collect the particles we are owed from all thread's buffers.
+
1796  for (int lev = lev_min; lev <= lev_max; lev++) {
+
1797  typename std::map<std::pair<int, int>, Vector<ParticleVector > >::iterator pmap_it;
+
1798 
+
1799  if constexpr(!ParticleType::is_soa_particle) {
+
1800  Vector<std::pair<int, int> > grid_tile_ids;
+
1801  Vector<Vector<ParticleVector>* > pvec_ptrs;
+
1802 
+
1803  // we need to create any missing map entries in serial here
+
1804  for (pmap_it=tmp_local[lev].begin(); pmap_it != tmp_local[lev].end(); pmap_it++)
+
1805  {
+
1806  m_particles[lev][pmap_it->first];
+
1807  grid_tile_ids.push_back(pmap_it->first);
+
1808  pvec_ptrs.push_back(&(pmap_it->second));
+
1809  }
+
1810 
+
1811 #ifdef AMREX_USE_OMP
+
1812 #pragma omp parallel for
+
1813 #endif
+
1814  for (int pit = 0; pit < static_cast<int>(pvec_ptrs.size()); ++pit)
+
1815  {
+
1816  auto index = grid_tile_ids[pit];
+
1817  auto& ptile = DefineAndReturnParticleTile(lev, index.first, index.second);
+
1818  auto& aos = ptile.GetArrayOfStructs();
+
1819  auto& soa = ptile.GetStructOfArrays();
+
1820  auto& aos_tmp = *(pvec_ptrs[pit]);
+
1821  auto& soa_tmp = soa_local[lev][index];
+
1822  for (int i = 0; i < num_threads; ++i) {
+
1823  aos.insert(aos.end(), aos_tmp[i].begin(), aos_tmp[i].end());
+
1824  aos_tmp[i].erase(aos_tmp[i].begin(), aos_tmp[i].end());
+
1825  for (int comp = 0; comp < NumRealComps(); ++comp) {
+
1826  RealVector& arr = soa.GetRealData(comp);
+
1827  RealVector& tmp = soa_tmp[i].GetRealData(comp);
+
1828  arr.insert(arr.end(), tmp.begin(), tmp.end());
+
1829  tmp.erase(tmp.begin(), tmp.end());
+
1830  }
+
1831  for (int comp = 0; comp < NumIntComps(); ++comp) {
+
1832  IntVector& arr = soa.GetIntData(comp);
+
1833  IntVector& tmp = soa_tmp[i].GetIntData(comp);
+
1834  arr.insert(arr.end(), tmp.begin(), tmp.end());
+
1835  tmp.erase(tmp.begin(), tmp.end());
+
1836  }
+
1837  }
+
1838  }
+
1839  } else { // soa particle
+
1840  Vector<std::pair<int, int> > grid_tile_ids;
+
1841 
+
1842  // we need to create any missing map entries in serial here
+
1843  for (auto soa_map_it=soa_local[lev].begin(); soa_map_it != soa_local[lev].end(); soa_map_it++)
+
1844  {
+
1845  m_particles[lev][soa_map_it->first];
+
1846  grid_tile_ids.push_back(soa_map_it->first);
+
1847  }
+
1848 
+
1849 #ifdef AMREX_USE_OMP
+
1850 #pragma omp parallel for
+
1851 #endif
+
1852  for (int pit = 0; pit < static_cast<int>(grid_tile_ids.size()); ++pit) // NOLINT(modernize-loop-convert)
+
1853  {
+
1854  auto index = grid_tile_ids[pit];
+
1855  auto& ptile = DefineAndReturnParticleTile(lev, index.first, index.second);
+
1856  auto& soa = ptile.GetStructOfArrays();
+
1857  auto& soa_tmp = soa_local[lev][index];
+
1858  for (int i = 0; i < num_threads; ++i) {
+
1859  {
+
1860  auto& arr = soa.GetIdCPUData();
+
1861  auto& tmp = soa_tmp[i].GetIdCPUData();
+
1862  arr.insert(arr.end(), tmp.begin(), tmp.end());
+
1863  tmp.erase(tmp.begin(), tmp.end());
+
1864  }
+
1865  for (int comp = 0; comp < NumRealComps(); ++comp) {
+
1866  RealVector& arr = soa.GetRealData(comp);
+
1867  RealVector& tmp = soa_tmp[i].GetRealData(comp);
+
1868  arr.insert(arr.end(), tmp.begin(), tmp.end());
+
1869  tmp.erase(tmp.begin(), tmp.end());
+
1870  }
+
1871  for (int comp = 0; comp < NumIntComps(); ++comp) {
+
1872  IntVector& arr = soa.GetIntData(comp);
+
1873  IntVector& tmp = soa_tmp[i].GetIntData(comp);
+
1874  arr.insert(arr.end(), tmp.begin(), tmp.end());
+
1875  tmp.erase(tmp.begin(), tmp.end());
+
1876  }
+
1877  }
+
1878  }
+
1879  }
1880  }
1881 
-
1882 #ifdef AMREX_USE_OMP
-
1883 #pragma omp parallel for
-
1884 #endif
-
1885  for (int pmap_it = 0; pmap_it < static_cast<int>(pbuff_ptrs.size()); ++pmap_it)
-
1886  {
-
1887  int who = dest_proc_ids[pmap_it];
-
1888  Vector<Vector<char> >& tmp = *(pbuff_ptrs[pmap_it]);
-
1889  for (int i = 0; i < num_threads; ++i) {
-
1890  not_ours[who].insert(not_ours[who].end(), tmp[i].begin(), tmp[i].end());
-
1891  tmp[i].erase(tmp[i].begin(), tmp[i].end());
-
1892  }
+
1882  for (auto& map_it : tmp_remote) {
+
1883  int who = map_it.first;
+
1884  not_ours[who];
+
1885  }
+
1886 
+
1887  Vector<int> dest_proc_ids;
+
1888  Vector<Vector<Vector<char> >* > pbuff_ptrs;
+
1889  for (auto& kv : tmp_remote)
+
1890  {
+
1891  dest_proc_ids.push_back(kv.first);
+
1892  pbuff_ptrs.push_back(&(kv.second));
1893  }
1894 
- -
1896 
-
1897  if (int(m_particles.size()) > theEffectiveFinestLevel+1) {
-
1898  // Looks like we lost an AmrLevel on a regrid.
-
1899  if (m_verbose > 0) {
-
1900  amrex::Print() << "ParticleContainer::Redistribute() resizing m_particles from "
-
1901  << m_particles.size() << " to " << theEffectiveFinestLevel+1 << '\n';
-
1902  }
-
1903  AMREX_ASSERT(int(m_particles.size()) >= 2);
-
1904 
-
1905  m_particles.resize(theEffectiveFinestLevel + 1);
-
1906  m_dummy_mf.resize(theEffectiveFinestLevel + 1);
-
1907  }
-
1908 
-
1909  if (ParallelContext::NProcsSub() == 1) {
-
1910  AMREX_ASSERT(not_ours.empty());
-
1911  }
-
1912  else {
-
1913  RedistributeMPI(not_ours, lev_min, lev_max, nGrow, local);
-
1914  }
-
1915 
-
1916  AMREX_ASSERT(OK(lev_min, lev_max, nGrow));
+
1895 #ifdef AMREX_USE_OMP
+
1896 #pragma omp parallel for
+
1897 #endif
+
1898  for (int pmap_it = 0; pmap_it < static_cast<int>(pbuff_ptrs.size()); ++pmap_it)
+
1899  {
+
1900  int who = dest_proc_ids[pmap_it];
+
1901  Vector<Vector<char> >& tmp = *(pbuff_ptrs[pmap_it]);
+
1902  for (int i = 0; i < num_threads; ++i) {
+
1903  not_ours[who].insert(not_ours[who].end(), tmp[i].begin(), tmp[i].end());
+
1904  tmp[i].erase(tmp[i].begin(), tmp[i].end());
+
1905  }
+
1906  }
+
1907 
+ +
1909 
+
1910  if (int(m_particles.size()) > theEffectiveFinestLevel+1) {
+
1911  // Looks like we lost an AmrLevel on a regrid.
+
1912  if (m_verbose > 0) {
+
1913  amrex::Print() << "ParticleContainer::Redistribute() resizing m_particles from "
+
1914  << m_particles.size() << " to " << theEffectiveFinestLevel+1 << '\n';
+
1915  }
+
1916  AMREX_ASSERT(int(m_particles.size()) >= 2);
1917 
-
1918  if (m_verbose > 0) {
-
1919  auto stoptime = amrex::second() - strttime;
-
1920 
-
1921  ByteSpread();
-
1922 
-
1923 #ifdef AMREX_LAZY
-
1924  Lazy::QueueReduction( [=] () mutable {
-
1925 #endif
- - +
1918  m_particles.resize(theEffectiveFinestLevel + 1);
+
1919  m_dummy_mf.resize(theEffectiveFinestLevel + 1);
+
1920  }
+
1921 
+
1922  if (ParallelContext::NProcsSub() == 1) {
+
1923  AMREX_ASSERT(not_ours.empty());
+
1924  }
+
1925  else {
+
1926  RedistributeMPI(not_ours, lev_min, lev_max, nGrow, local);
+
1927  }
1928 
-
1929  amrex::Print() << "ParticleContainer::Redistribute() time: " << stoptime << "\n\n";
-
1930 #ifdef AMREX_LAZY
-
1931  });
-
1932 #endif
-
1933  }
-
1934 }
+
1929  AMREX_ASSERT(OK(lev_min, lev_max, nGrow));
+
1930 
+
1931  if (m_verbose > 0) {
+
1932  auto stoptime = amrex::second() - strttime;
+
1933 
+
1934  ByteSpread();
1935 
-
1936 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
1937  template<class> class Allocator, class CellAssignor>
-
1938 void
- -
1940 RedistributeMPI (std::map<int, Vector<char> >& not_ours,
-
1941  int lev_min, int lev_max, int nGrow, int local)
-
1942 {
-
1943  BL_PROFILE("ParticleContainer::RedistributeMPI()");
-
1944  BL_PROFILE_VAR_NS("RedistributeMPI_locate", blp_locate);
-
1945  BL_PROFILE_VAR_NS("RedistributeMPI_copy", blp_copy);
-
1946 
-
1947 #ifdef AMREX_USE_MPI
+
1936 #ifdef AMREX_LAZY
+
1937  Lazy::QueueReduction( [=] () mutable {
+
1938 #endif
+ + +
1941 
+
1942  amrex::Print() << "ParticleContainer::Redistribute() time: " << stoptime << "\n\n";
+
1943 #ifdef AMREX_LAZY
+
1944  });
+
1945 #endif
+
1946  }
+
1947 }
1948 
-
1949  using buffer_type = unsigned long long;
-
1950 
-
1951  std::map<int, Vector<buffer_type> > mpi_snd_data;
-
1952  for (const auto& kv : not_ours)
-
1953  {
-
1954  auto nbt = (kv.second.size() + sizeof(buffer_type)-1)/sizeof(buffer_type);
-
1955  mpi_snd_data[kv.first].resize(nbt);
-
1956  std::memcpy((char*) mpi_snd_data[kv.first].data(), kv.second.data(), kv.second.size());
-
1957  }
-
1958 
-
1959  const int NProcs = ParallelContext::NProcsSub();
-
1960  const int NNeighborProcs = neighbor_procs.size();
+
1949 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
1950  template<class> class Allocator, class CellAssignor>
+
1951 void
+ +
1953 RedistributeMPI (std::map<int, Vector<char> >& not_ours,
+
1954  int lev_min, int lev_max, int nGrow, int local)
+
1955 {
+
1956  BL_PROFILE("ParticleContainer::RedistributeMPI()");
+
1957  BL_PROFILE_VAR_NS("RedistributeMPI_locate", blp_locate);
+
1958  BL_PROFILE_VAR_NS("RedistributeMPI_copy", blp_copy);
+
1959 
+
1960 #ifdef AMREX_USE_MPI
1961 
-
1962  // We may now have particles that are rightfully owned by another CPU.
-
1963  Vector<Long> Snds(NProcs, 0), Rcvs(NProcs, 0); // bytes!
-
1964 
-
1965  Long NumSnds = 0;
-
1966  if (local > 0)
-
1967  {
-
1968  AMREX_ALWAYS_ASSERT(lev_min == 0);
-
1969  AMREX_ALWAYS_ASSERT(lev_max == 0);
-
1970  BuildRedistributeMask(0, local);
-
1971  NumSnds = doHandShakeLocal(not_ours, neighbor_procs, Snds, Rcvs);
-
1972  }
-
1973  else
-
1974  {
-
1975  NumSnds = doHandShake(not_ours, Snds, Rcvs);
-
1976  }
+
1962  using buffer_type = unsigned long long;
+
1963 
+
1964  std::map<int, Vector<buffer_type> > mpi_snd_data;
+
1965  for (const auto& kv : not_ours)
+
1966  {
+
1967  auto nbt = (kv.second.size() + sizeof(buffer_type)-1)/sizeof(buffer_type);
+
1968  mpi_snd_data[kv.first].resize(nbt);
+
1969  std::memcpy((char*) mpi_snd_data[kv.first].data(), kv.second.data(), kv.second.size());
+
1970  }
+
1971 
+
1972  const int NProcs = ParallelContext::NProcsSub();
+
1973  const int NNeighborProcs = neighbor_procs.size();
+
1974 
+
1975  // We may now have particles that are rightfully owned by another CPU.
+
1976  Vector<Long> Snds(NProcs, 0), Rcvs(NProcs, 0); // bytes!
1977 
-
1978  const int SeqNum = ParallelDescriptor::SeqNum();
-
1979 
-
1980  if ((! local) && NumSnds == 0) {
-
1981  return; // There's no parallel work to do.
-
1982  }
-
1983 
-
1984  if (local)
-
1985  {
-
1986  Long tot_snds_this_proc = 0;
-
1987  Long tot_rcvs_this_proc = 0;
-
1988  for (int i = 0; i < NNeighborProcs; ++i) {
-
1989  tot_snds_this_proc += Snds[neighbor_procs[i]];
-
1990  tot_rcvs_this_proc += Rcvs[neighbor_procs[i]];
-
1991  }
-
1992  if ( (tot_snds_this_proc == 0) && (tot_rcvs_this_proc == 0) ) {
-
1993  return; // There's no parallel work to do.
-
1994  }
+
1978  Long NumSnds = 0;
+
1979  if (local > 0)
+
1980  {
+
1981  AMREX_ALWAYS_ASSERT(lev_min == 0);
+
1982  AMREX_ALWAYS_ASSERT(lev_max == 0);
+
1983  BuildRedistributeMask(0, local);
+
1984  NumSnds = doHandShakeLocal(not_ours, neighbor_procs, Snds, Rcvs);
+
1985  }
+
1986  else
+
1987  {
+
1988  NumSnds = doHandShake(not_ours, Snds, Rcvs);
+
1989  }
+
1990 
+
1991  const int SeqNum = ParallelDescriptor::SeqNum();
+
1992 
+
1993  if ((! local) && NumSnds == 0) {
+
1994  return; // There's no parallel work to do.
1995  }
1996 
-
1997  Vector<int> RcvProc;
-
1998  Vector<std::size_t> rOffset; // Offset (in bytes) in the receive buffer
-
1999 
-
2000  std::size_t TotRcvInts = 0;
-
2001  std::size_t TotRcvBytes = 0;
-
2002  for (int i = 0; i < NProcs; ++i) {
-
2003  if (Rcvs[i] > 0) {
-
2004  RcvProc.push_back(i);
-
2005  rOffset.push_back(TotRcvInts);
-
2006  TotRcvBytes += Rcvs[i];
-
2007  auto nbt = (Rcvs[i] + sizeof(buffer_type)-1)/sizeof(buffer_type);
-
2008  TotRcvInts += nbt;
-
2009  }
-
2010  }
-
2011 
-
2012  const auto nrcvs = static_cast<int>(RcvProc.size());
-
2013  Vector<MPI_Status> stats(nrcvs);
-
2014  Vector<MPI_Request> rreqs(nrcvs);
-
2015 
-
2016  // Allocate data for rcvs as one big chunk.
-
2017  Vector<unsigned long long> recvdata(TotRcvInts);
-
2018 
-
2019  // Post receives.
-
2020  for (int i = 0; i < nrcvs; ++i) {
-
2021  const auto Who = RcvProc[i];
-
2022  const auto offset = rOffset[i];
-
2023  const auto Cnt = (Rcvs[Who] + sizeof(buffer_type)-1)/sizeof(buffer_type);
-
2024  AMREX_ASSERT(Cnt > 0);
- -
2026  AMREX_ASSERT(Who >= 0 && Who < NProcs);
-
2027 
-
2028  rreqs[i] = ParallelDescriptor::Arecv(&recvdata[offset], Cnt, Who, SeqNum,
- -
2030  }
+
1997  if (local)
+
1998  {
+
1999  Long tot_snds_this_proc = 0;
+
2000  Long tot_rcvs_this_proc = 0;
+
2001  for (int i = 0; i < NNeighborProcs; ++i) {
+
2002  tot_snds_this_proc += Snds[neighbor_procs[i]];
+
2003  tot_rcvs_this_proc += Rcvs[neighbor_procs[i]];
+
2004  }
+
2005  if ( (tot_snds_this_proc == 0) && (tot_rcvs_this_proc == 0) ) {
+
2006  return; // There's no parallel work to do.
+
2007  }
+
2008  }
+
2009 
+
2010  Vector<int> RcvProc;
+
2011  Vector<std::size_t> rOffset; // Offset (in bytes) in the receive buffer
+
2012 
+
2013  std::size_t TotRcvInts = 0;
+
2014  std::size_t TotRcvBytes = 0;
+
2015  for (int i = 0; i < NProcs; ++i) {
+
2016  if (Rcvs[i] > 0) {
+
2017  RcvProc.push_back(i);
+
2018  rOffset.push_back(TotRcvInts);
+
2019  TotRcvBytes += Rcvs[i];
+
2020  auto nbt = (Rcvs[i] + sizeof(buffer_type)-1)/sizeof(buffer_type);
+
2021  TotRcvInts += nbt;
+
2022  }
+
2023  }
+
2024 
+
2025  const auto nrcvs = static_cast<int>(RcvProc.size());
+
2026  Vector<MPI_Status> stats(nrcvs);
+
2027  Vector<MPI_Request> rreqs(nrcvs);
+
2028 
+
2029  // Allocate data for rcvs as one big chunk.
+
2030  Vector<unsigned long long> recvdata(TotRcvInts);
2031 
-
2032  // Send.
-
2033  for (const auto& kv : mpi_snd_data) {
-
2034  const auto Who = kv.first;
-
2035  const auto Cnt = kv.second.size();
-
2036 
+
2032  // Post receives.
+
2033  for (int i = 0; i < nrcvs; ++i) {
+
2034  const auto Who = RcvProc[i];
+
2035  const auto offset = rOffset[i];
+
2036  const auto Cnt = (Rcvs[Who] + sizeof(buffer_type)-1)/sizeof(buffer_type);
2037  AMREX_ASSERT(Cnt > 0);
-
2038  AMREX_ASSERT(Who >= 0 && Who < NProcs);
- + +
2039  AMREX_ASSERT(Who >= 0 && Who < NProcs);
2040 
-
2041  ParallelDescriptor::Send(kv.second.data(), Cnt, Who, SeqNum,
- +
2041  rreqs[i] = ParallelDescriptor::Arecv(&recvdata[offset], Cnt, Who, SeqNum,
+
2043  }
2044 
-
2045  if (nrcvs > 0) {
-
2046  ParallelDescriptor::Waitall(rreqs, stats);
-
2047 
-
2048  BL_PROFILE_VAR_START(blp_locate);
+
2045  // Send.
+
2046  for (const auto& kv : mpi_snd_data) {
+
2047  const auto Who = kv.first;
+
2048  const auto Cnt = kv.second.size();
2049 
-
2050  int npart = TotRcvBytes / superparticle_size;
-
2051 
-
2052  Vector<int> rcv_levs(npart);
-
2053  Vector<int> rcv_grid(npart);
-
2054  Vector<int> rcv_tile(npart);
-
2055 
-
2056  int ipart = 0;
-
2057  ParticleLocData pld;
-
2058  for (int j = 0; j < nrcvs; ++j)
-
2059  {
-
2060  const auto offset = rOffset[j];
-
2061  const auto Who = RcvProc[j];
-
2062  const auto Cnt = Rcvs[Who] / superparticle_size;
-
2063  for (int i = 0; i < int(Cnt); ++i)
-
2064  {
-
2065  char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size;
-
2066 
- +
2050  AMREX_ASSERT(Cnt > 0);
+
2051  AMREX_ASSERT(Who >= 0 && Who < NProcs);
+ +
2053 
+
2054  ParallelDescriptor::Send(kv.second.data(), Cnt, Who, SeqNum,
+ +
2056  }
+
2057 
+
2058  if (nrcvs > 0) {
+
2059  ParallelDescriptor::Waitall(rreqs, stats);
+
2060 
+
2061  BL_PROFILE_VAR_START(blp_locate);
+
2062 
+
2063  int npart = TotRcvBytes / superparticle_size;
+
2064 
+
2065  Vector<int> rcv_levs(npart);
+
2066  Vector<int> rcv_grid(npart);
+
2067  Vector<int> rcv_tile(npart);
2068 
-
2069  if constexpr (ParticleType::is_soa_particle) {
-
2070  std::memcpy(&p.m_idcpu, pbuf, sizeof(uint64_t));
-
2071 
-
2072  ParticleReal pos[AMREX_SPACEDIM];
-
2073  std::memcpy(&pos[0], pbuf + sizeof(uint64_t), AMREX_SPACEDIM*sizeof(ParticleReal));
-
2074  AMREX_D_TERM(p.pos(0) = pos[0];,
-
2075  p.pos(1) = pos[1];,
-
2076  p.pos(2) = pos[2]);
-
2077  } else {
-
2078  std::memcpy(&p, pbuf, sizeof(ParticleType));
-
2079  }
-
2080 
-
2081  bool success = Where(p, pld, lev_min, lev_max, 0);
-
2082  if (!success)
-
2083  {
-
2084  success = (nGrow > 0) && Where(p, pld, lev_min, lev_min, nGrow);
-
2085  pld.m_grown_gridbox = pld.m_gridbox; // reset grown box for subsequent calls.
-
2086  }
-
2087  if (!success)
-
2088  {
-
2089  amrex::Abort("RedistributeMPI_locate:: invalid particle.");
-
2090  }
-
2091 
-
2092  rcv_levs[ipart] = pld.m_lev;
-
2093  rcv_grid[ipart] = pld.m_grid;
-
2094  rcv_tile[ipart] = pld.m_tile;
-
2095 
-
2096  ++ipart;
-
2097  }
-
2098  }
-
2099 
-
2100  BL_PROFILE_VAR_STOP(blp_locate);
-
2101 
-
2102  BL_PROFILE_VAR_START(blp_copy);
-
2103 
-
2104 #ifndef AMREX_USE_GPU
-
2105  ipart = 0;
-
2106  for (int i = 0; i < nrcvs; ++i)
-
2107  {
-
2108  const auto offset = rOffset[i];
-
2109  const auto Who = RcvProc[i];
-
2110  const auto Cnt = Rcvs[Who] / superparticle_size;
-
2111  for (int j = 0; j < int(Cnt); ++j)
-
2112  {
-
2113  auto& ptile = m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart],
-
2114  rcv_tile[ipart])];
-
2115  char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
+
2069  int ipart = 0;
+
2070  ParticleLocData pld;
+
2071  for (int j = 0; j < nrcvs; ++j)
+
2072  {
+
2073  const auto offset = rOffset[j];
+
2074  const auto Who = RcvProc[j];
+
2075  const auto Cnt = Rcvs[Who] / superparticle_size;
+
2076  for (int i = 0; i < int(Cnt); ++i)
+
2077  {
+
2078  char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size;
+
2079 
+ +
2081 
+
2082  if constexpr (ParticleType::is_soa_particle) {
+
2083  std::memcpy(&p.m_idcpu, pbuf, sizeof(uint64_t));
+
2084 
+
2085  ParticleReal pos[AMREX_SPACEDIM];
+
2086  std::memcpy(&pos[0], pbuf + sizeof(uint64_t), AMREX_SPACEDIM*sizeof(ParticleReal));
+
2087  AMREX_D_TERM(p.pos(0) = pos[0];,
+
2088  p.pos(1) = pos[1];,
+
2089  p.pos(2) = pos[2]);
+
2090  } else {
+
2091  std::memcpy(&p, pbuf, sizeof(ParticleType));
+
2092  }
+
2093 
+
2094  bool success = Where(p, pld, lev_min, lev_max, 0);
+
2095  if (!success)
+
2096  {
+
2097  success = (nGrow > 0) && Where(p, pld, lev_min, lev_min, nGrow);
+
2098  pld.m_grown_gridbox = pld.m_gridbox; // reset grown box for subsequent calls.
+
2099  }
+
2100  if (!success)
+
2101  {
+
2102  amrex::Abort("RedistributeMPI_locate:: invalid particle.");
+
2103  }
+
2104 
+
2105  rcv_levs[ipart] = pld.m_lev;
+
2106  rcv_grid[ipart] = pld.m_grid;
+
2107  rcv_tile[ipart] = pld.m_tile;
+
2108 
+
2109  ++ipart;
+
2110  }
+
2111  }
+
2112 
+
2113  BL_PROFILE_VAR_STOP(blp_locate);
+
2114 
+
2115  BL_PROFILE_VAR_START(blp_copy);
2116 
-
2117  if constexpr (ParticleType::is_soa_particle) {
-
2118  uint64_t idcpudata;
-
2119  std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
-
2120  pbuf += sizeof(uint64_t);
-
2121  ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata);
-
2122  } else {
-
2123  ParticleType p;
-
2124  std::memcpy(&p, pbuf, sizeof(ParticleType));
-
2125  pbuf += sizeof(ParticleType);
-
2126  ptile.push_back(p);
-
2127  }
-
2128 
-
2129  int array_comp_start = AMREX_SPACEDIM + NStructReal;
-
2130  for (int comp = 0; comp < NumRealComps(); ++comp) {
-
2131  if (h_redistribute_real_comp[array_comp_start + comp]) {
-
2132  ParticleReal rdata;
-
2133  std::memcpy(&rdata, pbuf, sizeof(ParticleReal));
-
2134  pbuf += sizeof(ParticleReal);
-
2135  ptile.push_back_real(comp, rdata);
-
2136  } else {
-
2137  ptile.push_back_real(comp, 0.0);
-
2138  }
-
2139  }
-
2140 
-
2141  array_comp_start = 2 + NStructInt;
-
2142  for (int comp = 0; comp < NumIntComps(); ++comp) {
-
2143  if (h_redistribute_int_comp[array_comp_start + comp]) {
-
2144  int idata;
-
2145  std::memcpy(&idata, pbuf, sizeof(int));
-
2146  pbuf += sizeof(int);
-
2147  ptile.push_back_int(comp, idata);
-
2148  } else {
-
2149  ptile.push_back_int(comp, 0);
-
2150  }
-
2151  }
-
2152  ++ipart;
-
2153  }
-
2154  }
-
2155 
-
2156 #else
- -
2158  host_particles.reserve(15);
-
2159  host_particles.resize(finestLevel()+1);
-
2160 
- -
2162  std::vector<Gpu::HostVector<ParticleReal> > > > host_real_attribs;
-
2163  host_real_attribs.reserve(15);
-
2164  host_real_attribs.resize(finestLevel()+1);
-
2165 
- -
2167  std::vector<Gpu::HostVector<int> > > > host_int_attribs;
-
2168  host_int_attribs.reserve(15);
-
2169  host_int_attribs.resize(finestLevel()+1);
-
2170 
- -
2172  host_idcpu.reserve(15);
-
2173  host_idcpu.resize(finestLevel()+1);
-
2174 
-
2175  ipart = 0;
-
2176  for (int i = 0; i < nrcvs; ++i)
-
2177  {
-
2178  const auto offset = rOffset[i];
-
2179  const auto Who = RcvProc[i];
-
2180  const auto Cnt = Rcvs[Who] / superparticle_size;
-
2181  for (auto j = decltype(Cnt)(0); j < Cnt; ++j)
-
2182  {
-
2183  int lev = rcv_levs[ipart];
-
2184  std::pair<int, int> ind(std::make_pair(rcv_grid[ipart], rcv_tile[ipart]));
-
2185 
-
2186  char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
+
2117 #ifndef AMREX_USE_GPU
+
2118  ipart = 0;
+
2119  for (int i = 0; i < nrcvs; ++i)
+
2120  {
+
2121  const auto offset = rOffset[i];
+
2122  const auto Who = RcvProc[i];
+
2123  const auto Cnt = Rcvs[Who] / superparticle_size;
+
2124  for (int j = 0; j < int(Cnt); ++j)
+
2125  {
+
2126  auto& ptile = m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart],
+
2127  rcv_tile[ipart])];
+
2128  char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
+
2129 
+
2130  if constexpr (ParticleType::is_soa_particle) {
+
2131  uint64_t idcpudata;
+
2132  std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
+
2133  pbuf += sizeof(uint64_t);
+
2134  ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata);
+
2135  } else {
+
2136  ParticleType p;
+
2137  std::memcpy(&p, pbuf, sizeof(ParticleType));
+
2138  pbuf += sizeof(ParticleType);
+
2139  ptile.push_back(p);
+
2140  }
+
2141 
+
2142  int array_comp_start = AMREX_SPACEDIM + NStructReal;
+
2143  for (int comp = 0; comp < NumRealComps(); ++comp) {
+
2144  if (h_redistribute_real_comp[array_comp_start + comp]) {
+
2145  ParticleReal rdata;
+
2146  std::memcpy(&rdata, pbuf, sizeof(ParticleReal));
+
2147  pbuf += sizeof(ParticleReal);
+
2148  ptile.push_back_real(comp, rdata);
+
2149  } else {
+
2150  ptile.push_back_real(comp, 0.0);
+
2151  }
+
2152  }
+
2153 
+
2154  array_comp_start = 2 + NStructInt;
+
2155  for (int comp = 0; comp < NumIntComps(); ++comp) {
+
2156  if (h_redistribute_int_comp[array_comp_start + comp]) {
+
2157  int idata;
+
2158  std::memcpy(&idata, pbuf, sizeof(int));
+
2159  pbuf += sizeof(int);
+
2160  ptile.push_back_int(comp, idata);
+
2161  } else {
+
2162  ptile.push_back_int(comp, 0);
+
2163  }
+
2164  }
+
2165  ++ipart;
+
2166  }
+
2167  }
+
2168 
+
2169 #else
+ +
2171  host_particles.reserve(15);
+
2172  host_particles.resize(finestLevel()+1);
+
2173 
+ +
2175  std::vector<Gpu::HostVector<ParticleReal> > > > host_real_attribs;
+
2176  host_real_attribs.reserve(15);
+
2177  host_real_attribs.resize(finestLevel()+1);
+
2178 
+ +
2180  std::vector<Gpu::HostVector<int> > > > host_int_attribs;
+
2181  host_int_attribs.reserve(15);
+
2182  host_int_attribs.resize(finestLevel()+1);
+
2183 
+ +
2185  host_idcpu.reserve(15);
+
2186  host_idcpu.resize(finestLevel()+1);
2187 
-
2188  host_real_attribs[lev][ind].resize(NumRealComps());
-
2189  host_int_attribs[lev][ind].resize(NumIntComps());
-
2190 
-
2191  if constexpr (ParticleType::is_soa_particle) {
-
2192  uint64_t idcpudata;
-
2193  std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
-
2194  pbuf += sizeof(uint64_t);
-
2195  host_idcpu[lev][ind].push_back(idcpudata);
-
2196  } else {
-
2197  ParticleType p;
-
2198  std::memcpy(&p, pbuf, sizeof(ParticleType));
-
2199  pbuf += sizeof(ParticleType);
-
2200  host_particles[lev][ind].push_back(p);
-
2201  }
-
2202 
-
2203  host_real_attribs[lev][ind].resize(NumRealComps());
-
2204  host_int_attribs[lev][ind].resize(NumIntComps());
-
2205 
-
2206  // add the real...
-
2207  int array_comp_start = AMREX_SPACEDIM + NStructReal;
-
2208  for (int comp = 0; comp < NumRealComps(); ++comp) {
-
2209  if (h_redistribute_real_comp[array_comp_start + comp]) {
-
2210  Real rdata;
-
2211  std::memcpy(&rdata, pbuf, sizeof(Real));
-
2212  pbuf += sizeof(Real);
-
2213  host_real_attribs[lev][ind][comp].push_back(rdata);
-
2214  } else {
-
2215  host_real_attribs[lev][ind][comp].push_back(0.0);
-
2216  }
-
2217  }
+
2188  ipart = 0;
+
2189  for (int i = 0; i < nrcvs; ++i)
+
2190  {
+
2191  const auto offset = rOffset[i];
+
2192  const auto Who = RcvProc[i];
+
2193  const auto Cnt = Rcvs[Who] / superparticle_size;
+
2194  for (auto j = decltype(Cnt)(0); j < Cnt; ++j)
+
2195  {
+
2196  int lev = rcv_levs[ipart];
+
2197  std::pair<int, int> ind(std::make_pair(rcv_grid[ipart], rcv_tile[ipart]));
+
2198 
+
2199  char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
+
2200 
+
2201  host_real_attribs[lev][ind].resize(NumRealComps());
+
2202  host_int_attribs[lev][ind].resize(NumIntComps());
+
2203 
+
2204  if constexpr (ParticleType::is_soa_particle) {
+
2205  uint64_t idcpudata;
+
2206  std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
+
2207  pbuf += sizeof(uint64_t);
+
2208  host_idcpu[lev][ind].push_back(idcpudata);
+
2209  } else {
+
2210  ParticleType p;
+
2211  std::memcpy(&p, pbuf, sizeof(ParticleType));
+
2212  pbuf += sizeof(ParticleType);
+
2213  host_particles[lev][ind].push_back(p);
+
2214  }
+
2215 
+
2216  host_real_attribs[lev][ind].resize(NumRealComps());
+
2217  host_int_attribs[lev][ind].resize(NumIntComps());
2218 
-
2219  // ... and int array data
-
2220  array_comp_start = 2 + NStructInt;
-
2221  for (int comp = 0; comp < NumIntComps(); ++comp) {
-
2222  if (h_redistribute_int_comp[array_comp_start + comp]) {
-
2223  int idata;
-
2224  std::memcpy(&idata, pbuf, sizeof(int));
-
2225  pbuf += sizeof(int);
-
2226  host_int_attribs[lev][ind][comp].push_back(idata);
+
2219  // add the real...
+
2220  int array_comp_start = AMREX_SPACEDIM + NStructReal;
+
2221  for (int comp = 0; comp < NumRealComps(); ++comp) {
+
2222  if (h_redistribute_real_comp[array_comp_start + comp]) {
+
2223  Real rdata;
+
2224  std::memcpy(&rdata, pbuf, sizeof(Real));
+
2225  pbuf += sizeof(Real);
+
2226  host_real_attribs[lev][ind][comp].push_back(rdata);
2227  } else {
-
2228  host_int_attribs[lev][ind][comp].push_back(0);
+
2228  host_real_attribs[lev][ind][comp].push_back(0.0);
2229  }
2230  }
-
2231  ++ipart;
-
2232  }
-
2233  }
-
2234 
-
2235  for (int host_lev = 0; host_lev < static_cast<int>(host_particles.size()); ++host_lev)
-
2236  {
-
2237  for (auto& kv : host_particles[host_lev]) {
-
2238  auto grid = kv.first.first;
-
2239  auto tile = kv.first.second;
-
2240  const auto& src_tile = kv.second;
-
2241 
-
2242  auto& dst_tile = GetParticles(host_lev)[std::make_pair(grid,tile)];
-
2243  auto old_size = dst_tile.size();
-
2244  auto new_size = old_size + src_tile.size();
-
2245  dst_tile.resize(new_size);
-
2246 
-
2247  if constexpr (ParticleType::is_soa_particle) {
- -
2249  host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
-
2250  host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
-
2251  dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
-
2252  } else {
- -
2254  src_tile.begin(), src_tile.end(),
-
2255  dst_tile.GetArrayOfStructs().begin() + old_size);
-
2256  }
-
2257 
-
2258  for (int i = 0; i < NumRealComps(); ++i) {
- -
2260  host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),
-
2261  host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(),
-
2262  dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);
-
2263  }
-
2264 
-
2265  for (int i = 0; i < NumIntComps(); ++i) {
+
2231 
+
2232  // ... and int array data
+
2233  array_comp_start = 2 + NStructInt;
+
2234  for (int comp = 0; comp < NumIntComps(); ++comp) {
+
2235  if (h_redistribute_int_comp[array_comp_start + comp]) {
+
2236  int idata;
+
2237  std::memcpy(&idata, pbuf, sizeof(int));
+
2238  pbuf += sizeof(int);
+
2239  host_int_attribs[lev][ind][comp].push_back(idata);
+
2240  } else {
+
2241  host_int_attribs[lev][ind][comp].push_back(0);
+
2242  }
+
2243  }
+
2244  ++ipart;
+
2245  }
+
2246  }
+
2247 
+
2248  for (int host_lev = 0; host_lev < static_cast<int>(host_particles.size()); ++host_lev)
+
2249  {
+
2250  for (auto& kv : host_particles[host_lev]) {
+
2251  auto grid = kv.first.first;
+
2252  auto tile = kv.first.second;
+
2253  const auto& src_tile = kv.second;
+
2254 
+
2255  auto& dst_tile = GetParticles(host_lev)[std::make_pair(grid,tile)];
+
2256  auto old_size = dst_tile.size();
+
2257  auto new_size = old_size + src_tile.size();
+
2258  dst_tile.resize(new_size);
+
2259 
+
2260  if constexpr (ParticleType::is_soa_particle) {
+ +
2262  host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
+
2263  host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
+
2264  dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
+
2265  } else {
-
2267  host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),
-
2268  host_int_attribs[host_lev][std::make_pair(grid,tile)][i].end(),
-
2269  dst_tile.GetStructOfArrays().GetIntData(i).begin() + old_size);
-
2270  }
-
2271  }
-
2272  }
-
2273 
- -
2275 #endif
-
2276 
-
2277  BL_PROFILE_VAR_STOP(blp_copy);
-
2278  }
-
2279 #else
-
2280  amrex::ignore_unused(not_ours,lev_min,lev_max,nGrow,local);
-
2281 #endif
-
2282 }
-
2283 
-
2284 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
2285  template<class> class Allocator, class CellAssignor>
-
2286 bool
- -
2288 {
-
2289  BL_PROFILE("ParticleContainer::OK()");
-
2290 
-
2291  if (lev_max == -1) {
-
2292  lev_max = finestLevel();
-
2293 }
-
2294 
-
2295  return (numParticlesOutOfRange(*this, lev_min, lev_max, nGrow) == 0);
-
2296 }
-
2297 
-
2298 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
2299  template<class> class Allocator, class CellAssignor>
-
2300 void
- -
2302 ::AddParticlesAtLevel (AoS& particles, int level, int nGrow)
-
2303 {
-
2304  ParticleTileType ptile;
-
2305  ptile.GetArrayOfStructs().swap(particles);
-
2306  AddParticlesAtLevel(ptile, level, nGrow);
-
2307 }
-
2308 
-
2309 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
2310  template<class> class Allocator, class CellAssignor>
-
2311 void
-
2312 ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
-
2313 ::AddParticlesAtLevel (ParticleTileType& particles, int level, int nGrow)
-
2314 {
-
2315  BL_PROFILE("ParticleContainer::AddParticlesAtLevel()");
-
2316 
-
2317  if (int(m_particles.size()) < level+1)
-
2318  {
-
2319  if (Verbose())
-
2320  {
-
2321  amrex::Print() << "ParticleContainer::AddParticlesAtLevel resizing m_particles from "
-
2322  << m_particles.size()
-
2323  << " to "
-
2324  << level+1 << '\n';
-
2325  }
-
2326  m_particles.resize(level+1);
-
2327  m_dummy_mf.resize(level+1);
-
2328  for (int lev = 0; lev < level+1; ++lev) {
-
2329  RedefineDummyMF(lev);
-
2330  }
-
2331  }
-
2332 
-
2333  auto& ptile = DefineAndReturnParticleTile(level, 0, 0);
-
2334  int old_np = ptile.size();
-
2335  int num_to_add = particles.size();
-
2336  int new_np = old_np + num_to_add;
-
2337  ptile.resize(new_np);
-
2338  amrex::copyParticles(ptile, particles, 0, old_np, num_to_add);
-
2339  Redistribute(level, level, nGrow);
-
2340  particles.resize(0);
-
2341 }
-
2342 
-
2343 // This is the single-level version for cell-centered density
-
2344 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
2345  template<class> class Allocator, class CellAssignor>
-
2346 void
- -
2348 AssignCellDensitySingleLevel (int rho_index,
-
2349  MultiFab& mf_to_be_filled,
-
2350  int lev,
-
2351  int ncomp,
-
2352  int particle_lvl_offset) const
-
2353 {
-
2354  BL_PROFILE("ParticleContainer::AssignCellDensitySingleLevel()");
+
2267  src_tile.begin(), src_tile.end(),
+
2268  dst_tile.GetArrayOfStructs().begin() + old_size);
+
2269  }
+
2270 
+
2271  for (int i = 0; i < NumRealComps(); ++i) {
+ +
2273  host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),
+
2274  host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(),
+
2275  dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size);
+
2276  }
+
2277 
+
2278  for (int i = 0; i < NumIntComps(); ++i) {
+ +
2280  host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(),
+
2281  host_int_attribs[host_lev][std::make_pair(grid,tile)][i].end(),
+
2282  dst_tile.GetStructOfArrays().GetIntData(i).begin() + old_size);
+
2283  }
+
2284  }
+
2285  }
+
2286 
+ +
2288 #endif
+
2289 
+
2290  BL_PROFILE_VAR_STOP(blp_copy);
+
2291  }
+
2292 #else
+
2293  amrex::ignore_unused(not_ours,lev_min,lev_max,nGrow,local);
+
2294 #endif
+
2295 }
+
2296 
+
2297 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
2298  template<class> class Allocator, class CellAssignor>
+
2299 bool
+ +
2301 {
+
2302  BL_PROFILE("ParticleContainer::OK()");
+
2303 
+
2304  if (lev_max == -1) {
+
2305  lev_max = finestLevel();
+
2306 }
+
2307 
+
2308  return (numParticlesOutOfRange(*this, lev_min, lev_max, nGrow) == 0);
+
2309 }
+
2310 
+
2311 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
2312  template<class> class Allocator, class CellAssignor>
+
2313 void
+ +
2315 ::AddParticlesAtLevel (AoS& particles, int level, int nGrow)
+
2316 {
+
2317  ParticleTileType ptile;
+
2318  ptile.GetArrayOfStructs().swap(particles);
+
2319  AddParticlesAtLevel(ptile, level, nGrow);
+
2320 }
+
2321 
+
2322 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
2323  template<class> class Allocator, class CellAssignor>
+
2324 void
+
2325 ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>
+
2326 ::AddParticlesAtLevel (ParticleTileType& particles, int level, int nGrow)
+
2327 {
+
2328  BL_PROFILE("ParticleContainer::AddParticlesAtLevel()");
+
2329 
+
2330  if (int(m_particles.size()) < level+1)
+
2331  {
+
2332  if (Verbose())
+
2333  {
+
2334  amrex::Print() << "ParticleContainer::AddParticlesAtLevel resizing m_particles from "
+
2335  << m_particles.size()
+
2336  << " to "
+
2337  << level+1 << '\n';
+
2338  }
+
2339  m_particles.resize(level+1);
+
2340  m_dummy_mf.resize(level+1);
+
2341  for (int lev = 0; lev < level+1; ++lev) {
+
2342  RedefineDummyMF(lev);
+
2343  }
+
2344  }
+
2345 
+
2346  auto& ptile = DefineAndReturnParticleTile(level, 0, 0);
+
2347  int old_np = ptile.size();
+
2348  int num_to_add = particles.size();
+
2349  int new_np = old_np + num_to_add;
+
2350  ptile.resize(new_np);
+
2351  amrex::copyParticles(ptile, particles, 0, old_np, num_to_add);
+
2352  Redistribute(level, level, nGrow);
+
2353  particles.resize(0);
+
2354 }
2355 
-
2356  if (rho_index != 0) { amrex::Abort("AssignCellDensitySingleLevel only works if rho_index = 0"); }
-
2357 
-
2358  MultiFab* mf_pointer;
-
2359 
-
2360  if (OnSameGrids(lev, mf_to_be_filled)) {
-
2361  // If we are already working with the internal mf defined on the
-
2362  // particle_box_array, then we just work with this.
-
2363  mf_pointer = &mf_to_be_filled;
-
2364  }
-
2365  else {
-
2366  // If mf_to_be_filled is not defined on the particle_box_array, then we need
-
2367  // to make a temporary here and copy into mf_to_be_filled at the end.
-
2368  mf_pointer = new MultiFab(ParticleBoxArray(lev),
-
2369  ParticleDistributionMap(lev),
-
2370  ncomp, mf_to_be_filled.nGrow());
-
2371  }
+
2356 // This is the single-level version for cell-centered density
+
2357 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
2358  template<class> class Allocator, class CellAssignor>
+
2359 void
+ +
2361 AssignCellDensitySingleLevel (int rho_index,
+
2362  MultiFab& mf_to_be_filled,
+
2363  int lev,
+
2364  int ncomp,
+
2365  int particle_lvl_offset) const
+
2366 {
+
2367  BL_PROFILE("ParticleContainer::AssignCellDensitySingleLevel()");
+
2368 
+
2369  if (rho_index != 0) { amrex::Abort("AssignCellDensitySingleLevel only works if rho_index = 0"); }
+
2370 
+
2371  MultiFab* mf_pointer;
2372 
-
2373  // We must have ghost cells for each FAB so that a particle in one grid can spread
-
2374  // its effect to an adjacent grid by first putting the value into ghost cells of its
-
2375  // own grid. The mf->SumBoundary call then adds the value from one grid's ghost cell
-
2376  // to another grid's valid region.
-
2377  if (mf_pointer->nGrow() < 1) {
-
2378  amrex::Error("Must have at least one ghost cell when in AssignCellDensitySingleLevel");
-
2379  }
-
2380 
-
2381  const auto strttime = amrex::second();
-
2382 
-
2383  const auto dxi = Geom(lev).InvCellSizeArray();
-
2384  const auto plo = Geom(lev).ProbLoArray();
-
2385  const auto pdxi = Geom(lev + particle_lvl_offset).InvCellSizeArray();
-
2386 
-
2387  if (Geom(lev).isAnyPeriodic() && ! Geom(lev).isAllPeriodic())
-
2388  {
-
2389  amrex::Error("AssignCellDensitySingleLevel: problem must be periodic in no or all directions");
-
2390  }
-
2391 
-
2392  mf_pointer->setVal(0);
+
2373  if (OnSameGrids(lev, mf_to_be_filled)) {
+
2374  // If we are already working with the internal mf defined on the
+
2375  // particle_box_array, then we just work with this.
+
2376  mf_pointer = &mf_to_be_filled;
+
2377  }
+
2378  else {
+
2379  // If mf_to_be_filled is not defined on the particle_box_array, then we need
+
2380  // to make a temporary here and copy into mf_to_be_filled at the end.
+
2381  mf_pointer = new MultiFab(ParticleBoxArray(lev),
+
2382  ParticleDistributionMap(lev),
+
2383  ncomp, mf_to_be_filled.nGrow());
+
2384  }
+
2385 
+
2386  // We must have ghost cells for each FAB so that a particle in one grid can spread
+
2387  // its effect to an adjacent grid by first putting the value into ghost cells of its
+
2388  // own grid. The mf->SumBoundary call then adds the value from one grid's ghost cell
+
2389  // to another grid's valid region.
+
2390  if (mf_pointer->nGrow() < 1) {
+
2391  amrex::Error("Must have at least one ghost cell when in AssignCellDensitySingleLevel");
+
2392  }
2393 
- -
2395 #ifdef AMREX_USE_OMP
-
2396 #pragma omp parallel if (Gpu::notInLaunchRegion())
-
2397 #endif
-
2398  {
-
2399  FArrayBox local_rho;
-
2400  for (ParConstIter pti(*this, lev); pti.isValid(); ++pti) {
-
2401  const Long np = pti.numParticles();
-
2402  auto ptd = pti.GetParticleTile().getConstParticleTileData();
-
2403  FArrayBox& fab = (*mf_pointer)[pti];
-
2404  auto rhoarr = fab.array();
-
2405 #ifdef AMREX_USE_OMP
-
2406  Box tile_box;
-
2407  if (Gpu::notInLaunchRegion())
-
2408  {
-
2409  tile_box = pti.tilebox();
-
2410  tile_box.grow(mf_pointer->nGrow());
-
2411  local_rho.resize(tile_box,ncomp);
-
2412  local_rho.setVal<RunOn::Host>(0.0);
-
2413  rhoarr = local_rho.array();
-
2414  }
-
2415 #endif
-
2416 
-
2417  if (particle_lvl_offset == 0)
-
2418  {
-
2419  AMREX_HOST_DEVICE_FOR_1D( np, i,
-
2420  {
-
2421  auto p = make_particle<ParticleType>{}(ptd,i);
-
2422  amrex_deposit_cic(p, ncomp, rhoarr, plo, dxi);
-
2423  });
-
2424  }
-
2425  else
-
2426  {
-
2427  AMREX_HOST_DEVICE_FOR_1D( np, i,
-
2428  {
-
2429  auto p = make_particle<ParticleType>{}(ptd,i);
-
2430  amrex_deposit_particle_dx_cic(p, ncomp, rhoarr, plo, dxi, pdxi);
-
2431  });
-
2432  }
-
2433 
-
2434 #ifdef AMREX_USE_OMP
-
2435  if (Gpu::notInLaunchRegion())
-
2436  {
-
2437  fab.atomicAdd<RunOn::Host>(local_rho, tile_box, tile_box, 0, 0, ncomp);
-
2438  }
-
2439 #endif
-
2440  }
-
2441  }
-
2442 
-
2443  mf_pointer->SumBoundary(Geom(lev).periodicity());
-
2444 
-
2445  // If ncomp > 1, first divide the momenta (component n)
-
2446  // by the mass (component 0) in order to get velocities.
-
2447  // Be careful not to divide by zero.
-
2448  for (int n = 1; n < ncomp; n++)
-
2449  {
-
2450  for (MFIter mfi(*mf_pointer); mfi.isValid(); ++mfi)
-
2451  {
-
2452  (*mf_pointer)[mfi].protected_divide<RunOn::Device>((*mf_pointer)[mfi],0,n,1);
+
2394  const auto strttime = amrex::second();
+
2395 
+
2396  const auto dxi = Geom(lev).InvCellSizeArray();
+
2397  const auto plo = Geom(lev).ProbLoArray();
+
2398  const auto pdxi = Geom(lev + particle_lvl_offset).InvCellSizeArray();
+
2399 
+
2400  if (Geom(lev).isAnyPeriodic() && ! Geom(lev).isAllPeriodic())
+
2401  {
+
2402  amrex::Error("AssignCellDensitySingleLevel: problem must be periodic in no or all directions");
+
2403  }
+
2404 
+
2405  mf_pointer->setVal(0);
+
2406 
+ +
2408 #ifdef AMREX_USE_OMP
+
2409 #pragma omp parallel if (Gpu::notInLaunchRegion())
+
2410 #endif
+
2411  {
+
2412  FArrayBox local_rho;
+
2413  for (ParConstIter pti(*this, lev); pti.isValid(); ++pti) {
+
2414  const Long np = pti.numParticles();
+
2415  auto ptd = pti.GetParticleTile().getConstParticleTileData();
+
2416  FArrayBox& fab = (*mf_pointer)[pti];
+
2417  auto rhoarr = fab.array();
+
2418 #ifdef AMREX_USE_OMP
+
2419  Box tile_box;
+
2420  if (Gpu::notInLaunchRegion())
+
2421  {
+
2422  tile_box = pti.tilebox();
+
2423  tile_box.grow(mf_pointer->nGrow());
+
2424  local_rho.resize(tile_box,ncomp);
+
2425  local_rho.setVal<RunOn::Host>(0.0);
+
2426  rhoarr = local_rho.array();
+
2427  }
+
2428 #endif
+
2429 
+
2430  if (particle_lvl_offset == 0)
+
2431  {
+
2432  AMREX_HOST_DEVICE_FOR_1D( np, i,
+
2433  {
+
2434  auto p = make_particle<ParticleType>{}(ptd,i);
+
2435  amrex_deposit_cic(p, ncomp, rhoarr, plo, dxi);
+
2436  });
+
2437  }
+
2438  else
+
2439  {
+
2440  AMREX_HOST_DEVICE_FOR_1D( np, i,
+
2441  {
+
2442  auto p = make_particle<ParticleType>{}(ptd,i);
+
2443  amrex_deposit_particle_dx_cic(p, ncomp, rhoarr, plo, dxi, pdxi);
+
2444  });
+
2445  }
+
2446 
+
2447 #ifdef AMREX_USE_OMP
+
2448  if (Gpu::notInLaunchRegion())
+
2449  {
+
2450  fab.atomicAdd<RunOn::Host>(local_rho, tile_box, tile_box, 0, 0, ncomp);
+
2451  }
+
2452 #endif
2453  }
2454  }
2455 
-
2456  // Only multiply the first component by (1/vol) because this converts mass
-
2457  // to density. If there are additional components (like velocity), we don't
-
2458  // want to divide those by volume.
-
2459  const Real* dx = Geom(lev).CellSize();
-
2460  const Real vol = AMREX_D_TERM(dx[0], *dx[1], *dx[2]);
-
2461 
-
2462  mf_pointer->mult(Real(1.0)/vol, 0, 1, mf_pointer->nGrow());
-
2463 
-
2464  // If mf_to_be_filled is not defined on the particle_box_array, then we need
-
2465  // to copy here from mf_pointer into mf_to_be_filled.
-
2466  if (mf_pointer != &mf_to_be_filled)
-
2467  {
-
2468  mf_to_be_filled.ParallelCopy(*mf_pointer,0,0,ncomp,0,0);
-
2469  delete mf_pointer;
-
2470  }
-
2471 
-
2472  if (m_verbose > 1)
-
2473  {
-
2474  auto stoptime = amrex::second() - strttime;
-
2475 
- - -
2478 
-
2479  amrex::Print() << "ParticleContainer::AssignCellDensitySingleLevel) time: "
-
2480  << stoptime << '\n';
-
2481  }
-
2482 }
-
2483 
-
2484 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
2485  template<class> class Allocator, class CellAssignor>
-
2486 void
- -
2488 ResizeRuntimeRealComp (int new_size, bool communicate)
-
2489 {
-
2490  int old_size = m_num_runtime_real;
+
2456  mf_pointer->SumBoundary(Geom(lev).periodicity());
+
2457 
+
2458  // If ncomp > 1, first divide the momenta (component n)
+
2459  // by the mass (component 0) in order to get velocities.
+
2460  // Be careful not to divide by zero.
+
2461  for (int n = 1; n < ncomp; n++)
+
2462  {
+
2463  for (MFIter mfi(*mf_pointer); mfi.isValid(); ++mfi)
+
2464  {
+
2465  (*mf_pointer)[mfi].protected_divide<RunOn::Device>((*mf_pointer)[mfi],0,n,1);
+
2466  }
+
2467  }
+
2468 
+
2469  // Only multiply the first component by (1/vol) because this converts mass
+
2470  // to density. If there are additional components (like velocity), we don't
+
2471  // want to divide those by volume.
+
2472  const Real* dx = Geom(lev).CellSize();
+
2473  const Real vol = AMREX_D_TERM(dx[0], *dx[1], *dx[2]);
+
2474 
+
2475  mf_pointer->mult(Real(1.0)/vol, 0, 1, mf_pointer->nGrow());
+
2476 
+
2477  // If mf_to_be_filled is not defined on the particle_box_array, then we need
+
2478  // to copy here from mf_pointer into mf_to_be_filled.
+
2479  if (mf_pointer != &mf_to_be_filled)
+
2480  {
+
2481  mf_to_be_filled.ParallelCopy(*mf_pointer,0,0,ncomp,0,0);
+
2482  delete mf_pointer;
+
2483  }
+
2484 
+
2485  if (m_verbose > 1)
+
2486  {
+
2487  auto stoptime = amrex::second() - strttime;
+
2488 
+ +
2491 
-
2492  m_runtime_comps_defined = (new_size > 0);
-
2493  m_num_runtime_real = new_size;
-
2494  int cur_size = h_redistribute_real_comp.size();
-
2495  h_redistribute_real_comp.resize(cur_size-old_size+new_size, communicate);
-
2496  SetParticleSize();
-
2497 
-
2498  for (int lev = 0; lev < numLevels(); ++lev) {
-
2499  for (ParIterType pti(*this,lev); pti.isValid(); ++pti) {
-
2500  auto& tile = DefineAndReturnParticleTile(lev, pti);
-
2501  auto np = tile.numParticles();
-
2502  if (np > 0 && new_size > old_size) {
-
2503  auto& soa = tile.GetStructOfArrays();
-
2504  soa.resize(np);
-
2505  }
-
2506  }
-
2507  }
-
2508 }
-
2509 
-
2510 template <typename ParticleType, int NArrayReal, int NArrayInt,
-
2511  template<class> class Allocator, class CellAssignor>
-
2512 void
- -
2514 ResizeRuntimeIntComp (int new_size, bool communicate)
-
2515 {
-
2516  int old_size = m_num_runtime_int;
-
2517 
-
2518  m_runtime_comps_defined = (new_size > 0);
-
2519  m_num_runtime_int = new_size;
-
2520  int cur_size = h_redistribute_int_comp.size();
-
2521  h_redistribute_int_comp.resize(cur_size-old_size+new_size, communicate);
-
2522  SetParticleSize();
-
2523 
-
2524  for (int lev = 0; lev < numLevels(); ++lev) {
-
2525  for (ParIterType pti(*this,lev); pti.isValid(); ++pti) {
-
2526  auto& tile = DefineAndReturnParticleTile(lev, pti);
-
2527  auto np = tile.numParticles();
-
2528  if (np > 0 && new_size > old_size) {
-
2529  auto& soa = tile.GetStructOfArrays();
-
2530  soa.resize(np);
-
2531  }
-
2532  }
-
2533  }
-
2534 }
+
2492  amrex::Print() << "ParticleContainer::AssignCellDensitySingleLevel) time: "
+
2493  << stoptime << '\n';
+
2494  }
+
2495 }
+
2496 
+
2497 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
2498  template<class> class Allocator, class CellAssignor>
+
2499 void
+ +
2501 ResizeRuntimeRealComp (int new_size, bool communicate)
+
2502 {
+
2503  int old_size = m_num_runtime_real;
+
2504 
+
2505  m_runtime_comps_defined = (new_size > 0);
+
2506  m_num_runtime_real = new_size;
+
2507  int cur_size = h_redistribute_real_comp.size();
+
2508  h_redistribute_real_comp.resize(cur_size-old_size+new_size, communicate);
+
2509  SetParticleSize();
+
2510 
+
2511  for (int lev = 0; lev < numLevels(); ++lev) {
+
2512  for (ParIterType pti(*this,lev); pti.isValid(); ++pti) {
+
2513  auto& tile = DefineAndReturnParticleTile(lev, pti);
+
2514  auto np = tile.numParticles();
+
2515  if (np > 0 && new_size > old_size) {
+
2516  auto& soa = tile.GetStructOfArrays();
+
2517  soa.resize(np);
+
2518  }
+
2519  }
+
2520  }
+
2521 }
+
2522 
+
2523 template <typename ParticleType, int NArrayReal, int NArrayInt,
+
2524  template<class> class Allocator, class CellAssignor>
+
2525 void
+ +
2527 ResizeRuntimeIntComp (int new_size, bool communicate)
+
2528 {
+
2529  int old_size = m_num_runtime_int;
+
2530 
+
2531  m_runtime_comps_defined = (new_size > 0);
+
2532  m_num_runtime_int = new_size;
+
2533  int cur_size = h_redistribute_int_comp.size();
+
2534  h_redistribute_int_comp.resize(cur_size-old_size+new_size, communicate);
+
2535  SetParticleSize();
+
2536 
+
2537  for (int lev = 0; lev < numLevels(); ++lev) {
+
2538  for (ParIterType pti(*this,lev); pti.isValid(); ++pti) {
+
2539  auto& tile = DefineAndReturnParticleTile(lev, pti);
+
2540  auto np = tile.numParticles();
+
2541  if (np > 0 && new_size > old_size) {
+
2542  auto& soa = tile.GetStructOfArrays();
+
2543  soa.resize(np);
+
2544  }
+
2545  }
+
2546  }
+
2547 }
#define BL_PROFILE_VAR_START(vname)
Definition: AMReX_BLProfiler.H:562
#define BL_PROFILE(a)
Definition: AMReX_BLProfiler.H:551
#define BL_PROFILE_VAR_STOP(vname)
Definition: AMReX_BLProfiler.H:563
@@ -2803,6 +2816,13 @@
Definition: AMReX_ParticleUtil.H:341
Definition: AMReX_Array.H:33
uint64_t m_idcpu
Definition: AMReX_Particle.H:252
+
Definition: AMReX_ParticleCommunication.H:58
+
void setNumLevels(int num_levels)
Definition: AMReX_ParticleCommunication.cpp:14
+
Vector< std::map< int, Gpu::DeviceVector< IntVect > > > m_periodic_shift
Definition: AMReX_ParticleCommunication.H:62
+
Vector< std::map< int, Gpu::DeviceVector< int > > > m_boxes
Definition: AMReX_ParticleCommunication.H:59
+
Vector< std::map< int, Gpu::DeviceVector< int > > > m_levels
Definition: AMReX_ParticleCommunication.H:60
+
void resize(int gid, int lev, int size)
Definition: AMReX_ParticleCommunication.cpp:22
+
Vector< std::map< int, Gpu::DeviceVector< int > > > m_src_indices
Definition: AMReX_ParticleCommunication.H:61
A struct used for storing a particle's position in the AMR hierarchy.
Definition: AMReX_ParticleContainer.H:90
Box m_grown_gridbox
Definition: AMReX_ParticleContainer.H:97
IntVect m_cell
Definition: AMReX_ParticleContainer.H:94
@@ -2824,6 +2844,7 @@
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealType & rdata(int index) &
Definition: AMReX_Particle.H:356
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int & idata(int index) &
Definition: AMReX_Particle.H:427
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE ParticleCPUWrapper cpu() &
Definition: AMReX_Particle.H:312
+
Definition: AMReX_ParticleCommunication.H:34
Definition: AMReX_MakeParticle.H:16
diff --git a/amrex/docs_html/doxygen/AMReX__ParticleContainer_8H_source.html b/amrex/docs_html/doxygen/AMReX__ParticleContainer_8H_source.html index db34e9cebb..1e3eab6a2d 100644 --- a/amrex/docs_html/doxygen/AMReX__ParticleContainer_8H_source.html +++ b/amrex/docs_html/doxygen/AMReX__ParticleContainer_8H_source.html @@ -898,7 +898,7 @@
void WriteParticles(int level, std::ofstream &ofs, int fnum, Vector< int > &which, Vector< int > &count, Vector< Long > &where, const Vector< int > &write_real_comp, const Vector< int > &write_int_comp, const Vector< std::map< std::pair< int, int >, IntVector >> &particle_io_flags, bool is_checkpoint) const
Definition: AMReX_ParticleIO.H:580
ParticleContainer_impl(const Geometry &geom, const DistributionMapping &dmap, const BoxArray &ba)
Construct a particle container using a given Geometry, DistributionMapping, and BoxArray....
Definition: AMReX_ParticleContainer.H:232
Long NumberOfParticlesAtLevel(int level, bool only_valid=true, bool only_local=false) const
Returns # of particles at specified the level.
Definition: AMReX_ParticleContainerI.H:421
-
void AssignCellDensitySingleLevel(int rho_index, MultiFab &mf, int level, int ncomp=1, int particle_lvl_offset=0) const
Definition: AMReX_ParticleContainerI.H:2348
+
void AssignCellDensitySingleLevel(int rho_index, MultiFab &mf, int level, int ncomp=1, int particle_lvl_offset=0) const
Definition: AMReX_ParticleContainerI.H:2361
void Increment(MultiFab &mf, int level)
Definition: AMReX_ParticleContainerI.H:571
void SetLevelDirectoriesCreated(bool tf)
Definition: AMReX_ParticleContainer.H:1220
void ReorderParticles(int lev, const MFIter &mfi, const index_type *permutations)
Reorder particles on the tile given by lev and mfi using a the permutations array.
@@ -919,17 +919,17 @@
typename ParticleTileType::SoA SoA
Definition: AMReX_ParticleContainer.H:187
virtual void correctCellVectors(int, int, int, const ParticleType &)
Definition: AMReX_ParticleContainer.H:1419
void WritePlotFile(const std::string &dir, const std::string &name, const Vector< std::string > &real_comp_names, const Vector< std::string > &int_comp_names, F &&f) const
This version of WritePlotFile writes all components and allows the user to specify the names of the c...
-
bool OK(int lev_min=0, int lev_max=-1, int nGrow=0) const
OK checks that all particles are in the right places (for some value of right)
Definition: AMReX_ParticleContainerI.H:2287
+
bool OK(int lev_min=0, int lev_max=-1, int nGrow=0) const
OK checks that all particles are in the right places (for some value of right)
Definition: AMReX_ParticleContainerI.H:2300
void locateParticle(P &p, ParticleLocData &pld, int lev_min, int lev_max, int nGrow, int local_grid=-1) const
int NumRuntimeIntComps() const
Definition: AMReX_ParticleContainer.H:1297
Long GetNParticlesPrePost() const
Definition: AMReX_ParticleContainer.H:1232
void InitRandom(Long icount, ULong iseed, const ParticleInitData &pdata, bool serialize=false, RealBox bx=RealBox())
This initializes the particle container with icount randomly distributed particles....
Definition: AMReX_ParticleInit.H:968
-
void ResizeRuntimeRealComp(int new_size, bool communicate)
Definition: AMReX_ParticleContainerI.H:2488
+
void ResizeRuntimeRealComp(int new_size, bool communicate)
Definition: AMReX_ParticleContainerI.H:2501
T_CellAssignor CellAssignor
Definition: AMReX_ParticleContainer.H:148
void RemoveParticlesNotAtFinestLevel()
Definition: AMReX_ParticleContainerI.H:622
DenseBins< typename ParticleTileType::ParticleTileDataType > m_bins
Definition: AMReX_ParticleContainer.H:1413
void ReadParticleRealData(void *data, size_t size, std::istream &is)
Read a contiguous chunk of real particle data from an istream.
Definition: AMReX_ParticleIO.H:25
-
void RedistributeCPU(int lev_min=0, int lev_max=-1, int nGrow=0, int local=0, bool remove_negative=true)
Definition: AMReX_ParticleContainerI.H:1426
+
void RedistributeCPU(int lev_min=0, int lev_max=-1, int nGrow=0, int local=0, bool remove_negative=true)
Definition: AMReX_ParticleContainerI.H:1439
int m_num_runtime_int
Definition: AMReX_ParticleContainer.H:1433
ParticleContainer_impl(ParGDBBase *gdb)
Construct a particle container using a ParGDB object. The container will track changes in the grid st...
Definition: AMReX_ParticleContainer.H:214
void Restart(const std::string &dir, const std::string &file)
Restart from checkpoint.
Definition: AMReX_ParticleIO.H:644
@@ -948,7 +948,7 @@
void CheckpointPre()
Definition: AMReX_ParticleIO.H:447
ParticleLocData Reset(ParticleType &prt, bool update, bool verbose=true, ParticleLocData pld=ParticleLocData()) const
Updates a particle's location (Where), tries to periodic shift any particles that have left the domai...
Definition: AMReX_ParticleContainerI.H:262
Vector< std::string > filePrefixPrePost
Definition: AMReX_ParticleContainer.H:1360
-
void RedistributeGPU(int lev_min=0, int lev_max=-1, int nGrow=0, int local=0, bool remove_negative=true)
Definition: AMReX_ParticleContainerI.H:1236
+
void RedistributeGPU(int lev_min=0, int lev_max=-1, int nGrow=0, int local=0, bool remove_negative=true)
Definition: AMReX_ParticleContainerI.H:1249
int numLocalTilesAtLevel(int lev) const
The total number of tiles on this rank on this level.
Definition: AMReX_ParticleContainer.H:368
void reserveData() override
This reserves data in the vector of dummy MultiFabs used by the ParticleContainer for the maximum num...
Definition: AMReX_ParticleContainerI.H:296
void WritePlotFile(const std::string &dir, const std::string &name, const Vector< int > &write_real_comp, const Vector< int > &write_int_comp, const Vector< std::string > &real_comp_names, const Vector< std::string > &int_comp_names, F &&f) const
This is the most general version of WritePlotFile, which takes component names and flags for whether ...
@@ -959,7 +959,7 @@
Vector< Long > NumberOfParticlesInGrid(int level, bool only_valid=true, bool only_local=false) const
Definition: AMReX_ParticleContainerI.H:366
int NumIntComps() const
Definition: AMReX_ParticleContainer.H:1300
Allocator< T > AllocatorType
The memory allocator in use.
Definition: AMReX_ParticleContainer.H:167
-
void RedistributeMPI(std::map< int, Vector< char > > &not_ours, int lev_min=0, int lev_max=0, int nGrow=0, int local=0)
Definition: AMReX_ParticleContainerI.H:1940
+
void RedistributeMPI(std::map< int, Vector< char > > &not_ours, int lev_min=0, int lev_max=0, int nGrow=0, int local=0)
Definition: AMReX_ParticleContainerI.H:1953
void AddIntComp(T communicate=true)
Definition: AMReX_ParticleContainer.H:1275
ParticleLevel & GetParticles(int lev)
Return the ParticleLevel for level "lev". Non-const version.
Definition: AMReX_ParticleContainer.H:1021
void CreateGhostParticles(int level, int ngrow, ParticleTileType &ghosts) const
Create ghost particles for a given level that are copies of particles near coarse->fine boundaries in...
@@ -976,14 +976,14 @@
typename SoA::IntVector IntVector
Definition: AMReX_ParticleContainer.H:190
int num_int_comm_comps
Definition: AMReX_ParticleContainer.H:1436
size_t particle_size
Definition: AMReX_ParticleContainer.H:1435
-
void SortParticlesByBin(IntVect bin_size)
Sort the particles on each tile by groups of cells, given an IntVect bin_size.
Definition: AMReX_ParticleContainerI.H:1173
+
void SortParticlesByBin(IntVect bin_size)
Sort the particles on each tile by groups of cells, given an IntVect bin_size.
Definition: AMReX_ParticleContainerI.H:1186
virtual void particlePostLocate(ParticleType &, const ParticleLocData &, const int)
Definition: AMReX_ParticleContainer.H:1416
void CreateGhostParticles(int level, int ngrow, AoS &ghosts) const
Create ghost particles for a given level that are copies of particles near coarse->fine boundaries in...
void Define(const Vector< Geometry > &geom, const Vector< DistributionMapping > &dmap, const Vector< BoxArray > &ba, const Vector< int > &rr)
Define a default-constructed ParticleContainer using a ParGDB object. Multi-level version.
Definition: AMReX_ParticleContainer.H:338
void AddParticlesAtLevel(AoS &particles, int level, int nGrow=0)
Add particles from a pbox to the grid at this level.
-
void SortParticlesByCell()
Sort the particles on each tile by cell, using Fortran ordering.
Definition: AMReX_ParticleContainerI.H:1164
+
void SortParticlesByCell()
Sort the particles on each tile by cell, using Fortran ordering.
Definition: AMReX_ParticleContainerI.H:1177
ParticleTileType & ParticlesAt(int lev, int grid, int tile)
Return the ParticleTile for level "lev", grid "grid" and tile "tile." Non-const version.
Definition: AMReX_ParticleContainer.H:1070
-
void ResizeRuntimeIntComp(int new_size, bool communicate)
Definition: AMReX_ParticleContainerI.H:2514
+
void ResizeRuntimeIntComp(int new_size, bool communicate)
Definition: AMReX_ParticleContainerI.H:2527
void InitFromBinaryMetaFile(const std::string &file, int extradata)
Definition: AMReX_ParticleInit.H:929
void resizeData() override
This resizes the vector of dummy MultiFabs used by the ParticleContainer for the current number of le...
Definition: AMReX_ParticleContainerI.H:305
int num_real_comm_comps
Definition: AMReX_ParticleContainer.H:1436
@@ -994,7 +994,7 @@
const Vector< ParticleLevel > & GetParticles() const
Return the underlying Vector (over AMR levels) of ParticleLevels. Const version.
Definition: AMReX_ParticleContainer.H:985
void InitOnePerCell(Real x_off, Real y_off, Real z_off, const ParticleInitData &pdata)
This initializes the particle container with one particle per cell, where the other particle data and...
Definition: AMReX_ParticleInit.H:1463
const ParticleTileType & ParticlesAt(int lev, int grid, int tile) const
Return the ParticleTile for level "lev", grid "grid" and tile "tile." Const version.
Definition: AMReX_ParticleContainer.H:1045
-
void SortParticlesForDeposition(IntVect idx_type)
Sort particles on each tile such that particles adjacent in memory are likely to map to adjacent cell...
Definition: AMReX_ParticleContainerI.H:1206
+
void SortParticlesForDeposition(IntVect idx_type)
Sort particles on each tile such that particles adjacent in memory are likely to map to adjacent cell...
Definition: AMReX_ParticleContainerI.H:1219
~ParticleContainer_impl() override=default
void CheckpointPost()
Definition: AMReX_ParticleIO.H:504
T_ParticleType ParticleType
Definition: AMReX_ParticleContainer.H:146
diff --git a/amrex/docs_xml/doxygen/AMReX__ParticleContainerI_8H.xml b/amrex/docs_xml/doxygen/AMReX__ParticleContainerI_8H.xml index 999a0bb1bc..d6015c7ef4 100644 --- a/amrex/docs_xml/doxygen/AMReX__ParticleContainerI_8H.xml +++ b/amrex/docs_xml/doxygen/AMReX__ParticleContainerI_8H.xml @@ -1200,1423 +1200,1436 @@ }); } Gpu::streamSynchronize(); -} - -{//Createascopeforthetemporaryvectorbelow -RealVectortmp_real(np_total); -for(intcomp=0;comp<NArrayReal+m_num_runtime_real;++comp){ -autosrc=ptile.GetStructOfArrays().GetRealData(comp).data(); -ParticleReal*dst=tmp_real.data(); -AMREX_HOST_DEVICE_FOR_1D(np_total,i, -{ -dst[i]=i<np?src[permutations[i]]:src[i]; -}); +}else{ +typenameSoA::IdCPUtmp_idcpu(np_total); + +autosrc=ptile.GetStructOfArrays().GetIdCPUData().data(); +uint64_t*dst=tmp_idcpu.data(); +AMREX_HOST_DEVICE_FOR_1D(np_total,i, +{ +dst[i]=i<np?src[permutations[i]]:src[i]; +}); + +Gpu::streamSynchronize(); -Gpu::streamSynchronize(); - -ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); -} -} - -IntVectortmp_int(np_total); -for(intcomp=0;comp<NArrayInt+m_num_runtime_int;++comp){ -autosrc=ptile.GetStructOfArrays().GetIntData(comp).data(); -int*dst=tmp_int.data(); -AMREX_HOST_DEVICE_FOR_1D(np_total,i, -{ -dst[i]=i<np?src[permutations[i]]:src[i]; -}); +ptile.GetStructOfArrays().GetIdCPUData().swap(tmp_idcpu); +} + +{//Createascopeforthetemporaryvectorbelow +RealVectortmp_real(np_total); +for(intcomp=0;comp<NArrayReal+m_num_runtime_real;++comp){ +autosrc=ptile.GetStructOfArrays().GetRealData(comp).data(); +ParticleReal*dst=tmp_real.data(); +AMREX_HOST_DEVICE_FOR_1D(np_total,i, +{ +dst[i]=i<np?src[permutations[i]]:src[i]; +}); + +Gpu::streamSynchronize(); -Gpu::streamSynchronize(); - -ptile.GetStructOfArrays().GetIntData(comp).swap(tmp_int); -} -}else{ -ParticleTileTypeptile_tmp; -ptile_tmp.define(m_num_runtime_real,m_num_runtime_int); -ptile_tmp.resize(np_total); -//copyre-orderedparticles -gatherParticles(ptile_tmp,ptile,np,permutations); -//copyneighborparticles -amrex::copyParticles(ptile_tmp,ptile,np,np,np_total-np); -ptile.swap(ptile_tmp); -} -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::SortParticlesByCell() -{ -SortParticlesByBin(IntVect(AMREX_D_DECL(1,1,1))); -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> -::SortParticlesByBin(IntVectbin_size) -{ -BL_PROFILE("ParticleContainer::SortParticlesByBin()"); - -if(bin_size==IntVect::TheZeroVector()){return;} - -for(intlev=0;lev<numLevels();++lev) -{ -constGeometry&geom=Geom(lev); -constautodxi=geom.InvCellSizeArray(); -constautoplo=geom.ProbLoArray(); -constautodomain=geom.Domain(); - -for(MFItermfi=MakeMFIter(lev);mfi.isValid();++mfi) -{ -auto&ptile=ParticlesAt(lev,mfi); -constsize_tnp=ptile.numParticles(); - -constBox&box=mfi.validbox(); - -intntiles=numTilesInBox(box,true,bin_size); - -m_bins.build(np,ptile.getParticleTileData(),ntiles, -GetParticleBin{plo,dxi,domain,bin_size,box}); -ReorderParticles(lev,mfi,m_bins.permutationPtr()); -} -} -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType,NArrayReal,NArrayInt,Allocator,CellAssignor> -::SortParticlesForDeposition(IntVectidx_type) -{ -BL_PROFILE("ParticleContainer::SortParticlesForDeposition()"); - -for(intlev=0;lev<numLevels();++lev) -{ -constGeometry&geom=Geom(lev); - -for(MFItermfi=MakeMFIter(lev);mfi.isValid();++mfi) -{ -constauto&ptile=ParticlesAt(lev,mfi); -constsize_tnp=ptile.numParticles(); - -constBox&box=mfi.validbox(); - -usingindex_type=typenamedecltype(m_bins)::index_type; -Gpu::DeviceVector<index_type>perm; -PermutationForDeposition<index_type>(perm,np,ptile,box,geom,idx_type); -ReorderParticles(lev,mfi,perm.dataPtr()); -} -} -} - -// -//TheGPUimplementationofRedistribute -// -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType,NArrayReal,NArrayInt,Allocator,CellAssignor> -::RedistributeGPU(intlev_min,intlev_max,intnGrow,intlocal,boolremove_negative) -{ -#ifdefAMREX_USE_GPU - -if(local){AMREX_ASSERT(numParticlesOutOfRange(*this,lev_min,lev_max,local)==0);} +ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); +} +} + +IntVectortmp_int(np_total); +for(intcomp=0;comp<NArrayInt+m_num_runtime_int;++comp){ +autosrc=ptile.GetStructOfArrays().GetIntData(comp).data(); +int*dst=tmp_int.data(); +AMREX_HOST_DEVICE_FOR_1D(np_total,i, +{ +dst[i]=i<np?src[permutations[i]]:src[i]; +}); + +Gpu::streamSynchronize(); + +ptile.GetStructOfArrays().GetIntData(comp).swap(tmp_int); +} +}else{ +ParticleTileTypeptile_tmp; +ptile_tmp.define(m_num_runtime_real,m_num_runtime_int); +ptile_tmp.resize(np_total); +//copyre-orderedparticles +gatherParticles(ptile_tmp,ptile,np,permutations); +//copyneighborparticles +amrex::copyParticles(ptile_tmp,ptile,np,np,np_total-np); +ptile.swap(ptile_tmp); +} +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::SortParticlesByCell() +{ +SortParticlesByBin(IntVect(AMREX_D_DECL(1,1,1))); +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> +::SortParticlesByBin(IntVectbin_size) +{ +BL_PROFILE("ParticleContainer::SortParticlesByBin()"); + +if(bin_size==IntVect::TheZeroVector()){return;} + +for(intlev=0;lev<numLevels();++lev) +{ +constGeometry&geom=Geom(lev); +constautodxi=geom.InvCellSizeArray(); +constautoplo=geom.ProbLoArray(); +constautodomain=geom.Domain(); + +for(MFItermfi=MakeMFIter(lev);mfi.isValid();++mfi) +{ +auto&ptile=ParticlesAt(lev,mfi); +constsize_tnp=ptile.numParticles(); + +constBox&box=mfi.validbox(); + +intntiles=numTilesInBox(box,true,bin_size); + +m_bins.build(np,ptile.getParticleTileData(),ntiles, +GetParticleBin{plo,dxi,domain,bin_size,box}); +ReorderParticles(lev,mfi,m_bins.permutationPtr()); +} +} +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType,NArrayReal,NArrayInt,Allocator,CellAssignor> +::SortParticlesForDeposition(IntVectidx_type) +{ +BL_PROFILE("ParticleContainer::SortParticlesForDeposition()"); + +for(intlev=0;lev<numLevels();++lev) +{ +constGeometry&geom=Geom(lev); + +for(MFItermfi=MakeMFIter(lev);mfi.isValid();++mfi) +{ +constauto&ptile=ParticlesAt(lev,mfi); +constsize_tnp=ptile.numParticles(); + +constBox&box=mfi.validbox(); + +usingindex_type=typenamedecltype(m_bins)::index_type; +Gpu::DeviceVector<index_type>perm; +PermutationForDeposition<index_type>(perm,np,ptile,box,geom,idx_type); +ReorderParticles(lev,mfi,perm.dataPtr()); +} +} +} -//sanitycheck -AMREX_ALWAYS_ASSERT(do_tiling==false); - -BL_PROFILE("ParticleContainer::RedistributeGPU()"); -BL_PROFILE_VAR_NS("Redistribute_partition",blp_partition); - -inttheEffectiveFinestLevel=m_gdb->finestLevel(); -while(!m_gdb->LevelDefined(theEffectiveFinestLevel)){theEffectiveFinestLevel--;} - -if(int(m_particles.size())<theEffectiveFinestLevel+1){ -if(Verbose()){ -amrex::Print()<<"ParticleContainer::Redistribute()resizingcontainersfrom" -<<m_particles.size()<<"to" -<<theEffectiveFinestLevel+1<<'\n'; -} -m_particles.resize(theEffectiveFinestLevel+1); -m_dummy_mf.resize(theEffectiveFinestLevel+1); -} +// +//TheGPUimplementationofRedistribute +// +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> +::RedistributeGPU(intlev_min,intlev_max,intnGrow,intlocal,boolremove_negative) +{ +#ifdefAMREX_USE_GPU + +if(local){AMREX_ASSERT(numParticlesOutOfRange(*this,lev_min,lev_max,local)==0);} + +//sanitycheck +AMREX_ALWAYS_ASSERT(do_tiling==false); + +BL_PROFILE("ParticleContainer::RedistributeGPU()"); +BL_PROFILE_VAR_NS("Redistribute_partition",blp_partition); -for(intlev=0;lev<theEffectiveFinestLevel+1;++lev){RedefineDummyMF(lev);} - -intfinest_lev_particles; -if(lev_max==-1){ -lev_max=theEffectiveFinestLevel; -finest_lev_particles=m_particles.size()-1; -}else{ -finest_lev_particles=lev_max; -} -AMREX_ASSERT(lev_max<=finestLevel()); - -this->defineBufferMap(); +inttheEffectiveFinestLevel=m_gdb->finestLevel(); +while(!m_gdb->LevelDefined(theEffectiveFinestLevel)){theEffectiveFinestLevel--;} + +if(int(m_particles.size())<theEffectiveFinestLevel+1){ +if(Verbose()){ +amrex::Print()<<"ParticleContainer::Redistribute()resizingcontainersfrom" +<<m_particles.size()<<"to" +<<theEffectiveFinestLevel+1<<'\n'; +} +m_particles.resize(theEffectiveFinestLevel+1); +m_dummy_mf.resize(theEffectiveFinestLevel+1); +} -if(!m_particle_locator.isValid(GetParGDB())){m_particle_locator.build(GetParGDB());} -m_particle_locator.setGeometry(GetParGDB()); -autoassign_grid=m_particle_locator.getGridAssignor(); - -BL_PROFILE_VAR_START(blp_partition); -ParticleCopyOpop; -intnum_levels=finest_lev_particles+1; -op.setNumLevels(num_levels); -Vector<std::map<int,int>>new_sizes(num_levels); -constautoplo=Geom(0).ProbLoArray(); -constautophi=Geom(0).ProbHiArray(); -constautorlo=Geom(0).ProbLoArrayInParticleReal(); -constautorhi=Geom(0).ProbHiArrayInParticleReal(); -constautois_per=Geom(0).isPeriodicArray(); -for(intlev=lev_min;lev<=finest_lev_particles;++lev) -{ -auto&plev=m_particles[lev]; -for(auto&kv:plev) -{ -intgid=kv.first.first; -inttid=kv.first.second; -autoindex=std::make_pair(gid,tid); - -auto&src_tile=plev[index]; -constsize_tnp=src_tile.numParticles(); - -intnum_stay=partitionParticlesByDest(src_tile,assign_grid, -std::forward<CellAssignor>(CellAssignor{}), -BufferMap(), -plo,phi,rlo,rhi,is_per,lev,gid,tid, -lev_min,lev_max,nGrow,remove_negative); - -intnum_move=np-num_stay; -new_sizes[lev][gid]=num_stay; -op.resize(gid,lev,num_move); +for(intlev=0;lev<theEffectiveFinestLevel+1;++lev){RedefineDummyMF(lev);} + +intfinest_lev_particles; +if(lev_max==-1){ +lev_max=theEffectiveFinestLevel; +finest_lev_particles=m_particles.size()-1; +}else{ +finest_lev_particles=lev_max; +} +AMREX_ASSERT(lev_max<=finestLevel()); + +this->defineBufferMap(); + +if(!m_particle_locator.isValid(GetParGDB())){m_particle_locator.build(GetParGDB());} +m_particle_locator.setGeometry(GetParGDB()); +autoassign_grid=m_particle_locator.getGridAssignor(); + +BL_PROFILE_VAR_START(blp_partition); +ParticleCopyOpop; +intnum_levels=finest_lev_particles+1; +op.setNumLevels(num_levels); +Vector<std::map<int, int>>new_sizes(num_levels); +constautoplo=Geom(0).ProbLoArray(); +constautophi=Geom(0).ProbHiArray(); +constautorlo=Geom(0).ProbLoArrayInParticleReal(); +constautorhi=Geom(0).ProbHiArrayInParticleReal(); +constautois_per=Geom(0).isPeriodicArray(); +for(intlev=lev_min;lev<=finest_lev_particles;++lev) +{ +auto&plev=m_particles[lev]; +for(auto&kv:plev) +{ +intgid=kv.first.first; +inttid=kv.first.second; +autoindex=std::make_pair(gid,tid); -autop_boxes=op.m_boxes[lev][gid].dataPtr(); -autop_levs=op.m_levels[lev][gid].dataPtr(); -autop_src_indices=op.m_src_indices[lev][gid].dataPtr(); -autop_periodic_shift=op.m_periodic_shift[lev][gid].dataPtr(); -autoptd=src_tile.getParticleTileData(); - -AMREX_FOR_1D(num_move,i, -{ -constautop=make_particle<ParticleType>{}(ptd,i+num_stay); - -if(p.id()<0) -{ -p_boxes[i]=-1; -p_levs[i]=-1; -} -else -{ -constautotup=assign_grid(p,lev_min,lev_max,nGrow, -std::forward<CellAssignor>(CellAssignor{})); -p_boxes[i]=amrex::get<0>(tup); -p_levs[i]=amrex::get<1>(tup); -} -p_periodic_shift[i]=IntVect(AMREX_D_DECL(0,0,0)); -p_src_indices[i]=i+num_stay; -}); -} -} -BL_PROFILE_VAR_STOP(blp_partition); - -ParticleCopyPlanplan; - -plan.build(*this,op,h_redistribute_int_comp, -h_redistribute_real_comp,local); - -amrex::PODVector<char, PolymorphicArenaAllocator<char>>snd_buffer; -Gpu::DeviceVector<char>rcv_buffer; - -packBuffer(*this,op,plan,snd_buffer); - -//clearparticlesfromcontainer -for(intlev=lev_min;lev<=lev_max;++lev) -{ -auto&plev=m_particles[lev]; -for(auto&kv:plev) -{ -intgid=kv.first.first; -inttid=kv.first.second; -autoindex=std::make_pair(gid,tid); -auto&tile=plev[index]; -tile.resize(new_sizes[lev][gid]); -} -} - -for(intlev=lev_min;lev<=lev_max;lev++) +auto&src_tile=plev[index]; +constsize_tnp=src_tile.numParticles(); + +intnum_stay=partitionParticlesByDest(src_tile,assign_grid, +std::forward<CellAssignor>(CellAssignor{}), +BufferMap(), +plo,phi,rlo,rhi,is_per,lev,gid,tid, +lev_min,lev_max,nGrow,remove_negative); + +intnum_move=np-num_stay; +new_sizes[lev][gid]=num_stay; +op.resize(gid,lev,num_move); + +autop_boxes=op.m_boxes[lev][gid].dataPtr(); +autop_levs=op.m_levels[lev][gid].dataPtr(); +autop_src_indices=op.m_src_indices[lev][gid].dataPtr(); +autop_periodic_shift=op.m_periodic_shift[lev][gid].dataPtr(); +autoptd=src_tile.getParticleTileData(); + +AMREX_FOR_1D(num_move,i, +{ +constautop=make_particle<ParticleType>{}(ptd,i+num_stay); + +if(p.id()<0) +{ +p_boxes[i]=-1; +p_levs[i]=-1; +} +else +{ +constautotup=assign_grid(p,lev_min,lev_max,nGrow, +std::forward<CellAssignor>(CellAssignor{})); +p_boxes[i]=amrex::get<0>(tup); +p_levs[i]=amrex::get<1>(tup); +} +p_periodic_shift[i]=IntVect(AMREX_D_DECL(0,0,0)); +p_src_indices[i]=i+num_stay; +}); +} +} +BL_PROFILE_VAR_STOP(blp_partition); + +ParticleCopyPlanplan; + +plan.build(*this,op,h_redistribute_int_comp, +h_redistribute_real_comp,local); + +amrex::PODVector<char, PolymorphicArenaAllocator<char>>snd_buffer; +Gpu::DeviceVector<char>rcv_buffer; + +packBuffer(*this,op,plan,snd_buffer); + +//clearparticlesfromcontainer +for(intlev=lev_min;lev<=lev_max;++lev) { -particle_detail::clearEmptyEntries(m_particles[lev]); -} - -if(int(m_particles.size())>theEffectiveFinestLevel+1){ -if(m_verbose>0){ -amrex::Print()<<"ParticleContainer::Redistribute()resizingm_particlesfrom" -<<m_particles.size()<<"to"<<theEffectiveFinestLevel+1<<'\n'; -} -AMREX_ASSERT(int(m_particles.size())>=2); - -m_particles.resize(theEffectiveFinestLevel+1); -m_dummy_mf.resize(theEffectiveFinestLevel+1); -} - -if(ParallelDescriptor::UseGpuAwareMpi()) -{ -plan.buildMPIFinish(BufferMap()); -communicateParticlesStart(*this,plan,snd_buffer,rcv_buffer); -unpackBuffer(*this,plan,snd_buffer,RedistributeUnpackPolicy()); -communicateParticlesFinish(plan); -unpackRemotes(*this,plan,rcv_buffer,RedistributeUnpackPolicy()); -} -else -{ -Gpu::Device::streamSynchronize(); -Gpu::PinnedVector<char>pinned_snd_buffer; -Gpu::PinnedVector<char>pinned_rcv_buffer; - -if(snd_buffer.arena()->isPinned()){ -plan.buildMPIFinish(BufferMap()); -Gpu::Device::streamSynchronize(); -communicateParticlesStart(*this,plan,snd_buffer,pinned_rcv_buffer); -}else{ -pinned_snd_buffer.resize(snd_buffer.size()); -Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(),snd_buffer.dataPtr(),snd_buffer.size()); -plan.buildMPIFinish(BufferMap()); -Gpu::Device::streamSynchronize(); -communicateParticlesStart(*this,plan,pinned_snd_buffer,pinned_rcv_buffer); -} - -rcv_buffer.resize(pinned_rcv_buffer.size()); -unpackBuffer(*this,plan,snd_buffer,RedistributeUnpackPolicy()); -communicateParticlesFinish(plan); -Gpu::htod_memcpy_async(rcv_buffer.dataPtr(),pinned_rcv_buffer.dataPtr(),pinned_rcv_buffer.size()); -unpackRemotes(*this,plan,rcv_buffer,RedistributeUnpackPolicy()); -} - -Gpu::Device::streamSynchronize(); -AMREX_ASSERT(numParticlesOutOfRange(*this,lev_min,lev_max,nGrow)==0); -#else -amrex::ignore_unused(lev_min,lev_max,nGrow,local,remove_negative); -#endif -} - -// -//TheCPUimplementationofRedistribute -// -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> -::RedistributeCPU(intlev_min,intlev_max,intnGrow,intlocal,boolremove_negative) -{ -BL_PROFILE("ParticleContainer::RedistributeCPU()"); - -constintMyProc=ParallelContext::MyProcSub(); -autostrttime=amrex::second(); - -if(local>0){BuildRedistributeMask(0,local);} - -//OnstartuptherearecaseswhereRedistribute()couldbecalled -//withagivenfinestLevel()wherethatAmrLevelhasyettobedefined. -inttheEffectiveFinestLevel=m_gdb->finestLevel(); - -while(!m_gdb->LevelDefined(theEffectiveFinestLevel)){ -theEffectiveFinestLevel--; -} +auto&plev=m_particles[lev]; +for(auto&kv:plev) +{ +intgid=kv.first.first; +inttid=kv.first.second; +autoindex=std::make_pair(gid,tid); +auto&tile=plev[index]; +tile.resize(new_sizes[lev][gid]); +} +} + +for(intlev=lev_min;lev<=lev_max;lev++) +{ +particle_detail::clearEmptyEntries(m_particles[lev]); +} + +if(int(m_particles.size())>theEffectiveFinestLevel+1){ +if(m_verbose>0){ +amrex::Print()<<"ParticleContainer::Redistribute()resizingm_particlesfrom" +<<m_particles.size()<<"to"<<theEffectiveFinestLevel+1<<'\n'; +} +AMREX_ASSERT(int(m_particles.size())>=2); + +m_particles.resize(theEffectiveFinestLevel+1); +m_dummy_mf.resize(theEffectiveFinestLevel+1); +} + +if(ParallelDescriptor::UseGpuAwareMpi()) +{ +plan.buildMPIFinish(BufferMap()); +communicateParticlesStart(*this,plan,snd_buffer,rcv_buffer); +unpackBuffer(*this,plan,snd_buffer,RedistributeUnpackPolicy()); +communicateParticlesFinish(plan); +unpackRemotes(*this,plan,rcv_buffer,RedistributeUnpackPolicy()); +} +else +{ +Gpu::Device::streamSynchronize(); +Gpu::PinnedVector<char>pinned_snd_buffer; +Gpu::PinnedVector<char>pinned_rcv_buffer; + +if(snd_buffer.arena()->isPinned()){ +plan.buildMPIFinish(BufferMap()); +Gpu::Device::streamSynchronize(); +communicateParticlesStart(*this,plan,snd_buffer,pinned_rcv_buffer); +}else{ +pinned_snd_buffer.resize(snd_buffer.size()); +Gpu::dtoh_memcpy_async(pinned_snd_buffer.dataPtr(),snd_buffer.dataPtr(),snd_buffer.size()); +plan.buildMPIFinish(BufferMap()); +Gpu::Device::streamSynchronize(); +communicateParticlesStart(*this,plan,pinned_snd_buffer,pinned_rcv_buffer); +} + +rcv_buffer.resize(pinned_rcv_buffer.size()); +unpackBuffer(*this,plan,snd_buffer,RedistributeUnpackPolicy()); +communicateParticlesFinish(plan); +Gpu::htod_memcpy_async(rcv_buffer.dataPtr(),pinned_rcv_buffer.dataPtr(),pinned_rcv_buffer.size()); +unpackRemotes(*this,plan,rcv_buffer,RedistributeUnpackPolicy()); +} + +Gpu::Device::streamSynchronize(); +AMREX_ASSERT(numParticlesOutOfRange(*this,lev_min,lev_max,nGrow)==0); +#else +amrex::ignore_unused(lev_min,lev_max,nGrow,local,remove_negative); +#endif +} + +// +//TheCPUimplementationofRedistribute +// +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> +::RedistributeCPU(intlev_min,intlev_max,intnGrow,intlocal,boolremove_negative) +{ +BL_PROFILE("ParticleContainer::RedistributeCPU()"); -if(int(m_particles.size())<theEffectiveFinestLevel+1){ -if(Verbose()){ -amrex::Print()<<"ParticleContainer::Redistribute()resizingcontainersfrom" -<<m_particles.size()<<"to" -<<theEffectiveFinestLevel+1<<'\n'; -} -m_particles.resize(theEffectiveFinestLevel+1); -m_dummy_mf.resize(theEffectiveFinestLevel+1); -} - -//Itisimportanttodothisevenifwedon'thavemorelevelsbecausewemayhavechangedthe -//gridsatthislevelinaregrid. -for(intlev=0;lev<theEffectiveFinestLevel+1;++lev){ -RedefineDummyMF(lev); -} - -intfinest_lev_particles; -if(lev_max==-1){ -lev_max=theEffectiveFinestLevel; -finest_lev_particles=m_particles.size()-1; -}else{ -finest_lev_particles=lev_max; -} -AMREX_ASSERT(lev_max<=finestLevel()); - -//Thiswillholdthevalidparticlesthatgotoanotherprocess -std::map<int,Vector<char>>not_ours; - -intnum_threads=OpenMP::get_max_threads(); - -//thesearetemporarybuffersforeachthread -std::map<int,Vector<Vector<char>>>tmp_remote; -Vector<std::map<std::pair<int,int>,Vector<ParticleVector>>>tmp_local; -Vector<std::map<std::pair<int,int>,Vector<StructOfArrays<NArrayReal,NArrayInt,Allocator>>>>soa_local; -tmp_local.resize(theEffectiveFinestLevel+1); -soa_local.resize(theEffectiveFinestLevel+1); - -//weresizethesebuffersoutsidetheparallelregion -for(intlev=lev_min;lev<=lev_max;lev++){ -for(MFItermfi(*m_dummy_mf[lev],this->do_tiling?this->tile_size:IntVect::TheZeroVector()); -mfi.isValid();++mfi){ -autoindex=std::make_pair(mfi.index(),mfi.LocalTileIndex()); -tmp_local[lev][index].resize(num_threads); -soa_local[lev][index].resize(num_threads); -for(intt=0;t<num_threads;++t){ -soa_local[lev][index][t].define(m_num_runtime_real,m_num_runtime_int); -} -} -} -if(local){ -for(inti=0;i<neighbor_procs.size();++i){ -tmp_remote[neighbor_procs[i]].resize(num_threads); -} -}else{ -for(inti=0;i<ParallelContext::NProcsSub();++i){ -tmp_remote[i].resize(num_threads); -} -} - -//firstpass:foreachtileinparallel,ineachthreadcopiestheparticlesthat -//needtobemovedintoit'sown,temporarybuffer. -for(intlev=lev_min;lev<=finest_lev_particles;lev++){ -auto&pmap=m_particles[lev]; - -Vector<std::pair<int,int>>grid_tile_ids; -Vector<ParticleTileType*>ptile_ptrs; -for(auto&kv:pmap) -{ -grid_tile_ids.push_back(kv.first); -ptile_ptrs.push_back(&(kv.second)); -} +constintMyProc=ParallelContext::MyProcSub(); +autostrttime=amrex::second(); + +if(local>0){BuildRedistributeMask(0,local);} + +//OnstartuptherearecaseswhereRedistribute()couldbecalled +//withagivenfinestLevel()wherethatAmrLevelhasyettobedefined. +inttheEffectiveFinestLevel=m_gdb->finestLevel(); + +while(!m_gdb->LevelDefined(theEffectiveFinestLevel)){ +theEffectiveFinestLevel--; +} + +if(int(m_particles.size())<theEffectiveFinestLevel+1){ +if(Verbose()){ +amrex::Print()<<"ParticleContainer::Redistribute()resizingcontainersfrom" +<<m_particles.size()<<"to" +<<theEffectiveFinestLevel+1<<'\n'; +} +m_particles.resize(theEffectiveFinestLevel+1); +m_dummy_mf.resize(theEffectiveFinestLevel+1); +} + +//Itisimportanttodothisevenifwedon'thavemorelevelsbecausewemayhavechangedthe +//gridsatthislevelinaregrid. +for(intlev=0;lev<theEffectiveFinestLevel+1;++lev){ +RedefineDummyMF(lev); +} + +intfinest_lev_particles; +if(lev_max==-1){ +lev_max=theEffectiveFinestLevel; +finest_lev_particles=m_particles.size()-1; +}else{ +finest_lev_particles=lev_max; +} +AMREX_ASSERT(lev_max<=finestLevel()); + +//Thiswillholdthevalidparticlesthatgotoanotherprocess +std::map<int,Vector<char>>not_ours; + +intnum_threads=OpenMP::get_max_threads(); + +//thesearetemporarybuffersforeachthread +std::map<int,Vector<Vector<char>>>tmp_remote; +Vector<std::map<std::pair<int, int>,Vector<ParticleVector>>>tmp_local; +Vector<std::map<std::pair<int, int>,Vector<StructOfArrays<NArrayReal, NArrayInt, Allocator>>>>soa_local; +tmp_local.resize(theEffectiveFinestLevel+1); +soa_local.resize(theEffectiveFinestLevel+1); + +//weresizethesebuffersoutsidetheparallelregion +for(intlev=lev_min;lev<=lev_max;lev++){ +for(MFItermfi(*m_dummy_mf[lev],this->do_tiling?this->tile_size:IntVect::TheZeroVector()); +mfi.isValid();++mfi){ +autoindex=std::make_pair(mfi.index(),mfi.LocalTileIndex()); +tmp_local[lev][index].resize(num_threads); +soa_local[lev][index].resize(num_threads); +for(intt=0;t<num_threads;++t){ +soa_local[lev][index][t].define(m_num_runtime_real,m_num_runtime_int); +} +} +} +if(local){ +for(inti=0;i<neighbor_procs.size();++i){ +tmp_remote[neighbor_procs[i]].resize(num_threads); +} +}else{ +for(inti=0;i<ParallelContext::NProcsSub();++i){ +tmp_remote[i].resize(num_threads); +} +} -#ifdefAMREX_USE_OMP -#pragmaompparallelfor -#endif -for(intpmap_it=0;pmap_it<static_cast<int>(ptile_ptrs.size());++pmap_it) -{ -intthread_num=OpenMP::get_thread_num(); -intgrid=grid_tile_ids[pmap_it].first; -inttile=grid_tile_ids[pmap_it].second; -auto&soa=ptile_ptrs[pmap_it]->GetStructOfArrays(); -auto&aos=ptile_ptrs[pmap_it]->GetArrayOfStructs(); - -//AMREX_ASSERT_WITH_MESSAGE((NumRealComps()==0&&NumIntComps()==0) -//||aos.size()==soa.size(), -//"TheAoSandSoAdataonthistilearedifferentsizes-" -//"perhapsparticleshavenotbeeninitializedcorrectly?"); -unsignednpart=ptile_ptrs[pmap_it]->numParticles(); -ParticleLocDatapld; - -ifconstexpr(!ParticleType::is_soa_particle){ - -if(npart!=0){ -Longlast=npart-1; -Longpindex=0; -while(pindex<=last){ -ParticleType&p=aos[pindex]; - -if((remove_negative==false)&&(p.id()<0)){ -++pindex; -continue; -} +//firstpass:foreachtileinparallel,ineachthreadcopiestheparticlesthat +//needtobemovedintoit'sown,temporarybuffer. +for(intlev=lev_min;lev<=finest_lev_particles;lev++){ +auto&pmap=m_particles[lev]; + +Vector<std::pair<int, int>>grid_tile_ids; +Vector<ParticleTileType*>ptile_ptrs; +for(auto&kv:pmap) +{ +grid_tile_ids.push_back(kv.first); +ptile_ptrs.push_back(&(kv.second)); +} + +#ifdefAMREX_USE_OMP +#pragmaompparallelfor +#endif +for(intpmap_it=0;pmap_it<static_cast<int>(ptile_ptrs.size());++pmap_it) +{ +intthread_num=OpenMP::get_thread_num(); +intgrid=grid_tile_ids[pmap_it].first; +inttile=grid_tile_ids[pmap_it].second; +auto&soa=ptile_ptrs[pmap_it]->GetStructOfArrays(); +auto&aos=ptile_ptrs[pmap_it]->GetArrayOfStructs(); + +//AMREX_ASSERT_WITH_MESSAGE((NumRealComps()==0&&NumIntComps()==0) +//||aos.size()==soa.size(), +//"TheAoSandSoAdataonthistilearedifferentsizes-" +//"perhapsparticleshavenotbeeninitializedcorrectly?"); +unsignednpart=ptile_ptrs[pmap_it]->numParticles(); +ParticleLocDatapld; -if(p.id()<0) -{ -aos[pindex]=aos[last]; -for(intcomp=0;comp<NumRealComps();comp++){ -soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; -} -for(intcomp=0;comp<NumIntComps();comp++){ -soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; -} -correctCellVectors(last,pindex,grid,aos[pindex]); ---last; -continue; -} - -locateParticle(p,pld,lev_min,lev_max,nGrow,local?grid:-1); - -particlePostLocate(p,pld,lev); - -if(p.id()<0) -{ -aos[pindex]=aos[last]; -for(intcomp=0;comp<NumRealComps();comp++){ -soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; -} -for(intcomp=0;comp<NumIntComps();comp++){ -soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; -} -correctCellVectors(last,pindex,grid,aos[pindex]); ---last; -continue; -} - -constintwho=ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); -if(who==MyProc){ -if(pld.m_lev!=lev||pld.m_grid!=grid||pld.m_tile!=tile){ -//Weownitbutmustshiftittoanotherplace. -autoindex=std::make_pair(pld.m_grid,pld.m_tile); -AMREX_ASSERT(tmp_local[pld.m_lev][index].size()==num_threads); -tmp_local[pld.m_lev][index][thread_num].push_back(p); -for(intcomp=0;comp<NumRealComps();++comp){ -RealVector&arr=soa_local[pld.m_lev][index][thread_num].GetRealData(comp); -arr.push_back(soa.GetRealData(comp)[pindex]); -} -for(intcomp=0;comp<NumIntComps();++comp){ -IntVector&arr=soa_local[pld.m_lev][index][thread_num].GetIntData(comp); -arr.push_back(soa.GetIntData(comp)[pindex]); -} - -p.id()=-p.id();//Invalidatetheparticle -} -} -else{ -auto&particles_to_send=tmp_remote[who][thread_num]; -autoold_size=particles_to_send.size(); -autonew_size=old_size+superparticle_size; -particles_to_send.resize(new_size); -std::memcpy(&particles_to_send[old_size],&p,particle_size); -char*dst=&particles_to_send[old_size]+particle_size; -intarray_comp_start=AMREX_SPACEDIM+NStructReal; -for(intcomp=0;comp<NumRealComps();comp++){ -if(h_redistribute_real_comp[array_comp_start+comp]){ -std::memcpy(dst,&soa.GetRealData(comp)[pindex],sizeof(ParticleReal)); -dst+=sizeof(ParticleReal); -} -} -array_comp_start=2+NStructInt; -for(intcomp=0;comp<NumIntComps();comp++){ -if(h_redistribute_int_comp[array_comp_start+comp]){ -std::memcpy(dst,&soa.GetIntData(comp)[pindex],sizeof(int)); -dst+=sizeof(int); -} -} - -p.id()=-p.id();//Invalidatetheparticle -} - -if(p.id()<0) -{ -aos[pindex]=aos[last]; -for(intcomp=0;comp<NumRealComps();comp++){ -soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; -} -for(intcomp=0;comp<NumIntComps();comp++){ -soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +ifconstexpr(!ParticleType::is_soa_particle){ + +if(npart!=0){ +Longlast=npart-1; +Longpindex=0; +while(pindex<=last){ +ParticleType&p=aos[pindex]; + +if((remove_negative==false)&&(p.id()<0)){ +++pindex; +continue; +} + +if(p.id()<0) +{ +aos[pindex]=aos[last]; +for(intcomp=0;comp<NumRealComps();comp++){ +soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; +} +for(intcomp=0;comp<NumIntComps();comp++){ +soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +} +correctCellVectors(last,pindex,grid,aos[pindex]); +--last; +continue; +} + +locateParticle(p,pld,lev_min,lev_max,nGrow,local?grid:-1); + +particlePostLocate(p,pld,lev); + +if(p.id()<0) +{ +aos[pindex]=aos[last]; +for(intcomp=0;comp<NumRealComps();comp++){ +soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; +} +for(intcomp=0;comp<NumIntComps();comp++){ +soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +} +correctCellVectors(last,pindex,grid,aos[pindex]); +--last; +continue; +} + +constintwho=ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); +if(who==MyProc){ +if(pld.m_lev!=lev||pld.m_grid!=grid||pld.m_tile!=tile){ +//Weownitbutmustshiftittoanotherplace. +autoindex=std::make_pair(pld.m_grid,pld.m_tile); +AMREX_ASSERT(tmp_local[pld.m_lev][index].size()==num_threads); +tmp_local[pld.m_lev][index][thread_num].push_back(p); +for(intcomp=0;comp<NumRealComps();++comp){ +RealVector&arr=soa_local[pld.m_lev][index][thread_num].GetRealData(comp); +arr.push_back(soa.GetRealData(comp)[pindex]); +} +for(intcomp=0;comp<NumIntComps();++comp){ +IntVector&arr=soa_local[pld.m_lev][index][thread_num].GetIntData(comp); +arr.push_back(soa.GetIntData(comp)[pindex]); +} + +p.id()=-p.id();//Invalidatetheparticle +} +} +else{ +auto&particles_to_send=tmp_remote[who][thread_num]; +autoold_size=particles_to_send.size(); +autonew_size=old_size+superparticle_size; +particles_to_send.resize(new_size); +std::memcpy(&particles_to_send[old_size],&p,particle_size); +char*dst=&particles_to_send[old_size]+particle_size; +intarray_comp_start=AMREX_SPACEDIM+NStructReal; +for(intcomp=0;comp<NumRealComps();comp++){ +if(h_redistribute_real_comp[array_comp_start+comp]){ +std::memcpy(dst,&soa.GetRealData(comp)[pindex],sizeof(ParticleReal)); +dst+=sizeof(ParticleReal); +} +} +array_comp_start=2+NStructInt; +for(intcomp=0;comp<NumIntComps();comp++){ +if(h_redistribute_int_comp[array_comp_start+comp]){ +std::memcpy(dst,&soa.GetIntData(comp)[pindex],sizeof(int)); +dst+=sizeof(int); +} } -correctCellVectors(last,pindex,grid,aos[pindex]); ---last; -continue; -} - -++pindex; -} - -aos().erase(aos().begin()+last+1,aos().begin()+npart); -for(intcomp=0;comp<NumRealComps();comp++){ -RealVector&rdata=soa.GetRealData(comp); -rdata.erase(rdata.begin()+last+1,rdata.begin()+npart); -} -for(intcomp=0;comp<NumIntComps();comp++){ -IntVector&idata=soa.GetIntData(comp); -idata.erase(idata.begin()+last+1,idata.begin()+npart); -} -} - -}else{//soaparticle + +p.id()=-p.id();//Invalidatetheparticle +} + +if(p.id()<0) +{ +aos[pindex]=aos[last]; +for(intcomp=0;comp<NumRealComps();comp++){ +soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; +} +for(intcomp=0;comp<NumIntComps();comp++){ +soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +} +correctCellVectors(last,pindex,grid,aos[pindex]); +--last; +continue; +} + +++pindex; +} -autoparticle_tile=ptile_ptrs[pmap_it]; -if(npart!=0){ -Longlast=npart-1; -Longpindex=0; -autoptd=particle_tile->getParticleTileData(); -while(pindex<=last){ -ParticleTypep(ptd,pindex); - -if((remove_negative==false)&&(p.id()<0)){ -++pindex; -continue; -} +aos().erase(aos().begin()+last+1,aos().begin()+npart); +for(intcomp=0;comp<NumRealComps();comp++){ +RealVector&rdata=soa.GetRealData(comp); +rdata.erase(rdata.begin()+last+1,rdata.begin()+npart); +} +for(intcomp=0;comp<NumIntComps();comp++){ +IntVector&idata=soa.GetIntData(comp); +idata.erase(idata.begin()+last+1,idata.begin()+npart); +} +} + +}else{//soaparticle -if(p.id()<0){ -soa.GetIdCPUData()[pindex]=soa.GetIdCPUData()[last]; -for(intcomp=0;comp<NumRealComps();comp++){ -soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; -} -for(intcomp=0;comp<NumIntComps();comp++){ -soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; -} -correctCellVectors(last,pindex,grid,ptd[pindex]); ---last; +autoparticle_tile=ptile_ptrs[pmap_it]; +if(npart!=0){ +Longlast=npart-1; +Longpindex=0; +autoptd=particle_tile->getParticleTileData(); +while(pindex<=last){ +ParticleTypep(ptd,pindex); + +if((remove_negative==false)&&(p.id()<0)){ +++pindex; continue; } -locateParticle(p,pld,lev_min,lev_max,nGrow,local?grid:-1); - -particlePostLocate(p,pld,lev); - -if(p.id()<0){ -soa.GetIdCPUData()[pindex]=soa.GetIdCPUData()[last]; -for(intcomp=0;comp<NumRealComps();comp++){ -soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; -} -for(intcomp=0;comp<NumIntComps();comp++){ -soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; -} -correctCellVectors(last,pindex,grid,ptd[pindex]); ---last; -continue; -} +if(p.id()<0){ +soa.GetIdCPUData()[pindex]=soa.GetIdCPUData()[last]; +for(intcomp=0;comp<NumRealComps();comp++){ +soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; +} +for(intcomp=0;comp<NumIntComps();comp++){ +soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +} +correctCellVectors(last,pindex,grid,ptd[pindex]); +--last; +continue; +} + +locateParticle(p,pld,lev_min,lev_max,nGrow,local?grid:-1); + +particlePostLocate(p,pld,lev); -constintwho=ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); -if(who==MyProc){ -if(pld.m_lev!=lev||pld.m_grid!=grid||pld.m_tile!=tile){ -//Weownitbutmustshiftittoanotherplace. -autoindex=std::make_pair(pld.m_grid,pld.m_tile); -AMREX_ASSERT(soa_local[pld.m_lev][index].size()==num_threads); -{ -auto&arr=soa_local[pld.m_lev][index][thread_num].GetIdCPUData(); -arr.push_back(soa.GetIdCPUData()[pindex]); -} -for(intcomp=0;comp<NumRealComps();++comp){ -RealVector&arr=soa_local[pld.m_lev][index][thread_num].GetRealData(comp); -arr.push_back(soa.GetRealData(comp)[pindex]); -} -for(intcomp=0;comp<NumIntComps();++comp){ -IntVector&arr=soa_local[pld.m_lev][index][thread_num].GetIntData(comp); -arr.push_back(soa.GetIntData(comp)[pindex]); -} - -p.id()=-p.id();//Invalidatetheparticle -} -} -else{ -auto&particles_to_send=tmp_remote[who][thread_num]; -autoold_size=particles_to_send.size(); -autonew_size=old_size+superparticle_size; -particles_to_send.resize(new_size); - -char*dst=&particles_to_send[old_size]; -{ -std::memcpy(dst,&soa.GetIdCPUData()[pindex],sizeof(uint64_t)); -dst+=sizeof(uint64_t); -} -intarray_comp_start=AMREX_SPACEDIM+NStructReal; -for(intcomp=0;comp<NumRealComps();comp++){ -if(h_redistribute_real_comp[array_comp_start+comp]){ -std::memcpy(dst,&soa.GetRealData(comp)[pindex],sizeof(ParticleReal)); -dst+=sizeof(ParticleReal); -} -} -array_comp_start=2+NStructInt; -for(intcomp=0;comp<NumIntComps();comp++){ -if(h_redistribute_int_comp[array_comp_start+comp]){ -std::memcpy(dst,&soa.GetIntData(comp)[pindex],sizeof(int)); -dst+=sizeof(int); -} -} -p.id()=-p.id();//Invalidatetheparticle -} - -if(p.id()<0){ -soa.GetIdCPUData()[pindex]=soa.GetIdCPUData()[last]; -for(intcomp=0;comp<NumRealComps();comp++){ -soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; -} -for(intcomp=0;comp<NumIntComps();comp++){ -soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; -} -correctCellVectors(last,pindex,grid,ptd[pindex]); ---last; -continue; +if(p.id()<0){ +soa.GetIdCPUData()[pindex]=soa.GetIdCPUData()[last]; +for(intcomp=0;comp<NumRealComps();comp++){ +soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; +} +for(intcomp=0;comp<NumIntComps();comp++){ +soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +} +correctCellVectors(last,pindex,grid,ptd[pindex]); +--last; +continue; +} + +constintwho=ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); +if(who==MyProc){ +if(pld.m_lev!=lev||pld.m_grid!=grid||pld.m_tile!=tile){ +//Weownitbutmustshiftittoanotherplace. +autoindex=std::make_pair(pld.m_grid,pld.m_tile); +AMREX_ASSERT(soa_local[pld.m_lev][index].size()==num_threads); +{ +auto&arr=soa_local[pld.m_lev][index][thread_num].GetIdCPUData(); +arr.push_back(soa.GetIdCPUData()[pindex]); +} +for(intcomp=0;comp<NumRealComps();++comp){ +RealVector&arr=soa_local[pld.m_lev][index][thread_num].GetRealData(comp); +arr.push_back(soa.GetRealData(comp)[pindex]); +} +for(intcomp=0;comp<NumIntComps();++comp){ +IntVector&arr=soa_local[pld.m_lev][index][thread_num].GetIntData(comp); +arr.push_back(soa.GetIntData(comp)[pindex]); +} + +p.id()=-p.id();//Invalidatetheparticle +} +} +else{ +auto&particles_to_send=tmp_remote[who][thread_num]; +autoold_size=particles_to_send.size(); +autonew_size=old_size+superparticle_size; +particles_to_send.resize(new_size); + +char*dst=&particles_to_send[old_size]; +{ +std::memcpy(dst,&soa.GetIdCPUData()[pindex],sizeof(uint64_t)); +dst+=sizeof(uint64_t); +} +intarray_comp_start=AMREX_SPACEDIM+NStructReal; +for(intcomp=0;comp<NumRealComps();comp++){ +if(h_redistribute_real_comp[array_comp_start+comp]){ +std::memcpy(dst,&soa.GetRealData(comp)[pindex],sizeof(ParticleReal)); +dst+=sizeof(ParticleReal); +} +} +array_comp_start=2+NStructInt; +for(intcomp=0;comp<NumIntComps();comp++){ +if(h_redistribute_int_comp[array_comp_start+comp]){ +std::memcpy(dst,&soa.GetIntData(comp)[pindex],sizeof(int)); +dst+=sizeof(int); +} +} +p.id()=-p.id();//Invalidatetheparticle } -++pindex; -} - -{ -auto&iddata=soa.GetIdCPUData(); -iddata.erase(iddata.begin()+last+1,iddata.begin()+npart); -} -for(intcomp=0;comp<NumRealComps();comp++){ -RealVector&rdata=soa.GetRealData(comp); -rdata.erase(rdata.begin()+last+1,rdata.begin()+npart); -} -for(intcomp=0;comp<NumIntComps();comp++){ -IntVector&idata=soa.GetIntData(comp); -idata.erase(idata.begin()+last+1,idata.begin()+npart); +if(p.id()<0){ +soa.GetIdCPUData()[pindex]=soa.GetIdCPUData()[last]; +for(intcomp=0;comp<NumRealComps();comp++){ +soa.GetRealData(comp)[pindex]=soa.GetRealData(comp)[last]; +} +for(intcomp=0;comp<NumIntComps();comp++){ +soa.GetIntData(comp)[pindex]=soa.GetIntData(comp)[last]; +} +correctCellVectors(last,pindex,grid,ptd[pindex]); +--last; +continue; +} + +++pindex; } -} -} -} -} - -for(intlev=lev_min;lev<=lev_max;lev++){ -particle_detail::clearEmptyEntries(m_particles[lev]); -} - -//Secondpass-foreachtileinparallel,collecttheparticlesweareowedfromallthread'sbuffers. -for(intlev=lev_min;lev<=lev_max;lev++){ -typenamestd::map<std::pair<int,int>,Vector<ParticleVector>>::iteratorpmap_it; - -ifconstexpr(!ParticleType::is_soa_particle){ -Vector<std::pair<int,int>>grid_tile_ids; -Vector<Vector<ParticleVector>*>pvec_ptrs; - -//weneedtocreateanymissingmapentriesinserialhere -for(pmap_it=tmp_local[lev].begin();pmap_it!=tmp_local[lev].end();pmap_it++) -{ -m_particles[lev][pmap_it->first]; -grid_tile_ids.push_back(pmap_it->first); -pvec_ptrs.push_back(&(pmap_it->second)); -} - -#ifdefAMREX_USE_OMP -#pragmaompparallelfor -#endif -for(intpit=0;pit<static_cast<int>(pvec_ptrs.size());++pit) -{ -autoindex=grid_tile_ids[pit]; -auto&ptile=DefineAndReturnParticleTile(lev,index.first,index.second); -auto&aos=ptile.GetArrayOfStructs(); -auto&soa=ptile.GetStructOfArrays(); -auto&aos_tmp=*(pvec_ptrs[pit]); -auto&soa_tmp=soa_local[lev][index]; -for(inti=0;i<num_threads;++i){ -aos.insert(aos.end(),aos_tmp[i].begin(),aos_tmp[i].end()); -aos_tmp[i].erase(aos_tmp[i].begin(),aos_tmp[i].end()); -for(intcomp=0;comp<NumRealComps();++comp){ -RealVector&arr=soa.GetRealData(comp); -RealVector&tmp=soa_tmp[i].GetRealData(comp); -arr.insert(arr.end(),tmp.begin(),tmp.end()); -tmp.erase(tmp.begin(),tmp.end()); -} -for(intcomp=0;comp<NumIntComps();++comp){ -IntVector&arr=soa.GetIntData(comp); -IntVector&tmp=soa_tmp[i].GetIntData(comp); -arr.insert(arr.end(),tmp.begin(),tmp.end()); -tmp.erase(tmp.begin(),tmp.end()); -} -} -} -}else{//soaparticle -Vector<std::pair<int,int>>grid_tile_ids; - -//weneedtocreateanymissingmapentriesinserialhere -for(autosoa_map_it=soa_local[lev].begin();soa_map_it!=soa_local[lev].end();soa_map_it++) -{ -m_particles[lev][soa_map_it->first]; -grid_tile_ids.push_back(soa_map_it->first); -} - -#ifdefAMREX_USE_OMP -#pragmaompparallelfor -#endif -for(intpit=0;pit<static_cast<int>(grid_tile_ids.size());++pit)//NOLINT(modernize-loop-convert) -{ -autoindex=grid_tile_ids[pit]; -auto&ptile=DefineAndReturnParticleTile(lev,index.first,index.second); -auto&soa=ptile.GetStructOfArrays(); -auto&soa_tmp=soa_local[lev][index]; -for(inti=0;i<num_threads;++i){ -{ -auto&arr=soa.GetIdCPUData(); -auto&tmp=soa_tmp[i].GetIdCPUData(); -arr.insert(arr.end(),tmp.begin(),tmp.end()); -tmp.erase(tmp.begin(),tmp.end()); -} -for(intcomp=0;comp<NumRealComps();++comp){ -RealVector&arr=soa.GetRealData(comp); -RealVector&tmp=soa_tmp[i].GetRealData(comp); -arr.insert(arr.end(),tmp.begin(),tmp.end()); -tmp.erase(tmp.begin(),tmp.end()); -} -for(intcomp=0;comp<NumIntComps();++comp){ -IntVector&arr=soa.GetIntData(comp); -IntVector&tmp=soa_tmp[i].GetIntData(comp); -arr.insert(arr.end(),tmp.begin(),tmp.end()); -tmp.erase(tmp.begin(),tmp.end()); -} -} -} -} -} - -for(auto&map_it:tmp_remote){ -intwho=map_it.first; -not_ours[who]; -} - -Vector<int>dest_proc_ids; -Vector<Vector<Vector<char>>*>pbuff_ptrs; -for(auto&kv:tmp_remote) -{ -dest_proc_ids.push_back(kv.first); -pbuff_ptrs.push_back(&(kv.second)); + +{ +auto&iddata=soa.GetIdCPUData(); +iddata.erase(iddata.begin()+last+1,iddata.begin()+npart); +} +for(intcomp=0;comp<NumRealComps();comp++){ +RealVector&rdata=soa.GetRealData(comp); +rdata.erase(rdata.begin()+last+1,rdata.begin()+npart); +} +for(intcomp=0;comp<NumIntComps();comp++){ +IntVector&idata=soa.GetIntData(comp); +idata.erase(idata.begin()+last+1,idata.begin()+npart); +} +} +} +} +} + +for(intlev=lev_min;lev<=lev_max;lev++){ +particle_detail::clearEmptyEntries(m_particles[lev]); +} + +//Secondpass-foreachtileinparallel,collecttheparticlesweareowedfromallthread'sbuffers. +for(intlev=lev_min;lev<=lev_max;lev++){ +typenamestd::map<std::pair<int,int>,Vector<ParticleVector >>::iteratorpmap_it; + +ifconstexpr(!ParticleType::is_soa_particle){ +Vector<std::pair<int, int>>grid_tile_ids; +Vector<Vector<ParticleVector>*>pvec_ptrs; + +//weneedtocreateanymissingmapentriesinserialhere +for(pmap_it=tmp_local[lev].begin();pmap_it!=tmp_local[lev].end();pmap_it++) +{ +m_particles[lev][pmap_it->first]; +grid_tile_ids.push_back(pmap_it->first); +pvec_ptrs.push_back(&(pmap_it->second)); +} + +#ifdefAMREX_USE_OMP +#pragmaompparallelfor +#endif +for(intpit=0;pit<static_cast<int>(pvec_ptrs.size());++pit) +{ +autoindex=grid_tile_ids[pit]; +auto&ptile=DefineAndReturnParticleTile(lev,index.first,index.second); +auto&aos=ptile.GetArrayOfStructs(); +auto&soa=ptile.GetStructOfArrays(); +auto&aos_tmp=*(pvec_ptrs[pit]); +auto&soa_tmp=soa_local[lev][index]; +for(inti=0;i<num_threads;++i){ +aos.insert(aos.end(),aos_tmp[i].begin(),aos_tmp[i].end()); +aos_tmp[i].erase(aos_tmp[i].begin(),aos_tmp[i].end()); +for(intcomp=0;comp<NumRealComps();++comp){ +RealVector&arr=soa.GetRealData(comp); +RealVector&tmp=soa_tmp[i].GetRealData(comp); +arr.insert(arr.end(),tmp.begin(),tmp.end()); +tmp.erase(tmp.begin(),tmp.end()); +} +for(intcomp=0;comp<NumIntComps();++comp){ +IntVector&arr=soa.GetIntData(comp); +IntVector&tmp=soa_tmp[i].GetIntData(comp); +arr.insert(arr.end(),tmp.begin(),tmp.end()); +tmp.erase(tmp.begin(),tmp.end()); +} +} +} +}else{//soaparticle +Vector<std::pair<int, int>>grid_tile_ids; + +//weneedtocreateanymissingmapentriesinserialhere +for(autosoa_map_it=soa_local[lev].begin();soa_map_it!=soa_local[lev].end();soa_map_it++) +{ +m_particles[lev][soa_map_it->first]; +grid_tile_ids.push_back(soa_map_it->first); +} + +#ifdefAMREX_USE_OMP +#pragmaompparallelfor +#endif +for(intpit=0;pit<static_cast<int>(grid_tile_ids.size());++pit)//NOLINT(modernize-loop-convert) +{ +autoindex=grid_tile_ids[pit]; +auto&ptile=DefineAndReturnParticleTile(lev,index.first,index.second); +auto&soa=ptile.GetStructOfArrays(); +auto&soa_tmp=soa_local[lev][index]; +for(inti=0;i<num_threads;++i){ +{ +auto&arr=soa.GetIdCPUData(); +auto&tmp=soa_tmp[i].GetIdCPUData(); +arr.insert(arr.end(),tmp.begin(),tmp.end()); +tmp.erase(tmp.begin(),tmp.end()); +} +for(intcomp=0;comp<NumRealComps();++comp){ +RealVector&arr=soa.GetRealData(comp); +RealVector&tmp=soa_tmp[i].GetRealData(comp); +arr.insert(arr.end(),tmp.begin(),tmp.end()); +tmp.erase(tmp.begin(),tmp.end()); +} +for(intcomp=0;comp<NumIntComps();++comp){ +IntVector&arr=soa.GetIntData(comp); +IntVector&tmp=soa_tmp[i].GetIntData(comp); +arr.insert(arr.end(),tmp.begin(),tmp.end()); +tmp.erase(tmp.begin(),tmp.end()); +} +} +} +} } -#ifdefAMREX_USE_OMP -#pragmaompparallelfor -#endif -for(intpmap_it=0;pmap_it<static_cast<int>(pbuff_ptrs.size());++pmap_it) -{ -intwho=dest_proc_ids[pmap_it]; -Vector<Vector<char>>&tmp=*(pbuff_ptrs[pmap_it]); -for(inti=0;i<num_threads;++i){ -not_ours[who].insert(not_ours[who].end(),tmp[i].begin(),tmp[i].end()); -tmp[i].erase(tmp[i].begin(),tmp[i].end()); -} +for(auto&map_it:tmp_remote){ +intwho=map_it.first; +not_ours[who]; +} + +Vector<int>dest_proc_ids; +Vector<Vector<Vector<char>>*>pbuff_ptrs; +for(auto&kv:tmp_remote) +{ +dest_proc_ids.push_back(kv.first); +pbuff_ptrs.push_back(&(kv.second)); } -particle_detail::clearEmptyEntries(not_ours); - -if(int(m_particles.size())>theEffectiveFinestLevel+1){ -//LookslikewelostanAmrLevelonaregrid. -if(m_verbose>0){ -amrex::Print()<<"ParticleContainer::Redistribute()resizingm_particlesfrom" -<<m_particles.size()<<"to"<<theEffectiveFinestLevel+1<<'\n'; -} -AMREX_ASSERT(int(m_particles.size())>=2); - -m_particles.resize(theEffectiveFinestLevel+1); -m_dummy_mf.resize(theEffectiveFinestLevel+1); -} - -if(ParallelContext::NProcsSub()==1){ -AMREX_ASSERT(not_ours.empty()); -} -else{ -RedistributeMPI(not_ours,lev_min,lev_max,nGrow,local); -} - -AMREX_ASSERT(OK(lev_min,lev_max,nGrow)); +#ifdefAMREX_USE_OMP +#pragmaompparallelfor +#endif +for(intpmap_it=0;pmap_it<static_cast<int>(pbuff_ptrs.size());++pmap_it) +{ +intwho=dest_proc_ids[pmap_it]; +Vector<Vector<char>>&tmp=*(pbuff_ptrs[pmap_it]); +for(inti=0;i<num_threads;++i){ +not_ours[who].insert(not_ours[who].end(),tmp[i].begin(),tmp[i].end()); +tmp[i].erase(tmp[i].begin(),tmp[i].end()); +} +} + +particle_detail::clearEmptyEntries(not_ours); + +if(int(m_particles.size())>theEffectiveFinestLevel+1){ +//LookslikewelostanAmrLevelonaregrid. +if(m_verbose>0){ +amrex::Print()<<"ParticleContainer::Redistribute()resizingm_particlesfrom" +<<m_particles.size()<<"to"<<theEffectiveFinestLevel+1<<'\n'; +} +AMREX_ASSERT(int(m_particles.size())>=2); -if(m_verbose>0){ -autostoptime=amrex::second()-strttime; - -ByteSpread(); - -#ifdefAMREX_LAZY -Lazy::QueueReduction([=]()mutable{ -#endif -ParallelReduce::Max(stoptime,ParallelContext::IOProcessorNumberSub(), -ParallelContext::CommunicatorSub()); +m_particles.resize(theEffectiveFinestLevel+1); +m_dummy_mf.resize(theEffectiveFinestLevel+1); +} + +if(ParallelContext::NProcsSub()==1){ +AMREX_ASSERT(not_ours.empty()); +} +else{ +RedistributeMPI(not_ours,lev_min,lev_max,nGrow,local); +} -amrex::Print()<<"ParticleContainer::Redistribute()time:"<<stoptime<<"\n\n"; -#ifdefAMREX_LAZY -}); -#endif -} -} +AMREX_ASSERT(OK(lev_min,lev_max,nGrow)); + +if(m_verbose>0){ +autostoptime=amrex::second()-strttime; + +ByteSpread(); -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: -RedistributeMPI(std::map<int,Vector<char>>&not_ours, -intlev_min,intlev_max,intnGrow,intlocal) -{ -BL_PROFILE("ParticleContainer::RedistributeMPI()"); -BL_PROFILE_VAR_NS("RedistributeMPI_locate",blp_locate); -BL_PROFILE_VAR_NS("RedistributeMPI_copy",blp_copy); - -#ifdefAMREX_USE_MPI +#ifdefAMREX_LAZY +Lazy::QueueReduction([=]()mutable{ +#endif +ParallelReduce::Max(stoptime,ParallelContext::IOProcessorNumberSub(), +ParallelContext::CommunicatorSub()); + +amrex::Print()<<"ParticleContainer::Redistribute()time:"<<stoptime<<"\n\n"; +#ifdefAMREX_LAZY +}); +#endif +} +} -usingbuffer_type=unsignedlonglong; - -std::map<int,Vector<buffer_type>>mpi_snd_data; -for(constauto&kv:not_ours) -{ -autonbt=(kv.second.size()+sizeof(buffer_type)-1)/sizeof(buffer_type); -mpi_snd_data[kv.first].resize(nbt); -std::memcpy((char*)mpi_snd_data[kv.first].data(),kv.second.data(),kv.second.size()); -} - -constintNProcs=ParallelContext::NProcsSub(); -constintNNeighborProcs=neighbor_procs.size(); +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: +RedistributeMPI(std::map<int,Vector<char>>&not_ours, +intlev_min,intlev_max,intnGrow,intlocal) +{ +BL_PROFILE("ParticleContainer::RedistributeMPI()"); +BL_PROFILE_VAR_NS("RedistributeMPI_locate",blp_locate); +BL_PROFILE_VAR_NS("RedistributeMPI_copy",blp_copy); + +#ifdefAMREX_USE_MPI -//WemaynowhaveparticlesthatarerightfullyownedbyanotherCPU. -Vector<Long>Snds(NProcs,0),Rcvs(NProcs,0);//bytes! - -LongNumSnds=0; -if(local>0) -{ -AMREX_ALWAYS_ASSERT(lev_min==0); -AMREX_ALWAYS_ASSERT(lev_max==0); -BuildRedistributeMask(0,local); -NumSnds=doHandShakeLocal(not_ours,neighbor_procs,Snds,Rcvs); -} -else -{ -NumSnds=doHandShake(not_ours,Snds,Rcvs); -} +usingbuffer_type=unsignedlonglong; + +std::map<int,Vector<buffer_type>>mpi_snd_data; +for(constauto&kv:not_ours) +{ +autonbt=(kv.second.size()+sizeof(buffer_type)-1)/sizeof(buffer_type); +mpi_snd_data[kv.first].resize(nbt); +std::memcpy((char*)mpi_snd_data[kv.first].data(),kv.second.data(),kv.second.size()); +} + +constintNProcs=ParallelContext::NProcsSub(); +constintNNeighborProcs=neighbor_procs.size(); + +//WemaynowhaveparticlesthatarerightfullyownedbyanotherCPU. +Vector<Long>Snds(NProcs,0),Rcvs(NProcs,0);//bytes! -constintSeqNum=ParallelDescriptor::SeqNum(); - -if((!local)&&NumSnds==0){ -return;//There'snoparallelworktodo. -} - -if(local) -{ -Longtot_snds_this_proc=0; -Longtot_rcvs_this_proc=0; -for(inti=0;i<NNeighborProcs;++i){ -tot_snds_this_proc+=Snds[neighbor_procs[i]]; -tot_rcvs_this_proc+=Rcvs[neighbor_procs[i]]; -} -if((tot_snds_this_proc==0)&&(tot_rcvs_this_proc==0)){ -return;//There'snoparallelworktodo. -} +LongNumSnds=0; +if(local>0) +{ +AMREX_ALWAYS_ASSERT(lev_min==0); +AMREX_ALWAYS_ASSERT(lev_max==0); +BuildRedistributeMask(0,local); +NumSnds=doHandShakeLocal(not_ours,neighbor_procs,Snds,Rcvs); +} +else +{ +NumSnds=doHandShake(not_ours,Snds,Rcvs); +} + +constintSeqNum=ParallelDescriptor::SeqNum(); + +if((!local)&&NumSnds==0){ +return;//There'snoparallelworktodo. } -Vector<int>RcvProc; -Vector<std::size_t>rOffset;//Offset(inbytes)inthereceivebuffer - -std::size_tTotRcvInts=0; -std::size_tTotRcvBytes=0; -for(inti=0;i<NProcs;++i){ -if(Rcvs[i]>0){ -RcvProc.push_back(i); -rOffset.push_back(TotRcvInts); -TotRcvBytes+=Rcvs[i]; -autonbt=(Rcvs[i]+sizeof(buffer_type)-1)/sizeof(buffer_type); -TotRcvInts+=nbt; -} -} - -constautonrcvs=static_cast<int>(RcvProc.size()); -Vector<MPI_Status>stats(nrcvs); -Vector<MPI_Request>rreqs(nrcvs); - -//Allocatedataforrcvsasonebigchunk. -Vector<unsigned long long>recvdata(TotRcvInts); - -//Postreceives. -for(inti=0;i<nrcvs;++i){ -constautoWho=RcvProc[i]; -constautooffset=rOffset[i]; -constautoCnt=(Rcvs[Who]+sizeof(buffer_type)-1)/sizeof(buffer_type); -AMREX_ASSERT(Cnt>0); -AMREX_ASSERT(Cnt<size_t(std::numeric_limits<int>::max())); -AMREX_ASSERT(Who>=0&&Who<NProcs); - -rreqs[i]=ParallelDescriptor::Arecv(&recvdata[offset],Cnt,Who,SeqNum, -ParallelContext::CommunicatorSub()).req(); -} +if(local) +{ +Longtot_snds_this_proc=0; +Longtot_rcvs_this_proc=0; +for(inti=0;i<NNeighborProcs;++i){ +tot_snds_this_proc+=Snds[neighbor_procs[i]]; +tot_rcvs_this_proc+=Rcvs[neighbor_procs[i]]; +} +if((tot_snds_this_proc==0)&&(tot_rcvs_this_proc==0)){ +return;//There'snoparallelworktodo. +} +} + +Vector<int>RcvProc; +Vector<std::size_t>rOffset;//Offset(inbytes)inthereceivebuffer + +std::size_tTotRcvInts=0; +std::size_tTotRcvBytes=0; +for(inti=0;i<NProcs;++i){ +if(Rcvs[i]>0){ +RcvProc.push_back(i); +rOffset.push_back(TotRcvInts); +TotRcvBytes+=Rcvs[i]; +autonbt=(Rcvs[i]+sizeof(buffer_type)-1)/sizeof(buffer_type); +TotRcvInts+=nbt; +} +} + +constautonrcvs=static_cast<int>(RcvProc.size()); +Vector<MPI_Status>stats(nrcvs); +Vector<MPI_Request>rreqs(nrcvs); + +//Allocatedataforrcvsasonebigchunk. +Vector<unsigned long long>recvdata(TotRcvInts); -//Send. -for(constauto&kv:mpi_snd_data){ -constautoWho=kv.first; -constautoCnt=kv.second.size(); - +//Postreceives. +for(inti=0;i<nrcvs;++i){ +constautoWho=RcvProc[i]; +constautooffset=rOffset[i]; +constautoCnt=(Rcvs[Who]+sizeof(buffer_type)-1)/sizeof(buffer_type); AMREX_ASSERT(Cnt>0); -AMREX_ASSERT(Who>=0&&Who<NProcs); -AMREX_ASSERT(Cnt<std::numeric_limits<int>::max()); +AMREX_ASSERT(Cnt<size_t(std::numeric_limits<int>::max())); +AMREX_ASSERT(Who>=0&&Who<NProcs); -ParallelDescriptor::Send(kv.second.data(),Cnt,Who,SeqNum, -ParallelContext::CommunicatorSub()); +rreqs[i]=ParallelDescriptor::Arecv(&recvdata[offset],Cnt,Who,SeqNum, +ParallelContext::CommunicatorSub()).req(); } -if(nrcvs>0){ -ParallelDescriptor::Waitall(rreqs,stats); - -BL_PROFILE_VAR_START(blp_locate); +//Send. +for(constauto&kv:mpi_snd_data){ +constautoWho=kv.first; +constautoCnt=kv.second.size(); -intnpart=TotRcvBytes/superparticle_size; - -Vector<int>rcv_levs(npart); -Vector<int>rcv_grid(npart); -Vector<int>rcv_tile(npart); - -intipart=0; -ParticleLocDatapld; -for(intj=0;j<nrcvs;++j) -{ -constautooffset=rOffset[j]; -constautoWho=RcvProc[j]; -constautoCnt=Rcvs[Who]/superparticle_size; -for(inti=0;i<int(Cnt);++i) -{ -char*pbuf=((char*)&recvdata[offset])+i*superparticle_size; - -Particle<NStructReal, NStructInt>p; +AMREX_ASSERT(Cnt>0); +AMREX_ASSERT(Who>=0&&Who<NProcs); +AMREX_ASSERT(Cnt<std::numeric_limits<int>::max()); + +ParallelDescriptor::Send(kv.second.data(),Cnt,Who,SeqNum, +ParallelContext::CommunicatorSub()); +} + +if(nrcvs>0){ +ParallelDescriptor::Waitall(rreqs,stats); + +BL_PROFILE_VAR_START(blp_locate); + +intnpart=TotRcvBytes/superparticle_size; + +Vector<int>rcv_levs(npart); +Vector<int>rcv_grid(npart); +Vector<int>rcv_tile(npart); -ifconstexpr(ParticleType::is_soa_particle){ -std::memcpy(&p.m_idcpu,pbuf,sizeof(uint64_t)); - -ParticleRealpos[AMREX_SPACEDIM]; -std::memcpy(&pos[0],pbuf+sizeof(uint64_t),AMREX_SPACEDIM*sizeof(ParticleReal)); -AMREX_D_TERM(p.pos(0)=pos[0];, -p.pos(1)=pos[1];, -p.pos(2)=pos[2]); -}else{ -std::memcpy(&p,pbuf,sizeof(ParticleType)); -} - -boolsuccess=Where(p,pld,lev_min,lev_max,0); -if(!success) -{ -success=(nGrow>0)&&Where(p,pld,lev_min,lev_min,nGrow); -pld.m_grown_gridbox=pld.m_gridbox;//resetgrownboxforsubsequentcalls. -} -if(!success) -{ -amrex::Abort("RedistributeMPI_locate::invalidparticle."); -} - -rcv_levs[ipart]=pld.m_lev; -rcv_grid[ipart]=pld.m_grid; -rcv_tile[ipart]=pld.m_tile; - -++ipart; -} -} - -BL_PROFILE_VAR_STOP(blp_locate); - -BL_PROFILE_VAR_START(blp_copy); - -#ifndefAMREX_USE_GPU -ipart=0; -for(inti=0;i<nrcvs;++i) -{ -constautooffset=rOffset[i]; -constautoWho=RcvProc[i]; -constautoCnt=Rcvs[Who]/superparticle_size; -for(intj=0;j<int(Cnt);++j) -{ -auto&ptile=m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart], -rcv_tile[ipart])]; -char*pbuf=((char*)&recvdata[offset])+j*superparticle_size; +intipart=0; +ParticleLocDatapld; +for(intj=0;j<nrcvs;++j) +{ +constautooffset=rOffset[j]; +constautoWho=RcvProc[j]; +constautoCnt=Rcvs[Who]/superparticle_size; +for(inti=0;i<int(Cnt);++i) +{ +char*pbuf=((char*)&recvdata[offset])+i*superparticle_size; + +Particle<NStructReal, NStructInt>p; + +ifconstexpr(ParticleType::is_soa_particle){ +std::memcpy(&p.m_idcpu,pbuf,sizeof(uint64_t)); + +ParticleRealpos[AMREX_SPACEDIM]; +std::memcpy(&pos[0],pbuf+sizeof(uint64_t),AMREX_SPACEDIM*sizeof(ParticleReal)); +AMREX_D_TERM(p.pos(0)=pos[0];, +p.pos(1)=pos[1];, +p.pos(2)=pos[2]); +}else{ +std::memcpy(&p,pbuf,sizeof(ParticleType)); +} + +boolsuccess=Where(p,pld,lev_min,lev_max,0); +if(!success) +{ +success=(nGrow>0)&&Where(p,pld,lev_min,lev_min,nGrow); +pld.m_grown_gridbox=pld.m_gridbox;//resetgrownboxforsubsequentcalls. +} +if(!success) +{ +amrex::Abort("RedistributeMPI_locate::invalidparticle."); +} + +rcv_levs[ipart]=pld.m_lev; +rcv_grid[ipart]=pld.m_grid; +rcv_tile[ipart]=pld.m_tile; + +++ipart; +} +} + +BL_PROFILE_VAR_STOP(blp_locate); + +BL_PROFILE_VAR_START(blp_copy); -ifconstexpr(ParticleType::is_soa_particle){ -uint64_tidcpudata; -std::memcpy(&idcpudata,pbuf,sizeof(uint64_t)); -pbuf+=sizeof(uint64_t); -ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata); -}else{ -ParticleTypep; -std::memcpy(&p,pbuf,sizeof(ParticleType)); -pbuf+=sizeof(ParticleType); -ptile.push_back(p); -} - -intarray_comp_start=AMREX_SPACEDIM+NStructReal; -for(intcomp=0;comp<NumRealComps();++comp){ -if(h_redistribute_real_comp[array_comp_start+comp]){ -ParticleRealrdata; -std::memcpy(&rdata,pbuf,sizeof(ParticleReal)); -pbuf+=sizeof(ParticleReal); -ptile.push_back_real(comp,rdata); -}else{ -ptile.push_back_real(comp,0.0); -} -} - -array_comp_start=2+NStructInt; -for(intcomp=0;comp<NumIntComps();++comp){ -if(h_redistribute_int_comp[array_comp_start+comp]){ -intidata; -std::memcpy(&idata,pbuf,sizeof(int)); -pbuf+=sizeof(int); -ptile.push_back_int(comp,idata); -}else{ -ptile.push_back_int(comp,0); -} -} -++ipart; -} -} - -#else -Vector<std::map<std::pair<int, int>,Gpu::HostVector<ParticleType>>>host_particles; -host_particles.reserve(15); -host_particles.resize(finestLevel()+1); - -Vector<std::map<std::pair<int, int>, -std::vector<Gpu::HostVector<ParticleReal>>>>host_real_attribs; -host_real_attribs.reserve(15); -host_real_attribs.resize(finestLevel()+1); - -Vector<std::map<std::pair<int, int>, -std::vector<Gpu::HostVector<int>>>>host_int_attribs; -host_int_attribs.reserve(15); -host_int_attribs.resize(finestLevel()+1); - -Vector<std::map<std::pair<int, int>,Gpu::HostVector<uint64_t>>>host_idcpu; -host_idcpu.reserve(15); -host_idcpu.resize(finestLevel()+1); - -ipart=0; -for(inti=0;i<nrcvs;++i) -{ -constautooffset=rOffset[i]; -constautoWho=RcvProc[i]; -constautoCnt=Rcvs[Who]/superparticle_size; -for(autoj=decltype(Cnt)(0);j<Cnt;++j) -{ -intlev=rcv_levs[ipart]; -std::pair<int,int>ind(std::make_pair(rcv_grid[ipart],rcv_tile[ipart])); - -char*pbuf=((char*)&recvdata[offset])+j*superparticle_size; +#ifndefAMREX_USE_GPU +ipart=0; +for(inti=0;i<nrcvs;++i) +{ +constautooffset=rOffset[i]; +constautoWho=RcvProc[i]; +constautoCnt=Rcvs[Who]/superparticle_size; +for(intj=0;j<int(Cnt);++j) +{ +auto&ptile=m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart], +rcv_tile[ipart])]; +char*pbuf=((char*)&recvdata[offset])+j*superparticle_size; + +ifconstexpr(ParticleType::is_soa_particle){ +uint64_tidcpudata; +std::memcpy(&idcpudata,pbuf,sizeof(uint64_t)); +pbuf+=sizeof(uint64_t); +ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata); +}else{ +ParticleTypep; +std::memcpy(&p,pbuf,sizeof(ParticleType)); +pbuf+=sizeof(ParticleType); +ptile.push_back(p); +} + +intarray_comp_start=AMREX_SPACEDIM+NStructReal; +for(intcomp=0;comp<NumRealComps();++comp){ +if(h_redistribute_real_comp[array_comp_start+comp]){ +ParticleRealrdata; +std::memcpy(&rdata,pbuf,sizeof(ParticleReal)); +pbuf+=sizeof(ParticleReal); +ptile.push_back_real(comp,rdata); +}else{ +ptile.push_back_real(comp,0.0); +} +} + +array_comp_start=2+NStructInt; +for(intcomp=0;comp<NumIntComps();++comp){ +if(h_redistribute_int_comp[array_comp_start+comp]){ +intidata; +std::memcpy(&idata,pbuf,sizeof(int)); +pbuf+=sizeof(int); +ptile.push_back_int(comp,idata); +}else{ +ptile.push_back_int(comp,0); +} +} +++ipart; +} +} + +#else +Vector<std::map<std::pair<int, int>,Gpu::HostVector<ParticleType>>>host_particles; +host_particles.reserve(15); +host_particles.resize(finestLevel()+1); + +Vector<std::map<std::pair<int, int>, +std::vector<Gpu::HostVector<ParticleReal>>>>host_real_attribs; +host_real_attribs.reserve(15); +host_real_attribs.resize(finestLevel()+1); + +Vector<std::map<std::pair<int, int>, +std::vector<Gpu::HostVector<int>>>>host_int_attribs; +host_int_attribs.reserve(15); +host_int_attribs.resize(finestLevel()+1); + +Vector<std::map<std::pair<int, int>,Gpu::HostVector<uint64_t>>>host_idcpu; +host_idcpu.reserve(15); +host_idcpu.resize(finestLevel()+1); -host_real_attribs[lev][ind].resize(NumRealComps()); -host_int_attribs[lev][ind].resize(NumIntComps()); - -ifconstexpr(ParticleType::is_soa_particle){ -uint64_tidcpudata; -std::memcpy(&idcpudata,pbuf,sizeof(uint64_t)); -pbuf+=sizeof(uint64_t); -host_idcpu[lev][ind].push_back(idcpudata); -}else{ -ParticleTypep; -std::memcpy(&p,pbuf,sizeof(ParticleType)); -pbuf+=sizeof(ParticleType); -host_particles[lev][ind].push_back(p); -} - -host_real_attribs[lev][ind].resize(NumRealComps()); -host_int_attribs[lev][ind].resize(NumIntComps()); - -//addthereal... -intarray_comp_start=AMREX_SPACEDIM+NStructReal; -for(intcomp=0;comp<NumRealComps();++comp){ -if(h_redistribute_real_comp[array_comp_start+comp]){ -Realrdata; -std::memcpy(&rdata,pbuf,sizeof(Real)); -pbuf+=sizeof(Real); -host_real_attribs[lev][ind][comp].push_back(rdata); -}else{ -host_real_attribs[lev][ind][comp].push_back(0.0); -} -} +ipart=0; +for(inti=0;i<nrcvs;++i) +{ +constautooffset=rOffset[i]; +constautoWho=RcvProc[i]; +constautoCnt=Rcvs[Who]/superparticle_size; +for(autoj=decltype(Cnt)(0);j<Cnt;++j) +{ +intlev=rcv_levs[ipart]; +std::pair<int,int>ind(std::make_pair(rcv_grid[ipart],rcv_tile[ipart])); + +char*pbuf=((char*)&recvdata[offset])+j*superparticle_size; + +host_real_attribs[lev][ind].resize(NumRealComps()); +host_int_attribs[lev][ind].resize(NumIntComps()); + +ifconstexpr(ParticleType::is_soa_particle){ +uint64_tidcpudata; +std::memcpy(&idcpudata,pbuf,sizeof(uint64_t)); +pbuf+=sizeof(uint64_t); +host_idcpu[lev][ind].push_back(idcpudata); +}else{ +ParticleTypep; +std::memcpy(&p,pbuf,sizeof(ParticleType)); +pbuf+=sizeof(ParticleType); +host_particles[lev][ind].push_back(p); +} + +host_real_attribs[lev][ind].resize(NumRealComps()); +host_int_attribs[lev][ind].resize(NumIntComps()); -//...andintarraydata -array_comp_start=2+NStructInt; -for(intcomp=0;comp<NumIntComps();++comp){ -if(h_redistribute_int_comp[array_comp_start+comp]){ -intidata; -std::memcpy(&idata,pbuf,sizeof(int)); -pbuf+=sizeof(int); -host_int_attribs[lev][ind][comp].push_back(idata); +//addthereal... +intarray_comp_start=AMREX_SPACEDIM+NStructReal; +for(intcomp=0;comp<NumRealComps();++comp){ +if(h_redistribute_real_comp[array_comp_start+comp]){ +Realrdata; +std::memcpy(&rdata,pbuf,sizeof(Real)); +pbuf+=sizeof(Real); +host_real_attribs[lev][ind][comp].push_back(rdata); }else{ -host_int_attribs[lev][ind][comp].push_back(0); +host_real_attribs[lev][ind][comp].push_back(0.0); } } -++ipart; -} -} - -for(inthost_lev=0;host_lev<static_cast<int>(host_particles.size());++host_lev) -{ -for(auto&kv:host_particles[host_lev]){ -autogrid=kv.first.first; -autotile=kv.first.second; -constauto&src_tile=kv.second; - -auto&dst_tile=GetParticles(host_lev)[std::make_pair(grid,tile)]; -autoold_size=dst_tile.size(); -autonew_size=old_size+src_tile.size(); -dst_tile.resize(new_size); - -ifconstexpr(ParticleType::is_soa_particle){ -Gpu::copyAsync(Gpu::hostToDevice, -host_idcpu[host_lev][std::make_pair(grid,tile)].begin(), -host_idcpu[host_lev][std::make_pair(grid,tile)].end(), -dst_tile.GetStructOfArrays().GetIdCPUData().begin()+old_size); -}else{ -Gpu::copyAsync(Gpu::hostToDevice, -src_tile.begin(),src_tile.end(), -dst_tile.GetArrayOfStructs().begin()+old_size); -} - -for(inti=0;i<NumRealComps();++i){ -Gpu::copyAsync(Gpu::hostToDevice, -host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(), -host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(), -dst_tile.GetStructOfArrays().GetRealData(i).begin()+old_size); -} - -for(inti=0;i<NumIntComps();++i){ + +//...andintarraydata +array_comp_start=2+NStructInt; +for(intcomp=0;comp<NumIntComps();++comp){ +if(h_redistribute_int_comp[array_comp_start+comp]){ +intidata; +std::memcpy(&idata,pbuf,sizeof(int)); +pbuf+=sizeof(int); +host_int_attribs[lev][ind][comp].push_back(idata); +}else{ +host_int_attribs[lev][ind][comp].push_back(0); +} +} +++ipart; +} +} + +for(inthost_lev=0;host_lev<static_cast<int>(host_particles.size());++host_lev) +{ +for(auto&kv:host_particles[host_lev]){ +autogrid=kv.first.first; +autotile=kv.first.second; +constauto&src_tile=kv.second; + +auto&dst_tile=GetParticles(host_lev)[std::make_pair(grid,tile)]; +autoold_size=dst_tile.size(); +autonew_size=old_size+src_tile.size(); +dst_tile.resize(new_size); + +ifconstexpr(ParticleType::is_soa_particle){ +Gpu::copyAsync(Gpu::hostToDevice, +host_idcpu[host_lev][std::make_pair(grid,tile)].begin(), +host_idcpu[host_lev][std::make_pair(grid,tile)].end(), +dst_tile.GetStructOfArrays().GetIdCPUData().begin()+old_size); +}else{ Gpu::copyAsync(Gpu::hostToDevice, -host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(), -host_int_attribs[host_lev][std::make_pair(grid,tile)][i].end(), -dst_tile.GetStructOfArrays().GetIntData(i).begin()+old_size); -} -} -} - -Gpu::Device::streamSynchronize(); -#endif - -BL_PROFILE_VAR_STOP(blp_copy); -} -#else -amrex::ignore_unused(not_ours,lev_min,lev_max,nGrow,local); -#endif -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -bool -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::OK(intlev_min,intlev_max,intnGrow)const -{ -BL_PROFILE("ParticleContainer::OK()"); - -if(lev_max==-1){ -lev_max=finestLevel(); -} - -return(numParticlesOutOfRange(*this,lev_min,lev_max,nGrow)==0); -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> -::AddParticlesAtLevel(AoS&particles,intlevel,intnGrow) -{ -ParticleTileTypeptile; -ptile.GetArrayOfStructs().swap(particles); -AddParticlesAtLevel(ptile,level,nGrow); -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType,NArrayReal,NArrayInt,Allocator,CellAssignor> -::AddParticlesAtLevel(ParticleTileType&particles,intlevel,intnGrow) -{ -BL_PROFILE("ParticleContainer::AddParticlesAtLevel()"); - -if(int(m_particles.size())<level+1) -{ -if(Verbose()) -{ -amrex::Print()<<"ParticleContainer::AddParticlesAtLevelresizingm_particlesfrom" -<<m_particles.size() -<<"to" -<<level+1<<'\n'; -} -m_particles.resize(level+1); -m_dummy_mf.resize(level+1); -for(intlev=0;lev<level+1;++lev){ -RedefineDummyMF(lev); -} -} - -auto&ptile=DefineAndReturnParticleTile(level,0,0); -intold_np=ptile.size(); -intnum_to_add=particles.size(); -intnew_np=old_np+num_to_add; -ptile.resize(new_np); -amrex::copyParticles(ptile,particles,0,old_np,num_to_add); -Redistribute(level,level,nGrow); -particles.resize(0); -} - -//Thisisthesingle-levelversionforcell-centereddensity -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: -AssignCellDensitySingleLevel(intrho_index, -MultiFab&mf_to_be_filled, -intlev, -intncomp, -intparticle_lvl_offset)const -{ -BL_PROFILE("ParticleContainer::AssignCellDensitySingleLevel()"); +src_tile.begin(),src_tile.end(), +dst_tile.GetArrayOfStructs().begin()+old_size); +} + +for(inti=0;i<NumRealComps();++i){ +Gpu::copyAsync(Gpu::hostToDevice, +host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(), +host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(), +dst_tile.GetStructOfArrays().GetRealData(i).begin()+old_size); +} + +for(inti=0;i<NumIntComps();++i){ +Gpu::copyAsync(Gpu::hostToDevice, +host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(), +host_int_attribs[host_lev][std::make_pair(grid,tile)][i].end(), +dst_tile.GetStructOfArrays().GetIntData(i).begin()+old_size); +} +} +} + +Gpu::Device::streamSynchronize(); +#endif + +BL_PROFILE_VAR_STOP(blp_copy); +} +#else +amrex::ignore_unused(not_ours,lev_min,lev_max,nGrow,local); +#endif +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +bool +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::OK(intlev_min,intlev_max,intnGrow)const +{ +BL_PROFILE("ParticleContainer::OK()"); + +if(lev_max==-1){ +lev_max=finestLevel(); +} + +return(numParticlesOutOfRange(*this,lev_min,lev_max,nGrow)==0); +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor> +::AddParticlesAtLevel(AoS&particles,intlevel,intnGrow) +{ +ParticleTileTypeptile; +ptile.GetArrayOfStructs().swap(particles); +AddParticlesAtLevel(ptile,level,nGrow); +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType,NArrayReal,NArrayInt,Allocator,CellAssignor> +::AddParticlesAtLevel(ParticleTileType&particles,intlevel,intnGrow) +{ +BL_PROFILE("ParticleContainer::AddParticlesAtLevel()"); + +if(int(m_particles.size())<level+1) +{ +if(Verbose()) +{ +amrex::Print()<<"ParticleContainer::AddParticlesAtLevelresizingm_particlesfrom" +<<m_particles.size() +<<"to" +<<level+1<<'\n'; +} +m_particles.resize(level+1); +m_dummy_mf.resize(level+1); +for(intlev=0;lev<level+1;++lev){ +RedefineDummyMF(lev); +} +} + +auto&ptile=DefineAndReturnParticleTile(level,0,0); +intold_np=ptile.size(); +intnum_to_add=particles.size(); +intnew_np=old_np+num_to_add; +ptile.resize(new_np); +amrex::copyParticles(ptile,particles,0,old_np,num_to_add); +Redistribute(level,level,nGrow); +particles.resize(0); +} -if(rho_index!=0){amrex::Abort("AssignCellDensitySingleLevelonlyworksifrho_index=0");} - -MultiFab*mf_pointer; - -if(OnSameGrids(lev,mf_to_be_filled)){ -//Ifwearealreadyworkingwiththeinternalmfdefinedonthe -//particle_box_array,thenwejustworkwiththis. -mf_pointer=&mf_to_be_filled; -} -else{ -//Ifmf_to_be_filledisnotdefinedontheparticle_box_array,thenweneed -//tomakeatemporaryhereandcopyintomf_to_be_filledattheend. -mf_pointer=newMultiFab(ParticleBoxArray(lev), -ParticleDistributionMap(lev), -ncomp,mf_to_be_filled.nGrow()); -} +//Thisisthesingle-levelversionforcell-centereddensity +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: +AssignCellDensitySingleLevel(intrho_index, +MultiFab&mf_to_be_filled, +intlev, +intncomp, +intparticle_lvl_offset)const +{ +BL_PROFILE("ParticleContainer::AssignCellDensitySingleLevel()"); + +if(rho_index!=0){amrex::Abort("AssignCellDensitySingleLevelonlyworksifrho_index=0");} + +MultiFab*mf_pointer; -//WemusthaveghostcellsforeachFABsothataparticleinonegridcanspread -//itseffecttoanadjacentgridbyfirstputtingthevalueintoghostcellsofits -//owngrid.Themf->SumBoundarycallthenaddsthevaluefromonegrid'sghostcell -//toanothergrid'svalidregion. -if(mf_pointer->nGrow()<1){ -amrex::Error("MusthaveatleastoneghostcellwheninAssignCellDensitySingleLevel"); -} - -constautostrttime=amrex::second(); - -constautodxi=Geom(lev).InvCellSizeArray(); -constautoplo=Geom(lev).ProbLoArray(); -constautopdxi=Geom(lev+particle_lvl_offset).InvCellSizeArray(); - -if(Geom(lev).isAnyPeriodic()&&!Geom(lev).isAllPeriodic()) -{ -amrex::Error("AssignCellDensitySingleLevel:problemmustbeperiodicinnooralldirections"); -} - -mf_pointer->setVal(0); +if(OnSameGrids(lev,mf_to_be_filled)){ +//Ifwearealreadyworkingwiththeinternalmfdefinedonthe +//particle_box_array,thenwejustworkwiththis. +mf_pointer=&mf_to_be_filled; +} +else{ +//Ifmf_to_be_filledisnotdefinedontheparticle_box_array,thenweneed +//tomakeatemporaryhereandcopyintomf_to_be_filledattheend. +mf_pointer=newMultiFab(ParticleBoxArray(lev), +ParticleDistributionMap(lev), +ncomp,mf_to_be_filled.nGrow()); +} + +//WemusthaveghostcellsforeachFABsothataparticleinonegridcanspread +//itseffecttoanadjacentgridbyfirstputtingthevalueintoghostcellsofits +//owngrid.Themf->SumBoundarycallthenaddsthevaluefromonegrid'sghostcell +//toanothergrid'svalidregion. +if(mf_pointer->nGrow()<1){ +amrex::Error("MusthaveatleastoneghostcellwheninAssignCellDensitySingleLevel"); +} -usingParConstIter=ParConstIter_impl<ParticleType, NArrayReal, NArrayInt, Allocator>; -#ifdefAMREX_USE_OMP -#pragmaompparallelif(Gpu::notInLaunchRegion()) -#endif -{ -FArrayBoxlocal_rho; -for(ParConstIterpti(*this,lev);pti.isValid();++pti){ -constLongnp=pti.numParticles(); -autoptd=pti.GetParticleTile().getConstParticleTileData(); -FArrayBox&fab=(*mf_pointer)[pti]; -autorhoarr=fab.array(); -#ifdefAMREX_USE_OMP -Boxtile_box; -if(Gpu::notInLaunchRegion()) -{ -tile_box=pti.tilebox(); -tile_box.grow(mf_pointer->nGrow()); -local_rho.resize(tile_box,ncomp); -local_rho.setVal<RunOn::Host>(0.0); -rhoarr=local_rho.array(); -} -#endif - -if(particle_lvl_offset==0) -{ -AMREX_HOST_DEVICE_FOR_1D(np,i, -{ -autop=make_particle<ParticleType>{}(ptd,i); -amrex_deposit_cic(p,ncomp,rhoarr,plo,dxi); -}); -} -else -{ -AMREX_HOST_DEVICE_FOR_1D(np,i, -{ -autop=make_particle<ParticleType>{}(ptd,i); -amrex_deposit_particle_dx_cic(p,ncomp,rhoarr,plo,dxi,pdxi); -}); -} - -#ifdefAMREX_USE_OMP -if(Gpu::notInLaunchRegion()) -{ -fab.atomicAdd<RunOn::Host>(local_rho,tile_box,tile_box,0,0,ncomp); -} -#endif -} -} - -mf_pointer->SumBoundary(Geom(lev).periodicity()); - -//Ifncomp>1,firstdividethemomenta(componentn) -//bythemass(component0)inordertogetvelocities. -//Becarefulnottodividebyzero. -for(intn=1;n<ncomp;n++) -{ -for(MFItermfi(*mf_pointer);mfi.isValid();++mfi) -{ -(*mf_pointer)[mfi].protected_divide<RunOn::Device>((*mf_pointer)[mfi],0,n,1); +constautostrttime=amrex::second(); + +constautodxi=Geom(lev).InvCellSizeArray(); +constautoplo=Geom(lev).ProbLoArray(); +constautopdxi=Geom(lev+particle_lvl_offset).InvCellSizeArray(); + +if(Geom(lev).isAnyPeriodic()&&!Geom(lev).isAllPeriodic()) +{ +amrex::Error("AssignCellDensitySingleLevel:problemmustbeperiodicinnooralldirections"); +} + +mf_pointer->setVal(0); + +usingParConstIter=ParConstIter_impl<ParticleType, NArrayReal, NArrayInt, Allocator>; +#ifdefAMREX_USE_OMP +#pragmaompparallelif(Gpu::notInLaunchRegion()) +#endif +{ +FArrayBoxlocal_rho; +for(ParConstIterpti(*this,lev);pti.isValid();++pti){ +constLongnp=pti.numParticles(); +autoptd=pti.GetParticleTile().getConstParticleTileData(); +FArrayBox&fab=(*mf_pointer)[pti]; +autorhoarr=fab.array(); +#ifdefAMREX_USE_OMP +Boxtile_box; +if(Gpu::notInLaunchRegion()) +{ +tile_box=pti.tilebox(); +tile_box.grow(mf_pointer->nGrow()); +local_rho.resize(tile_box,ncomp); +local_rho.setVal<RunOn::Host>(0.0); +rhoarr=local_rho.array(); +} +#endif + +if(particle_lvl_offset==0) +{ +AMREX_HOST_DEVICE_FOR_1D(np,i, +{ +autop=make_particle<ParticleType>{}(ptd,i); +amrex_deposit_cic(p,ncomp,rhoarr,plo,dxi); +}); +} +else +{ +AMREX_HOST_DEVICE_FOR_1D(np,i, +{ +autop=make_particle<ParticleType>{}(ptd,i); +amrex_deposit_particle_dx_cic(p,ncomp,rhoarr,plo,dxi,pdxi); +}); +} + +#ifdefAMREX_USE_OMP +if(Gpu::notInLaunchRegion()) +{ +fab.atomicAdd<RunOn::Host>(local_rho,tile_box,tile_box,0,0,ncomp); +} +#endif } } -//Onlymultiplythefirstcomponentby(1/vol)becausethisconvertsmass -//todensity.Ifthereareadditionalcomponents(likevelocity),wedon't -//wanttodividethosebyvolume. -constReal*dx=Geom(lev).CellSize(); -constRealvol=AMREX_D_TERM(dx[0],*dx[1],*dx[2]); - -mf_pointer->mult(Real(1.0)/vol,0,1,mf_pointer->nGrow()); - -//Ifmf_to_be_filledisnotdefinedontheparticle_box_array,thenweneed -//tocopyherefrommf_pointerintomf_to_be_filled. -if(mf_pointer!=&mf_to_be_filled) -{ -mf_to_be_filled.ParallelCopy(*mf_pointer,0,0,ncomp,0,0); -deletemf_pointer; -} - -if(m_verbose>1) -{ -autostoptime=amrex::second()-strttime; - -ParallelReduce::Max(stoptime,ParallelContext::IOProcessorNumberSub(), -ParallelContext::CommunicatorSub()); - -amrex::Print()<<"ParticleContainer::AssignCellDensitySingleLevel)time:" -<<stoptime<<'\n'; -} -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: -ResizeRuntimeRealComp(intnew_size,boolcommunicate) -{ -intold_size=m_num_runtime_real; +mf_pointer->SumBoundary(Geom(lev).periodicity()); + +//Ifncomp>1,firstdividethemomenta(componentn) +//bythemass(component0)inordertogetvelocities. +//Becarefulnottodividebyzero. +for(intn=1;n<ncomp;n++) +{ +for(MFItermfi(*mf_pointer);mfi.isValid();++mfi) +{ +(*mf_pointer)[mfi].protected_divide<RunOn::Device>((*mf_pointer)[mfi],0,n,1); +} +} + +//Onlymultiplythefirstcomponentby(1/vol)becausethisconvertsmass +//todensity.Ifthereareadditionalcomponents(likevelocity),wedon't +//wanttodividethosebyvolume. +constReal*dx=Geom(lev).CellSize(); +constRealvol=AMREX_D_TERM(dx[0],*dx[1],*dx[2]); + +mf_pointer->mult(Real(1.0)/vol,0,1,mf_pointer->nGrow()); + +//Ifmf_to_be_filledisnotdefinedontheparticle_box_array,thenweneed +//tocopyherefrommf_pointerintomf_to_be_filled. +if(mf_pointer!=&mf_to_be_filled) +{ +mf_to_be_filled.ParallelCopy(*mf_pointer,0,0,ncomp,0,0); +deletemf_pointer; +} + +if(m_verbose>1) +{ +autostoptime=amrex::second()-strttime; + +ParallelReduce::Max(stoptime,ParallelContext::IOProcessorNumberSub(), +ParallelContext::CommunicatorSub()); -m_runtime_comps_defined=(new_size>0); -m_num_runtime_real=new_size; -intcur_size=h_redistribute_real_comp.size(); -h_redistribute_real_comp.resize(cur_size-old_size+new_size,communicate); -SetParticleSize(); - -for(intlev=0;lev<numLevels();++lev){ -for(ParIterTypepti(*this,lev);pti.isValid();++pti){ -auto&tile=DefineAndReturnParticleTile(lev,pti); -autonp=tile.numParticles(); -if(np>0&&new_size>old_size){ -auto&soa=tile.GetStructOfArrays(); -soa.resize(np); -} -} -} -} - -template<typenameParticleType,intNArrayReal,intNArrayInt, -template<class>classAllocator,classCellAssignor> -void -ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: -ResizeRuntimeIntComp(intnew_size,boolcommunicate) -{ -intold_size=m_num_runtime_int; - -m_runtime_comps_defined=(new_size>0); -m_num_runtime_int=new_size; -intcur_size=h_redistribute_int_comp.size(); -h_redistribute_int_comp.resize(cur_size-old_size+new_size,communicate); -SetParticleSize(); - -for(intlev=0;lev<numLevels();++lev){ -for(ParIterTypepti(*this,lev);pti.isValid();++pti){ -auto&tile=DefineAndReturnParticleTile(lev,pti); -autonp=tile.numParticles(); -if(np>0&&new_size>old_size){ -auto&soa=tile.GetStructOfArrays(); -soa.resize(np); -} -} -} -} +amrex::Print()<<"ParticleContainer::AssignCellDensitySingleLevel)time:" +<<stoptime<<'\n'; +} +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: +ResizeRuntimeRealComp(intnew_size,boolcommunicate) +{ +intold_size=m_num_runtime_real; + +m_runtime_comps_defined=(new_size>0); +m_num_runtime_real=new_size; +intcur_size=h_redistribute_real_comp.size(); +h_redistribute_real_comp.resize(cur_size-old_size+new_size,communicate); +SetParticleSize(); + +for(intlev=0;lev<numLevels();++lev){ +for(ParIterTypepti(*this,lev);pti.isValid();++pti){ +auto&tile=DefineAndReturnParticleTile(lev,pti); +autonp=tile.numParticles(); +if(np>0&&new_size>old_size){ +auto&soa=tile.GetStructOfArrays(); +soa.resize(np); +} +} +} +} + +template<typenameParticleType,intNArrayReal,intNArrayInt, +template<class>classAllocator,classCellAssignor> +void +ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>:: +ResizeRuntimeIntComp(intnew_size,boolcommunicate) +{ +intold_size=m_num_runtime_int; + +m_runtime_comps_defined=(new_size>0); +m_num_runtime_int=new_size; +intcur_size=h_redistribute_int_comp.size(); +h_redistribute_int_comp.resize(cur_size-old_size+new_size,communicate); +SetParticleSize(); + +for(intlev=0;lev<numLevels();++lev){ +for(ParIterTypepti(*this,lev);pti.isValid();++pti){ +auto&tile=DefineAndReturnParticleTile(lev,pti); +autonp=tile.numParticles(); +if(np>0&&new_size>old_size){ +auto&soa=tile.GetStructOfArrays(); +soa.resize(np); +} +} +} +} diff --git a/amrex/docs_xml/doxygen/classamrex_1_1ParticleContainer__impl.xml b/amrex/docs_xml/doxygen/classamrex_1_1ParticleContainer__impl.xml index 7165a4ba90..e002b5e288 100644 --- a/amrex/docs_xml/doxygen/classamrex_1_1ParticleContainer__impl.xml +++ b/amrex/docs_xml/doxygen/classamrex_1_1ParticleContainer__impl.xml @@ -1788,7 +1788,7 @@ - + void @@ -1802,7 +1802,7 @@ - + void @@ -1821,7 +1821,7 @@ - + bool @@ -1877,7 +1877,7 @@ - + std::array< Long, 3 > @@ -4426,7 +4426,7 @@ - + @@ -4709,7 +4709,7 @@ - + void @@ -4747,7 +4747,7 @@ - + Long @@ -4906,7 +4906,7 @@ - + void @@ -4946,7 +4946,7 @@ - + @@ -5385,7 +5385,7 @@ - + @@ -5993,7 +5993,7 @@ - +