Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pure SoA Particle: Separate Array for IdCPU #3585

Merged
77 changes: 58 additions & 19 deletions Src/Particle/AMReX_ParticleContainerI.H
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
if (h_redistribute_int_comp[i]) {++num_int_comm_comps;}
}

if constexpr(!ParticleType::is_soa_particle) {
particle_size = sizeof(ParticleType);
if constexpr (ParticleType::is_soa_particle) {
particle_size = sizeof(uint64_t); // idcpu
} else {
particle_size = 0;
particle_size = sizeof(ParticleType);
}
superparticle_size = particle_size +
num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int);
Expand Down Expand Up @@ -1095,7 +1095,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
const size_t np_total = np + ptile.numNeighborParticles();

if (memEfficientSort) {
if constexpr(!ParticleType::is_soa_particle) {
if constexpr (!ParticleType::is_soa_particle) {
static_assert(sizeof(ParticleType)%4 == 0 && sizeof(uint32_t) == 4);
using tmp_t = std::conditional_t<sizeof(ParticleType)%8 == 0,
uint64_t, uint32_t>;
Expand Down Expand Up @@ -1530,7 +1530,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
unsigned npart = ptile_ptrs[pmap_it]->numParticles();
ParticleLocData pld;

if constexpr(!ParticleType::is_soa_particle){
if constexpr (!ParticleType::is_soa_particle){

if (npart != 0) {
Long last = npart - 1;
Expand Down Expand Up @@ -1647,7 +1647,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
}
}

} else{ // soa particle
} else { // soa particle

auto particle_tile = ptile_ptrs[pmap_it];
if (npart != 0) {
Expand All @@ -1663,6 +1663,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
}

if (p.id() < 0){
soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
for (int comp = 0; comp < NumRealComps(); comp++) {
soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
}
Expand All @@ -1679,6 +1680,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
particlePostLocate(p, pld, lev);

if (p.id() < 0) {
soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
for (int comp = 0; comp < NumRealComps(); comp++) {
soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
}
Expand All @@ -1696,6 +1698,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
// We own it but must shift it to another place.
auto index = std::make_pair(pld.m_grid, pld.m_tile);
AMREX_ASSERT(soa_local[pld.m_lev][index].size() == num_threads);
{
auto& arr = soa_local[pld.m_lev][index][thread_num].GetIdCPUData();
arr.push_back(soa.GetIdCPUData()[pindex]);
}
for (int comp = 0; comp < NumRealComps(); ++comp) {
RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp);
arr.push_back(soa.GetRealData(comp)[pindex]);
Expand All @@ -1715,6 +1721,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
particles_to_send.resize(new_size);

char* dst = &particles_to_send[old_size];
{
std::memcpy(dst, &soa.GetIdCPUData()[pindex], sizeof(uint64_t));
dst += sizeof(uint64_t);
}
int array_comp_start = AMREX_SPACEDIM + NStructReal;
for (int comp = 0; comp < NumRealComps(); comp++) {
if (h_redistribute_real_comp[array_comp_start + comp]) {
Expand All @@ -1733,6 +1743,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
}

if (p.id() < 0){
soa.GetIdCPUData()[pindex] = soa.GetIdCPUData()[last];
for (int comp = 0; comp < NumRealComps(); comp++) {
soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
}
Expand All @@ -1747,6 +1758,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
++pindex;
}

{
auto& iddata = soa.GetIdCPUData();
iddata.erase(iddata.begin() + last + 1, iddata.begin() + npart);
}
for (int comp = 0; comp < NumRealComps(); comp++) {
RealVector& rdata = soa.GetRealData(comp);
rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
Expand Down Expand Up @@ -1828,6 +1843,12 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
auto& soa = ptile.GetStructOfArrays();
auto& soa_tmp = soa_local[lev][index];
for (int i = 0; i < num_threads; ++i) {
{
auto& arr = soa.GetIdCPUData();
auto& tmp = soa_tmp[i].GetIdCPUData();
arr.insert(arr.end(), tmp.begin(), tmp.end());
tmp.erase(tmp.begin(), tmp.end());
}
for (int comp = 0; comp < NumRealComps(); ++comp) {
RealVector& arr = soa.GetRealData(comp);
RealVector& tmp = soa_tmp[i].GetRealData(comp);
Expand Down Expand Up @@ -2045,20 +2066,16 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,

Particle<NStructReal, NStructInt> p;

if constexpr (!ParticleType::is_soa_particle) {
std::memcpy(&p, pbuf, sizeof(ParticleType));
} else {
if constexpr (ParticleType::is_soa_particle) {
std::memcpy(&p.m_idcpu, pbuf, sizeof(uint64_t));

ParticleReal pos[AMREX_SPACEDIM];
std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
std::memcpy(&pos[0], pbuf + sizeof(uint64_t), AMREX_SPACEDIM*sizeof(ParticleReal));
AMREX_D_TERM(p.pos(0) = pos[0];,
p.pos(1) = pos[1];,
p.pos(2) = pos[2]);

int idcpu[2];
std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int));

p.id() = idcpu[0];
p.cpu() = idcpu[1];
} else {
std::memcpy(&p, pbuf, sizeof(ParticleType));
}

bool success = Where(p, pld, lev_min, lev_max, 0);
Expand Down Expand Up @@ -2097,7 +2114,12 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
rcv_tile[ipart])];
char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;

if constexpr(! ParticleType::is_soa_particle) {
if constexpr (ParticleType::is_soa_particle) {
uint64_t idcpudata;
std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
pbuf += sizeof(uint64_t);
ptile.GetStructOfArrays().GetIdCPUData().push_back(idcpudata);
} else {
ParticleType p;
std::memcpy(&p, pbuf, sizeof(ParticleType));
pbuf += sizeof(ParticleType);
Expand Down Expand Up @@ -2146,6 +2168,10 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
host_int_attribs.reserve(15);
host_int_attribs.resize(finestLevel()+1);

Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
host_idcpu.reserve(15);
host_idcpu.resize(finestLevel()+1);

ipart = 0;
for (int i = 0; i < nrcvs; ++i)
{
Expand All @@ -2159,7 +2185,15 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,

char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;

if constexpr(! ParticleType::is_soa_particle) {
host_real_attribs[lev][ind].resize(NumRealComps());
host_int_attribs[lev][ind].resize(NumIntComps());

if constexpr (ParticleType::is_soa_particle) {
uint64_t idcpudata;
std::memcpy(&idcpudata, pbuf, sizeof(uint64_t));
pbuf += sizeof(uint64_t);
host_idcpu[lev][ind].push_back(idcpudata);
} else {
ParticleType p;
std::memcpy(&p, pbuf, sizeof(ParticleType));
pbuf += sizeof(ParticleType);
Expand Down Expand Up @@ -2210,7 +2244,12 @@ RedistributeMPI (std::map<int, Vector<char> >& not_ours,
auto new_size = old_size + src_tile.size();
dst_tile.resize(new_size);

if constexpr(! ParticleType::is_soa_particle) {
if constexpr (ParticleType::is_soa_particle) {
Gpu::copyAsync(Gpu::hostToDevice,
host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
} else {
Gpu::copyAsync(Gpu::hostToDevice,
src_tile.begin(), src_tile.end(),
dst_tile.GetArrayOfStructs().begin() + old_size);
Expand Down
14 changes: 11 additions & 3 deletions Src/Particle/AMReX_ParticleIO.H
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,10 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
host_int_attribs.reserve(15);
host_int_attribs.resize(finest_level_in_file+1);

Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
host_idcpu.reserve(15);
host_idcpu.resize(finestLevel()+1);

for (int i = 0; i < cnt; i++) {
// note: for pure SoA particle layouts, we do write the id, cpu and positions as a struct
// for backwards compatibility with readers
Expand Down Expand Up @@ -1031,8 +1035,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
host_real_attribs[pld.m_lev][ind][j].push_back(ptemp.pos(j));
}

host_int_attribs[pld.m_lev][ind][0].push_back(ptemp.id());
host_int_attribs[pld.m_lev][ind][1].push_back(ptemp.cpu());
host_idcpu[pld.m_lev][ind].push_back(ptemp.m_idcpu);

// read all other SoA
// add the real...
Expand All @@ -1042,7 +1045,7 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
}

// ... and int array data
for (int icomp = 2; icomp < NumIntComps(); icomp++) {
for (int icomp = 0; icomp < NumIntComps(); icomp++) {
host_int_attribs[lev][ind][icomp].push_back(*iptr);
++iptr;
}
Expand Down Expand Up @@ -1071,6 +1074,11 @@ ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssig
{
Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),
dst_tile.GetArrayOfStructs().begin() + old_size);
} else {
Gpu::copyAsync(Gpu::hostToDevice,
host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
}

for (int i = 0; i < NumRealComps(); ++i) { // NOLINT(readability-misleading-indentation)
Expand Down
28 changes: 24 additions & 4 deletions Src/Particle/AMReX_ParticleInit.H
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,10 @@ InitRandom (Long icount,
host_int_attribs.reserve(15);
host_int_attribs.resize(finestLevel()+1);

Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
host_idcpu.reserve(15);
host_idcpu.resize(finestLevel()+1);

for (Long j = 0; j < icount; j++)
{
Particle<0, 0> ptest;
Expand Down Expand Up @@ -1117,8 +1121,9 @@ InitRandom (Long icount,
host_real_attribs[pld.m_lev][ind][i].push_back(pos[j*AMREX_SPACEDIM+i]);
}

host_int_attribs[pld.m_lev][ind][0].push_back(ParticleType::NextID());
host_int_attribs[pld.m_lev][ind][1].push_back(MyProc);
host_idcpu[pld.m_lev][ind].push_back(0);
ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID();
ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc();

host_particles[pld.m_lev][ind];

Expand Down Expand Up @@ -1157,6 +1162,11 @@ InitRandom (Long icount,
{
Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),
dst_tile.GetArrayOfStructs().begin() + old_size);
} else {
Gpu::copyAsync(Gpu::hostToDevice,
host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
}

for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation)
Expand Down Expand Up @@ -1201,6 +1211,10 @@ InitRandom (Long icount,
host_int_attribs.reserve(15);
host_int_attribs.resize(finestLevel()+1);

Vector<std::map<std::pair<int, int>, Gpu::HostVector<uint64_t> > > host_idcpu;
host_idcpu.reserve(15);
host_idcpu.resize(finestLevel()+1);

for (Long icnt = 0; icnt < M; icnt++) {
Particle<0, 0> ptest;
for (int i = 0; i < AMREX_SPACEDIM; i++) {
Expand Down Expand Up @@ -1261,8 +1275,9 @@ InitRandom (Long icount,
host_real_attribs[pld.m_lev][ind][i].push_back(ptest.pos(i));
}

host_int_attribs[pld.m_lev][ind][0].push_back(ptest.id());
host_int_attribs[pld.m_lev][ind][1].push_back(ptest.cpu());
host_idcpu[pld.m_lev][ind].push_back(0);
ParticleIDWrapper(host_idcpu[pld.m_lev][ind].back()) = ParticleType::NextID();
ParticleCPUWrapper(host_idcpu[pld.m_lev][ind].back()) = ParallelDescriptor::MyProc();

host_particles[pld.m_lev][ind];

Expand Down Expand Up @@ -1300,6 +1315,11 @@ InitRandom (Long icount,
{
Gpu::copyAsync(Gpu::hostToDevice, src_tile.begin(), src_tile.end(),
dst_tile.GetArrayOfStructs().begin() + old_size);
} else {
Gpu::copyAsync(Gpu::hostToDevice,
host_idcpu[host_lev][std::make_pair(grid,tile)].begin(),
host_idcpu[host_lev][std::make_pair(grid,tile)].end(),
dst_tile.GetStructOfArrays().GetIdCPUData().begin() + old_size);
}

for (int i = 0; i < NArrayReal; ++i) { // NOLINT(readability-misleading-indentation)
Expand Down
Loading