Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor ListEntry used in collision #4811

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions include/picongpu/particles/collision/InterCollision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ namespace picongpu::particles::collision

PMACC_SMEM(worker, nppc, memory::Array<uint32_t, numCellsPerSuperCell>);

PMACC_SMEM(worker, parCellList0, detail::ListEntry<numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList1, detail::ListEntry<numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList0, detail::ListEntry<T_ParBox0, numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList1, detail::ListEntry<T_ParBox1, numCellsPerSuperCell>);
PMACC_SMEM(worker, densityArray0, memory::Array<float_X, numCellsPerSuperCell>);
PMACC_SMEM(worker, densityArray1, memory::Array<float_X, numCellsPerSuperCell>);

Expand Down Expand Up @@ -187,8 +187,8 @@ namespace picongpu::particles::collision
superCellIdx,
densityArray0[linearIdx],
densityArray1[linearIdx],
parCellList0.template getParticlesAccessor<FramePtr0>(linearIdx),
parCellList1.template getParticlesAccessor<FramePtr1>(linearIdx),
parCellList0.getParticlesAccessor(linearIdx),
parCellList1.getParticlesAccessor(linearIdx),
linearIdx);
});

Expand Down
4 changes: 2 additions & 2 deletions include/picongpu/particles/collision/IntraCollision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace picongpu::particles::collision
constexpr uint32_t numCellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;

PMACC_SMEM(worker, nppc, memory::Array<uint32_t, numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList, detail::ListEntry<numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList, detail::ListEntry<T_ParBox, numCellsPerSuperCell>);
PMACC_SMEM(worker, densityArray, memory::Array<float_X, numCellsPerSuperCell>);

constexpr bool ifAverageLog = !std::is_same<T_SumCoulombLogBox, std::nullptr_t>::value;
Expand Down Expand Up @@ -148,7 +148,7 @@ namespace picongpu::particles::collision
auto collisionFunctorCtx = forEachCell(
[&](int32_t const idx)
{
auto parAccess = parCellList.template getParticlesAccessor<FramePtr>(idx);
auto parAccess = parCellList.getParticlesAccessor(idx);
uint32_t const sizeAll = parAccess.size();
uint32_t potentialPartners = sizeAll - 1u + sizeAll % 2u;
auto collisionFunctor = srcCollisionFunctor(
Expand Down
123 changes: 54 additions & 69 deletions include/picongpu/particles/collision/detail/ListEntry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,52 +32,12 @@ namespace picongpu::particles::collision
{
namespace detail
{
/** Access all particles in a cell.
*
* An accessor provides access to all particles within a cell (NOT supercell).
*
* @tparam T_FramePtrType frame pointer type
*/
template<typename T_FramePtrType>
struct ParticleAccessor
{
T_FramePtrType* m_framePtrList = nullptr;
uint32_t* m_parIdxList = nullptr;
uint32_t m_numPars = 0u;

static constexpr uint32_t frameSize = T_FramePtrType::type::frameSize;

DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, T_FramePtrType* framePtrList)
: m_framePtrList(framePtrList)
, m_parIdxList(parIdxList)
, m_numPars(numParticles)
{
}

/** Number of particles within the cell
*/
DINLINE uint32_t size() const
{
return m_numPars;
}

/** get particle
*
* @param idx index of the particle, range [0;size())
*/
DINLINE auto operator[](uint32_t idx) const
{
const uint32_t inSuperCellIdx = m_parIdxList[idx];
return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize];
}
};

/* Storage for particle.
*
* Storage for particles of collision eligible macro particles. It comes with a simple
* shuffling algorithm.
*/
template<uint32_t T_numElem>
template<typename T_ParticlesBox, uint32_t T_numElem>
struct ListEntry
{
private:
Expand All @@ -87,24 +47,58 @@ namespace picongpu::particles::collision
//! number of particles per cell
memory::Array<uint32_t, T_numElem> numParticles;

/** Frame pointer array.
using FramePtrType = typename T_ParticlesBox::FramePtr;

/**! pointer to an array of frames for the selected supercell index
*
* A Frame pointer contains only a pointer therefore storing data as void* is allowed (but not
* nice). This keeps the ListEntry signature equal for all species.
* Since ListEntry is designed to be located in shared memory the list located in global memory will be
* shared by all workers.
*/
void** framePtr = nullptr;
FramePtrType* framePtr = nullptr;

public:
DINLINE uint32_t& size(uint32_t cellIdx)
/** Access all particles in a cell.
*
* An accessor provides access to all particles within a cell (NOT supercell).
*
*/

struct ParticleAccessor
{
return numParticles[cellIdx];
}
FramePtrType* m_framePtrList = nullptr;
uint32_t* m_parIdxList = nullptr;
uint32_t m_numPars = 0u;

static constexpr uint32_t frameSize = FramePtrType::type::frameSize;

DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, FramePtrType* framePtrList)
: m_framePtrList(framePtrList)
, m_parIdxList(parIdxList)
, m_numPars(numParticles)
{
}

/** Number of particles within the cell
*/
DINLINE uint32_t size() const
{
return m_numPars;
}

/** get particle
*
* @param idx index of the particle, range [0;size())
*/
DINLINE auto operator[](uint32_t idx) const
{
const uint32_t inSuperCellIdx = m_parIdxList[idx];
return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it guaranteed that the division inSuperCellIdx / frameSize does not yield a floating point number? (A comment may state that)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With both integers? I don't think so

Copy link
Member Author

@psychocoderHPC psychocoderHPC Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both types a integral types, they can not end in floating point numbers, that are defined by the language.
A comment is not useful because it will increase the code size without extra new knowledge.

}
};

template<typename T_FramePtrType>
DINLINE T_FramePtrType& frameData(uint32_t frameIdx)
DINLINE uint32_t& size(uint32_t cellIdx)
{
static_assert(sizeof(void*) == sizeof(T_FramePtrType));
return reinterpret_cast<T_FramePtrType*>(framePtr)[frameIdx];
return numParticles[cellIdx];
}

/** Get particle index array.
Expand All @@ -127,15 +121,11 @@ namespace picongpu::particles::collision
* @param superCellIdx supercell index, relative to the guard origin
* @param numParArray array with number of particles for each cell
*/
template<
typename T_Worker,
typename T_DeviceHeapHandle,
typename T_ParticlesBox,
typename T_NumParticlesArray>
template<typename T_Worker, typename T_DeviceHeapHandle, typename T_NumParticlesArray>
DINLINE void init(
T_Worker const& worker,
T_DeviceHeapHandle& deviceHeapHandle,
T_ParticlesBox pb,
T_ParticlesBox& pb,
DataSpace<simDim> superCellIdx,
T_NumParticlesArray& numParArray)
{
Expand All @@ -152,14 +142,14 @@ namespace picongpu::particles::collision

// Chunk size in bytes based on the typical initial number of frames within a supercell.
constexpr uint32_t frameListChunkSize = cellListChunkSize * framePtrBytes;
framePtr = (void**)
framePtr = (FramePtrType*)
allocMem<frameListChunkSize>(worker, numFrames * framePtrBytes, deviceHeapHandle);

auto frame = pb.getFirstFrame(superCellIdx);
uint32_t frameId = 0u;
while(frame.isValid())
{
frameData<decltype(frame)>(frameId) = frame;
framePtr[frameId] = frame;
frame = pb.getNextFrame(frame);
++frameId;
}
Expand Down Expand Up @@ -265,13 +255,9 @@ namespace picongpu::particles::collision
* @param cellIdx cell index within the supercell, range [0, number of cells in supercell)
* @return accessor to access particles via index
*/
template<typename T_FramePtrType>
DINLINE auto getParticlesAccessor(uint32_t cellIdx)
{
return ParticleAccessor<T_FramePtrType>(
particleIds(cellIdx),
size(cellIdx),
reinterpret_cast<T_FramePtrType*>(framePtr));
return ParticleAccessor(particleIds(cellIdx), size(cellIdx), framePtr);
}

private:
Expand Down Expand Up @@ -375,20 +361,19 @@ namespace picongpu::particles::collision
template<
typename T_Worker,
typename T_ForEachCell,
typename T_ParticlesBox,
typename T_DeviceHeapHandle,
typename T_Array,
typename T_Filter,
typename T_ParticlesBox,
uint32_t T_numListEntryCells>
DINLINE void prepareList(
T_Worker const& worker,
T_ForEachCell forEachCell,
T_ParticlesBox pb,
T_ParticlesBox& pb,
DataSpace<simDim> superCellIdx,
T_DeviceHeapHandle deviceHeapHandle,
ListEntry<T_numListEntryCells>& listEntrys,
ListEntry<T_ParticlesBox, T_numListEntryCells>& listEntrys,
T_Array& nppc,

T_Filter filter)
{
// Initialize nppc with zeros.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace picongpu::particles::collision::detail
forEachCell(
[&](uint32_t const linearIdx)
{
auto parAccess = parCellList.template getParticlesAccessor<T_FramePtr>(linearIdx);
auto parAccess = parCellList.getParticlesAccessor(linearIdx);
uint32_t const numParInCell = parAccess.size();
float_X density(0.0);
for(uint32_t partIdx = 0; partIdx < numParInCell; partIdx++)
Expand Down