Skip to content

Commit

Permalink
fix collision particle invalid memory
Browse files Browse the repository at this point in the history
Fix changes introduced in #4745 to avoid invalid memory access.
Avoid using void pointer as place holder for the later used frame list.
  • Loading branch information
psychocoderHPC committed Feb 23, 2024
1 parent 52f3e38 commit 0dd9aac
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 76 deletions.
8 changes: 4 additions & 4 deletions include/picongpu/particles/collision/InterCollision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ namespace picongpu::particles::collision

PMACC_SMEM(worker, nppc, memory::Array<uint32_t, numCellsPerSuperCell>);

PMACC_SMEM(worker, parCellList0, detail::ListEntry<numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList1, detail::ListEntry<numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList0, detail::ListEntry<T_ParBox0, numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList1, detail::ListEntry<T_ParBox1, numCellsPerSuperCell>);
PMACC_SMEM(worker, densityArray0, memory::Array<float_X, numCellsPerSuperCell>);
PMACC_SMEM(worker, densityArray1, memory::Array<float_X, numCellsPerSuperCell>);

Expand Down Expand Up @@ -187,8 +187,8 @@ namespace picongpu::particles::collision
superCellIdx,
densityArray0[linearIdx],
densityArray1[linearIdx],
parCellList0.template getParticlesAccessor<FramePtr0>(linearIdx),
parCellList1.template getParticlesAccessor<FramePtr1>(linearIdx),
parCellList0.getParticlesAccessor(linearIdx),
parCellList1.getParticlesAccessor(linearIdx),
linearIdx);
});

Expand Down
4 changes: 2 additions & 2 deletions include/picongpu/particles/collision/IntraCollision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace picongpu::particles::collision
constexpr uint32_t numCellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;

PMACC_SMEM(worker, nppc, memory::Array<uint32_t, numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList, detail::ListEntry<numCellsPerSuperCell>);
PMACC_SMEM(worker, parCellList, detail::ListEntry<T_ParBox, numCellsPerSuperCell>);
PMACC_SMEM(worker, densityArray, memory::Array<float_X, numCellsPerSuperCell>);

constexpr bool ifAverageLog = !std::is_same<T_SumCoulombLogBox, std::nullptr_t>::value;
Expand Down Expand Up @@ -148,7 +148,7 @@ namespace picongpu::particles::collision
auto collisionFunctorCtx = forEachCell(
[&](int32_t const idx)
{
auto parAccess = parCellList.template getParticlesAccessor<FramePtr>(idx);
auto parAccess = parCellList.getParticlesAccessor(idx);
uint32_t const sizeAll = parAccess.size();
uint32_t potentialPartners = sizeAll - 1u + sizeAll % 2u;
auto collisionFunctor = srcCollisionFunctor(
Expand Down
123 changes: 54 additions & 69 deletions include/picongpu/particles/collision/detail/ListEntry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,52 +32,12 @@ namespace picongpu::particles::collision
{
namespace detail
{
/** Access all particles in a cell.
*
* An accessor provides access to all particles within a cell (NOT supercell).
*
* @tparam T_FramePtrType frame pointer type
*/
template<typename T_FramePtrType>
struct ParticleAccessor
{
T_FramePtrType* m_framePtrList = nullptr;
uint32_t* m_parIdxList = nullptr;
uint32_t m_numPars = 0u;

static constexpr uint32_t frameSize = T_FramePtrType::type::frameSize;

DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, T_FramePtrType* framePtrList)
: m_framePtrList(framePtrList)
, m_parIdxList(parIdxList)
, m_numPars(numParticles)
{
}

/** Number of particles within the cell
*/
DINLINE uint32_t size() const
{
return m_numPars;
}

/** get particle
*
* @param idx index of the particle, range [0;size())
*/
DINLINE auto operator[](uint32_t idx) const
{
const uint32_t inSuperCellIdx = m_parIdxList[idx];
return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize];
}
};

/* Storage for particle.
*
* Storage for particles of collision eligible macro particles. It comes with a simple
* shuffling algorithm.
*/
template<uint32_t T_numElem>
template<typename T_ParticlesBox, uint32_t T_numElem>
struct ListEntry
{
private:
Expand All @@ -87,24 +47,58 @@ namespace picongpu::particles::collision
//! number of particles per cell
memory::Array<uint32_t, T_numElem> numParticles;

/** Frame pointer array.
using FramePtrType = typename T_ParticlesBox::FramePtr;

/**! pointer to an array of frames for the selected supercell index
*
* A Frame pointer contains only a pointer therefore storing data as void* is allowed (but not
* nice). This keeps the ListEntry signature equal for all species.
* Since ListEntry is designed to be located in shared memory the list located in global memory will be
* shared by all workers.
*/
void** framePtr = nullptr;
FramePtrType* framePtr = nullptr;

public:
DINLINE uint32_t& size(uint32_t cellIdx)
/** Access all particles in a cell.
*
* An accessor provides access to all particles within a cell (NOT supercell).
*
*/

struct ParticleAccessor
{
return numParticles[cellIdx];
}
FramePtrType* m_framePtrList = nullptr;
uint32_t* m_parIdxList = nullptr;
uint32_t m_numPars = 0u;

static constexpr uint32_t frameSize = FramePtrType::type::frameSize;

DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, FramePtrType* framePtrList)
: m_framePtrList(framePtrList)
, m_parIdxList(parIdxList)
, m_numPars(numParticles)
{
}

/** Number of particles within the cell
*/
DINLINE uint32_t size() const
{
return m_numPars;
}

/** get particle
*
* @param idx index of the particle, range [0;size())
*/
DINLINE auto operator[](uint32_t idx) const
{
const uint32_t inSuperCellIdx = m_parIdxList[idx];
return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize];
}
};

template<typename T_FramePtrType>
DINLINE T_FramePtrType& frameData(uint32_t frameIdx)
DINLINE uint32_t& size(uint32_t cellIdx)
{
static_assert(sizeof(void*) == sizeof(T_FramePtrType));
return reinterpret_cast<T_FramePtrType*>(framePtr)[frameIdx];
return numParticles[cellIdx];
}

/** Get particle index array.
Expand All @@ -127,15 +121,11 @@ namespace picongpu::particles::collision
* @param superCellIdx supercell index, relative to the guard origin
* @param numParArray array with number of particles for each cell
*/
template<
typename T_Worker,
typename T_DeviceHeapHandle,
typename T_ParticlesBox,
typename T_NumParticlesArray>
template<typename T_Worker, typename T_DeviceHeapHandle, typename T_NumParticlesArray>
DINLINE void init(
T_Worker const& worker,
T_DeviceHeapHandle& deviceHeapHandle,
T_ParticlesBox pb,
T_ParticlesBox& pb,
DataSpace<simDim> superCellIdx,
T_NumParticlesArray& numParArray)
{
Expand All @@ -152,14 +142,14 @@ namespace picongpu::particles::collision

// Chunk size in bytes based on the typical initial number of frames within a supercell.
constexpr uint32_t frameListChunkSize = cellListChunkSize * framePtrBytes;
framePtr = (void**)
framePtr = (FramePtrType*)
allocMem<frameListChunkSize>(worker, numFrames * framePtrBytes, deviceHeapHandle);

auto frame = pb.getFirstFrame(superCellIdx);
uint32_t frameId = 0u;
while(frame.isValid())
{
frameData<decltype(frame)>(frameId) = frame;
framePtr[frameId] = frame;
frame = pb.getNextFrame(frame);
++frameId;
}
Expand Down Expand Up @@ -265,13 +255,9 @@ namespace picongpu::particles::collision
* @param cellIdx cell index within the supercell, range [0, number of cells in supercell)
* @return accessor to access particles via index
*/
template<typename T_FramePtrType>
DINLINE auto getParticlesAccessor(uint32_t cellIdx)
{
return ParticleAccessor<T_FramePtrType>(
particleIds(cellIdx),
size(cellIdx),
reinterpret_cast<T_FramePtrType*>(framePtr));
return ParticleAccessor(particleIds(cellIdx), size(cellIdx), framePtr);
}

private:
Expand Down Expand Up @@ -375,20 +361,19 @@ namespace picongpu::particles::collision
template<
typename T_Worker,
typename T_ForEachCell,
typename T_ParticlesBox,
typename T_DeviceHeapHandle,
typename T_Array,
typename T_Filter,
typename T_ParticlesBox,
uint32_t T_numListEntryCells>
DINLINE void prepareList(
T_Worker const& worker,
T_ForEachCell forEachCell,
T_ParticlesBox pb,
T_ParticlesBox& pb,
DataSpace<simDim> superCellIdx,
T_DeviceHeapHandle deviceHeapHandle,
ListEntry<T_numListEntryCells>& listEntrys,
ListEntry<T_ParticlesBox, T_numListEntryCells>& listEntrys,
T_Array& nppc,

T_Filter filter)
{
// Initialize nppc with zeros.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace picongpu::particles::collision::detail
forEachCell(
[&](uint32_t const linearIdx)
{
auto parAccess = parCellList.template getParticlesAccessor<T_FramePtr>(linearIdx);
auto parAccess = parCellList.getParticlesAccessor(linearIdx);
uint32_t const numParInCell = parAccess.size();
float_X density(0.0);
for(uint32_t partIdx = 0; partIdx < numParInCell; partIdx++)
Expand Down

0 comments on commit 0dd9aac

Please sign in to comment.