From 0dd9aac9d8d994f9adc14e9d1fec942ef2d4d45c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Widera?= Date: Thu, 8 Feb 2024 11:50:51 +0100 Subject: [PATCH] fix collision particle invalid memory Fix changes introduced in #4745 to avoid invalid memory access. Avoid using void pointer as place holder for the later used frame list. --- .../particles/collision/InterCollision.hpp | 8 +- .../particles/collision/IntraCollision.hpp | 4 +- .../particles/collision/detail/ListEntry.hpp | 123 ++++++++---------- .../collision/detail/cellDensity.hpp | 2 +- 4 files changed, 61 insertions(+), 76 deletions(-) diff --git a/include/picongpu/particles/collision/InterCollision.hpp b/include/picongpu/particles/collision/InterCollision.hpp index d91f9fda36..8d6e321103 100644 --- a/include/picongpu/particles/collision/InterCollision.hpp +++ b/include/picongpu/particles/collision/InterCollision.hpp @@ -127,8 +127,8 @@ namespace picongpu::particles::collision PMACC_SMEM(worker, nppc, memory::Array); - PMACC_SMEM(worker, parCellList0, detail::ListEntry); - PMACC_SMEM(worker, parCellList1, detail::ListEntry); + PMACC_SMEM(worker, parCellList0, detail::ListEntry); + PMACC_SMEM(worker, parCellList1, detail::ListEntry); PMACC_SMEM(worker, densityArray0, memory::Array); PMACC_SMEM(worker, densityArray1, memory::Array); @@ -187,8 +187,8 @@ namespace picongpu::particles::collision superCellIdx, densityArray0[linearIdx], densityArray1[linearIdx], - parCellList0.template getParticlesAccessor(linearIdx), - parCellList1.template getParticlesAccessor(linearIdx), + parCellList0.getParticlesAccessor(linearIdx), + parCellList1.getParticlesAccessor(linearIdx), linearIdx); }); diff --git a/include/picongpu/particles/collision/IntraCollision.hpp b/include/picongpu/particles/collision/IntraCollision.hpp index 86e2c539be..5898343bd2 100644 --- a/include/picongpu/particles/collision/IntraCollision.hpp +++ b/include/picongpu/particles/collision/IntraCollision.hpp @@ -105,7 +105,7 @@ namespace picongpu::particles::collision constexpr uint32_t numCellsPerSuperCell = pmacc::math::CT::volume::type::value; PMACC_SMEM(worker, nppc, memory::Array); - PMACC_SMEM(worker, parCellList, detail::ListEntry); + PMACC_SMEM(worker, parCellList, detail::ListEntry); PMACC_SMEM(worker, densityArray, memory::Array); constexpr bool ifAverageLog = !std::is_same::value; @@ -148,7 +148,7 @@ namespace picongpu::particles::collision auto collisionFunctorCtx = forEachCell( [&](int32_t const idx) { - auto parAccess = parCellList.template getParticlesAccessor(idx); + auto parAccess = parCellList.getParticlesAccessor(idx); uint32_t const sizeAll = parAccess.size(); uint32_t potentialPartners = sizeAll - 1u + sizeAll % 2u; auto collisionFunctor = srcCollisionFunctor( diff --git a/include/picongpu/particles/collision/detail/ListEntry.hpp b/include/picongpu/particles/collision/detail/ListEntry.hpp index 012d45bb7c..5fa194ddbb 100644 --- a/include/picongpu/particles/collision/detail/ListEntry.hpp +++ b/include/picongpu/particles/collision/detail/ListEntry.hpp @@ -32,52 +32,12 @@ namespace picongpu::particles::collision { namespace detail { - /** Access all particles in a cell. - * - * An accessor provides access to all particles within a cell (NOT supercell). - * - * @tparam T_FramePtrType frame pointer type - */ - template - struct ParticleAccessor - { - T_FramePtrType* m_framePtrList = nullptr; - uint32_t* m_parIdxList = nullptr; - uint32_t m_numPars = 0u; - - static constexpr uint32_t frameSize = T_FramePtrType::type::frameSize; - - DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, T_FramePtrType* framePtrList) - : m_framePtrList(framePtrList) - , m_parIdxList(parIdxList) - , m_numPars(numParticles) - { - } - - /** Number of particles within the cell - */ - DINLINE uint32_t size() const - { - return m_numPars; - } - - /** get particle - * - * @param idx index of the particle, range [0;size()) - */ - DINLINE auto operator[](uint32_t idx) const - { - const uint32_t inSuperCellIdx = m_parIdxList[idx]; - return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize]; - } - }; - /* Storage for particle. * * Storage for particles of collision eligible macro particles. It comes with a simple * shuffling algorithm. */ - template + template struct ListEntry { private: @@ -87,24 +47,58 @@ namespace picongpu::particles::collision //! number of particles per cell memory::Array numParticles; - /** Frame pointer array. + using FramePtrType = typename T_ParticlesBox::FramePtr; + + /**! pointer to an array of frames for the selected supercell index * - * A Frame pointer contains only a pointer therefore storing data as void* is allowed (but not - * nice). This keeps the ListEntry signature equal for all species. + * Since ListEntry is designed to be located in shared memory the list located in global memory will be + * shared by all workers. */ - void** framePtr = nullptr; + FramePtrType* framePtr = nullptr; public: - DINLINE uint32_t& size(uint32_t cellIdx) + /** Access all particles in a cell. + * + * An accessor provides access to all particles within a cell (NOT supercell). + * + */ + + struct ParticleAccessor { - return numParticles[cellIdx]; - } + FramePtrType* m_framePtrList = nullptr; + uint32_t* m_parIdxList = nullptr; + uint32_t m_numPars = 0u; + + static constexpr uint32_t frameSize = FramePtrType::type::frameSize; + + DINLINE ParticleAccessor(uint32_t* parIdxList, uint32_t const numParticles, FramePtrType* framePtrList) + : m_framePtrList(framePtrList) + , m_parIdxList(parIdxList) + , m_numPars(numParticles) + { + } + + /** Number of particles within the cell + */ + DINLINE uint32_t size() const + { + return m_numPars; + } + + /** get particle + * + * @param idx index of the particle, range [0;size()) + */ + DINLINE auto operator[](uint32_t idx) const + { + const uint32_t inSuperCellIdx = m_parIdxList[idx]; + return m_framePtrList[inSuperCellIdx / frameSize][inSuperCellIdx % frameSize]; + } + }; - template - DINLINE T_FramePtrType& frameData(uint32_t frameIdx) + DINLINE uint32_t& size(uint32_t cellIdx) { - static_assert(sizeof(void*) == sizeof(T_FramePtrType)); - return reinterpret_cast(framePtr)[frameIdx]; + return numParticles[cellIdx]; } /** Get particle index array. @@ -127,15 +121,11 @@ namespace picongpu::particles::collision * @param superCellIdx supercell index, relative to the guard origin * @param numParArray array with number of particles for each cell */ - template< - typename T_Worker, - typename T_DeviceHeapHandle, - typename T_ParticlesBox, - typename T_NumParticlesArray> + template DINLINE void init( T_Worker const& worker, T_DeviceHeapHandle& deviceHeapHandle, - T_ParticlesBox pb, + T_ParticlesBox& pb, DataSpace superCellIdx, T_NumParticlesArray& numParArray) { @@ -152,14 +142,14 @@ namespace picongpu::particles::collision // Chunk size in bytes based on the typical initial number of frames within a supercell. constexpr uint32_t frameListChunkSize = cellListChunkSize * framePtrBytes; - framePtr = (void**) + framePtr = (FramePtrType*) allocMem(worker, numFrames * framePtrBytes, deviceHeapHandle); auto frame = pb.getFirstFrame(superCellIdx); uint32_t frameId = 0u; while(frame.isValid()) { - frameData(frameId) = frame; + framePtr[frameId] = frame; frame = pb.getNextFrame(frame); ++frameId; } @@ -265,13 +255,9 @@ namespace picongpu::particles::collision * @param cellIdx cell index within the supercell, range [0, number of cells in supercell) * @return accessor to access particles via index */ - template DINLINE auto getParticlesAccessor(uint32_t cellIdx) { - return ParticleAccessor( - particleIds(cellIdx), - size(cellIdx), - reinterpret_cast(framePtr)); + return ParticleAccessor(particleIds(cellIdx), size(cellIdx), framePtr); } private: @@ -375,20 +361,19 @@ namespace picongpu::particles::collision template< typename T_Worker, typename T_ForEachCell, - typename T_ParticlesBox, typename T_DeviceHeapHandle, typename T_Array, typename T_Filter, + typename T_ParticlesBox, uint32_t T_numListEntryCells> DINLINE void prepareList( T_Worker const& worker, T_ForEachCell forEachCell, - T_ParticlesBox pb, + T_ParticlesBox& pb, DataSpace superCellIdx, T_DeviceHeapHandle deviceHeapHandle, - ListEntry& listEntrys, + ListEntry& listEntrys, T_Array& nppc, - T_Filter filter) { // Initialize nppc with zeros. diff --git a/include/picongpu/particles/collision/detail/cellDensity.hpp b/include/picongpu/particles/collision/detail/cellDensity.hpp index 8ceea4e60a..61c3fc54c7 100644 --- a/include/picongpu/particles/collision/detail/cellDensity.hpp +++ b/include/picongpu/particles/collision/detail/cellDensity.hpp @@ -42,7 +42,7 @@ namespace picongpu::particles::collision::detail forEachCell( [&](uint32_t const linearIdx) { - auto parAccess = parCellList.template getParticlesAccessor(linearIdx); + auto parAccess = parCellList.getParticlesAccessor(linearIdx); uint32_t const numParInCell = parAccess.size(); float_X density(0.0); for(uint32_t partIdx = 0; partIdx < numParInCell; partIdx++)