Skip to content

Commit

Permalink
Use make_workdiv and uniform_elements
Browse files Browse the repository at this point in the history
  • Loading branch information
ariostas committed Dec 17, 2024
1 parent f912613 commit c2ea148
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 291 deletions.
23 changes: 0 additions & 23 deletions RecoTracker/LSTCore/interface/alpaka/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

using namespace ::lst;

Vec3D constexpr elementsPerThread(Vec3D::all(static_cast<Idx>(1)));

// Adjust grid and block sizes based on backend configuration
template <typename Vec, typename TAcc = Acc<typename Vec::Dim>>
ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv<typename Vec::Dim> createWorkDiv(const Vec& blocksPerGrid,
const Vec& threadsPerBlock,
const Vec& elementsPerThreadArg) {
Vec adjustedBlocks = blocksPerGrid;
Vec adjustedThreads = threadsPerBlock;

// special overrides for CPU/host cases
if constexpr (std::is_same_v<Platform, alpaka::PlatformCpu>) {
adjustedBlocks = Vec::all(static_cast<Idx>(1));

if constexpr (alpaka::accMatchesTags<TAcc, alpaka::TagCpuSerial>) {
// Serial execution, set threads to 1 as well
adjustedThreads = Vec::all(static_cast<Idx>(1)); // probably redundant
}
}

return WorkDiv<typename Vec::Dim>(adjustedBlocks, adjustedThreads, elementsPerThreadArg);
}

// The constants below are usually used in functions like alpaka::math::min(),
// expecting a reference (T const&) in the arguments. Hence,
// ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr.
Expand Down
11 changes: 4 additions & 7 deletions RecoTracker/LSTCore/src/alpaka/Hit.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h
#define RecoTracker_LSTCore_src_alpaka_Hit_h

#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h"
Expand Down Expand Up @@ -83,10 +85,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
ModulesConst modules,
HitsRanges hitsRanges,
int nLowerModules) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) {
for (int lowerIndex : cms::alpakatools::uniform_elements(acc, nLowerModules)) {
uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex];
if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) {
hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0];
Expand Down Expand Up @@ -115,9 +114,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
{
auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map
auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) {
for (unsigned int ihit : cms::alpakatools::uniform_elements(acc, nHits)) {
float ihit_x = hits.xs()[ihit];
float ihit_y = hits.ys()[ihit];
float ihit_z = hits.zs()[ihit];
Expand Down
42 changes: 14 additions & 28 deletions RecoTracker/LSTCore/src/alpaka/Kernels.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
#define RecoTracker_LSTCore_src_alpaka_Kernels_h

#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
Expand Down Expand Up @@ -145,20 +147,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
Quintuplets quintuplets,
QuintupletsOccupancyConst quintupletsOccupancy,
ObjectRangesConst ranges) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) {
for (unsigned int lowmod : cms::alpakatools::uniform_elements_z(acc, modules.nLowerModules())) {
unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod];
int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod];

for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
for (unsigned int ix1 : cms::alpakatools::uniform_elements_y(acc, nQuintuplets_lowmod)) {
unsigned int ix = quintupletModuleIndices_lowmod + ix1;
float eta1 = __H2F(quintuplets.eta()[ix]);
float phi1 = __H2F(quintuplets.phi()[ix]);
float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);

for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
for (unsigned int jx1 : cms::alpakatools::uniform_elements_x(acc, ix1 + 1, nQuintuplets_lowmod)) {
unsigned int jx = quintupletModuleIndices_lowmod + jx1;

float eta2 = __H2F(quintuplets.eta()[jx]);
Expand Down Expand Up @@ -194,20 +193,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
Quintuplets quintuplets,
QuintupletsOccupancyConst quintupletsOccupancy,
ObjectRangesConst ranges) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules();
lowmodIdx1 += gridThreadExtent[1]) {
for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y(acc, ranges.nEligibleT5Modules())) {
uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1];
unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1];
if (nQuintuplets_lowmod1 == 0)
continue;

unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1];

for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules();
lowmodIdx2 += gridThreadExtent[2]) {
for (unsigned int lowmodIdx2 :
cms::alpakatools::uniform_elements_x(acc, lowmodIdx1, ranges.nEligibleT5Modules())) {
uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2];
unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2];
if (nQuintuplets_lowmod2 == 0)
Expand Down Expand Up @@ -274,11 +269,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
struct RemoveDupPixelTripletsFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) {
for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, pixelTriplets.nPixelTriplets())) {
for (unsigned int jx : cms::alpakatools::uniform_elements_y(acc, pixelTriplets.nPixelTriplets())) {
if (ix == jx)
continue;

Expand Down Expand Up @@ -308,13 +300,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
struct RemoveDupPixelQuintupletsFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
float score1 = __H2F(pixelQuintuplets.score()[ix]);
for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
if (ix == jx)
continue;

Expand All @@ -339,16 +328,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
SegmentsOccupancyConst segmentsOccupancy,
SegmentsPixel segmentsPixel,
bool secondpass) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

int pixelModuleIndex = modules.nLowerModules();
unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex];

if (nPixelSegments > n_max_pixel_segments_per_module)
nPixelSegments = n_max_pixel_segments_per_module;

for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) {
if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1)))
continue;

Expand All @@ -360,7 +346,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
float eta_pix1 = segmentsPixel.eta()[ix];
float phi_pix1 = segmentsPixel.phi()[ix];

for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, ix + 1, nPixelSegments)) {
float eta_pix2 = segmentsPixel.eta()[jx];
float phi_pix2 = segmentsPixel.phi()[jx];

Expand Down
Loading

0 comments on commit c2ea148

Please sign in to comment.