Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration PR followups: make_workdiv, uniform_elements, concrete kernel dimensions #141

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions RecoTracker/LSTCore/interface/HitsSoA.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ namespace lst {
SOA_COLUMN(ArrayIx2, hitRanges),
SOA_COLUMN(int, hitRangesLower),
SOA_COLUMN(int, hitRangesUpper),
SOA_COLUMN(int8_t, hitRangesnLower),
SOA_COLUMN(int8_t, hitRangesnUpper))
SOA_COLUMN(int16_t, hitRangesnLower),
SOA_COLUMN(int16_t, hitRangesnUpper))

using HitsSoA = HitsSoALayout<>;
using HitsRangesSoA = HitsRangesSoALayout<>;
Expand Down
28 changes: 1 addition & 27 deletions RecoTracker/LSTCore/interface/alpaka/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

using namespace ::lst;

Vec3D constexpr elementsPerThread(Vec3D::all(static_cast<Idx>(1)));

ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string warning) {
edm::LogWarning("LST") << warning;
return;
}

// Adjust grid and block sizes based on backend configuration
template <typename Vec, typename TAcc = Acc<typename Vec::Dim>>
ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv<typename Vec::Dim> createWorkDiv(const Vec& blocksPerGrid,
const Vec& threadsPerBlock,
const Vec& elementsPerThreadArg) {
Vec adjustedBlocks = blocksPerGrid;
Vec adjustedThreads = threadsPerBlock;

// special overrides for CPU/host cases
if constexpr (std::is_same_v<Platform, alpaka::PlatformCpu>) {
adjustedBlocks = Vec::all(static_cast<Idx>(1));

if constexpr (alpaka::accMatchesTags<TAcc, alpaka::TagCpuSerial>) {
// Serial execution, set threads to 1 as well
adjustedThreads = Vec::all(static_cast<Idx>(1)); // probably redundant
}
}

return WorkDiv<typename Vec::Dim>(adjustedBlocks, adjustedThreads, elementsPerThreadArg);
}
ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string warning) { edm::LogWarning("LST") << warning; }

// The constants below are usually used in functions like alpaka::math::min(),
// expecting a reference (T const&) in the arguments. Hence,
Expand Down
17 changes: 6 additions & 11 deletions RecoTracker/LSTCore/src/alpaka/Hit.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h
#define RecoTracker_LSTCore_src_alpaka_Hit_h

#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h"
Expand Down Expand Up @@ -57,15 +59,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
}

struct ModuleRangesKernel {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
ModulesConst modules,
HitsRanges hitsRanges,
int nLowerModules) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) {
for (int lowerIndex : cms::alpakatools::uniform_elements(acc, nLowerModules)) {
uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex];
if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) {
hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0];
Expand All @@ -80,8 +78,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct HitLoopKernel {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
uint16_t Endcap, // Integer corresponding to endcap in module subdets
uint16_t TwoS, // Integer corresponding to TwoS in moduleType
unsigned int nModules, // Number of modules
Expand All @@ -94,9 +91,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
{
auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map
auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) {
for (unsigned int ihit : cms::alpakatools::uniform_elements(acc, nHits)) {
float ihit_x = hits.xs()[ihit];
float ihit_y = hits.ys()[ihit];
float ihit_z = hits.zs()[ihit];
Expand Down
57 changes: 19 additions & 38 deletions RecoTracker/LSTCore/src/alpaka/Kernels.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
#define RecoTracker_LSTCore_src_alpaka_Kernels_h

#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
Expand Down Expand Up @@ -139,26 +141,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
}

struct RemoveDupQuintupletsAfterBuild {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc3D const& acc,
ModulesConst modules,
Quintuplets quintuplets,
QuintupletsOccupancyConst quintupletsOccupancy,
ObjectRangesConst ranges) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) {
for (unsigned int lowmod : cms::alpakatools::uniform_elements_z(acc, modules.nLowerModules())) {
unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod];
int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod];

for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
for (unsigned int ix1 : cms::alpakatools::uniform_elements_y(acc, nQuintuplets_lowmod)) {
unsigned int ix = quintupletModuleIndices_lowmod + ix1;
float eta1 = __H2F(quintuplets.eta()[ix]);
float phi1 = __H2F(quintuplets.phi()[ix]);
float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);

for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
for (unsigned int jx1 : cms::alpakatools::uniform_elements_x(acc, ix1 + 1, nQuintuplets_lowmod)) {
unsigned int jx = quintupletModuleIndices_lowmod + jx1;

float eta2 = __H2F(quintuplets.eta()[jx]);
Expand Down Expand Up @@ -189,25 +187,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct RemoveDupQuintupletsBeforeTC {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc2D const& acc,
Quintuplets quintuplets,
QuintupletsOccupancyConst quintupletsOccupancy,
ObjectRangesConst ranges) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules();
lowmodIdx1 += gridThreadExtent[1]) {
for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y(acc, ranges.nEligibleT5Modules())) {
uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1];
unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1];
if (nQuintuplets_lowmod1 == 0)
continue;

unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1];

for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules();
lowmodIdx2 += gridThreadExtent[2]) {
for (unsigned int lowmodIdx2 :
cms::alpakatools::uniform_elements_x(acc, lowmodIdx1, ranges.nEligibleT5Modules())) {
uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2];
unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2];
if (nQuintuplets_lowmod2 == 0)
Expand Down Expand Up @@ -272,13 +265,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct RemoveDupPixelTripletsFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) {
for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) {
ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelTriplets pixelTriplets) const {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, pixelTriplets.nPixelTriplets())) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, pixelTriplets.nPixelTriplets())) {
if (ix == jx)
continue;

Expand Down Expand Up @@ -306,15 +295,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct RemoveDupPixelQuintupletsFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const {
unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
float score1 = __H2F(pixelQuintuplets.score()[ix]);
for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
if (ix == jx)
continue;

Expand All @@ -333,22 +318,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct CheckHitspLS {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc2D const& acc,
ModulesConst modules,
SegmentsOccupancyConst segmentsOccupancy,
SegmentsPixel segmentsPixel,
bool secondpass) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

int pixelModuleIndex = modules.nLowerModules();
unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex];

if (nPixelSegments > n_max_pixel_segments_per_module)
nPixelSegments = n_max_pixel_segments_per_module;

for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) {
if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1)))
continue;

Expand All @@ -360,7 +341,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
float eta_pix1 = segmentsPixel.eta()[ix];
float phi_pix1 = segmentsPixel.phi()[ix];

for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, ix + 1, nPixelSegments)) {
float eta_pix2 = segmentsPixel.eta()[jx];
float phi_pix2 = segmentsPixel.phi()[jx];

Expand Down
Loading