Skip to content

Commit

Permalink
Make threads and blocks configurable from CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
mconcas committed Sep 9, 2024
1 parent 46615df commit 99b5a69
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 65 deletions.
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ cmake_minimum_required(VERSION 3.27.1 FATAL_ERROR)
# it's important to specify accurately the list of languages. for instance C and
# C++ as we _do_ have some C files to compile explicitely as C (e.g. gl3w.c)
project(O2 LANGUAGES C CXX VERSION 1.2.0)

add_definitions(-DGPUCA_NO_FAST_MATH=1)
set(GPUCA_NO_FAST_MATH 1)
set(GPUCA_NO_FAST_MATH_WHOLEO2 1)
include(CTest)

# Project wide setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ class TimeFrameGPU : public TimeFrame
void registerHostMemory(const int);
void unregisterHostMemory(const int);
void initialise(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
void initialiseHybrid(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
void initDeviceSAFitting();
void loadTrackingFrameInfoDevice(const int);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ void trackSeedHandler(CellSeed* trackSeeds,
float maxChi2ClusterAttachment,
float maxChi2NDF,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType);
const o2::base::PropagatorF::MatCorrType matCorrType,
const int nBlocks,
const int nThreads);
} // namespace o2::its
#endif // ITSTRACKINGGPU_TRACKINGKERNELS_H_
21 changes: 0 additions & 21 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -362,27 +362,6 @@ void TimeFrameGPU<nLayers>::initialise(const int iteration,
const int maxLayers,
IndexTableUtils* utils,
const TimeFrameGPUParameters* gpuParam)
{
mGpuStreams.resize(mGpuParams.nTimeFrameChunks);
mHostNTracklets.resize((nLayers - 1) * mGpuParams.nTimeFrameChunks, 0);
mHostNCells.resize((nLayers - 2) * mGpuParams.nTimeFrameChunks, 0);

auto init = [&]() -> void {
this->initDevice(utils, trkParam, *gpuParam, maxLayers, iteration);
};
std::thread t1{init};
RANGE("tf_cpu_initialisation", 1);
o2::its::TimeFrame::initialise(iteration, trkParam, maxLayers);
// registerHostMemory(maxLayers);
t1.join();
}

template <int nLayers>
void TimeFrameGPU<nLayers>::initialiseHybrid(const int iteration,
const TrackingParameters& trkParam,
const int maxLayers,
IndexTableUtils* utils,
const TimeFrameGPUParameters* gpuParam)
{
mGpuStreams.resize(mGpuParams.nTimeFrameChunks);
o2::its::TimeFrame::initialise(iteration, trkParam, maxLayers);
Expand Down
9 changes: 6 additions & 3 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "ITStrackingGPU/TrackerTraitsGPU.h"
#include "ITStrackingGPU/TrackingKernels.h"
#include "ITStracking/TrackingConfigParam.h"

namespace o2::its
{
Expand All @@ -28,7 +29,7 @@ constexpr int UnusedIndex{-1};
template <int nLayers>
void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
{
mTimeFrameGPU->initialiseHybrid(iteration, mTrkParams[iteration], nLayers);
mTimeFrameGPU->initialise(iteration, mTrkParams[iteration], nLayers);
mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration);
}

Expand Down Expand Up @@ -397,7 +398,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
}
mTimeFrameGPU->createTrackITSExtDevice(trackSeeds);
mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);

auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
trackSeedHandler(
mTimeFrameGPU->getDeviceTrackSeeds(), // CellSeed* trackSeeds,
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), // TrackingFrameInfo** foundTrackingFrameInfo,
Expand All @@ -408,7 +409,9 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment,
mTrkParams[0].MaxChi2NDF, // float maxChi2NDF,
mTimeFrameGPU->getDevicePropagator(), // const o2::base::Propagator* propagator
mCorrType); // o2::base::PropagatorImpl<float>::MatCorrType
mCorrType, // o2::base::PropagatorImpl<float>::MatCorrType
conf.nBlocks,
conf.nThreads);

mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);

Expand Down
8 changes: 5 additions & 3 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -717,9 +717,11 @@ void trackSeedHandler(CellSeed* trackSeeds,
float maxChi2ClusterAttachment,
float maxChi2NDF,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType)
const o2::base::PropagatorF::MatCorrType matCorrType,
const int nBlocks,
const int nThreads)
{
gpu::fitTrackSeedsKernel<<<20, 256>>>(
gpu::fitTrackSeedsKernel<<<nBlocks, nThreads>>>(
trackSeeds, // CellSeed* trackSeeds,
foundTrackingFrameInfo, // TrackingFrameInfo** foundTrackingFrameInfo,
tracks, // o2::its::TrackITSExt* tracks,
Expand All @@ -734,4 +736,4 @@ void trackSeedHandler(CellSeed* trackSeeds,
gpuCheckError(cudaPeekAtLastError());
gpuCheckError(cudaDeviceSynchronize());
}
} // namespace o2::its
} // namespace o2::its
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void VertexerTraitsGPU::initialise(const TrackingParameters& trackingParams, con
{
mTimeFrameGPU->initialise(0, trackingParams, 3, &mIndexTableUtils, &mTfGPUParams);
}

void VertexerTraitsGPU::updateVertexingParameters(const std::vector<VertexingParameters>& vrtPar, const TimeFrameGPUParameters& tfPar)
{
mVrtParams = vrtPar;
Expand Down
41 changes: 28 additions & 13 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,23 @@ void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2
const unsigned int startRofId,
const unsigned int rofSize,
const float phiCut,
const size_t maxTrackletsPerCluster)
const unsigned int maxTrackletsPerCluster,
const int nBlocks,
const int nThreads)
{
gpu::trackleterKernelMultipleRof<Mode><<<nBlocks, nThreads>>>(
clustersNextLayer, // const Cluster* clustersNextLayer, // 0 2
clustersCurrentLayer, // const Cluster* clustersCurrentLayer, // 1 1
sizeNextLClusters, // const int* sizeNextLClusters,
sizeCurrentLClusters, // const int* sizeCurrentLClusters,
nextIndexTables, // const int* nextIndexTables,
Tracklets, // Tracklet* Tracklets,
foundTracklets, // int* foundTracklets,
utils, // const IndexTableUtils* utils,
startRofId, // const unsigned int startRofId,
rofSize, // const unsigned int rofSize,
phiCut, // const float phiCut,
maxTrackletsPerCluster); // const unsigned int maxTrackletsPerCluster = 1e2
}
/*
GPUd() float smallestAngleDifference(float a, float b)
Expand Down Expand Up @@ -96,7 +111,7 @@ GPUd() void printOnBlock(const unsigned int bId, const char* str, Args... args)
}
}
GPUg() void printBufferOnThread(const int* v, size_t size, const int len = 150, const unsigned int tId = 0)
GPUg() void printBufferOnThread(const int* v, unsigned int size, const int len = 150, const unsigned int tId = 0)
{
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
for (int i{0}; i < size; ++i) {
Expand All @@ -109,7 +124,7 @@ GPUg() void printBufferOnThread(const int* v, size_t size, const int len = 150,
}
}
GPUg() void printBufferOnThreadF(const float* v, size_t size, const unsigned int tId = 0)
GPUg() void printBufferOnThreadF(const float* v, unsigned int size, const unsigned int tId = 0)
{
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
printf("vector :");
Expand All @@ -127,7 +142,7 @@ GPUg() void resetTrackletsKernel(Tracklet* tracklets, const int nTracklets)
}
}
GPUg() void dumpFoundTrackletsKernel(const Tracklet* tracklets, const int* nTracklet, const size_t nClustersMiddleLayer, const int maxTrackletsPerCluster)
GPUg() void dumpFoundTrackletsKernel(const Tracklet* tracklets, const int* nTracklet, const unsigned int nClustersMiddleLayer, const int maxTrackletsPerCluster)
{
for (int iCurrentLayerClusterIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentLayerClusterIndex < nClustersMiddleLayer; iCurrentLayerClusterIndex += blockDim.x * gridDim.x) {
const int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
Expand Down Expand Up @@ -160,15 +175,15 @@ GPUg() void trackleterKernelSingleRof(
int* foundTracklets,
const IndexTableUtils* utils,
const short rofId,
const size_t maxTrackletsPerCluster = 1e2)
const unsigned int maxTrackletsPerCluster = 1e2)
{
const int phiBins{utils->getNphiBins()};
const int zBins{utils->getNzBins()};
// loop on layer1 clusters
for (int iCurrentLayerClusterIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentLayerClusterIndex < sizeCurrentLClusters; iCurrentLayerClusterIndex += blockDim.x * gridDim.x) {
if (iCurrentLayerClusterIndex < sizeCurrentLClusters) {
unsigned int storedTracklets{0};
const size_t stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
const unsigned int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
const Cluster& currentCluster = clustersCurrentLayer[iCurrentLayerClusterIndex];
const int4 selectedBinsRect{VertexerTraits::getBinsRect(currentCluster, (int)Mode, 0.f, 50.f, phiCut / 2, *utils)};
if (selectedBinsRect.x != 0 || selectedBinsRect.y != 0 || selectedBinsRect.z != 0 || selectedBinsRect.w != 0) {
Expand Down Expand Up @@ -218,7 +233,7 @@ GPUg() void trackleterKernelMultipleRof(
const short startRofId,
const short rofSize,
const float phiCut,
const size_t maxTrackletsPerCluster = 1e2)
const unsigned int maxTrackletsPerCluster = 1e2)
{
const int phiBins{utils->getNphiBins()};
const int zBins{utils->getNzBins()};
Expand All @@ -235,7 +250,7 @@ GPUg() void trackleterKernelMultipleRof(
// single rof loop on layer1 clusters
for (int iCurrentLayerClusterIndex = threadIdx.x; iCurrentLayerClusterIndex < nClustersCurrentLayerRof; iCurrentLayerClusterIndex += blockDim.x) {
unsigned int storedTracklets{0};
const size_t stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
const unsigned int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
const Cluster& currentCluster = clustersCurrentLayerRof[iCurrentLayerClusterIndex];
const int4 selectedBinsRect{VertexerTraits::getBinsRect(currentCluster, (int)Mode, 0.f, 50.f, phiCut / 2, *utils)};
if (selectedBinsRect.x != 0 || selectedBinsRect.y != 0 || selectedBinsRect.z != 0 || selectedBinsRect.w != 0) {
Expand Down Expand Up @@ -276,7 +291,7 @@ template <bool initRun>
GPUg() void trackletSelectionKernelSingleRof(
const Cluster* clusters0,
const Cluster* clusters1,
const size_t nClustersMiddleLayer,
const unsigned int nClustersMiddleLayer,
Tracklet* tracklets01,
Tracklet* tracklets12,
const int* nFoundTracklet01,
Expand Down Expand Up @@ -436,7 +451,7 @@ GPUg() void computeCentroidsKernel(
Line* lines,
int* nFoundLines,
int* nExclusiveFoundLines,
const size_t nClustersMiddleLayer,
const unsigned int nClustersMiddleLayer,
float* centroids,
const float lowHistX,
const float highHistX,
Expand All @@ -446,7 +461,7 @@ GPUg() void computeCentroidsKernel(
{
const int nLines = nExclusiveFoundLines[nClustersMiddleLayer - 1] + nFoundLines[nClustersMiddleLayer - 1];
const int maxIterations{nLines * (nLines - 1) / 2};
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
int iFirstLine = currentThreadIndex / nLines;
int iSecondLine = currentThreadIndex % nLines;
// All unique pairs
Expand Down Expand Up @@ -496,7 +511,7 @@ GPUg() void computeZCentroidsKernel(
const int binOpeningX,
const int binOpeningY)
{
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < nLines; currentThreadIndex += blockDim.x * gridDim.x) {
for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < nLines; currentThreadIndex += blockDim.x * gridDim.x) {
if (tmpVtX[0].value || tmpVtX[1].value) {
float tmpX{lowHistX + tmpVtX[0].key * binSizeHistX + binSizeHistX / 2};
int sumWX{tmpVtX[0].value};
Expand Down Expand Up @@ -543,7 +558,7 @@ GPUg() void computeVertexKernel(
const int minContributors,
const int binOpeningZ)
{
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < binOpeningZ; currentThreadIndex += blockDim.x * gridDim.x) {
for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < binOpeningZ; currentThreadIndex += blockDim.x * gridDim.x) {
if (currentThreadIndex == 0) {
if (tmpVertexBins[2].value > 1 && (tmpVertexBins[0].value || tmpVertexBins[1].value)) {
float z{lowHistZ + tmpVertexBins[2].key * binSizeHistZ + binSizeHistZ / 2};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper<VertexerPa
};

struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper<TrackerParamConfig> {

// Use TGeo for mat. budget
bool useMatCorrTGeo = false;
bool useFastMaterial = false;
Expand Down Expand Up @@ -89,24 +88,13 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper<TrackerPara
O2ParamDef(TrackerParamConfig, "ITSCATrackerParam");
};

struct GpuRecoParamConfig : public o2::conf::ConfigurableParamHelper<GpuRecoParamConfig> {
struct ITSGpuTrackingParamConfig : public o2::conf::ConfigurableParamHelper<ITSGpuTrackingParamConfig> {
// GPU-specific parameters
size_t tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes
size_t maxTrackletsPerCluster = 1e2;
size_t clustersPerLayerCapacity = 2.5e5;
size_t clustersPerROfCapacity = 1.5e3;
// size_t trackletsCapacity = maxTrackletsPerCluster * clustersPerLayerCapacity;
size_t validatedTrackletsCapacity = 1e5;
size_t cellsLUTsize = validatedTrackletsCapacity;
size_t maxNeighboursSize = 1e4;
size_t neighboursLUTsize = maxNeighboursSize;
size_t maxRoadPerRofSize = 5e2; // pp!
size_t maxLinesCapacity = 1e2;
size_t maxVerticesCapacity = 5e4;
size_t nTimeFramePartitions = 3;
int maxGPUMemoryGB = -1;
unsigned int tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes
int nBlocks = 20;
int nThreads = 256;

O2ParamDef(GpuRecoParamConfig, "ITSGpuRecoParam");
O2ParamDef(ITSGpuTrackingParamConfig, "ITSGpuTrackingParam");
};

} // namespace its
Expand Down
4 changes: 2 additions & 2 deletions Detectors/ITSMFT/ITS/tracking/src/TrackingConfigParam.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ namespace its
{
static auto& sVertexerParamITS = o2::its::VertexerParamConfig::Instance();
static auto& sCATrackerParamITS = o2::its::TrackerParamConfig::Instance();
static auto& sGpuRecoParamITS = o2::its::GpuRecoParamConfig::Instance();
static auto& sGpuRecoParamITS = o2::its::ITSGpuTrackingParamConfig::Instance();

O2ParamImpl(o2::its::VertexerParamConfig);
O2ParamImpl(o2::its::TrackerParamConfig);
O2ParamImpl(o2::its::GpuRecoParamConfig);
O2ParamImpl(o2::its::ITSGpuTrackingParamConfig);
} // namespace its
} // namespace o2
4 changes: 2 additions & 2 deletions Detectors/ITSMFT/ITS/tracking/src/TrackingLinkDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#pragma link C++ class o2::its::TrackerParamConfig + ;
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::TrackerParamConfig> + ;

#pragma link C++ class o2::its::GpuRecoParamConfig + ;
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::GpuRecoParamConfig> + ;
#pragma link C++ class o2::its::ITSGpuTrackingParamConfig + ;
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::ITSGpuTrackingParamConfig> + ;

#endif
3 changes: 2 additions & 1 deletion Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ void Vertexer::getGlobalConfiguration()
{
auto& vc = o2::its::VertexerParamConfig::Instance();
vc.printKeyValues(true, true);
auto& grc = o2::its::GpuRecoParamConfig::Instance();
auto& grc = o2::its::ITSGpuTrackingParamConfig::Instance();


// This is odd: we override only the parameters for the first iteration.
// Variations for the next iterations are set in the trackingInterfrace.
Expand Down

0 comments on commit 99b5a69

Please sign in to comment.