Skip to content

Commit

Permalink
Add tracklet writing on the buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
mconcas committed Nov 29, 2024
1 parent 1bd7e9f commit 208ea84
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class TimeFrameGPU : public TimeFrame
std::vector<unsigned int> getClusterSizes();
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
Expand All @@ -142,6 +142,7 @@ class TimeFrameGPU : public TimeFrame
// Host-available device getters
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
gsl::span<Tracklet*> getDeviceTracklet() { return mTrackletsDevice; }
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
gsl::span<int, nLayers - 2> getNCellsDevice() { return mNCells; }

Expand Down Expand Up @@ -175,7 +176,7 @@ class TimeFrameGPU : public TimeFrame
const unsigned char** mUsedClustersDeviceArray;
const int** mROFrameClustersDeviceArray;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
const Tracklet** mTrackletsDeviceArray;
Tracklet** mTrackletsDeviceArray;
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
std::array<int*, nLayers - 2> mCellsLUTDevice;
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
Tracklet* tracklets,
Tracklet** tracklets,
int** trackletsLUTs,
const int iteration,
const float NSigmaCut,
Expand All @@ -113,7 +113,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
void countCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
Expand All @@ -130,7 +130,7 @@ void countCellsHandler(const Cluster** sortedClusters,
void computeCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
Expand Down
9 changes: 4 additions & 5 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,12 @@ void TimeFrameGPU<nLayers>::createTrackletsBuffers()
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
mNTracklets[iLayer] = 0;
checkGPUError(cudaMemcpyAsync(&mNTracklets[iLayer], mTrackletsLUTDevice[iLayer] + mClusters[iLayer].size(), sizeof(int), cudaMemcpyDeviceToHost));
LOGP(info, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[iLayer], iLayer, mNTracklets[iLayer] * sizeof(Tracklet) / MB);
LOGP(debug, "gpu-transfer: creating tracklets buffer for {} elements on layer {}, for {} MB.", mNTracklets[iLayer], iLayer, mNTracklets[iLayer] * sizeof(Tracklet) / MB);
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iLayer]), mNTracklets[iLayer] * sizeof(Tracklet), nullptr, getExtAllocator());
}
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), nullptr, getExtAllocator());
checkGPUError(cudaHostRegister(mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaHostRegisterPortable));
checkGPUError(cudaMemcpyAsync(mTrackletsDeviceArray, mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
}

Expand All @@ -282,13 +285,9 @@ void TimeFrameGPU<nLayers>::loadTrackletsDevice()
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "loading tracklets");
for (auto iLayer{0}; iLayer < nLayers - 1; ++iLayer) {
LOGP(debug, "gpu-transfer: loading {} tracklets on layer {}, for {} MB.", mTracklets[iLayer].size(), iLayer, mTracklets[iLayer].size() * sizeof(Tracklet) / MB);
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDevice[iLayer]), mTracklets[iLayer].size() * sizeof(Tracklet), nullptr, getExtAllocator());
checkGPUError(cudaHostRegister(mTracklets[iLayer].data(), mTracklets[iLayer].size() * sizeof(Tracklet), cudaHostRegisterPortable));
checkGPUError(cudaMemcpyAsync(mTrackletsDevice[iLayer], mTracklets[iLayer].data(), mTracklets[iLayer].size() * sizeof(Tracklet), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
}
allocMemAsync(reinterpret_cast<void**>(&mTrackletsDeviceArray), (nLayers - 1) * sizeof(Tracklet*), nullptr, getExtAllocator());
checkGPUError(cudaHostRegister(mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaHostRegisterPortable));
checkGPUError(cudaMemcpyAsync(mTrackletsDeviceArray, mTrackletsDevice.data(), (nLayers - 1) * sizeof(Tracklet*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
}

Expand Down
30 changes: 29 additions & 1 deletion Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
mTimeFrameGPU->getDeviceArrayUsedClusters(),
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
mTimeFrameGPU->getDeviceTrackletsLUTs(),
mTimeFrameGPU->getDeviceTrackletsLUTs(), // Required for the exclusive sums
iteration,
mTrkParams[iteration].NSigmaCut,
mTimeFrameGPU->getPhiCuts(),
Expand All @@ -128,6 +128,34 @@ void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int
conf.nBlocks,
conf.nThreads);
mTimeFrameGPU->createTrackletsBuffers();
computeTrackletsInROFsHandler<nLayers>(mTimeFrameGPU->getDeviceIndexTableUtils(),
mTimeFrameGPU->getDeviceMultCutMask(),
startROF,
endROF,
mTimeFrameGPU->getNrof(),
mTrkParams[iteration].DeltaROF,
iVertex,
mTimeFrameGPU->getDeviceVertices(),
mTimeFrameGPU->getDeviceROFramesPV(),
mTimeFrameGPU->getPrimaryVerticesNum(),
mTimeFrameGPU->getDeviceArrayClusters(),
mTimeFrameGPU->getClusterSizes(),
mTimeFrameGPU->getDeviceROframeClusters(),
mTimeFrameGPU->getDeviceArrayUsedClusters(),
mTimeFrameGPU->getDeviceArrayClustersIndexTables(),
mTimeFrameGPU->getDeviceArrayTracklets(),
mTimeFrameGPU->getDeviceArrayTrackletsLUT(),
iteration,
mTrkParams[iteration].NSigmaCut,
mTimeFrameGPU->getPhiCuts(),
mTrkParams[iteration].PVres,
mTimeFrameGPU->getMinRs(),
mTimeFrameGPU->getMaxRs(),
mTimeFrameGPU->getPositionResolutions(),
mTrkParams[iteration].LayerRadii,
mTimeFrameGPU->getMSangles(),
conf.nBlocks,
conf.nThreads);
}

template <int nLayers>
Expand Down
42 changes: 20 additions & 22 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ GPUg() void computeLayerCellsKernel(
const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
Tracklet** tracklets,
int** trackletsLUT,
const int nTrackletsCurrent,
const int layer,
Expand Down Expand Up @@ -462,11 +462,11 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
const int* rofPV,
const int nVertices,
const int vertexId,
const Cluster** clusters, // input data rof0
const Cluster** clusters, // Input data rof0
const int** ROFClusters, // Number of clusters on layers per ROF
const unsigned char** usedClusters, // Used clusters
const int** indexTables, // input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
Tracklet* tracklets, // output data
const int** indexTables, // Input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
Tracklet** tracklets, // Output data
int** trackletsLUT,
const int iteration,
const float NSigmaCut,
Expand All @@ -475,18 +475,18 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
const float minR,
const float maxR,
const float positionResolution,
const float meanDeltaR = -666.f,
const float MSAngle = -666.f)
const float meanDeltaR = -42.f,
const float MSAngle = -42.f)
{
const int phiBins{utils->getNphiBins()};
const int zBins{utils->getNzBins()};
for (unsigned int iROF{blockIdx.x}; iROF < endROF - startROF; iROF += gridDim.x) {
const int rof0 = iROF + startROF;
const short rof0 = iROF + startROF;
auto primaryVertices = getPrimaryVertices(rof0, rofPV, totalROFs, multMask, vertices);
const auto startVtx{vertexId >= 0 ? vertexId : 0};
const auto endVtx{vertexId >= 0 ? o2::gpu::CAMath::Min(vertexId + 1, static_cast<int>(primaryVertices.size())) : static_cast<int>(primaryVertices.size())};
auto minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(rof0 - deltaROF));
auto maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(rof0 + deltaROF));
const short minROF = o2::gpu::CAMath::Max(startROF, static_cast<int>(rof0 - deltaROF));
const short maxROF = o2::gpu::CAMath::Min(endROF - 1, static_cast<int>(rof0 + deltaROF));
auto clustersCurrentLayer = getClustersOnLayer(rof0, totalROFs, layerIndex, ROFClusters, clusters);
if (clustersCurrentLayer.empty()) {
continue;
Expand Down Expand Up @@ -523,7 +523,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
}

const int tableSize{phiBins * zBins + 1};
for (int rof1{minROF}; rof1 <= maxROF; ++rof1) {
for (short rof1{minROF}; rof1 <= maxROF; ++rof1) {
auto clustersNextLayer = getClustersOnLayer(rof1, totalROFs, layerIndex + 1, ROFClusters, clusters);
if (clustersNextLayer.empty()) {
continue;
Expand All @@ -534,26 +534,24 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
const int maxBinIndex{firstBinIndex + selectedBinsRect.z - selectedBinsRect.x + 1};
const int firstRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + firstBinIndex];
const int maxRowClusterIndex = indexTables[layerIndex + 1][(rof1 - startROF) * tableSize + maxBinIndex];
for (int iNextCluster{firstRowClusterIndex}; iNextCluster < maxRowClusterIndex; ++iNextCluster) {
if (iNextCluster >= clustersNextLayer.size()) {
for (int nextClusterIndex{firstRowClusterIndex}; nextClusterIndex < maxRowClusterIndex; ++nextClusterIndex) {
if (nextClusterIndex >= clustersNextLayer.size()) {
break;
}
const Cluster& nextCluster{clustersNextLayer[iNextCluster]};
const Cluster& nextCluster{clustersNextLayer[nextClusterIndex]};
if (usedClusters[layerIndex + 1][nextCluster.clusterId]) {
continue;
}
const float deltaPhi{o2::gpu::CAMath::Abs(currentCluster.phi - nextCluster.phi)};
const float deltaZ{o2::gpu::CAMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) +
currentCluster.zCoordinate - nextCluster.zCoordinate)};
const float deltaZ{o2::gpu::CAMath::Abs(tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate)};
const int nextSortedIndex{ROFClusters[layerIndex + 1][rof1] + nextClusterIndex};
if (deltaZ / sigmaZ < NSigmaCut && (deltaPhi < phiCut || o2::gpu::CAMath::Abs(deltaPhi - constants::math::TwoPi) < phiCut)) {
// if (layerIndex > 0) {
if constexpr (initRun) {
trackletsLUT[layerIndex][currentSortedIndex]++; // we need l0 as well for usual exclusive sums.
} else {
// }
const float phi{o2::gpu::CAMath::ATan2(currentCluster.yCoordinate - nextCluster.yCoordinate, currentCluster.xCoordinate - nextCluster.xCoordinate)};
const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)};
// tf->getTracklets()[layerIndex].emplace_back(currentSortedIndex, tf->getSortedIndex(rof1, layerIndex + 1, iNextCluster), tanL, phi, rof0, rof1);
new (tracklets[layerIndex] + trackletsLUT[layerIndex][currentSortedIndex] + storedTracklets) Tracklet{currentSortedIndex, nextSortedIndex, tanL, phi, rof0, rof1};
}
++storedTracklets;
}
Expand Down Expand Up @@ -809,7 +807,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
Tracklet* tracklets,
Tracklet** tracklets,
int** trackletsLUTs,
const int iteration,
const float NSigmaCut,
Expand Down Expand Up @@ -859,7 +857,7 @@ void countCellsHandler(
const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
Expand Down Expand Up @@ -909,7 +907,7 @@ void computeCellsHandler(
const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
Expand Down Expand Up @@ -1128,7 +1126,7 @@ template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
Tracklet* tracklets,
Tracklet** tracklets,
int** trackletsLUTs,
const int iteration,
const float NSigmaCut,
Expand Down
6 changes: 3 additions & 3 deletions Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
return;
}

for (auto iLayer{0}; iLayer < tf->getTracklets().size(); ++iLayer) {
std::cout << "tracklets layer " << iLayer << ": " << tf->getTracklets()[iLayer].size() << std::endl;
}
// for (auto iLayer{0}; iLayer < tf->getTracklets().size(); ++iLayer) {
// std::cout << "tracklets layer " << iLayer << ": " << tf->getTracklets()[iLayer].size() << std::endl;
// }

// for (auto iLayer{0}; iLayer < tf->getTrackletsLookupTable().size(); ++iLayer) {
// auto lut = tf->getTrackletsLookupTable()[iLayer];
Expand Down

0 comments on commit 208ea84

Please sign in to comment.