cp

mconcas · Oct 26, 2024 · 5b6577c · 5b6577c
1 parent 7393473
commit 5b6577c
Show file tree

Hide file tree

Showing 11 changed files with 427 additions and 214 deletions.
diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h
@@ -446,6 +446,8 @@ class SMatrixGPU
   GPUdi() SMatrixGPU(SMatrixNoInit) {}
   GPUd() SMatrixGPU(SMatrixIdentity);
   GPUd() SMatrixGPU(const SMatrixGPU<T, D1, D2, R>& rhs);
+  template <class R2>
+  GPUd() SMatrixGPU(const SMatrixGPU<T, D1, D2, R2>& rhs);
   template <class A, class R2>
   GPUd() SMatrixGPU(const Expr<A, T, D1, D2, R2>& rhs);
   template <class M>
@@ -497,6 +499,11 @@ class SMatrixGPU
   GPUd() SMatrixRowGPU operator[](unsigned int i) { return SMatrixRowGPU(*this, i); }
   template <class R2>
   GPUd() SMatrixGPU<T, D1, D2, R>& operator+=(const SMatrixGPU<T, D1, D2, R2>& rhs);
+  GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const T& rhs);
+  template <class R2>
+  GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const SMatrixGPU<T, D1, D2, R2>& rhs);
+  template <class A, class R2>
+  GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const Expr<A, T, D1, D2, R2>& rhs);
 
   GPUd() bool Invert();
   GPUd() bool IsInUse(const T* p) const;
@@ -528,6 +535,13 @@ GPUdi() SMatrixGPU<T, D1, D2, R>::SMatrixGPU(const SMatrixGPU<T, D1, D2, R>& rhs
   mRep = rhs.mRep;
 }
 
+template <class T, unsigned int D1, unsigned int D2, class R>
+template <class R2>
+GPUd() SMatrixGPU<T, D1, D2, R>::SMatrixGPU(const SMatrixGPU<T, D1, D2, R2>& rhs)
+{
+  operator=(rhs);
+}
+
 template <class T, unsigned int D1, unsigned int D2, class R>
 GPUdi() T* SMatrixGPU<T, D1, D2, R>::begin()
 {
@@ -1387,6 +1401,29 @@ GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator+=(const SMa
   return *this;
 }
 
+template <class T, unsigned int D1, unsigned int D2, class R>
+GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const T & rhs)
+{
+  for (unsigned int i = 0; i < R::kSize; ++i) {
+    mRep.Array()[i] *= rhs;
+  }
+  return *this;
+}
+
+template <class T, unsigned int D1, unsigned int D2, class R>
+template <class R2>
+GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const SMatrixGPU<T, D1, D2, R2>& rhs)
+{
+  return operator=(*this* rhs);
+}
+
+template <class T, unsigned int D1, unsigned int D2, class R>
+template <class A, class R2>
+GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const Expr<A, T, D1, D2, R2>& rhs)
+{
+  return operator=(*this* rhs);
+}
+
 template <class T, unsigned int D1, unsigned int D2, class R>
 struct TranspPolicyGPU {
   enum {

diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h
@@ -18,6 +18,7 @@
 #define INCLUDE_RECONSTRUCTIONDATAFORMATS_TRACKPARAMETRIZATIONWITHERROR_H_
 
 #include "ReconstructionDataFormats/TrackParametrization.h"
+#include <MathUtils/Cartesian.h>
 
 namespace o2
 {
@@ -38,8 +39,8 @@ class TrackParametrizationWithError : public TrackParametrization<value_T>
 #endif
 
   using covMat_t = gpu::gpustd::array<value_t, kCovMatSize>;
-  using MatrixDSym5 = ROOT::Math::SMatrix<double, kNParams, kNParams, ROOT::Math::MatRepSym<double, kNParams>>;
-  using MatrixD5 = ROOT::Math::SMatrix<double, kNParams, kNParams, ROOT::Math::MatRepStd<double, kNParams, kNParams>>;
+  using MatrixDSym5 = o2::math_utils::SMatrix<double, kNParams, kNParams, o2::math_utils::MatRepSym<double, kNParams>>;
+  using MatrixD5 = o2::math_utils::SMatrix<double, kNParams, kNParams, o2::math_utils::MatRepStd<double, kNParams, kNParams>>;
 
   GPUd() TrackParametrizationWithError();
   GPUd() TrackParametrizationWithError(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion);
@@ -100,12 +101,12 @@ class TrackParametrizationWithError : public TrackParametrization<value_T>
   template <typename T>
   GPUd() value_t getPredictedChi2(const BaseCluster<T>& p) const;
 
-  void buildCombinedCovMatrix(const TrackParametrizationWithError& rhs, MatrixDSym5& cov) const;
-  value_t getPredictedChi2(const TrackParametrizationWithError& rhs, MatrixDSym5& covToSet) const;
+  GPUd() void buildCombinedCovMatrix(const TrackParametrizationWithError& rhs, MatrixDSym5& cov) const;
+  GPUd() value_t getPredictedChi2(const TrackParametrizationWithError& rhs, MatrixDSym5& covToSet) const;
   GPUd() value_t getPredictedChi2(const TrackParametrizationWithError& rhs) const;
   GPUd() value_t getPredictedChi2Quiet(const TrackParametrizationWithError& rhs) const;
-  bool update(const TrackParametrizationWithError& rhs, const MatrixDSym5& covInv);
-  bool update(const TrackParametrizationWithError& rhs);
+  GPUd() bool update(const TrackParametrizationWithError& rhs, const MatrixDSym5& covInv);
+  GPUd() bool update(const TrackParametrizationWithError& rhs);
 
   GPUd() bool update(const dim2_t& p, const dim3_t& cov);
   GPUd() bool update(const value_t* p, const value_t* cov);

diff --git a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx
@@ -13,6 +13,7 @@
 #include "ReconstructionDataFormats/Vertex.h"
 #include "ReconstructionDataFormats/DCA.h"
 #include <GPUCommonLogger.h>
+#include <cfloat>
 
 #ifndef GPUCA_GPUCODE_DEVICE
 #include <iostream>
@@ -754,30 +755,6 @@ GPUd() auto TrackParametrizationWithError<value_T>::getPredictedChi2Quiet(const
   return (d * (szz * d - sdz * z) + z * (sdd * z - d * sdz)) / det;
 }
 
-#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // Disable function relying on ROOT SMatrix on GPU
-
-//______________________________________________
-template <typename value_T>
-void TrackParametrizationWithError<value_T>::buildCombinedCovMatrix(const TrackParametrizationWithError<value_T>& rhs, MatrixDSym5& cov) const
-{
-  // fill combined cov.matrix (NOT inverted)
-  cov(kY, kY) = static_cast<double>(getSigmaY2()) + static_cast<double>(rhs.getSigmaY2());
-  cov(kZ, kY) = static_cast<double>(getSigmaZY()) + static_cast<double>(rhs.getSigmaZY());
-  cov(kZ, kZ) = static_cast<double>(getSigmaZ2()) + static_cast<double>(rhs.getSigmaZ2());
-  cov(kSnp, kY) = static_cast<double>(getSigmaSnpY()) + static_cast<double>(rhs.getSigmaSnpY());
-  cov(kSnp, kZ) = static_cast<double>(getSigmaSnpZ()) + static_cast<double>(rhs.getSigmaSnpZ());
-  cov(kSnp, kSnp) = static_cast<double>(getSigmaSnp2()) + static_cast<double>(rhs.getSigmaSnp2());
-  cov(kTgl, kY) = static_cast<double>(getSigmaTglY()) + static_cast<double>(rhs.getSigmaTglY());
-  cov(kTgl, kZ) = static_cast<double>(getSigmaTglZ()) + static_cast<double>(rhs.getSigmaTglZ());
-  cov(kTgl, kSnp) = static_cast<double>(getSigmaTglSnp()) + static_cast<double>(rhs.getSigmaTglSnp());
-  cov(kTgl, kTgl) = static_cast<double>(getSigmaTgl2()) + static_cast<double>(rhs.getSigmaTgl2());
-  cov(kQ2Pt, kY) = static_cast<double>(getSigma1PtY()) + static_cast<double>(rhs.getSigma1PtY());
-  cov(kQ2Pt, kZ) = static_cast<double>(getSigma1PtZ()) + static_cast<double>(rhs.getSigma1PtZ());
-  cov(kQ2Pt, kSnp) = static_cast<double>(getSigma1PtSnp()) + static_cast<double>(rhs.getSigma1PtSnp());
-  cov(kQ2Pt, kTgl) = static_cast<double>(getSigma1PtTgl()) + static_cast<double>(rhs.getSigma1PtTgl());
-  cov(kQ2Pt, kQ2Pt) = static_cast<double>(getSigma1Pt2()) + static_cast<double>(rhs.getSigma1Pt2());
-}
-
 //______________________________________________
 template <typename value_T>
 GPUd() auto TrackParametrizationWithError<value_T>::getPredictedChi2(const TrackParametrizationWithError<value_T>& rhs) const -> value_t
@@ -819,6 +796,28 @@ GPUd() auto TrackParametrizationWithError<value_T>::getPredictedChi2(const Track
   return chi2diag + 2. * chi2ndiag;
 }
 
+//______________________________________________
+template <typename value_T>
+GPUd() void TrackParametrizationWithError<value_T>::buildCombinedCovMatrix(const TrackParametrizationWithError<value_T>& rhs, MatrixDSym5& cov) const
+{
+  // fill combined cov.matrix (NOT inverted)
+  cov(kY, kY) = static_cast<double>(getSigmaY2()) + static_cast<double>(rhs.getSigmaY2());
+  cov(kZ, kY) = static_cast<double>(getSigmaZY()) + static_cast<double>(rhs.getSigmaZY());
+  cov(kZ, kZ) = static_cast<double>(getSigmaZ2()) + static_cast<double>(rhs.getSigmaZ2());
+  cov(kSnp, kY) = static_cast<double>(getSigmaSnpY()) + static_cast<double>(rhs.getSigmaSnpY());
+  cov(kSnp, kZ) = static_cast<double>(getSigmaSnpZ()) + static_cast<double>(rhs.getSigmaSnpZ());
+  cov(kSnp, kSnp) = static_cast<double>(getSigmaSnp2()) + static_cast<double>(rhs.getSigmaSnp2());
+  cov(kTgl, kY) = static_cast<double>(getSigmaTglY()) + static_cast<double>(rhs.getSigmaTglY());
+  cov(kTgl, kZ) = static_cast<double>(getSigmaTglZ()) + static_cast<double>(rhs.getSigmaTglZ());
+  cov(kTgl, kSnp) = static_cast<double>(getSigmaTglSnp()) + static_cast<double>(rhs.getSigmaTglSnp());
+  cov(kTgl, kTgl) = static_cast<double>(getSigmaTgl2()) + static_cast<double>(rhs.getSigmaTgl2());
+  cov(kQ2Pt, kY) = static_cast<double>(getSigma1PtY()) + static_cast<double>(rhs.getSigma1PtY());
+  cov(kQ2Pt, kZ) = static_cast<double>(getSigma1PtZ()) + static_cast<double>(rhs.getSigma1PtZ());
+  cov(kQ2Pt, kSnp) = static_cast<double>(getSigma1PtSnp()) + static_cast<double>(rhs.getSigma1PtSnp());
+  cov(kQ2Pt, kTgl) = static_cast<double>(getSigma1PtTgl()) + static_cast<double>(rhs.getSigma1PtTgl());
+  cov(kQ2Pt, kQ2Pt) = static_cast<double>(getSigma1Pt2()) + static_cast<double>(rhs.getSigma1Pt2());
+}
+
 //______________________________________________
 template <typename value_T>
 GPUd() bool TrackParametrizationWithError<value_T>::update(const TrackParametrizationWithError<value_T>& rhs, const MatrixDSym5& covInv)
@@ -867,7 +866,7 @@ GPUd() bool TrackParametrizationWithError<value_T>::update(const TrackParametriz
   }
 
   // updated covariance: Cov0 = Cov0 - K*Cov0
-  matK *= ROOT::Math::SMatrix<double, kNParams, kNParams, ROOT::Math::MatRepStd<double, kNParams>>(matC0);
+  matK *= o2::math_utils::SMatrix<double, kNParams, kNParams, o2::math_utils::MatRepStd<double, kNParams>>(matC0);
   mC[kSigY2] -= matK(kY, kY);
   mC[kSigZY] -= matK(kZ, kY);
   mC[kSigZ2] -= matK(kZ, kZ);
@@ -901,8 +900,6 @@ GPUd() bool TrackParametrizationWithError<value_T>::update(const TrackParametriz
   return update(rhs, covI);
 }
 
-#endif
-
 //______________________________________________
 template <typename value_T>
 GPUd() bool TrackParametrizationWithError<value_T>::update(const value_t* p, const value_t* cov)
@@ -1245,6 +1242,7 @@ GPUd() void TrackParametrizationWithError<value_T>::printHexadecimal()
 
 namespace o2::track
 {
+
 #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code.
 template class TrackParametrizationWithError<float>;
 #endif

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
@@ -26,12 +26,9 @@
 
 namespace o2
 {
-// namespace gpu
-// {
-// // class GPUChainITS;
-// }
 namespace its
-{namespace gpu
+{
+namespace gpu
 {
 
 class DefaultGPUAllocator : public ExternalAllocator
@@ -59,13 +56,17 @@ class TimeFrameGPU : public TimeFrame
   void loadClustersDevice();
   void loadTrackletsDevice();
   void loadCellsDevice();
+  void loadCellsLUT();
   void loadTrackSeedsDevice();
   void loadTrackSeedsChi2Device();
   void loadRoadsDevice();
   void loadTrackSeedsDevice(std::vector<CellSeed>&);
-  void createCellNeighboursDevice(const unsigned int& layer, std::vector<std::pair<int, int>>& neighbours);
+  void createNeighboursDevice(const unsigned int& layer, std::vector<std::pair<int, int>>& neighbours);
+  void createNeighboursLUTDevice(const int, const unsigned int);
   void createTrackITSExtDevice(std::vector<CellSeed>&);
   void downloadTrackITSExtDevice(std::vector<CellSeed>&);
+  void downloadCellsNeighbours(std::vector<std::vector<std::pair<int, int>>>&, const int);
+  void downloadNeighboursLUT(std::vector<int>&, const int);
   void initDeviceChunks(const int, const int);
   template <Task task>
   size_t loadChunkData(const size_t, const size_t, const size_t);
@@ -92,17 +93,22 @@ class TimeFrameGPU : public TimeFrame
   // Hybrid
   Road<nLayers - 2>* getDeviceRoads() { return mRoadsDevice; }
   TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
+  int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
   gpuPair<int, int>* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
   TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
   // TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() { return mTrackingFrameInfoDeviceArray; }
   const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
   Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
   Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
   Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
+  int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
+  int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
   CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
   CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
   o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
   float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
+  int* getNeighboursIndexTablesDevice(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
+
   void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;
 
   // Host-specific getters
@@ -131,9 +137,13 @@ class TimeFrameGPU : public TimeFrame
   Cluster** mUnsortedClustersDeviceArray;
   std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
   Tracklet** mTrackletsDeviceArray;
-  std::array<int*, nLayers - 2> mCellsLookupTablesDevice;
-  int** mCellsLookupTablesDeviceArray;
+  std::array<int*, nLayers - 2> mCellsLUTDevice;
+  std::array<int*, nLayers - 3> mNeighboursLUTDevice;
+  int** mCellsLUTDeviceArray;
+  int** mNeighboursCellDeviceArray;
+  int** mNeighboursCellLUTDeviceArray;
   std::array<CellSeed*, nLayers - 2> mCellsDevice;
+  std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
   CellSeed* mTrackSeedsDevice;
   CellSeed** mCellsDeviceArray;
   std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h
@@ -49,6 +49,33 @@ GPUg() void fitTrackSeedsKernel(
   const o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorF::MatCorrType::USEMatCorrLUT);
 #endif
 } // namespace gpu
+void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
+                                int* neighboursLUTs,
+                                int** cellsLUTs,
+                                gpuPair<int, int>* cellNeighbours,
+                                int* neighboursIndexTable,
+                                const float maxChi2ClusterAttachment,
+                                const float bz,
+                                const int layerIndex,
+                                const unsigned int nCells,
+                                const unsigned int nCellsNext,
+                                const int maxCellNeighbours,
+                                const int nBlocks,
+                                const int nThreads);
+
+void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
+                                  int* neighboursLUTs,
+                                  int** cellsLUTs,
+                                  gpuPair<int, int>* cellNeighbours,
+                                  int* neighboursIndexTable,
+                                  const float maxChi2ClusterAttachment,
+                                  const float bz,
+                                  const int layerIndex,
+                                  const unsigned int nCells,
+                                  const unsigned int nCellsNext,
+                                  const int maxCellNeighbours,
+                                  const int nBlocks,
+                                  const int nThreads);
 
 void trackSeedHandler(CellSeed* trackSeeds,
                       const TrackingFrameInfo** foundTrackingFrameInfo,

diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt
@@ -13,7 +13,7 @@
 if(CUDA_ENABLED)
 find_package(CUDAToolkit)
 message(STATUS "Building ITS CUDA tracker")
-
+add_compile_options(-O0 -g -fPIC)
 o2_add_library(ITStrackingCUDA
                SOURCES ClusterLinesGPU.cu
                        Context.cu
@@ -32,7 +32,7 @@ o2_add_library(ITStrackingCUDA
                                      O2::SimulationDataFormat
                                      O2::ReconstructionDataFormats
                                      O2::GPUCommon
-                                     CUDA::nvToolsExt # TODO: change to CUDA::nvtx3 when CMake bump >= 3.25
+                                     CUDA::nvToolsExt
                PRIVATE_LINK_LIBRARIES O2::GPUTrackingCUDAExternalProvider
                TARGETVARNAME targetName)