Change block_size to parameter for mainRun

cms-patatrack · Dec 14, 2023 · eb104c4 · eb104c4
1 parent 703e811
commit eb104c4
Show file tree

Hide file tree

Showing 6 changed files with 75 additions and 54 deletions.
diff --git a/CLUEstering/alpaka/BindingModules/CLUEstering.py b/CLUEstering/alpaka/BindingModules/CLUEstering.py
@@ -504,7 +504,10 @@ def choose_kernel(self,
             raise ValueError("Invalid kernel. The allowed choices for the"
                              + " kernels are: flat, exp, gaus and custom.")
 
-    def run_clue(self, backend: str = "cpu serial", verbose: bool = False) -> None:
+    def run_clue(self,
+                 backend: str = "cpu serial",
+                 block_size: int = 1024,
+                 verbose: bool = False) -> None:
         """
         Executes the CLUE clustering algorithm.
 
@@ -537,15 +540,15 @@ def run_clue(self, backend: str = "cpu serial", verbose: bool = False) -> None:
         if backend == "cpu serial":
             cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
                                                     self.clust_data.coords, self.clust_data.weight,
-                                                    self.kernel, self.clust_data.n_dim)
+                                                    self.kernel, self.clust_data.n_dim, block_size)
         elif backend == "cpu tbb":
             cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
                                                     self.clust_data.coords, self.clust_data.weight,
-                                                    self.kernel, self.clust_data.n_dim)
+                                                    self.kernel, self.clust_data.n_dim, block_size)
         elif backend == "gpu cuda":
             cluster_id_is_seed = gpu_cuda.mainRun(self.dc_, float(self.rhoc), self.outlier, self.ppbin,
                                                   self.clust_data.coords, self.clust_data.weight,
-                                                  self.kernel, self.clust_data.n_dim)
+                                                  self.kernel, self.clust_data.n_dim, block_size)
             # cluster_id_is_seed = cpu_tbb.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
             #                                      self.clust_data.coords, self.clust_data.weight,
             #                                      self.kernel, self.clust_data.n_dim)

diff --git a/CLUEstering/alpaka/BindingModules/binding_cpu.cc b/CLUEstering/alpaka/BindingModules/binding_cpu.cc
@@ -20,7 +20,8 @@ namespace alpaka_serial_sync {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
                                         const FlatKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size = 1024) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -41,7 +42,7 @@ namespace alpaka_serial_sync {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
@@ -81,7 +82,8 @@ namespace alpaka_serial_sync {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
                                         const ExponentialKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size = 1024) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -102,7 +104,7 @@ namespace alpaka_serial_sync {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
@@ -142,7 +144,8 @@ namespace alpaka_serial_sync {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
                                         const GaussianKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -163,7 +166,7 @@ namespace alpaka_serial_sync {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
@@ -207,7 +210,8 @@ namespace alpaka_serial_sync {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const FlatKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -217,7 +221,8 @@ namespace alpaka_serial_sync {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const ExponentialKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -227,9 +232,8 @@ namespace alpaka_serial_sync {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const GaussianKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
-
-	m.def("set_blocksize", CLUEAlgoAlpaka::setBlockSize, "set_blocksize");
   }
 };  // namespace alpaka_serial_sync
diff --git a/CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc b/CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc
@@ -20,7 +20,8 @@ namespace alpaka_tbb_async {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
 										const FlatKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -41,7 +42,7 @@ namespace alpaka_tbb_async {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
@@ -81,7 +82,8 @@ namespace alpaka_tbb_async {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
 										const ExponentialKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -102,7 +104,7 @@ namespace alpaka_tbb_async {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
@@ -142,7 +144,8 @@ namespace alpaka_tbb_async {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
 										const GaussianKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -163,7 +166,7 @@ namespace alpaka_tbb_async {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
@@ -221,7 +224,8 @@ namespace alpaka_tbb_async {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const FlatKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -231,7 +235,8 @@ namespace alpaka_tbb_async {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const ExponentialKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -241,7 +246,8 @@ namespace alpaka_tbb_async {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const GaussianKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
 
     /* m.def("mainRun", &mainRun, "mainRun"); */

diff --git a/CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc b/CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc
@@ -22,7 +22,8 @@ namespace alpaka_cuda_async {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
                                         const FlatKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
 	std::vector<Device> devices = alpaka::getDevs<Platform>();
 
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
@@ -47,7 +48,7 @@ namespace alpaka_cuda_async {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
@@ -87,7 +88,8 @@ namespace alpaka_cuda_async {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
                                         const ExponentialKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -108,7 +110,7 @@ namespace alpaka_cuda_async {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
@@ -148,7 +150,8 @@ namespace alpaka_cuda_async {
                                         const std::vector<std::vector<float>>& coords,
                                         const std::vector<float>& weights,
                                         const GaussianKernel& kernel,
-                                        int Ndim) {
+                                        int Ndim,
+										size_t block_size) {
     auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
 
     // Create the queue
@@ -169,7 +172,7 @@ namespace alpaka_cuda_async {
         /* return run1(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
         break;
       [[likely]] case (2) :
-        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
+        return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
         break;
       [[likely]] case (3) :
         /* return run3(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
@@ -213,7 +216,8 @@ namespace alpaka_cuda_async {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const FlatKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -223,7 +227,8 @@ namespace alpaka_cuda_async {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const ExponentialKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
     m.def("mainRun",
           pybind11::overload_cast<float,
@@ -233,7 +238,8 @@ namespace alpaka_cuda_async {
                                   const std::vector<std::vector<float>>&,
                                   const std::vector<float>&,
                                   const GaussianKernel&,
-                                  int>(&mainRun),
+                                  int,
+								  size_t>(&mainRun),
           "mainRun");
   }
 };  // namespace alpaka_tbb_async
diff --git a/CLUEstering/alpaka/CLUE/CLUEAlgoAlpaka.h b/CLUEstering/alpaka/CLUE/CLUEAlgoAlpaka.h
@@ -37,13 +37,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     VecArray<int32_t, max_seeds>* m_seeds;
     VecArray<int32_t, max_followers>* m_followers;
 
-	void setBlockSize(std::size_t blockSize) { blockSize_ = blockSize; }
-
     template <typename KernelType>
     std::vector<std::vector<int>> make_clusters(Points<Ndim>& h_points,
                                                 PointsAlpaka<Ndim>& d_points,
                                                 const KernelType& kernel,
-                                                Queue queue_);
+                                                Queue queue_,
+												size_t block_size);
 
   private:
     float dc_;
@@ -52,8 +51,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     // average number of points found in a tile
     int pointsPerTile_;
 
-	std::size_t blockSize_ = 1024;
-
     /* domain_t<Ndim> m_domains; */
 
     // Buffers
@@ -66,7 +63,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
     // Private methods
     void init_device(Queue queue_);
-    void setup(const Points<Ndim>& h_points, PointsAlpaka<Ndim>& d_points, Queue queue_);
+    void setup(const Points<Ndim>& h_points, PointsAlpaka<Ndim>& d_points, Queue queue_, size_t block_size);
 
     // Construction of the tiles
     void calculate_tile_size(TilesAlpaka<Ndim>& h_tiles, const Points<Ndim>& h_points);
@@ -107,7 +104,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   template <typename TAcc, uint8_t Ndim>
   void CLUEAlgoAlpaka<TAcc, Ndim>::setup(const Points<Ndim>& h_points,
                                          PointsAlpaka<Ndim>& d_points,
-                                         Queue queue_) {
+                                         Queue queue_,
+										 size_t block_size) {
     // Create temporary tiles object
     TilesAlpaka<Ndim> temp;
     calculate_tile_size(temp, h_points);
@@ -122,8 +120,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     alpaka::memset(queue_, (*d_seeds), 0x00);
 
     // Define the working division
-    Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, blockSize_);
-    auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, blockSize_);
+    Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
+    auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
     alpaka::enqueue(
         queue_,
         alpaka::createTaskKernel<Acc1D>(working_div, KernelResetFollowers{}, m_followers, h_points.n));
@@ -135,11 +133,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
   std::vector<std::vector<int>> CLUEAlgoAlpaka<TAcc, Ndim>::make_clusters(Points<Ndim>& h_points,
                                                                           PointsAlpaka<Ndim>& d_points,
                                                                           const KernelType& kernel,
-                                                                          Queue queue_) {
-    setup(h_points, d_points, queue_);
+                                                                          Queue queue_,
+																		  size_t block_size) {
+    setup(h_points, d_points, queue_, block_size);
 
-    const Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, blockSize_);
-    auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, blockSize_);
+    const Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
+    auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
     alpaka::enqueue(queue_,
                     alpaka::createTaskKernel<Acc1D>(
                         working_div, KernelFillTiles(), d_points.view(), m_tiles, h_points.n));
@@ -174,8 +173,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                                                     h_points.n));
 
     // We change the working division when assigning the clusters
-    const Idx grid_size_seeds = cms::alpakatools::divide_up_by(max_seeds, blockSize_);
-    auto working_div_seeds = cms::alpakatools::make_workdiv<Acc1D>(grid_size_seeds, blockSize_);
+    const Idx grid_size_seeds = cms::alpakatools::divide_up_by(max_seeds, block_size);
+    auto working_div_seeds = cms::alpakatools::make_workdiv<Acc1D>(grid_size_seeds, block_size);
     alpaka::enqueue(
         queue_,
         alpaka::createTaskKernel<Acc1D>(