Skip to content

Commit

Permalink
Change block_size to parameter for mainRun
Browse files Browse the repository at this point in the history
  • Loading branch information
sbaldu committed Dec 14, 2023
1 parent 703e811 commit eb104c4
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 54 deletions.
11 changes: 7 additions & 4 deletions CLUEstering/alpaka/BindingModules/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,10 @@ def choose_kernel(self,
raise ValueError("Invalid kernel. The allowed choices for the"
+ " kernels are: flat, exp, gaus and custom.")

def run_clue(self, backend: str = "cpu serial", verbose: bool = False) -> None:
def run_clue(self,
backend: str = "cpu serial",
block_size: int = 1024,
verbose: bool = False) -> None:
"""
Executes the CLUE clustering algorithm.
Expand Down Expand Up @@ -537,15 +540,15 @@ def run_clue(self, backend: str = "cpu serial", verbose: bool = False) -> None:
if backend == "cpu serial":
cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
self.clust_data.coords, self.clust_data.weight,
self.kernel, self.clust_data.n_dim)
self.kernel, self.clust_data.n_dim, block_size)
elif backend == "cpu tbb":
cluster_id_is_seed = cpu_serial.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
self.clust_data.coords, self.clust_data.weight,
self.kernel, self.clust_data.n_dim)
self.kernel, self.clust_data.n_dim, block_size)
elif backend == "gpu cuda":
cluster_id_is_seed = gpu_cuda.mainRun(self.dc_, float(self.rhoc), self.outlier, self.ppbin,
self.clust_data.coords, self.clust_data.weight,
self.kernel, self.clust_data.n_dim)
self.kernel, self.clust_data.n_dim, block_size)
# cluster_id_is_seed = cpu_tbb.mainRun(self.dc_, self.rhoc, self.outlier, self.ppbin,
# self.clust_data.coords, self.clust_data.weight,
# self.kernel, self.clust_data.n_dim)
Expand Down
26 changes: 15 additions & 11 deletions CLUEstering/alpaka/BindingModules/binding_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace alpaka_serial_sync {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const FlatKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size = 1024) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -41,7 +42,7 @@ namespace alpaka_serial_sync {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
Expand Down Expand Up @@ -81,7 +82,8 @@ namespace alpaka_serial_sync {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const ExponentialKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size = 1024) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -102,7 +104,7 @@ namespace alpaka_serial_sync {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
Expand Down Expand Up @@ -142,7 +144,8 @@ namespace alpaka_serial_sync {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const GaussianKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -163,7 +166,7 @@ namespace alpaka_serial_sync {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
Expand Down Expand Up @@ -207,7 +210,8 @@ namespace alpaka_serial_sync {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const FlatKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
m.def("mainRun",
pybind11::overload_cast<float,
Expand All @@ -217,7 +221,8 @@ namespace alpaka_serial_sync {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const ExponentialKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
m.def("mainRun",
pybind11::overload_cast<float,
Expand All @@ -227,9 +232,8 @@ namespace alpaka_serial_sync {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const GaussianKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");

m.def("set_blocksize", CLUEAlgoAlpaka::setBlockSize, "set_blocksize");
}
}; // namespace alpaka_serial_sync
24 changes: 15 additions & 9 deletions CLUEstering/alpaka/BindingModules/binding_cpu_tbb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace alpaka_tbb_async {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const FlatKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -41,7 +42,7 @@ namespace alpaka_tbb_async {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
Expand Down Expand Up @@ -81,7 +82,8 @@ namespace alpaka_tbb_async {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const ExponentialKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -102,7 +104,7 @@ namespace alpaka_tbb_async {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
Expand Down Expand Up @@ -142,7 +144,8 @@ namespace alpaka_tbb_async {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const GaussianKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -163,7 +166,7 @@ namespace alpaka_tbb_async {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_); */
Expand Down Expand Up @@ -221,7 +224,8 @@ namespace alpaka_tbb_async {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const FlatKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
m.def("mainRun",
pybind11::overload_cast<float,
Expand All @@ -231,7 +235,8 @@ namespace alpaka_tbb_async {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const ExponentialKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
m.def("mainRun",
pybind11::overload_cast<float,
Expand All @@ -241,7 +246,8 @@ namespace alpaka_tbb_async {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const GaussianKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");

/* m.def("mainRun", &mainRun, "mainRun"); */
Expand Down
24 changes: 15 additions & 9 deletions CLUEstering/alpaka/BindingModules/binding_gpu_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ namespace alpaka_cuda_async {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const FlatKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
std::vector<Device> devices = alpaka::getDevs<Platform>();

auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);
Expand All @@ -47,7 +48,7 @@ namespace alpaka_cuda_async {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
Expand Down Expand Up @@ -87,7 +88,8 @@ namespace alpaka_cuda_async {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const ExponentialKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -108,7 +110,7 @@ namespace alpaka_cuda_async {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
Expand Down Expand Up @@ -148,7 +150,8 @@ namespace alpaka_cuda_async {
const std::vector<std::vector<float>>& coords,
const std::vector<float>& weights,
const GaussianKernel& kernel,
int Ndim) {
int Ndim,
size_t block_size) {
auto const dev_acc = alpaka::getDevByIdx<Acc1D>(0u);

// Create the queue
Expand All @@ -169,7 +172,7 @@ namespace alpaka_cuda_async {
/* return run1(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
break;
[[likely]] case (2) :
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_);
return run2(dc, rhoc, outlier, pPBin, coords, weights, kernel, queue_, block_size);
break;
[[likely]] case (3) :
/* return run3(dc, rhoc, outlier, pPBin, coords, weights, queue_); */
Expand Down Expand Up @@ -213,7 +216,8 @@ namespace alpaka_cuda_async {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const FlatKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
m.def("mainRun",
pybind11::overload_cast<float,
Expand All @@ -223,7 +227,8 @@ namespace alpaka_cuda_async {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const ExponentialKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
m.def("mainRun",
pybind11::overload_cast<float,
Expand All @@ -233,7 +238,8 @@ namespace alpaka_cuda_async {
const std::vector<std::vector<float>>&,
const std::vector<float>&,
const GaussianKernel&,
int>(&mainRun),
int,
size_t>(&mainRun),
"mainRun");
}
}; // namespace alpaka_tbb_async
29 changes: 14 additions & 15 deletions CLUEstering/alpaka/CLUE/CLUEAlgoAlpaka.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
VecArray<int32_t, max_seeds>* m_seeds;
VecArray<int32_t, max_followers>* m_followers;

void setBlockSize(std::size_t blockSize) { blockSize_ = blockSize; }

template <typename KernelType>
std::vector<std::vector<int>> make_clusters(Points<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
const KernelType& kernel,
Queue queue_);
Queue queue_,
size_t block_size);

private:
float dc_;
Expand All @@ -52,8 +51,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
// average number of points found in a tile
int pointsPerTile_;

std::size_t blockSize_ = 1024;

/* domain_t<Ndim> m_domains; */

// Buffers
Expand All @@ -66,7 +63,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

// Private methods
void init_device(Queue queue_);
void setup(const Points<Ndim>& h_points, PointsAlpaka<Ndim>& d_points, Queue queue_);
void setup(const Points<Ndim>& h_points, PointsAlpaka<Ndim>& d_points, Queue queue_, size_t block_size);

// Construction of the tiles
void calculate_tile_size(TilesAlpaka<Ndim>& h_tiles, const Points<Ndim>& h_points);
Expand Down Expand Up @@ -107,7 +104,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
template <typename TAcc, uint8_t Ndim>
void CLUEAlgoAlpaka<TAcc, Ndim>::setup(const Points<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
Queue queue_) {
Queue queue_,
size_t block_size) {
// Create temporary tiles object
TilesAlpaka<Ndim> temp;
calculate_tile_size(temp, h_points);
Expand All @@ -122,8 +120,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
alpaka::memset(queue_, (*d_seeds), 0x00);

// Define the working division
Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, blockSize_);
auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, blockSize_);
Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
alpaka::enqueue(
queue_,
alpaka::createTaskKernel<Acc1D>(working_div, KernelResetFollowers{}, m_followers, h_points.n));
Expand All @@ -135,11 +133,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
std::vector<std::vector<int>> CLUEAlgoAlpaka<TAcc, Ndim>::make_clusters(Points<Ndim>& h_points,
PointsAlpaka<Ndim>& d_points,
const KernelType& kernel,
Queue queue_) {
setup(h_points, d_points, queue_);
Queue queue_,
size_t block_size) {
setup(h_points, d_points, queue_, block_size);

const Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, blockSize_);
auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, blockSize_);
const Idx grid_size = cms::alpakatools::divide_up_by(h_points.n, block_size);
auto working_div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
alpaka::enqueue(queue_,
alpaka::createTaskKernel<Acc1D>(
working_div, KernelFillTiles(), d_points.view(), m_tiles, h_points.n));
Expand Down Expand Up @@ -174,8 +173,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
h_points.n));

// We change the working division when assigning the clusters
const Idx grid_size_seeds = cms::alpakatools::divide_up_by(max_seeds, blockSize_);
auto working_div_seeds = cms::alpakatools::make_workdiv<Acc1D>(grid_size_seeds, blockSize_);
const Idx grid_size_seeds = cms::alpakatools::divide_up_by(max_seeds, block_size);
auto working_div_seeds = cms::alpakatools::make_workdiv<Acc1D>(grid_size_seeds, block_size);
alpaka::enqueue(
queue_,
alpaka::createTaskKernel<Acc1D>(
Expand Down
Loading

0 comments on commit eb104c4

Please sign in to comment.