Skip to content

Commit

Permalink
Set maximum required alignment to 16 bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeepyjack committed Sep 18, 2024
1 parent 936f8fe commit d2f04ff
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion include/cuco/detail/bloom_filter/bloom_filter_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
#include <cuco/detail/utils.hpp>
#include <cuco/utility/cuda_thread_scope.cuh>

// TODO #include <cuda/std/algorithm> once available
#include <cub/device/device_for.cuh>
#include <cuda/atomic>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/array>
#include <cuda/std/bit>
#include <cuda/std/tuple>
Expand Down Expand Up @@ -277,9 +279,14 @@ class bloom_filter_impl {
words_ + index, min(sizeof(word_type) * NumWords, required_alignment())));
}

__host__ __device__ static constexpr size_t max_vec_bytes() noexcept
{
return 16; // LDG128 is the widest load we can perform
}

__host__ __device__ static constexpr size_t required_alignment() noexcept
{
return sizeof(word_type) * words_per_block; // TODO check if a maximum of 16byte is sufficient
return cuda::std::max(sizeof(word_type) * words_per_block, max_vec_bytes());
}

word_type* words_;
Expand Down

0 comments on commit d2f04ff

Please sign in to comment.