Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/dev' into bloom-filter-new
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeepyjack committed Sep 18, 2024
2 parents 5b3fe40 + 5602381 commit f53cfad
Show file tree
Hide file tree
Showing 15 changed files with 565 additions and 158 deletions.
16 changes: 4 additions & 12 deletions include/cuco/detail/probing_scheme/probing_scheme_impl.inl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ __host__ __device__ constexpr linear_probing<CGSize, Hash>::linear_probing(Hash

template <int32_t CGSize, typename Hash>
template <typename NewHash>
__host__ __device__ constexpr auto linear_probing<CGSize, Hash>::with_hash_function(
__host__ __device__ constexpr auto linear_probing<CGSize, Hash>::rebind_hash_function(
NewHash const& hash) const noexcept
{
return linear_probing<cg_size, NewHash>{hash};
Expand Down Expand Up @@ -143,28 +143,20 @@ __host__ __device__ constexpr double_hashing<CGSize, Hash1, Hash2>::double_hashi

template <int32_t CGSize, typename Hash1, typename Hash2>
__host__ __device__ constexpr double_hashing<CGSize, Hash1, Hash2>::double_hashing(
cuco::pair<Hash1, Hash2> const& hash)
cuda::std::tuple<Hash1, Hash2> const& hash)
: hash1_{hash.first}, hash2_{hash.second}
{
}

template <int32_t CGSize, typename Hash1, typename Hash2>
template <typename NewHash1, typename NewHash2>
__host__ __device__ constexpr auto double_hashing<CGSize, Hash1, Hash2>::with_hash_function(
NewHash1 const& hash1, NewHash2 const& hash2) const noexcept
{
return double_hashing<cg_size, NewHash1, NewHash2>{hash1, hash2};
}

template <int32_t CGSize, typename Hash1, typename Hash2>
template <typename NewHash, typename Enable>
__host__ __device__ constexpr auto double_hashing<CGSize, Hash1, Hash2>::with_hash_function(
__host__ __device__ constexpr auto double_hashing<CGSize, Hash1, Hash2>::rebind_hash_function(
NewHash const& hash) const
{
static_assert(cuco::is_tuple_like<NewHash>::value,
"The given hasher must be a tuple-like object");

auto const [hash1, hash2] = cuco::pair{hash};
auto const [hash1, hash2] = cuda::std::tuple{hash};
using hash1_type = cuda::std::decay_t<decltype(hash1)>;
using hash2_type = cuda::std::decay_t<decltype(hash2)>;
return double_hashing<cg_size, hash1_type, hash2_type>{hash1, hash2};
Expand Down
4 changes: 2 additions & 2 deletions include/cuco/detail/static_map/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_apply_shmem(
ref.probing_scheme(),
{},
storage};
auto shared_map_ref = std::move(shared_map).with(cuco::op::insert_or_apply);
auto shared_map_ref = shared_map.rebind_operators(cuco::op::insert_or_apply);
shared_map_ref.initialize(block);
block.sync();

Expand Down Expand Up @@ -262,4 +262,4 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_apply_shmem(
}
}
}
} // namespace cuco::static_map_ns::detail
} // namespace cuco::static_map_ns::detail
77 changes: 63 additions & 14 deletions include/cuco/detail/static_map/static_map_ref.inl
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,17 @@ template <typename Key,
typename StorageRef,
typename... Operators>
template <typename... NewOperators>
auto static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::with(
NewOperators...) && noexcept
__host__ __device__ constexpr auto
static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::with_operators(
NewOperators...) const noexcept
{
return static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, NewOperators...>{
std::move(*this)};
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
this->key_eq(),
this->probing_scheme(),
{},
this->storage_ref()};
}

template <typename Key,
Expand All @@ -311,22 +317,65 @@ template <typename Key,
typename StorageRef,
typename... Operators>
template <typename... NewOperators>
__host__ __device__ auto constexpr static_map_ref<Key,
T,
Scope,
KeyEqual,
ProbingScheme,
StorageRef,
Operators...>::with_operators(NewOperators...)
const noexcept
__host__ __device__ constexpr auto
static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::rebind_operators(
NewOperators...) const noexcept
{
return static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, NewOperators...>{
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
this->key_eq(),
this->impl_.probing_scheme(),
this->probing_scheme(),
{},
this->impl_.storage_ref()};
this->storage_ref()};
}

template <typename Key,
typename T,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename NewKeyEqual>
__host__ __device__ constexpr auto
static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::rebind_key_eq(
NewKeyEqual const& key_equal) const noexcept
{
return static_map_ref<Key, T, Scope, NewKeyEqual, ProbingScheme, StorageRef, Operators...>{
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
key_equal,
this->probing_scheme(),
{},
this->storage_ref()};
}

template <typename Key,
typename T,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename NewHash>
__host__ __device__ constexpr auto
static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::
rebind_hash_function(NewHash const& hash) const
{
auto const probing_scheme = this->probing_scheme().rebind_hash_function(hash);
return static_map_ref<Key,
T,
Scope,
KeyEqual,
cuda::std::decay_t<decltype(probing_scheme)>,
StorageRef,
Operators...>{cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
this->key_eq(),
probing_scheme,
{},
this->storage_ref()};
}

template <typename Key,
Expand All @@ -349,7 +398,7 @@ static_map_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>
cuco::empty_value<T>{this->empty_value_sentinel()},
cuco::erased_key<Key>{this->erased_key_sentinel()},
this->key_eq(),
this->impl_.probing_scheme(),
this->probing_scheme(),
scope,
storage_ref_type{this->window_extent(), memory_to_use}};
}
Expand Down
179 changes: 174 additions & 5 deletions include/cuco/detail/static_multimap/static_multimap_ref.inl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <cuda/atomic>
#include <cuda/std/functional>
#include <cuda/std/utility>

#include <cooperative_groups.h>

Expand Down Expand Up @@ -294,11 +295,22 @@ template <typename Key,
typename StorageRef,
typename... Operators>
template <typename... NewOperators>
auto static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::with(
NewOperators...) && noexcept
__host__ __device__ auto constexpr static_multimap_ref<
Key,
T,
Scope,
KeyEqual,
ProbingScheme,
StorageRef,
Operators...>::with_operators(NewOperators...) const noexcept
{
return static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, NewOperators...>{
std::move(*this)};
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
this->key_eq(),
this->probing_scheme(),
{},
impl_.storage_ref()};
}

template <typename Key,
Expand All @@ -316,15 +328,63 @@ __host__ __device__ auto constexpr static_multimap_ref<
KeyEqual,
ProbingScheme,
StorageRef,
Operators...>::with_operators(NewOperators...) const noexcept
Operators...>::rebind_operators(NewOperators...) const noexcept
{
return static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, NewOperators...>{
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
this->key_eq(),
impl_.probing_scheme(),
{},
impl_.storage_ref()};
this->storage_ref()};
}

template <typename Key,
typename T,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename NewKeyEqual>
__host__ __device__ constexpr auto
static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::
rebind_key_eq(NewKeyEqual const& key_equal) const noexcept
{
return static_multimap_ref<Key, T, Scope, NewKeyEqual, ProbingScheme, StorageRef, Operators...>{
cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
key_equal,
this->probing_scheme(),
{},
this->storage_ref()};
}

template <typename Key,
typename T,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename NewHash>
__host__ __device__ constexpr auto
static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::
rebind_hash_function(NewHash const& hash) const
{
auto const probing_scheme = this->probing_scheme().rebind_hash_function(hash);
return static_multimap_ref<Key,
T,
Scope,
KeyEqual,
cuda::std::decay_t<decltype(probing_scheme)>,
StorageRef,
Operators...>{cuco::empty_key<Key>{this->empty_key_sentinel()},
cuco::empty_value<T>{this->empty_value_sentinel()},
this->key_eq(),
probing_scheme,
{},
this->storage_ref()};
}

template <typename Key,
Expand Down Expand Up @@ -487,6 +547,115 @@ class operator_impl<
}
};

template <typename Key,
typename T,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
class operator_impl<
op::for_each_tag,
static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>> {
using base_type = static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef>;
using ref_type =
static_multimap_ref<Key, T, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>;

static constexpr auto cg_size = base_type::cg_size;

public:
/**
* @brief Executes a callback on every element in the container with key equivalent to the probe
* key.
*
* @note Passes an un-incrementable input iterator to the element whose key is equivalent to
* `key` to the callback.
*
* @tparam ProbeKey Probe key type
* @tparam CallbackOp Unary callback functor or device lambda
*
* @param key The key to search for
* @param callback_op Function to call on every element found
*/
template <class ProbeKey, class CallbackOp>
__device__ void for_each(ProbeKey const& key, CallbackOp&& callback_op) const noexcept
{
// CRTP: cast `this` to the actual ref type
auto const& ref_ = static_cast<ref_type const&>(*this);
ref_.impl_.for_each(key, cuda::std::forward<CallbackOp>(callback_op));
}

/**
* @brief Executes a callback on every element in the container with key equivalent to the probe
* key.
*
* @note Passes an un-incrementable input iterator to the element whose key is equivalent to
* `key` to the callback.
*
* @note This function uses cooperative group semantics, meaning that any thread may call the
* callback if it finds a matching element. If multiple elements are found within the same group,
* each thread with a match will call the callback with its associated element.
*
* @note Synchronizing `group` within `callback_op` is undefined behavior.
*
* @tparam ProbeKey Probe key type
* @tparam CallbackOp Unary callback functor or device lambda
*
* @param group The Cooperative Group used to perform this operation
* @param key The key to search for
* @param callback_op Function to call on every element found
*/
template <class ProbeKey, class CallbackOp>
__device__ void for_each(cooperative_groups::thread_block_tile<cg_size> const& group,
ProbeKey const& key,
CallbackOp&& callback_op) const noexcept
{
// CRTP: cast `this` to the actual ref type
auto const& ref_ = static_cast<ref_type const&>(*this);
ref_.impl_.for_each(group, key, cuda::std::forward<CallbackOp>(callback_op));
}

/**
* @brief Executes a callback on every element in the container with key equivalent to the probe
* key and can additionally perform work that requires synchronizing the Cooperative Group
* performing this operation.
*
* @note Passes an un-incrementable input iterator to the element whose key is equivalent to
* `key` to the callback.
*
* @note This function uses cooperative group semantics, meaning that any thread may call the
* callback if it finds a matching element. If multiple elements are found within the same group,
* each thread with a match will call the callback with its associated element.
*
* @note Synchronizing `group` within `callback_op` is undefined behavior.
*
* @note The `sync_op` function can be used to perform work that requires synchronizing threads in
* `group` inbetween probing steps, where the number of probing steps performed between
* synchronization points is capped by `window_size * cg_size`. The functor will be called right
* after the current probing window has been traversed.
*
* @tparam ProbeKey Probe key type
* @tparam CallbackOp Unary callback functor or device lambda
* @tparam SyncOp Functor or device lambda which accepts the current `group` object
*
* @param group The Cooperative Group used to perform this operation
* @param key The key to search for
* @param callback_op Function to call on every element found
* @param sync_op Function that is allowed to synchronize `group` inbetween probing windows
*/
template <class ProbeKey, class CallbackOp, class SyncOp>
__device__ void for_each(cooperative_groups::thread_block_tile<cg_size> const& group,
ProbeKey const& key,
CallbackOp&& callback_op,
SyncOp&& sync_op) const noexcept
{
// CRTP: cast `this` to the actual ref type
auto const& ref_ = static_cast<ref_type const&>(*this);
ref_.impl_.for_each(
group, key, cuda::std::forward<CallbackOp>(callback_op), cuda::std::forward<SyncOp>(sync_op));
}
};

template <typename Key,
typename T,
cuda::thread_scope Scope,
Expand Down
Loading

0 comments on commit f53cfad

Please sign in to comment.