-
Notifications
You must be signed in to change notification settings - Fork 90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Factory method for cuco::static_set #297
base: dev
Are you sure you want to change the base?
Changes from all commits
462821f
a9dbc74
cf82721
91b5cb1
4285475
d74fb77
cc637dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <type_traits> | ||
|
||
namespace cuco::experimental::detail { | ||
|
||
/** | ||
* @brief Base class of public extent class. | ||
* | ||
* This class should not be used directly. | ||
* | ||
* @tparam SizeType Size type | ||
*/ | ||
template <typename SizeType> | ||
class extent_base { | ||
static_assert(std::is_integral_v<SizeType>, "SizeType bust be integral."); | ||
|
||
public: | ||
using value_type = SizeType; ///< Extent value type | ||
}; | ||
} // namespace cuco::experimental::detail |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
#include <cuco/detail/static_set/functors.cuh> | ||
#include <cuco/detail/static_set/kernels.cuh> | ||
#include <cuco/detail/storage/counter_storage.cuh> | ||
#include <cuco/detail/traits.hpp> | ||
#include <cuco/detail/tuning.cuh> | ||
#include <cuco/detail/utils.hpp> | ||
#include <cuco/operator.hpp> | ||
|
@@ -39,18 +40,19 @@ namespace cuco { | |
namespace experimental { | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::static_set( | ||
constexpr static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::static_set( | ||
Extent capacity, | ||
empty_key<Key> empty_key_sentinel, | ||
KeyEqual pred, | ||
ProbingScheme const& probing_scheme, | ||
Allocator const& alloc, | ||
[[maybe_unused]] Storage const& storage, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like this. We're using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have a solution in mind that eliminates the |
||
cuda_stream_ref stream) | ||
: empty_key_sentinel_{empty_key_sentinel}, | ||
predicate_{pred}, | ||
|
@@ -62,15 +64,15 @@ constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Sto | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt> | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::size_type | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::insert( | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::size_type | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::insert( | ||
InputIt first, InputIt last, cuda_stream_ref stream) | ||
{ | ||
auto const num_keys = cuco::detail::distance(first, last); | ||
|
@@ -92,14 +94,14 @@ static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::ins | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt> | ||
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::insert_async( | ||
void static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::insert_async( | ||
InputIt first, InputIt last, cuda_stream_ref stream) | ||
{ | ||
auto const num_keys = cuco::detail::distance(first, last); | ||
|
@@ -116,15 +118,15 @@ void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage> | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt, typename StencilIt, typename Predicate> | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::size_type | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::insert_if( | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::size_type | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::insert_if( | ||
InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream) | ||
{ | ||
auto const num_keys = cuco::detail::distance(first, last); | ||
|
@@ -145,14 +147,14 @@ static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::ins | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt, typename StencilIt, typename Predicate> | ||
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::insert_if_async( | ||
void static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::insert_if_async( | ||
InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda_stream_ref stream) | ||
{ | ||
auto const num_keys = cuco::detail::distance(first, last); | ||
|
@@ -168,29 +170,29 @@ void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage> | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt, typename OutputIt> | ||
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::contains( | ||
void static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::contains( | ||
InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const | ||
{ | ||
contains_async(first, last, output_begin, stream); | ||
stream.synchronize(); | ||
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt, typename OutputIt> | ||
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::contains_async( | ||
void static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::contains_async( | ||
InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const | ||
{ | ||
auto const num_keys = cuco::detail::distance(first, last); | ||
|
@@ -212,29 +214,29 @@ void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage> | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt, typename OutputIt> | ||
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::find( | ||
void static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::find( | ||
InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const | ||
{ | ||
find_async(first, last, output_begin, stream); | ||
stream.synchronize(); | ||
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename InputIt, typename OutputIt> | ||
void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::find_async( | ||
void static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::find_async( | ||
InputIt first, InputIt last, OutputIt output_begin, cuda_stream_ref stream) const | ||
{ | ||
auto const num_keys = cuco::detail::distance(first, last); | ||
|
@@ -250,14 +252,14 @@ void static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage> | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename OutputIt> | ||
OutputIt static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve_all( | ||
OutputIt static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve_all( | ||
OutputIt output_begin, cuda_stream_ref stream) const | ||
{ | ||
auto begin = thrust::make_transform_iterator(thrust::counting_iterator<size_type>(0), | ||
|
@@ -296,14 +298,14 @@ OutputIt static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stor | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::size_type | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::size( | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::size_type | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::size( | ||
cuda_stream_ref stream) const | ||
{ | ||
auto counter = detail::counter_storage<size_type, thread_scope, allocator_type>{allocator_}; | ||
|
@@ -323,47 +325,68 @@ static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::siz | |
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
constexpr auto | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::capacity() | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::capacity() | ||
const noexcept | ||
{ | ||
return storage_.capacity(); | ||
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
constexpr static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::key_type | ||
static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::empty_key_sentinel() | ||
constexpr static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::key_type | ||
static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::empty_key_sentinel() | ||
const noexcept | ||
{ | ||
return empty_key_sentinel_; | ||
} | ||
|
||
template <class Key, | ||
class Extent, | ||
cuda::thread_scope Scope, | ||
class Extent, | ||
class KeyEqual, | ||
class ProbingScheme, | ||
class Allocator, | ||
class Storage> | ||
template <typename... Operators> | ||
auto static_set<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::ref( | ||
auto static_set<Key, Scope, Extent, KeyEqual, ProbingScheme, Allocator, Storage>::ref( | ||
Operators...) const noexcept | ||
{ | ||
static_assert(sizeof...(Operators), "No operators specified"); | ||
return ref_type<Operators...>{ | ||
cuco::empty_key<key_type>(empty_key_sentinel_), predicate_, probing_scheme_, storage_.ref()}; | ||
} | ||
|
||
template <class Key, cuda::thread_scope Scope, class... Args> | ||
constexpr auto make_static_set(Args&&... args) | ||
{ | ||
// TODO don't repeat defaults | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that's also not so elegant. Currently, we need to mirror every default tparam from using default_set = static_set<Key>;
using default_allocator = typename default_set::allocator_type;
... |
||
return static_set<Key, Scope>{ | ||
detail::find_arg<detail::is_extent>(std::forward<Args>(args)...), // required parameter | ||
detail::find_arg<empty_key<Key>>(std::forward<Args>(args)...), // required parameter | ||
detail::find_arg<detail::key_equal_traits<Key>::template is_equal_functor_t>( | ||
std::forward<Args>(args)..., thrust::equal_to<Key>{}), | ||
detail::find_arg<detail::is_probing_scheme>(std::forward<Args>(args)..., | ||
double_hashing<4, // CG size | ||
cuco::murmurhash3_32<Key>, | ||
cuco::murmurhash3_32<Key>>{}), | ||
detail::find_arg<detail::is_allocator>(std::forward<Args>(args)..., | ||
cuco::cuda_allocator<std::byte>{}), | ||
detail::find_arg<detail::is_storage>(std::forward<Args>(args)..., | ||
cuco::experimental::aow_storage<1>{}), | ||
detail::find_arg<cuda_stream_ref>(std::forward<Args>(args)..., cuda_stream_ref{})}; | ||
} | ||
|
||
} // namespace experimental | ||
} // namespace cuco |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using the factory doesn't save you any characters (at least until we remove the
experimental
namespace). However, not having to remember the exact order of arguments is a huge win in my UI book.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume size and empty sentinel are necessary and all others are optional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep. Also, for the tparams, only the
Key
is necessary.