Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate static map examples #407

Merged
merged 8 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
`cuco::static_map` is a fixed-size hash table using open addressing with linear probing. See the Doxygen documentation in `static_map.cuh` for more detailed information.

#### Examples:
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/T49P85Mnd))
- [Device-view APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/device_view_example.cu) (see [live example in godbolt](https://godbolt.org/z/dh8bMn3G1))
- [Custom data types, key equality operators and hash functions](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/custom_type_example.cu) (see [live example in godbolt](https://godbolt.org/z/7djKevK6e))
- [Key histogram](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/count_by_key_example.cu) (see [live example in godbolt](https://godbolt.org/z/vecGeYM48))
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/7jK9od6bx))
- [Device-ref APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/W338MePdW))
- [Custom data types, key equality operators and hash functions](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/custom_type_example.cu) (see [live example in godbolt](https://godbolt.org/z/YYb1WE9od))
- [Key histogram](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_map/count_by_key_example.cu) (see [live example in godbolt](https://godbolt.org/z/6rz7MYoMe))

### `static_multimap`

Expand Down
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ConfigureExample(STATIC_SET_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/stati
ConfigureExample(STATIC_SET_DEVICE_REF_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_set/device_ref_example.cu")
ConfigureExample(STATIC_SET_DEVICE_SUBSETS_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_set/device_subsets_example.cu")
ConfigureExample(STATIC_MAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/host_bulk_example.cu")
ConfigureExample(STATIC_MAP_DEVICE_SIDE_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/device_view_example.cu")
ConfigureExample(STATIC_MAP_DEVICE_SIDE_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/device_ref_example.cu")
ConfigureExample(STATIC_MAP_CUSTOM_TYPE_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/custom_type_example.cu")
ConfigureExample(STATIC_MAP_COUNT_BY_KEY_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_map/count_by_key_example.cu")
ConfigureExample(STATIC_MULTIMAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/static_multimap/host_bulk_example.cu")
46 changes: 25 additions & 21 deletions examples/static_map/count_by_key_example.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* the context of a count-by-key operation, i.e. for a histogram over keys.
*
* Individual operations like a single insert or find can be performed in device code via the
* static_map "device_view" types.
* "static_map_ref" types.
*
* @note This example is for demonstration purposes only. It is not intended to show the most
* performant way to do the example algorithm.
Expand All @@ -47,17 +47,17 @@
* @brief Inserts keys and counts how often they occur in the input sequence.
*
* @tparam BlockSize CUDA block size
* @tparam Map Type of the map returned from static_map::get_device_mutable_view
* @tparam Map Type of the map device reference
* @tparam KeyIter Input iterator whose value_type convertible to Map::key_type
* @tparam UniqueIter Output iterator whose value_type is convertible to uint64_t
*
* @param[in] map_view View of the map into which inserts will be performed
* @param[in] map_ref Reference of the map into which inserts will be performed
* @param[in] key_begin The beginning of the range of keys to insert
* @param[in] num_keys The total number of keys and values
* @param[out] num_unique_keys The total number of distinct keys inserted
*/
template <int64_t BlockSize, typename Map, typename KeyIter, typename UniqueIter>
__global__ void count_by_key(Map map_view,
__global__ void count_by_key(Map map_ref,
KeyIter keys,
uint64_t num_keys,
UniqueIter num_unique_keys)
Expand All @@ -71,13 +71,14 @@ __global__ void count_by_key(Map map_view,
uint64_t thread_unique_keys = 0;
while (idx < num_keys) {
// insert key into the map with a count of 1
auto [slot, is_new_key] = map_view.insert_and_find({keys[idx], 1});
auto [slot, is_new_key] = map_ref.insert_and_find(cuco::pair{keys[idx], 1});
sleeepyjack marked this conversation as resolved.
Show resolved Hide resolved
if (is_new_key) {
// first occurrence of the key
thread_unique_keys++;
} else {
// key is already in the map -> increment count
slot->second.fetch_add(1, cuda::memory_order_relaxed);
auto ref = cuda::atomic_ref<uint32_t, cuda::thread_scope_device>{slot->second};
ref.fetch_add(1, cuda::memory_order_relaxed);
}
idx += loop_stride;
}
Expand All @@ -101,7 +102,7 @@ int main(void)
// Empty slots are represented by reserved "sentinel" values. These values should be selected such
// that they never occur in your input data.
Key constexpr empty_key_sentinel = static_cast<Key>(-1);
Count constexpr empty_value_sentinel = static_cast<Key>(-1);
Count constexpr empty_value_sentinel = static_cast<Count>(-1);

// Number of keys to be inserted
auto constexpr num_keys = 50'000;
Expand All @@ -125,34 +126,37 @@ int main(void)
// Compute capacity based on a 50% load factor
auto constexpr load_factor = 0.5;

// If the number of unique keys is known in advance, we can use it to calculate the map capacity
std::size_t const capacity = std::ceil((num_keys / key_duplicates) / load_factor);
// If we can't give an estimated upper bound on the number of unique keys
// we conservatively assume each key in the input is distinct
// std::size_t const capacity = std::ceil(num_keys / load_factor);
// If the number of elements is known in advance, we can use it to calculate the map capacity
std::size_t const num_elements = num_keys / key_duplicates;

// Constructs a map with "capacity" slots.
cuco::static_map<Key, Count> map{
capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
// Constructs a map with number of elements and desired load factor.
auto map = cuco::experimental::static_map{
num_elements,
load_factor,
cuco::empty_key{empty_key_sentinel},
cuco::empty_value{empty_value_sentinel},
thrust::equal_to<Key>{},
sleeepyjack marked this conversation as resolved.
Show resolved Hide resolved
cuco::experimental::linear_probing<1, cuco::default_hash_function<Key>>{}};

// Get a non-owning, mutable view of the map that allows inserts to pass by value into the kernel
auto device_insert_view = map.get_device_mutable_view();
// Get a non-owning, mutable reference of the map that allows `insert_and_find` operation to pass
// by value into the kernel
auto map_ref = map.ref(cuco::experimental::op::insert_and_find);

auto constexpr block_size = 256;
auto const grid_size = (num_keys + block_size - 1) / block_size;
count_by_key<block_size><<<grid_size, block_size>>>(
device_insert_view, insert_keys.begin(), num_keys, num_unique_keys.data());
count_by_key<block_size>
<<<grid_size, block_size>>>(map_ref, insert_keys.begin(), num_keys, num_unique_keys.data());

// Retrieve contents of all the non-empty slots in the map
thrust::device_vector<Key> result_keys(num_unique_keys[0]);
thrust::device_vector<Count> result_counts(num_unique_keys[0]);
map.retrieve_all(result_keys.begin(), result_counts.begin());

// Check if the number of result keys is correct
auto num_keys_check = num_unique_keys[0] == (num_keys / key_duplicates);
auto const num_keys_check = num_unique_keys[0] == (num_keys / key_duplicates);

// Iterate over all result counts and verify that they are correct
auto counts_check = thrust::all_of(
auto const counts_check = thrust::all_of(
result_counts.begin(), result_counts.end(), [] __host__ __device__(Count const count) {
return count == key_duplicates;
});
Expand Down
53 changes: 17 additions & 36 deletions examples/static_map/custom_type_example.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,32 +25,13 @@
#include <cuda/functional>

// User-defined key type
#if !defined(CUCO_HAS_INDEPENDENT_THREADS)
sleeepyjack marked this conversation as resolved.
Show resolved Hide resolved
struct custom_key_type {
int32_t a;
int32_t b;

__host__ __device__ custom_key_type() {}
__host__ __device__ custom_key_type(int32_t x) : a{x}, b{x} {}
};
#else
// Key type larger than 8B only supported for sm_70 and up
struct custom_key_type {
int32_t a;
int32_t b;
int32_t c;

__host__ __device__ custom_key_type() {}
__host__ __device__ custom_key_type(int32_t x) : a{x}, b{x}, c{x} {}

// Device equality operator is mandatory due to libcudacxx bug:
// https://github.com/NVIDIA/libcudacxx/issues/223
__device__ bool operator==(custom_key_type const& other) const
{
return a == other.a and b == other.b and c == other.c;
}
};
#endif

// User-defined value type
struct custom_value_type {
Expand All @@ -63,17 +44,12 @@ struct custom_value_type {

// User-defined device hash callable
struct custom_hash {
template <typename key_type>
__device__ uint32_t operator()(key_type k)
{
return k.a;
};
__device__ uint32_t operator()(custom_key_type const& k) const noexcept { return k.a; };
};

// User-defined device key equal callable
struct custom_key_equals {
template <typename key_type>
__device__ bool operator()(key_type const& lhs, key_type const& rhs)
struct custom_key_equal {
__device__ bool operator()(custom_key_type const& lhs, custom_key_type const& rhs) const noexcept
{
return lhs.a == rhs.a;
}
Expand All @@ -91,15 +67,20 @@ int main(void)
auto pairs_begin = thrust::make_transform_iterator(
thrust::make_counting_iterator<int32_t>(0),
cuda::proclaim_return_type<cuco::pair<custom_key_type, custom_value_type>>(
[] __device__(auto i) { return cuco::make_pair(custom_key_type{i}, custom_value_type{i}); }));
[] __device__(auto i) {
return cuco::pair{custom_key_type{i}, custom_value_type{i}};
}));

// Construct a map with 100,000 slots using the given empty key/value sentinels. Note the
// capacity is chosen knowing we will insert 80,000 keys, for an load factor of 80%.
cuco::static_map<custom_key_type, custom_value_type> map{
100'000, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
auto map = cuco::experimental::static_map{cuco::experimental::extent<std::size_t, 100'000>{},
sleeepyjack marked this conversation as resolved.
Show resolved Hide resolved
cuco::empty_key{empty_key_sentinel},
cuco::empty_value{empty_value_sentinel},
custom_key_equal{},
cuco::experimental::linear_probing<1, custom_hash>{}};

// Inserts 80,000 pairs into the map by using the custom hasher and custom equality callable
map.insert(pairs_begin, pairs_begin + num_pairs, custom_hash{}, custom_key_equals{});
map.insert(pairs_begin, pairs_begin + num_pairs);

// Reproduce inserted keys
auto insert_keys =
Expand All @@ -111,14 +92,14 @@ int main(void)

// Determine if all the inserted keys can be found by using the same hasher and equality
// function as `insert`. If a key `insert_keys[i]` doesn't exist, `contained[i] == false`.
map.contains(
insert_keys, insert_keys + num_pairs, contained.begin(), custom_hash{}, custom_key_equals{});
map.contains(insert_keys, insert_keys + num_pairs, contained.begin());
// This will fail due to inconsistent hash and key equal.
// map.contains(insert_keys, insert_keys + num_pairs, contained.begin());

// All inserted keys are contained
assert(
thrust::all_of(contained.begin(), contained.end(), [] __device__(auto const& b) { return b; }));
auto const all_contained =
thrust::all_of(contained.begin(), contained.end(), [] __device__(auto const& b) { return b; });
if (all_contained) { std::cout << "Success! Found all values.\n"; }

return 0;
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,12 +29,11 @@
#include <limits>

/**
* @file device_view_example.cu
* @file device_ref_example.cu
* @brief Demonstrates usage of the device side APIs for individual operations like insert/find.
*
* Individual operations like a single insert or find can be performed in device code via the
* static_map "device_view" types. Note that concurrent insert and find are not supported, and
* therefore there are separate view types for insert and find to help prevent undefined behavior.
* "static_map_ref" types.
*
* @note This example is for demonstration purposes only. It is not intended to show the most
* performant way to do the example algorithm.
Expand All @@ -44,12 +43,12 @@
/**
* @brief Inserts keys that pass the specified predicated into the map.
*
* @tparam Map Type of the map returned from static_map::get_device_mutable_view
* @tparam Map Type of the map device reference
* @tparam KeyIter Input iterator whose value_type convertible to Map::key_type
* @tparam ValueIter Input iterator whose value_type is convertible to Map::mapped_type
* @tparam Predicate Unary predicate
*
* @param[in] map_view View of the map into which inserts will be performed
* @param[in] map_ref Reference of the map into which inserts will be performed
* @param[in] key_begin The beginning of the range of keys to insert
* @param[in] value_begin The beginning of the range of values associated with each key to insert
* @param[in] num_keys The total number of keys and values
Expand All @@ -58,7 +57,7 @@
* @param[out] num_inserted The total number of keys successfully inserted
*/
template <typename Map, typename KeyIter, typename ValueIter, typename Predicate>
__global__ void filtered_insert(Map map_view,
__global__ void filtered_insert(Map map_ref,
KeyIter key_begin,
ValueIter value_begin,
std::size_t num_keys,
Expand All @@ -71,9 +70,9 @@ __global__ void filtered_insert(Map map_view,
while (tid < num_keys) {
// Only insert keys that pass the predicate
if (pred(key_begin[tid])) {
// device_mutable_view::insert returns `true` if it is the first time the given key was
// Map::insert returns `true` if it is the first time the given key was
// inserted and `false` if the key already existed
if (map_view.insert({key_begin[tid], value_begin[tid]})) {
if (map_ref.insert(cuco::pair{key_begin[tid], value_begin[tid]})) {
++counter; // Count number of successfully inserted keys
}
}
Expand All @@ -87,25 +86,26 @@ __global__ void filtered_insert(Map map_view,
/**
* @brief For keys that have a match in the map, increments their corresponding value by one.
*
* @tparam Map Type of the map returned from static_map::get_device_view
* @tparam Map Type of the map device reference
* @tparam KeyIter Input iterator whose value_type convertible to Map::key_type
*
* @param map_view View of the map into which queries will be performed
* @param map_ref Reference of the map into which queries will be performed
* @param key_begin The beginning of the range of keys to query
* @param num_keys The total number of keys
*/
template <typename Map, typename KeyIter>
__global__ void increment_values(Map map_view, KeyIter key_begin, std::size_t num_keys)
__global__ void increment_values(Map map_ref, KeyIter key_begin, std::size_t num_keys)
{
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
while (tid < num_keys) {
// If the key exists in the map, find returns an iterator to the specified key. Otherwise it
// returns map.end()
auto found = map_view.find(key_begin[tid]);
if (found != map_view.end()) {
auto found = map_ref.find(key_begin[tid]);
if (found != map_ref.end()) {
// If the key exists, atomically increment the associated value
// The value type of the iterator is pair<cuda::atomic<Key>, cuda::atomic<Value>>
found->second.fetch_add(1, cuda::memory_order_relaxed);
auto ref =
cuda::atomic_ref<typename Map::mapped_type, cuda::thread_scope_device>{found->second};
ref.fetch_add(1, cuda::memory_order_relaxed);
}
tid += gridDim.x * blockDim.x;
}
Expand Down Expand Up @@ -135,11 +135,16 @@ int main(void)
std::size_t const capacity = std::ceil(num_keys / load_factor);

// Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
cuco::static_map<Key, Value> map{
capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};
auto map = cuco::experimental::static_map{
capacity,
cuco::empty_key{empty_key_sentinel},
cuco::empty_value{empty_value_sentinel},
thrust::equal_to<Key>{},
cuco::experimental::linear_probing<1, cuco::default_hash_function<Key>>{}};

// Get a non-owning, mutable view of the map that allows inserts to pass by value into the kernel
auto device_insert_view = map.get_device_mutable_view();
// Get a non-owning, mutable reference of the map that allows inserts to pass by value into the
// kernel
auto insert_ref = map.ref(cuco::experimental::op::insert);

// Predicate will only insert even keys
auto is_even = [] __device__(auto key) { return (key % 2) == 0; };
Expand All @@ -149,7 +154,7 @@ int main(void)

auto constexpr block_size = 256;
auto const grid_size = (num_keys + block_size - 1) / block_size;
filtered_insert<<<grid_size, block_size>>>(device_insert_view,
filtered_insert<<<grid_size, block_size>>>(insert_ref,
insert_keys.begin(),
insert_values.begin(),
num_keys,
Expand All @@ -158,10 +163,11 @@ int main(void)

std::cout << "Number of keys inserted: " << num_inserted[0] << std::endl;

// Get a non-owning view of the map that allows find operations to pass by value into the kernel
auto device_find_view = map.get_device_view();
// Get a non-owning reference of the map that allows find operations to pass by value into the
// kernel
auto find_ref = map.ref(cuco::experimental::op::find);

increment_values<<<grid_size, block_size>>>(device_find_view, insert_keys.begin(), num_keys);
increment_values<<<grid_size, block_size>>>(find_ref, insert_keys.begin(), num_keys);

// Retrieve contents of all the non-empty slots in the map
thrust::device_vector<Key> contained_keys(num_inserted[0]);
Expand Down
4 changes: 2 additions & 2 deletions examples/static_map/host_bulk_example.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,7 +54,7 @@ int main(void)
std::size_t const capacity = std::ceil(num_keys / load_factor);

// Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
cuco::static_map<Key, Value> map{
auto map = cuco::experimental::static_map{
capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};

// Create a sequence of keys and values {{0,0}, {1,1}, ... {i,i}}
Expand Down
Loading
Loading