From c9c115e7b6e74a1ed5d1f2d0f9426be5b3788e6a Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 2 Oct 2024 11:28:14 -0700 Subject: [PATCH 1/2] Restore the deprecated `with_key_eq` to prevent breaking changes in libcudf. (#607) Unblock https://github.com/rapidsai/cudf/pull/16967 This PR restores the deprecated `with_key_eq` to avoid a breaking change in libcudf. To be reverted once libcudf is migrated to use the new `rebind_key_eq`. --- include/cuco/detail/static_set/static_set_ref.inl | 14 ++++++++++++++ include/cuco/static_set_ref.cuh | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/cuco/detail/static_set/static_set_ref.inl b/include/cuco/detail/static_set/static_set_ref.inl index a70df3d76..0683848e6 100644 --- a/include/cuco/detail/static_set/static_set_ref.inl +++ b/include/cuco/detail/static_set/static_set_ref.inl @@ -298,6 +298,20 @@ static_set_ref::r this->storage_ref()}; } +template +template +__host__ __device__ constexpr auto +static_set_ref::with_key_eq( + NewKeyEqual const& key_equal) const noexcept +{ + return this->rebind_key_eq(key_equal); +} + template + [[nodiscard]] __host__ __device__ constexpr auto with_key_eq( + NewKeyEqual const& key_equal) const noexcept; + /** * @brief Makes a copy of the current device reference with the given hasher * From de9d8c84e91a337fd970dca9f82f766c93a87ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20J=C3=BCnger?= Date: Thu, 3 Oct 2024 02:51:59 +0200 Subject: [PATCH 2/2] CUPTI metrics for bloom_filter benchmarks (#609) --- benchmarks/bloom_filter/add_bench.cu | 17 ++++++++++------- benchmarks/bloom_filter/contains_bench.cu | 17 ++++++++++------- benchmarks/bloom_filter/defaults.hpp | 1 - 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/benchmarks/bloom_filter/add_bench.cu b/benchmarks/bloom_filter/add_bench.cu index b60768d0b..00e2de775 100644 --- a/benchmarks/bloom_filter/add_bench.cu +++ b/benchmarks/bloom_filter/add_bench.cu @@ -49,7 +49,7 @@ void bloom_filter_add(nvbench::state& state, auto const num_keys = state.get_int64("NumInputs"); auto const filter_size_mb = state.get_int64("FilterSizeMB"); - auto const pattern_bits = state.get_int64("PatternBits"); + auto const pattern_bits = state.get_int64_or_default("PatternBits", WordsPerBlock); try { auto const policy = policy_type{static_cast(pattern_bits)}; @@ -70,6 +70,12 @@ void bloom_filter_add(nvbench::state& state, filter_type filter{num_sub_filters, {}, {static_cast(pattern_bits)}}; + state.collect_dram_throughput(); + state.collect_l1_hit_rates(); + state.collect_l2_hit_rates(); + state.collect_loads_efficiency(); + state.collect_stores_efficiency(); + add_fpr_summary(state, filter); state.exec([&](nvbench::launch& launch) { @@ -87,8 +93,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_add, .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"}) .set_max_noise(defaults::MAX_NOISE) .add_int64_axis("NumInputs", {defaults::BF_N}) - .add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE) - .add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS}); + .add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE); NVBENCH_BENCH_TYPES(bloom_filter_add, NVBENCH_TYPE_AXES(nvbench::type_list, @@ -100,8 +105,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_add, .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"}) .set_max_noise(defaults::MAX_NOISE) .add_int64_axis("NumInputs", {defaults::BF_N}) - .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}) - .add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS}); + .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}); NVBENCH_BENCH_TYPES(bloom_filter_add, NVBENCH_TYPE_AXES(nvbench::type_list, @@ -113,5 +117,4 @@ NVBENCH_BENCH_TYPES(bloom_filter_add, .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"}) .set_max_noise(defaults::MAX_NOISE) .add_int64_axis("NumInputs", {defaults::BF_N}) - .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}) - .add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS}); + .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}); diff --git a/benchmarks/bloom_filter/contains_bench.cu b/benchmarks/bloom_filter/contains_bench.cu index 8b9abcb68..67ba80d95 100644 --- a/benchmarks/bloom_filter/contains_bench.cu +++ b/benchmarks/bloom_filter/contains_bench.cu @@ -51,7 +51,7 @@ void bloom_filter_contains( auto const num_keys = state.get_int64("NumInputs"); auto const filter_size_mb = state.get_int64("FilterSizeMB"); - auto const pattern_bits = state.get_int64("PatternBits"); + auto const pattern_bits = state.get_int64_or_default("PatternBits", WordsPerBlock); try { auto const policy = policy_type{static_cast(pattern_bits)}; @@ -73,6 +73,12 @@ void bloom_filter_contains( filter_type filter{num_sub_filters, {}, {static_cast(pattern_bits)}}; + state.collect_dram_throughput(); + state.collect_l1_hit_rates(); + state.collect_l2_hit_rates(); + state.collect_loads_efficiency(); + state.collect_stores_efficiency(); + add_fpr_summary(state, filter); filter.add(keys.begin(), keys.end()); @@ -92,8 +98,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains, .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"}) .set_max_noise(defaults::MAX_NOISE) .add_int64_axis("NumInputs", {defaults::BF_N}) - .add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE) - .add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS}); + .add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE); NVBENCH_BENCH_TYPES(bloom_filter_contains, NVBENCH_TYPE_AXES(nvbench::type_list, @@ -105,8 +110,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains, .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"}) .set_max_noise(defaults::MAX_NOISE) .add_int64_axis("NumInputs", {defaults::BF_N}) - .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}) - .add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS}); + .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}); NVBENCH_BENCH_TYPES(bloom_filter_contains, NVBENCH_TYPE_AXES(nvbench::type_list, @@ -118,5 +122,4 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains, .set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"}) .set_max_noise(defaults::MAX_NOISE) .add_int64_axis("NumInputs", {defaults::BF_N}) - .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}) - .add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS}); \ No newline at end of file + .add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB}); \ No newline at end of file diff --git a/benchmarks/bloom_filter/defaults.hpp b/benchmarks/bloom_filter/defaults.hpp index 40c0883e2..67f3cf6ff 100644 --- a/benchmarks/bloom_filter/defaults.hpp +++ b/benchmarks/bloom_filter/defaults.hpp @@ -33,7 +33,6 @@ using BF_WORD = nvbench::uint32_t; static constexpr auto BF_N = 400'000'000; static constexpr auto BF_SIZE_MB = 2'000; static constexpr auto BF_WORDS_PER_BLOCK = 8; -static constexpr auto BF_PATTERN_BITS = BF_WORDS_PER_BLOCK; auto const BF_SIZE_MB_RANGE_CACHE = std::vector{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};