Skip to content

Commit

Permalink
Remove unneeded pair-iterator benchmark (#16511)
Browse files Browse the repository at this point in the history
Removes the pair-iterator benchmark logic. The remaining benchmarks use the null-replacement-iterator which uses the libcudf pair-iterator internally. There is no need for benchmarking this unique iterator pattern that is not used by libcudf.

The `cpp/benchmarks/iterator/iterator.cu` failed to compile with gcc 12 because the sum-reduce function cannot resolve adding `thrust::pair` objects together likely due to some recent changes in CCCL. Regardless, adding `thrust::pair` objects is not something we need to benchmark. The existing benchmark benchmarks libcudf's usage of the internal pair-iterator correctly.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: #16511
  • Loading branch information
davidwendt authored Aug 13, 2024
1 parent 419fb99 commit 3a791cb
Showing 1 changed file with 0 additions and 77 deletions.
77 changes: 0 additions & 77 deletions cpp/benchmarks/iterator/iterator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/pair.h>
#include <thrust/reduce.h>

#include <random>
Expand Down Expand Up @@ -161,68 +160,6 @@ void BM_iterator(benchmark::State& state)
sizeof(TypeParam));
}

// operator+ defined for pair iterator reduction
template <typename T>
__device__ thrust::pair<T, bool> operator+(thrust::pair<T, bool> lhs, thrust::pair<T, bool> rhs)
{
return thrust::pair<T, bool>{lhs.first * lhs.second + rhs.first * rhs.second,
lhs.second + rhs.second};
}
// -----------------------------------------------------------------------------
template <typename T, bool has_null>
void pair_iterator_bench_cub(cudf::column_view& col,
rmm::device_uvector<thrust::pair<T, bool>>& result)
{
thrust::pair<T, bool> init{0, false};
auto d_col = cudf::column_device_view::create(col);
int num_items = col.size();
auto begin = d_col->pair_begin<T, has_null>();
reduce_by_cub(result.begin(), begin, num_items, init);
}

template <typename T, bool has_null>
void pair_iterator_bench_thrust(cudf::column_view& col,
rmm::device_uvector<thrust::pair<T, bool>>& result)
{
thrust::pair<T, bool> init{0, false};
auto d_col = cudf::column_device_view::create(col);
auto d_in = d_col->pair_begin<T, has_null>();
auto d_end = d_in + col.size();
thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{});
}

template <class TypeParam, bool cub_or_thrust>
void BM_pair_iterator(benchmark::State& state)
{
cudf::size_type const column_size{(cudf::size_type)state.range(0)};
using T = TypeParam;
auto num_gen = thrust::counting_iterator<cudf::size_type>(0);
auto null_gen =
thrust::make_transform_iterator(num_gen, [](cudf::size_type row) { return row % 2 == 0; });

cudf::test::fixed_width_column_wrapper<T> wrap_hasnull_F(num_gen, num_gen + column_size);
cudf::test::fixed_width_column_wrapper<T> wrap_hasnull_T(
num_gen, num_gen + column_size, null_gen);
cudf::column_view hasnull_F = wrap_hasnull_F;
cudf::column_view hasnull_T = wrap_hasnull_T;

// Initialize dev_result to false
auto dev_result = cudf::detail::make_zeroed_device_uvector_sync<thrust::pair<T, bool>>(
1, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
if (cub_or_thrust) {
pair_iterator_bench_cub<T, false>(hasnull_T,
dev_result); // driven by pair iterator with nulls
} else {
pair_iterator_bench_thrust<T, false>(hasnull_T,
dev_result); // driven by pair iterator with nulls
}
}
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * column_size *
sizeof(TypeParam));
}

#define ITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust, raw_or_iterator) \
BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \
{ \
Expand All @@ -238,17 +175,3 @@ ITER_BM_BENCHMARK_DEFINE(double_cub_raw, double, true, true);
ITER_BM_BENCHMARK_DEFINE(double_cub_iter, double, true, false);
ITER_BM_BENCHMARK_DEFINE(double_thrust_raw, double, false, true);
ITER_BM_BENCHMARK_DEFINE(double_thrust_iter, double, false, false);

#define PAIRITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust) \
BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \
{ \
BM_pair_iterator<type, cub_or_thrust>(state); \
} \
BENCHMARK_REGISTER_F(Iterator, name) \
->RangeMultiplier(10) \
->Range(1000, 10000000) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

PAIRITER_BM_BENCHMARK_DEFINE(double_cub_pair, double, true);
PAIRITER_BM_BENCHMARK_DEFINE(double_thrust_pair, double, false);

0 comments on commit 3a791cb

Please sign in to comment.