From 66f43e40d4aa4f8be9a846b46265729dd0f3eef4 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Fri, 22 Sep 2023 12:21:03 +0200 Subject: [PATCH 1/9] Add `bytes_per_second` to transpose benchmark This patch relates to #13735. --- cpp/benchmarks/transpose/transpose.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 2f41bda4b88..b9ec5055451 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -40,6 +40,16 @@ static void BM_transpose(benchmark::State& state) cuda_event_timer raii(state, true); auto output = cudf::transpose(input); } + + // Collect memory statistics. + auto const bytes_read = input.num_columns() * input.num_rows() * (sizeof(int32_t)); + auto const bytes_written = bytes_read; + // Account for nullability in input and output. + auto const null_bytes = + 2 * input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()); + + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_read + bytes_written + null_bytes)); } class Transpose : public cudf::benchmark {}; From d2676b542629714cd109afd13c4e46e405e6e766 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Fri, 22 Sep 2023 16:46:58 +0200 Subject: [PATCH 2/9] Run clang-format on transpose benchmark --- cpp/benchmarks/transpose/transpose.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index b9ec5055451..da602bd9783 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -42,7 +42,7 @@ static void BM_transpose(benchmark::State& state) } // Collect memory statistics. - auto const bytes_read = input.num_columns() * input.num_rows() * (sizeof(int32_t)); + auto const bytes_read = input.num_columns() * input.num_rows() * (sizeof(int32_t)); auto const bytes_written = bytes_read; // Account for nullability in input and output. auto const null_bytes = @@ -54,12 +54,15 @@ static void BM_transpose(benchmark::State& state) class Transpose : public cudf::benchmark {}; -#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) { BM_transpose(state); } \ - BENCHMARK_REGISTER_F(Transpose, name) \ - ->RangeMultiplier(4) \ - ->Range(4, 4 << 13) \ - ->UseManualTime() \ +#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) \ + { \ + BM_transpose(state); \ + } \ + BENCHMARK_REGISTER_F(Transpose, name) \ + ->RangeMultiplier(4) \ + ->Range(4, 4 << 13) \ + ->UseManualTime() \ ->Unit(benchmark::kMillisecond); TRANSPOSE_BM_BENCHMARK_DEFINE(transpose_simple); From f594a81a754b77e1264809a3aa2e560e5dc82d6a Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 27 Sep 2023 08:21:25 +0200 Subject: [PATCH 3/9] Refactor column type in transpose benchmark --- cpp/benchmarks/transpose/transpose.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index da602bd9783..8fbc20c9758 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -27,10 +27,12 @@ static void BM_transpose(benchmark::State& state) { auto count = state.range(0); + + constexpr auto column_type = cudf::type_id::INT32; auto int_column_generator = thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { return cudf::make_numeric_column( - cudf::data_type{cudf::type_id::INT32}, count, cudf::mask_state::ALL_VALID); + column_type, count, cudf::mask_state::ALL_VALID); }); auto input_table = cudf::table(std::vector(int_column_generator, int_column_generator + count)); @@ -42,7 +44,7 @@ static void BM_transpose(benchmark::State& state) } // Collect memory statistics. - auto const bytes_read = input.num_columns() * input.num_rows() * (sizeof(int32_t)); + auto const bytes_read = input.num_columns() * input.num_rows() * cudf::size_of(column_type); auto const bytes_written = bytes_read; // Account for nullability in input and output. auto const null_bytes = From 60845b7615f8570a76a4c57117f61ae1732bbcea Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 27 Sep 2023 09:33:19 +0200 Subject: [PATCH 4/9] Fix transpose benchmark type issue --- cpp/benchmarks/transpose/transpose.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 8fbc20c9758..50021e15713 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,8 +28,7 @@ static void BM_transpose(benchmark::State& state) { auto count = state.range(0); - - constexpr auto column_type = cudf::type_id::INT32; + constexpr auto column_type = cudf::data_type{cudf::type_id::INT32}; auto int_column_generator = thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { return cudf::make_numeric_column( From e9280cc506fb335442a13cb10a26ddabab7fa9ba Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Thu, 28 Sep 2023 15:35:11 +0200 Subject: [PATCH 5/9] Change way column type is handled in transpose bench --- cpp/benchmarks/transpose/transpose.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 50021e15713..1c0cf86d4af 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -28,11 +28,11 @@ static void BM_transpose(benchmark::State& state) { auto count = state.range(0); - constexpr auto column_type = cudf::data_type{cudf::type_id::INT32}; + constexpr auto column_type_id = cudf::type_id::INT32; auto int_column_generator = thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { return cudf::make_numeric_column( - column_type, count, cudf::mask_state::ALL_VALID); + cudf::data_type{column_type_id}, count, cudf::mask_state::ALL_VALID); }); auto input_table = cudf::table(std::vector(int_column_generator, int_column_generator + count)); @@ -44,7 +44,7 @@ static void BM_transpose(benchmark::State& state) } // Collect memory statistics. - auto const bytes_read = input.num_columns() * input.num_rows() * cudf::size_of(column_type); + auto const bytes_read = input.num_columns() * input.num_rows() * sizeof(cudf::id_to_type); auto const bytes_written = bytes_read; // Account for nullability in input and output. auto const null_bytes = From c50a5ef9f0ffaa857269b682f986dea967f92cc9 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Sat, 30 Sep 2023 14:07:05 +0200 Subject: [PATCH 6/9] Fix code style in transpose benchmark --- cpp/benchmarks/transpose/transpose.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 1c0cf86d4af..bee9a14fd26 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -27,7 +27,7 @@ static void BM_transpose(benchmark::State& state) { - auto count = state.range(0); + auto count = state.range(0); constexpr auto column_type_id = cudf::type_id::INT32; auto int_column_generator = thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { @@ -44,7 +44,8 @@ static void BM_transpose(benchmark::State& state) } // Collect memory statistics. - auto const bytes_read = input.num_columns() * input.num_rows() * sizeof(cudf::id_to_type); + auto const bytes_read = + input.num_columns() * input.num_rows() * sizeof(cudf::id_to_type); auto const bytes_written = bytes_read; // Account for nullability in input and output. auto const null_bytes = @@ -56,15 +57,12 @@ static void BM_transpose(benchmark::State& state) class Transpose : public cudf::benchmark {}; -#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) \ - { \ - BM_transpose(state); \ - } \ - BENCHMARK_REGISTER_F(Transpose, name) \ - ->RangeMultiplier(4) \ - ->Range(4, 4 << 13) \ - ->UseManualTime() \ +#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) { BM_transpose(state); } \ + BENCHMARK_REGISTER_F(Transpose, name) \ + ->RangeMultiplier(4) \ + ->Range(4, 4 << 13) \ + ->UseManualTime() \ ->Unit(benchmark::kMillisecond); TRANSPOSE_BM_BENCHMARK_DEFINE(transpose_simple); From 8083756df8f69d005c144b437fc4d34523cb5495 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Sun, 1 Oct 2023 14:35:43 +0200 Subject: [PATCH 7/9] Avoid potential integer overflow in transpose benchmark. --- cpp/benchmarks/transpose/transpose.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index bee9a14fd26..f9dc76370e2 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -44,8 +44,8 @@ static void BM_transpose(benchmark::State& state) } // Collect memory statistics. - auto const bytes_read = - input.num_columns() * input.num_rows() * sizeof(cudf::id_to_type); + auto const bytes_read = static_cast(input.num_columns()) * input.num_rows() * + sizeof(cudf::id_to_type); auto const bytes_written = bytes_read; // Account for nullability in input and output. auto const null_bytes = From e6d9f247d5ad1323b1893df59f2016c1c7cbb408 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Mon, 2 Oct 2023 10:43:55 +0200 Subject: [PATCH 8/9] Update cpp/benchmarks/transpose/transpose.cpp Co-authored-by: Lawrence Mitchell --- cpp/benchmarks/transpose/transpose.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index f9dc76370e2..0676d3f63d9 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -49,7 +49,7 @@ static void BM_transpose(benchmark::State& state) auto const bytes_written = bytes_read; // Account for nullability in input and output. auto const null_bytes = - 2 * input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()); + 2 * static_cast(input.num_columns()) * cudf::bitmask_allocation_size_bytes(input.num_rows()); state.SetBytesProcessed(static_cast(state.iterations()) * (bytes_read + bytes_written + null_bytes)); From 82cbcf9752d9a1564bc483708ff3a4cd41bc6fca Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Tue, 3 Oct 2023 08:34:41 +0200 Subject: [PATCH 9/9] Fix code style in transpose benchmark --- cpp/benchmarks/transpose/transpose.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/transpose/transpose.cpp b/cpp/benchmarks/transpose/transpose.cpp index 0676d3f63d9..c2737325462 100644 --- a/cpp/benchmarks/transpose/transpose.cpp +++ b/cpp/benchmarks/transpose/transpose.cpp @@ -48,8 +48,8 @@ static void BM_transpose(benchmark::State& state) sizeof(cudf::id_to_type); auto const bytes_written = bytes_read; // Account for nullability in input and output. - auto const null_bytes = - 2 * static_cast(input.num_columns()) * cudf::bitmask_allocation_size_bytes(input.num_rows()); + auto const null_bytes = 2 * static_cast(input.num_columns()) * + cudf::bitmask_allocation_size_bytes(input.num_rows()); state.SetBytesProcessed(static_cast(state.iterations()) * (bytes_read + bytes_written + null_bytes));