From 0664c8b3ff71e614487273a67ac5ccf261202a29 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Thu, 24 Aug 2023 16:01:51 +0200 Subject: [PATCH 1/7] Add `bytes_per_second` to shift benchmark This patch relates to #13735. --- cpp/benchmarks/copying/shift.cu | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 460100a8fe9..04fe4f081db 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -68,6 +68,17 @@ static void BM_shift(benchmark::State& state) cuda_event_timer raii(state, true); auto output = cudf::shift(input, offset, *fill); } + + auto const elems_read = (size - offset); + auto const elems_written = use_validity ? (size - offset) : size; + auto const bytes_read = elems_read * sizeof(int); + auto const bytes_written = elems_written * sizeof(int); + + // null bytes for input and output + auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; + + state.SetBytesProcessed(static_cast(state.iterations()) * + (bytes_written + bytes_read + null_bytes)); } class Shift : public cudf::benchmark {}; From f2f73925e9bcfb3c0270601cb55b66061679df38 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Sun, 10 Sep 2023 17:27:01 +0200 Subject: [PATCH 2/7] Extend comments in shift benchmark --- cpp/benchmarks/copying/shift.cu | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 04fe4f081db..7a80e0d598d 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -70,11 +70,13 @@ static void BM_shift(benchmark::State& state) } auto const elems_read = (size - offset); - auto const elems_written = use_validity ? (size - offset) : size; auto const bytes_read = elems_read * sizeof(int); - auto const bytes_written = elems_written * sizeof(int); - // null bytes for input and output + // If 'use_validity' is false, the fill value is a number, and the entire column + // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' + // is true, only the elements that can be shifted are written, along with the full null bitmask. + auto const elems_written = use_validity ? (size - offset) : size; + auto const bytes_written = elems_written * sizeof(int); auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; state.SetBytesProcessed(static_cast(state.iterations()) * From 6148cb2c7d502c524999081f15ba352316b2bbb0 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Mon, 18 Sep 2023 10:33:47 +0200 Subject: [PATCH 3/7] Fix code style in shift benchmark --- cpp/benchmarks/copying/shift.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 7a80e0d598d..f7f1858bd5a 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -69,15 +69,15 @@ static void BM_shift(benchmark::State& state) auto output = cudf::shift(input, offset, *fill); } - auto const elems_read = (size - offset); - auto const bytes_read = elems_read * sizeof(int); + auto const elems_read = (size - offset); + auto const bytes_read = elems_read * sizeof(int); // If 'use_validity' is false, the fill value is a number, and the entire column // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' // is true, only the elements that can be shifted are written, along with the full null bitmask. auto const elems_written = use_validity ? (size - offset) : size; auto const bytes_written = elems_written * sizeof(int); - auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; + auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; state.SetBytesProcessed(static_cast(state.iterations()) * (bytes_written + bytes_read + null_bytes)); From 59d5022597ddbc9b429ea716c0dfe12d2a7437d8 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 27 Sep 2023 10:27:50 +0200 Subject: [PATCH 4/7] Refactor column type in shift benchmark --- cpp/benchmarks/copying/shift.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index f7f1858bd5a..44625f30acb 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -56,8 +56,9 @@ static void BM_shift(benchmark::State& state) cudf::size_type size = state.range(0); cudf::size_type offset = size * (static_cast(shift_factor) / 100.0); + auto constexpr column_type_id = cudf::type_to_id(); auto const input_table = - create_sequence_table({cudf::type_to_id()}, + create_sequence_table({column_type_id}, row_count{size}, use_validity ? std::optional{1.0} : std::nullopt); cudf::column_view input{input_table->get_column(0)}; @@ -76,7 +77,7 @@ static void BM_shift(benchmark::State& state) // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' // is true, only the elements that can be shifted are written, along with the full null bitmask. auto const elems_written = use_validity ? (size - offset) : size; - auto const bytes_written = elems_written * sizeof(int); + auto const bytes_written = elems_written * cudf::size_of(cudf::data_type{column_type_id}); auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; state.SetBytesProcessed(static_cast(state.iterations()) * From 6590a05f45f934aa54e6bffb3f2f93598ac1474d Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Thu, 28 Sep 2023 16:39:27 +0200 Subject: [PATCH 5/7] Change way column type is handled in shift bench --- cpp/benchmarks/copying/shift.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 44625f30acb..c27da752425 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -77,7 +77,7 @@ static void BM_shift(benchmark::State& state) // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' // is true, only the elements that can be shifted are written, along with the full null bitmask. auto const elems_written = use_validity ? (size - offset) : size; - auto const bytes_written = elems_written * cudf::size_of(cudf::data_type{column_type_id}); + auto const bytes_written = elems_written * sizeof(cudf::id_to_type); auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; state.SetBytesProcessed(static_cast(state.iterations()) * From 174e64905792b3af8cf529e7201ae562ccf2cb66 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Tue, 3 Oct 2023 08:30:26 +0200 Subject: [PATCH 6/7] Fix code style in shift benchmark --- cpp/benchmarks/copying/shift.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index c27da752425..4f50adcb246 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -57,10 +57,8 @@ static void BM_shift(benchmark::State& state) cudf::size_type offset = size * (static_cast(shift_factor) / 100.0); auto constexpr column_type_id = cudf::type_to_id(); - auto const input_table = - create_sequence_table({column_type_id}, - row_count{size}, - use_validity ? std::optional{1.0} : std::nullopt); + auto const input_table = create_sequence_table( + {column_type_id}, row_count{size}, use_validity ? std::optional{1.0} : std::nullopt); cudf::column_view input{input_table->get_column(0)}; auto fill = use_validity ? make_scalar() : make_scalar(777); From b7ddb9c35340d254b782c05d5bb06d42451dc5bb Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Wed, 4 Oct 2023 16:34:02 +0200 Subject: [PATCH 7/7] Consolidate type usage of shift benchmark --- cpp/benchmarks/copying/shift.cu | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index 4f50adcb246..e1169e3bcd6 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -56,12 +56,14 @@ static void BM_shift(benchmark::State& state) cudf::size_type size = state.range(0); cudf::size_type offset = size * (static_cast(shift_factor) / 100.0); - auto constexpr column_type_id = cudf::type_to_id(); - auto const input_table = create_sequence_table( + auto constexpr column_type_id = cudf::type_id::INT32; + using column_type = cudf::id_to_type; + + auto const input_table = create_sequence_table( {column_type_id}, row_count{size}, use_validity ? std::optional{1.0} : std::nullopt); cudf::column_view input{input_table->get_column(0)}; - auto fill = use_validity ? make_scalar() : make_scalar(777); + auto fill = use_validity ? make_scalar() : make_scalar(777); for (auto _ : state) { cuda_event_timer raii(state, true); @@ -69,13 +71,13 @@ static void BM_shift(benchmark::State& state) } auto const elems_read = (size - offset); - auto const bytes_read = elems_read * sizeof(int); + auto const bytes_read = elems_read * sizeof(column_type); // If 'use_validity' is false, the fill value is a number, and the entire column // (excluding the null bitmask) needs to be written. On the other hand, if 'use_validity' // is true, only the elements that can be shifted are written, along with the full null bitmask. auto const elems_written = use_validity ? (size - offset) : size; - auto const bytes_written = elems_written * sizeof(cudf::id_to_type); + auto const bytes_written = elems_written * sizeof(column_type); auto const null_bytes = use_validity ? 2 * cudf::bitmask_allocation_size_bytes(size) : 0; state.SetBytesProcessed(static_cast(state.iterations()) *