Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move strings/numeric convert benchmarks to nvbench #17255

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,6 @@ ConfigureBench(
STRINGS_BENCH
string/convert_datetime.cpp
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cu
string/factory.cu
string/filter.cpp
Expand All @@ -375,6 +373,8 @@ ConfigureNVBench(
string/char_types.cpp
string/combine.cpp
string/contains.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy_if_else.cpp
string/copy_range.cpp
string/count.cpp
Expand Down
111 changes: 33 additions & 78 deletions cpp/benchmarks/string/convert_fixed_point.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,93 +16,48 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/convert/convert_fixed_point.hpp>
#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/types.hpp>

namespace {
#include <nvbench/nvbench.cuh>

std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
{
auto result =
create_random_column(cudf::type_id::FLOAT32, row_count{static_cast<cudf::size_type>(rows)});
return cudf::strings::from_floats(result->view());
}

} // anonymous namespace

class StringsToFixedPoint : public cudf::benchmark {};

template <typename fixed_point_type>
void convert_to_fixed_point(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));
auto const strings_col = get_strings_column(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};

for (auto _ : state) {
cuda_event_timer raii(state, true);
auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype);
}
using Types = nvbench::type_list<numeric::decimal32, numeric::decimal64>;

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
}

class StringsFromFixedPoint : public cudf::benchmark {};
NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32");
NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64");

template <typename fixed_point_type>
void convert_from_fixed_point(benchmark::State& state)
template <typename DataType>
void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list<DataType>)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));
auto const strings_col = get_strings_column(rows);
auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
auto const fp_col =
cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype);

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
results = cudf::strings::from_fixed_point(fp_col->view());
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_num = state.get_string("dir") == "from";

auto const data_type = cudf::data_type{cudf::type_to_id<DataType>(), numeric::scale_type{-2}};
auto const fp_col = create_random_column(data_type.id(), row_count{num_rows});

auto const strings_col = cudf::strings::from_fixed_point(fp_col->view());
auto const sv = cudf::strings_column_view(strings_col->view());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_num) {
state.add_global_memory_reads<int8_t>(num_rows * cudf::size_of(data_type));
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); });
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<int8_t>(num_rows * cudf::size_of(data_type));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); });
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
rows * cudf::size_of(dtype)));
}

#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type) \
BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \
{ \
convert_to_fixed_point<fixed_point_type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToFixedPoint, name) \
->RangeMultiplier(4) \
->Range(1 << 12, 1 << 24) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type) \
BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \
{ \
convert_from_fixed_point<fixed_point_type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromFixedPoint, name) \
->RangeMultiplier(4) \
->Range(1 << 12, 1 << 24) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32);
CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64);

CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32);
CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64);
NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types))
.set_name("fixed_point")
.set_type_axes_names({"DataType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
138 changes: 44 additions & 94 deletions cpp/benchmarks/string/convert_numerics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,117 +16,67 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/strings/convert/convert_integers.hpp>
#include <cudf/types.hpp>

namespace {
#include <nvbench/nvbench.cuh>

template <typename NumericType>
std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
{
return create_random_column(cudf::type_to_id<NumericType>(), row_count{rows});
}
namespace {

template <typename NumericType>
std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
std::unique_ptr<cudf::column> get_strings_column(cudf::column_view const& nv)
{
auto const numerics_col = get_numerics_column<NumericType>(rows);
if constexpr (std::is_floating_point_v<NumericType>) {
return cudf::strings::from_floats(numerics_col->view());
return cudf::strings::from_floats(nv);
} else {
return cudf::strings::from_integers(numerics_col->view());
}
}
} // anonymous namespace

class StringsToNumeric : public cudf::benchmark {};

template <typename NumericType>
void convert_to_number(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const strings_col = get_strings_column<NumericType>(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const col_type = cudf::type_to_id<NumericType>();

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
} else {
cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
}
return cudf::strings::from_integers(nv);
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
}
} // namespace

class StringsFromNumeric : public cudf::benchmark {};
using Types = nvbench::type_list<float, double, int32_t, int64_t, uint8_t, uint16_t>;

template <typename NumericType>
void convert_from_number(benchmark::State& state)
void bench_convert_number(nvbench::state& state, nvbench::type_list<NumericType>)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const numerics_col = get_numerics_column<NumericType>(rows);
auto const numerics_view = numerics_col->view();

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>)
results = cudf::strings::from_floats(numerics_view);
else
results = cudf::strings::from_integers(numerics_view);
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_num = state.get_string("dir") == "from";

auto const data_type = cudf::data_type(cudf::type_to_id<NumericType>());
auto const num_col = create_random_column(data_type.id(), row_count{num_rows});

auto const strings_col = get_strings_column<NumericType>(num_col->view());
auto const sv = cudf::strings_column_view(strings_col->view());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_num) {
state.add_global_memory_reads<NumericType>(num_rows);
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(sv, data_type);
} else {
cudf::strings::to_integers(sv, data_type);
}
});
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<NumericType>(num_rows);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if constexpr (std::is_floating_point_v<NumericType>)
cudf::strings::from_floats(num_col->view());
else
cudf::strings::from_integers(num_col->view());
});
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
rows * sizeof(NumericType)));
}

#define CONVERT_TO_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
{ \
convert_to_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
{ \
convert_from_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);

CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);
NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types))
.set_name("numeric")
.set_type_axes_names({"NumericType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
Loading