Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.12' into improve-dis…
Browse files Browse the repository at this point in the history
…tinct-inner-join
  • Loading branch information
PointKernel committed Nov 8, 2024
2 parents d62b33f + 3c5f787 commit 49bcd34
Show file tree
Hide file tree
Showing 43 changed files with 1,403 additions and 780 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ dependencies:
- polars>=1.11,<1.13
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<18.0.0a0
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pytest-benchmark
- pytest-cases>=3.8.2
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ dependencies:
- pandoc
- polars>=1.11,<1.13
- pre-commit
- pyarrow>=14.0.0,<18.0.0a0
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvjitlink>=0.0.0a0
- pytest-benchmark
Expand Down
13 changes: 4 additions & 9 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -355,15 +355,8 @@ ConfigureNVBench(
# ##################################################################################################
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(
STRINGS_BENCH
string/convert_datetime.cpp
string/convert_durations.cpp
string/factory.cu
string/filter.cpp
string/repeat_strings.cpp
string/replace.cpp
string/translate.cpp
string/url_decode.cu
STRINGS_BENCH string/factory.cu string/filter.cpp string/repeat_strings.cpp string/replace.cpp
string/translate.cpp string/url_decode.cu
)

ConfigureNVBench(
Expand All @@ -372,6 +365,8 @@ ConfigureNVBench(
string/char_types.cpp
string/combine.cpp
string/contains.cpp
string/convert_datetime.cpp
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cpp
Expand Down
37 changes: 20 additions & 17 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/filling.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -540,7 +542,7 @@ struct string_generator {
// range 32-127 is ASCII; 127-136 will be multi-byte UTF-8
{
}
__device__ void operator()(thrust::tuple<cudf::size_type, cudf::size_type> str_begin_end)
__device__ void operator()(thrust::tuple<int64_t, int64_t> str_begin_end)
{
auto begin = thrust::get<0>(str_begin_end);
auto end = thrust::get<1>(str_begin_end);
Expand Down Expand Up @@ -569,6 +571,9 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
distribution_params<bool>{1. - profile.get_null_probability().value_or(0)});
auto lengths = len_dist(engine, num_rows + 1);
auto null_mask = valid_dist(engine, num_rows + 1);
auto stream = cudf::get_default_stream();
auto mr = cudf::get_current_device_resource_ref();

thrust::transform_if(
thrust::device,
lengths.begin(),
Expand All @@ -580,28 +585,26 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
auto valid_lengths = thrust::make_transform_iterator(
thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())),
valid_or_zero{});
rmm::device_uvector<cudf::size_type> offsets(num_rows + 1, cudf::get_default_stream());
thrust::exclusive_scan(
thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin());
// offsets are ready.
auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1);

// offsets are created as INT32 or INT64 as appropriate
auto [offsets, chars_length] = cudf::strings::detail::make_offsets_child_column(
valid_lengths, valid_lengths + num_rows, stream, mr);
// use the offsetalator to normalize the offset values for use by the string_generator
auto offsets_itr = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
rmm::device_uvector<char> chars(chars_length, cudf::get_default_stream());
thrust::for_each_n(thrust::device,
thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
thrust::make_zip_iterator(offsets_itr, offsets_itr + 1),
num_rows,
string_generator{chars.data(), engine});

auto [result_bitmask, null_count] =
cudf::detail::valid_if(null_mask.begin(),
null_mask.end() - 1,
thrust::identity<bool>{},
cudf::get_default_stream(),
cudf::get_current_device_resource_ref());
profile.get_null_probability().has_value()
? cudf::detail::valid_if(
null_mask.begin(), null_mask.end() - 1, thrust::identity<bool>{}, stream, mr)
: std::pair{rmm::device_buffer{}, 0};

return cudf::make_strings_column(
num_rows,
std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
chars.release(),
null_count,
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
num_rows, std::move(offsets), chars.release(), null_count, std::move(result_bitmask));
}

/**
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/io/nvbench_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ enum class data_type : int32_t {
INTEGRAL = static_cast<int32_t>(type_group_id::INTEGRAL),
INTEGRAL_SIGNED = static_cast<int32_t>(type_group_id::INTEGRAL_SIGNED),
FLOAT = static_cast<int32_t>(type_group_id::FLOATING_POINT),
BOOL8 = static_cast<int32_t>(cudf::type_id::BOOL8),
DECIMAL = static_cast<int32_t>(type_group_id::FIXED_POINT),
TIMESTAMP = static_cast<int32_t>(type_group_id::TIMESTAMP),
DURATION = static_cast<int32_t>(type_group_id::DURATION),
Expand All @@ -44,6 +45,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
case data_type::INTEGRAL: return "INTEGRAL";
case data_type::INTEGRAL_SIGNED: return "INTEGRAL_SIGNED";
case data_type::FLOAT: return "FLOAT";
case data_type::BOOL8: return "BOOL8";
case data_type::DECIMAL: return "DECIMAL";
case data_type::TIMESTAMP: return "TIMESTAMP";
case data_type::DURATION: return "DURATION";
Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ void BM_parquet_read_io_compression(nvbench::state& state)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
static_cast<int32_t>(data_type::FLOAT),
static_cast<int32_t>(data_type::BOOL8),
static_cast<int32_t>(data_type::DECIMAL),
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
Expand Down Expand Up @@ -298,6 +299,7 @@ void BM_parquet_read_wide_tables_mixed(nvbench::state& state)

using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::FLOAT,
data_type::BOOL8,
data_type::DECIMAL,
data_type::TIMESTAMP,
data_type::DURATION,
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/io/parquet/parquet_reader_options.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -66,6 +66,7 @@ void BM_parquet_read_options(nvbench::state& state,
auto const data_types =
dtypes_for_column_selection(get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
static_cast<int32_t>(data_type::FLOAT),
static_cast<int32_t>(data_type::BOOL8),
static_cast<int32_t>(data_type::DECIMAL),
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
Expand Down
3 changes: 3 additions & 0 deletions cpp/benchmarks/io/parquet/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ void BM_parq_write_io_compression(
{
auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
static_cast<int32_t>(data_type::FLOAT),
static_cast<int32_t>(data_type::BOOL8),
static_cast<int32_t>(data_type::DECIMAL),
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
Expand Down Expand Up @@ -143,6 +144,7 @@ void BM_parq_write_varying_options(

auto const data_types = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
static_cast<int32_t>(data_type::FLOAT),
static_cast<int32_t>(data_type::BOOL8),
static_cast<int32_t>(data_type::DECIMAL),
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
Expand Down Expand Up @@ -181,6 +183,7 @@ void BM_parq_write_varying_options(

using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::FLOAT,
data_type::BOOL8,
data_type::DECIMAL,
data_type::TIMESTAMP,
data_type::DURATION,
Expand Down
87 changes: 42 additions & 45 deletions cpp/benchmarks/string/convert_datetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,59 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/strings/convert/convert_datetime.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/wrappers/timestamps.hpp>

class StringDateTime : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

enum class direction { to, from };
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_D, "cudf::timestamp_D", "cudf::timestamp_D");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_s, "cudf::timestamp_s", "cudf::timestamp_s");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_us, "cudf::timestamp_us", "cudf::timestamp_us");
NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ns, "cudf::timestamp_ns", "cudf::timestamp_ns");

template <class TypeParam>
void BM_convert_datetime(benchmark::State& state, direction dir)
using Types = nvbench::type_list<cudf::timestamp_D,
cudf::timestamp_s,
cudf::timestamp_ms,
cudf::timestamp_us,
cudf::timestamp_ns>;

template <class DataType>
void bench_convert_datetime(nvbench::state& state, nvbench::type_list<DataType>)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const data_type = cudf::data_type(cudf::type_to_id<TypeParam>());
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_ts = state.get_string("dir") == "from";

auto const column = create_random_column(data_type.id(), row_count{n_rows});
cudf::column_view input(column->view());
auto const data_type = cudf::data_type(cudf::type_to_id<DataType>());
auto const ts_col = create_random_column(data_type.id(), row_count{num_rows});

auto source = dir == direction::to ? cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S")
: make_empty_column(cudf::data_type{cudf::type_id::STRING});
cudf::strings_column_view source_string(source->view());
auto format = std::string{"%Y-%m-%d %H:%M:%S"};
auto s_col = cudf::strings::from_timestamps(ts_col->view(), format);
auto sv = cudf::strings_column_view(s_col->view());

for (auto _ : state) {
cuda_event_timer raii(state, true);
if (dir == direction::to)
cudf::strings::to_timestamps(source_string, data_type, "%Y-%m-%d %H:%M:%S");
else
cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S");
}
auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

auto const bytes = dir == direction::to ? source_string.chars_size(cudf::get_default_stream())
: n_rows * sizeof(TypeParam);
state.SetBytesProcessed(state.iterations() * bytes);
if (from_ts) {
state.add_global_memory_reads<DataType>(num_rows);
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::from_timestamps(ts_col->view(), format);
});
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<DataType>(num_rows);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::to_timestamps(sv, data_type, format);
});
}
}

#define STR_BENCHMARK_DEFINE(name, type, dir) \
BENCHMARK_DEFINE_F(StringDateTime, name)(::benchmark::State & state) \
{ \
BM_convert_datetime<type>(state, dir); \
} \
BENCHMARK_REGISTER_F(StringDateTime, name) \
->RangeMultiplier(1 << 5) \
->Range(1 << 10, 1 << 25) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

STR_BENCHMARK_DEFINE(from_days, cudf::timestamp_D, direction::from);
STR_BENCHMARK_DEFINE(from_seconds, cudf::timestamp_s, direction::from);
STR_BENCHMARK_DEFINE(from_mseconds, cudf::timestamp_ms, direction::from);
STR_BENCHMARK_DEFINE(from_useconds, cudf::timestamp_us, direction::from);
STR_BENCHMARK_DEFINE(from_nseconds, cudf::timestamp_ns, direction::from);

STR_BENCHMARK_DEFINE(to_days, cudf::timestamp_D, direction::to);
STR_BENCHMARK_DEFINE(to_seconds, cudf::timestamp_s, direction::to);
STR_BENCHMARK_DEFINE(to_mseconds, cudf::timestamp_ms, direction::to);
STR_BENCHMARK_DEFINE(to_useconds, cudf::timestamp_us, direction::to);
STR_BENCHMARK_DEFINE(to_nseconds, cudf::timestamp_ns, direction::to);
NVBENCH_BENCH_TYPES(bench_convert_datetime, NVBENCH_TYPE_AXES(Types))
.set_name("datetime")
.set_type_axes_names({"DataType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
Loading

0 comments on commit 49bcd34

Please sign in to comment.