Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into pylibcudf-cs
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 authored Nov 7, 2024
2 parents 635595a + e29e0ab commit 74c0045
Show file tree
Hide file tree
Showing 75 changed files with 4,281 additions and 3,333 deletions.
1 change: 1 addition & 0 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
1 change: 1 addition & 0 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
mkdir -p ${package_dir}/final_dist
python -m auditwheel repair \
--exclude libnvcomp.so.4 \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
1 change: 1 addition & 0 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
--exclude libkvikio.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
- cuda-python>=12.0,<13.0a0
- cuda-python>=12.0,<13.0a0,<=12.6.0
- cuda-sanitizer-api
- cuda-version=12.5
- cupy>=12.0.0
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ requirements:
- cudatoolkit
- ptxcompiler >=0.7.0
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.7.1,<12.0a0,<=11.8.3
{% else %}
- cuda-cudart
- libcufile # [linux64]
Expand All @@ -100,7 +100,7 @@ requirements:
# TODO: Add nvjitlink here
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.0,<13.0a0,<=12.6.0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ requirements:
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.7.1,<12.0a0,<=11.8.3
{% else %}
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.0,<13.0a0,<=12.6.0
{% endif %}
- nvtx >=0.2.1
- packaging
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,6 @@ ConfigureBench(
STRINGS_BENCH
string/convert_datetime.cpp
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cu
string/factory.cu
string/filter.cpp
Expand All @@ -375,6 +373,8 @@ ConfigureNVBench(
string/char_types.cpp
string/combine.cpp
string/contains.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy_if_else.cpp
string/copy_range.cpp
string/count.cpp
Expand Down
111 changes: 33 additions & 78 deletions cpp/benchmarks/string/convert_fixed_point.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,93 +16,48 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/convert/convert_fixed_point.hpp>
#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/types.hpp>

namespace {
#include <nvbench/nvbench.cuh>

std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
{
auto result =
create_random_column(cudf::type_id::FLOAT32, row_count{static_cast<cudf::size_type>(rows)});
return cudf::strings::from_floats(result->view());
}

} // anonymous namespace

class StringsToFixedPoint : public cudf::benchmark {};

template <typename fixed_point_type>
void convert_to_fixed_point(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));
auto const strings_col = get_strings_column(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};

for (auto _ : state) {
cuda_event_timer raii(state, true);
auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype);
}
using Types = nvbench::type_list<numeric::decimal32, numeric::decimal64>;

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
}

class StringsFromFixedPoint : public cudf::benchmark {};
NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32");
NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64");

template <typename fixed_point_type>
void convert_from_fixed_point(benchmark::State& state)
template <typename DataType>
void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list<DataType>)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));
auto const strings_col = get_strings_column(rows);
auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
auto const fp_col =
cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype);

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
results = cudf::strings::from_fixed_point(fp_col->view());
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_num = state.get_string("dir") == "from";

auto const data_type = cudf::data_type{cudf::type_to_id<DataType>(), numeric::scale_type{-2}};
auto const fp_col = create_random_column(data_type.id(), row_count{num_rows});

auto const strings_col = cudf::strings::from_fixed_point(fp_col->view());
auto const sv = cudf::strings_column_view(strings_col->view());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_num) {
state.add_global_memory_reads<int8_t>(num_rows * cudf::size_of(data_type));
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); });
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<int8_t>(num_rows * cudf::size_of(data_type));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); });
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
rows * cudf::size_of(dtype)));
}

#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type) \
BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \
{ \
convert_to_fixed_point<fixed_point_type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToFixedPoint, name) \
->RangeMultiplier(4) \
->Range(1 << 12, 1 << 24) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type) \
BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \
{ \
convert_from_fixed_point<fixed_point_type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromFixedPoint, name) \
->RangeMultiplier(4) \
->Range(1 << 12, 1 << 24) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32);
CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64);

CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32);
CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64);
NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types))
.set_name("fixed_point")
.set_type_axes_names({"DataType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
138 changes: 44 additions & 94 deletions cpp/benchmarks/string/convert_numerics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,117 +16,67 @@

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/strings/convert/convert_integers.hpp>
#include <cudf/types.hpp>

namespace {
#include <nvbench/nvbench.cuh>

template <typename NumericType>
std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
{
return create_random_column(cudf::type_to_id<NumericType>(), row_count{rows});
}
namespace {

template <typename NumericType>
std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
std::unique_ptr<cudf::column> get_strings_column(cudf::column_view const& nv)
{
auto const numerics_col = get_numerics_column<NumericType>(rows);
if constexpr (std::is_floating_point_v<NumericType>) {
return cudf::strings::from_floats(numerics_col->view());
return cudf::strings::from_floats(nv);
} else {
return cudf::strings::from_integers(numerics_col->view());
}
}
} // anonymous namespace

class StringsToNumeric : public cudf::benchmark {};

template <typename NumericType>
void convert_to_number(benchmark::State& state)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const strings_col = get_strings_column<NumericType>(rows);
auto const strings_view = cudf::strings_column_view(strings_col->view());
auto const col_type = cudf::type_to_id<NumericType>();

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
} else {
cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
}
return cudf::strings::from_integers(nv);
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
}
} // namespace

class StringsFromNumeric : public cudf::benchmark {};
using Types = nvbench::type_list<float, double, int32_t, int64_t, uint8_t, uint16_t>;

template <typename NumericType>
void convert_from_number(benchmark::State& state)
void bench_convert_number(nvbench::state& state, nvbench::type_list<NumericType>)
{
auto const rows = static_cast<cudf::size_type>(state.range(0));

auto const numerics_col = get_numerics_column<NumericType>(rows);
auto const numerics_view = numerics_col->view();

std::unique_ptr<cudf::column> results = nullptr;

for (auto _ : state) {
cuda_event_timer raii(state, true);
if constexpr (std::is_floating_point_v<NumericType>)
results = cudf::strings::from_floats(numerics_view);
else
results = cudf::strings::from_integers(numerics_view);
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const from_num = state.get_string("dir") == "from";

auto const data_type = cudf::data_type(cudf::type_to_id<NumericType>());
auto const num_col = create_random_column(data_type.id(), row_count{num_rows});

auto const strings_col = get_strings_column<NumericType>(num_col->view());
auto const sv = cudf::strings_column_view(strings_col->view());

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

if (from_num) {
state.add_global_memory_reads<NumericType>(num_rows);
state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if constexpr (std::is_floating_point_v<NumericType>) {
cudf::strings::to_floats(sv, data_type);
} else {
cudf::strings::to_integers(sv, data_type);
}
});
} else {
state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
state.add_global_memory_writes<NumericType>(num_rows);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if constexpr (std::is_floating_point_v<NumericType>)
cudf::strings::from_floats(num_col->view());
else
cudf::strings::from_integers(num_col->view());
});
}

// bytes_processed = bytes_input + bytes_output
state.SetBytesProcessed(
state.iterations() *
(cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
rows * sizeof(NumericType)));
}

#define CONVERT_TO_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
{ \
convert_to_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsToNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

#define CONVERT_FROM_NUMERICS_BD(name, type) \
BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
{ \
convert_from_number<type>(state); \
} \
BENCHMARK_REGISTER_F(StringsFromNumeric, name) \
->RangeMultiplier(4) \
->Range(1 << 10, 1 << 17) \
->UseManualTime() \
->Unit(benchmark::kMicrosecond);

CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);

CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);
NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types))
.set_name("numeric")
.set_type_axes_names({"NumericType"})
.add_string_axis("dir", {"to", "from"})
.add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_kvikio.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -16,7 +16,7 @@
function(find_and_configure_kvikio VERSION)

rapids_cpm_find(
KvikIO ${VERSION}
kvikio ${VERSION}
GLOBAL_TARGETS kvikio::kvikio
CPM_ARGS
GIT_REPOSITORY https://github.com/rapidsai/kvikio.git
Expand Down
Loading

0 comments on commit 74c0045

Please sign in to comment.