Merge branch 'branch-24.12' into pylibcudf-cs

rapidsai · Nov 7, 2024 · 74c0045 · 74c0045
2 parents 635595a + e29e0ab
commit 74c0045
Show file tree

Hide file tree

Showing 75 changed files with 4,281 additions and 3,333 deletions.
diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
@@ -23,6 +23,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
 python -m auditwheel repair \
     --exclude libcudf.so \
     --exclude libnvcomp.so \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 

diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
@@ -33,6 +33,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 mkdir -p ${package_dir}/final_dist
 python -m auditwheel repair \
     --exclude libnvcomp.so.4 \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 

diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
@@ -21,6 +21,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
 python -m auditwheel repair \
     --exclude libcudf.so \
     --exclude libnvcomp.so \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
-- cuda-python>=11.7.1,<12.0a0
+- cuda-python>=11.7.1,<12.0a0,<=11.8.3
 - cuda-sanitizer-api=11.8.86
 - cuda-version=11.8
 - cudatoolkit

diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -21,7 +21,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.0,<13.0a0
+- cuda-python>=12.0,<13.0a0,<=12.6.0
 - cuda-sanitizer-api
 - cuda-version=12.5
 - cupy>=12.0.0

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
@@ -91,7 +91,7 @@ requirements:
     - cudatoolkit
     - ptxcompiler >=0.7.0
     - cubinlinker  # CUDA enhanced compatibility.
-    - cuda-python >=11.7.1,<12.0a0
+    - cuda-python >=11.7.1,<12.0a0,<=11.8.3
     {% else %}
     - cuda-cudart
     - libcufile  # [linux64]
@@ -100,7 +100,7 @@ requirements:
     # TODO: Add nvjitlink here
     # xref: https://github.com/rapidsai/cudf/issues/12822
     - cuda-nvrtc
-    - cuda-python >=12.0,<13.0a0
+    - cuda-python >=12.0,<13.0a0,<=12.6.0
     - pynvjitlink
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
@@ -83,9 +83,9 @@ requirements:
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
-    - cuda-python >=11.7.1,<12.0a0
+    - cuda-python >=11.7.1,<12.0a0,<=11.8.3
     {% else %}
-    - cuda-python >=12.0,<13.0a0
+    - cuda-python >=12.0,<13.0a0,<=12.6.0
     {% endif %}
     - nvtx >=0.2.1
     - packaging

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -358,8 +358,6 @@ ConfigureBench(
   STRINGS_BENCH
   string/convert_datetime.cpp
   string/convert_durations.cpp
-  string/convert_fixed_point.cpp
-  string/convert_numerics.cpp
   string/copy.cu
   string/factory.cu
   string/filter.cpp
@@ -375,6 +373,8 @@ ConfigureNVBench(
   string/char_types.cpp
   string/combine.cpp
   string/contains.cpp
+  string/convert_fixed_point.cpp
+  string/convert_numerics.cpp
   string/copy_if_else.cpp
   string/copy_range.cpp
   string/count.cpp

diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp
@@ -16,93 +16,48 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/types.hpp>
 
-namespace {
+#include <nvbench/nvbench.cuh>
 
-std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
-{
-  auto result =
-    create_random_column(cudf::type_id::FLOAT32, row_count{static_cast<cudf::size_type>(rows)});
-  return cudf::strings::from_floats(result->view());
-}
-
-}  // anonymous namespace
-
-class StringsToFixedPoint : public cudf::benchmark {};
-
-template <typename fixed_point_type>
-void convert_to_fixed_point(benchmark::State& state)
-{
-  auto const rows         = static_cast<cudf::size_type>(state.range(0));
-  auto const strings_col  = get_strings_column(rows);
-  auto const strings_view = cudf::strings_column_view(strings_col->view());
-  auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype);
-  }
+using Types = nvbench::type_list<numeric::decimal32, numeric::decimal64>;
 
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
-}
-
-class StringsFromFixedPoint : public cudf::benchmark {};
+NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32");
+NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64");
 
-template <typename fixed_point_type>
-void convert_from_fixed_point(benchmark::State& state)
+template <typename DataType>
+void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  auto const rows        = static_cast<cudf::size_type>(state.range(0));
-  auto const strings_col = get_strings_column(rows);
-  auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
-  auto const fp_col =
-    cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype);
-
-  std::unique_ptr<cudf::column> results = nullptr;
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    results = cudf::strings::from_fixed_point(fp_col->view());
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const from_num = state.get_string("dir") == "from";
+
+  auto const data_type = cudf::data_type{cudf::type_to_id<DataType>(), numeric::scale_type{-2}};
+  auto const fp_col    = create_random_column(data_type.id(), row_count{num_rows});
+
+  auto const strings_col = cudf::strings::from_fixed_point(fp_col->view());
+  auto const sv          = cudf::strings_column_view(strings_col->view());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (from_num) {
+    state.add_global_memory_reads<int8_t>(num_rows * cudf::size_of(data_type));
+    state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); });
+  } else {
+    state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
+    state.add_global_memory_writes<int8_t>(num_rows * cudf::size_of(data_type));
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); });
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
-     rows * cudf::size_of(dtype)));
 }
 
-#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type)                  \
-  BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \
-  {                                                                         \
-    convert_to_fixed_point<fixed_point_type>(state);                        \
-  }                                                                         \
-  BENCHMARK_REGISTER_F(StringsToFixedPoint, name)                           \
-    ->RangeMultiplier(4)                                                    \
-    ->Range(1 << 12, 1 << 24)                                               \
-    ->UseManualTime()                                                       \
-    ->Unit(benchmark::kMicrosecond);
-
-#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type)                  \
-  BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \
-  {                                                                           \
-    convert_from_fixed_point<fixed_point_type>(state);                        \
-  }                                                                           \
-  BENCHMARK_REGISTER_F(StringsFromFixedPoint, name)                           \
-    ->RangeMultiplier(4)                                                      \
-    ->Range(1 << 12, 1 << 24)                                                 \
-    ->UseManualTime()                                                         \
-    ->Unit(benchmark::kMicrosecond);
-
-CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32);
-CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64);
-
-CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32);
-CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64);
+NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types))
+  .set_name("fixed_point")
+  .set_type_axes_names({"DataType"})
+  .add_string_axis("dir", {"to", "from"})
+  .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp
@@ -16,117 +16,67 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/strings/convert/convert_integers.hpp>
 #include <cudf/types.hpp>
 
-namespace {
+#include <nvbench/nvbench.cuh>
 
-template <typename NumericType>
-std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
-{
-  return create_random_column(cudf::type_to_id<NumericType>(), row_count{rows});
-}
+namespace {
 
 template <typename NumericType>
-std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
+std::unique_ptr<cudf::column> get_strings_column(cudf::column_view const& nv)
 {
-  auto const numerics_col = get_numerics_column<NumericType>(rows);
   if constexpr (std::is_floating_point_v<NumericType>) {
-    return cudf::strings::from_floats(numerics_col->view());
+    return cudf::strings::from_floats(nv);
   } else {
-    return cudf::strings::from_integers(numerics_col->view());
-  }
-}
-}  // anonymous namespace
-
-class StringsToNumeric : public cudf::benchmark {};
-
-template <typename NumericType>
-void convert_to_number(benchmark::State& state)
-{
-  auto const rows = static_cast<cudf::size_type>(state.range(0));
-
-  auto const strings_col  = get_strings_column<NumericType>(rows);
-  auto const strings_view = cudf::strings_column_view(strings_col->view());
-  auto const col_type     = cudf::type_to_id<NumericType>();
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    if constexpr (std::is_floating_point_v<NumericType>) {
-      cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
-    } else {
-      cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
-    }
+    return cudf::strings::from_integers(nv);
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
 }
+}  // namespace
 
-class StringsFromNumeric : public cudf::benchmark {};
+using Types = nvbench::type_list<float, double, int32_t, int64_t, uint8_t, uint16_t>;
 
 template <typename NumericType>
-void convert_from_number(benchmark::State& state)
+void bench_convert_number(nvbench::state& state, nvbench::type_list<NumericType>)
 {
-  auto const rows = static_cast<cudf::size_type>(state.range(0));
-
-  auto const numerics_col  = get_numerics_column<NumericType>(rows);
-  auto const numerics_view = numerics_col->view();
-
-  std::unique_ptr<cudf::column> results = nullptr;
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    if constexpr (std::is_floating_point_v<NumericType>)
-      results = cudf::strings::from_floats(numerics_view);
-    else
-      results = cudf::strings::from_integers(numerics_view);
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const from_num = state.get_string("dir") == "from";
+
+  auto const data_type = cudf::data_type(cudf::type_to_id<NumericType>());
+  auto const num_col   = create_random_column(data_type.id(), row_count{num_rows});
+
+  auto const strings_col = get_strings_column<NumericType>(num_col->view());
+  auto const sv          = cudf::strings_column_view(strings_col->view());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (from_num) {
+    state.add_global_memory_reads<NumericType>(num_rows);
+    state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      if constexpr (std::is_floating_point_v<NumericType>) {
+        cudf::strings::to_floats(sv, data_type);
+      } else {
+        cudf::strings::to_integers(sv, data_type);
+      }
+    });
+  } else {
+    state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
+    state.add_global_memory_writes<NumericType>(num_rows);
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      if constexpr (std::is_floating_point_v<NumericType>)
+        cudf::strings::from_floats(num_col->view());
+      else
+        cudf::strings::from_integers(num_col->view());
+    });
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
-     rows * sizeof(NumericType)));
 }
 
-#define CONVERT_TO_NUMERICS_BD(name, type)                               \
-  BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
-  {                                                                      \
-    convert_to_number<type>(state);                                      \
-  }                                                                      \
-  BENCHMARK_REGISTER_F(StringsToNumeric, name)                           \
-    ->RangeMultiplier(4)                                                 \
-    ->Range(1 << 10, 1 << 17)                                            \
-    ->UseManualTime()                                                    \
-    ->Unit(benchmark::kMicrosecond);
-
-#define CONVERT_FROM_NUMERICS_BD(name, type)                               \
-  BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
-  {                                                                        \
-    convert_from_number<type>(state);                                      \
-  }                                                                        \
-  BENCHMARK_REGISTER_F(StringsFromNumeric, name)                           \
-    ->RangeMultiplier(4)                                                   \
-    ->Range(1 << 10, 1 << 17)                                              \
-    ->UseManualTime()                                                      \
-    ->Unit(benchmark::kMicrosecond);
-
-CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
-CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
-CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
-CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
-CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
-CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);
-
-CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
-CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
-CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);
+NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types))
+  .set_name("numeric")
+  .set_type_axes_names({"NumericType"})
+  .add_string_axis("dir", {"to", "from"})
+  .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
diff --git a/cpp/cmake/thirdparty/get_kvikio.cmake b/cpp/cmake/thirdparty/get_kvikio.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 function(find_and_configure_kvikio VERSION)
 
   rapids_cpm_find(
-    KvikIO ${VERSION}
+    kvikio ${VERSION}
     GLOBAL_TARGETS kvikio::kvikio
     CPM_ARGS
     GIT_REPOSITORY https://github.com/rapidsai/kvikio.git