From 06b3f83b3e7f1b1364973be34f58fac4caf773f3 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 6 Nov 2024 16:54:28 -0500 Subject: [PATCH 1/7] Disallow cuda-python 12.6.1 and 11.8.4 (#17253) Due to a bug in cuda-python we must disallow cuda-python 12.6.1 and 11.8.4. This PR disallows those versions. It also silences new cuda-python deprecation warnings so that our test suite passes. See https://github.com/rapidsai/build-planning/issues/116 for more information. --------- Co-authored-by: James Lamb --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cudf/meta.yaml | 4 ++-- conda/recipes/pylibcudf/meta.yaml | 4 ++-- dependencies.yaml | 8 ++++---- python/cudf/pyproject.toml | 4 +++- python/cudf_kafka/pyproject.toml | 4 +++- python/cudf_polars/pyproject.toml | 4 +++- python/custreamz/pyproject.toml | 2 ++ python/dask_cudf/pyproject.toml | 2 ++ python/pylibcudf/pyproject.toml | 4 +++- 11 files changed, 26 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 9d9fec97731..ace55a15c09 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -19,7 +19,7 @@ dependencies: - cramjam - cubinlinker - cuda-nvtx=11.8 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,!=11.8.4 - cuda-sanitizer-api=11.8.86 - cuda-version=11.8 - cudatoolkit diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 19e3eafd641..d20db44497e 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - cuda-nvcc - cuda-nvrtc-dev - cuda-nvtx-dev -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,!=12.6.1 - cuda-sanitizer-api - cuda-version=12.5 - cupy>=12.0.0 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 2c254415318..6debcb281b1 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -91,7 +91,7 @@ requirements: - cudatoolkit - ptxcompiler >=0.7.0 - cubinlinker # CUDA enhanced compatibility. - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.7.1,<12.0a0,!=11.8.4 {% else %} - cuda-cudart - libcufile # [linux64] @@ -100,7 +100,7 @@ requirements: # TODO: Add nvjitlink here # xref: https://github.com/rapidsai/cudf/issues/12822 - cuda-nvrtc - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.0,<13.0a0,!=12.6.1 - pynvjitlink {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml index 3d965f30986..92ca495f972 100644 --- a/conda/recipes/pylibcudf/meta.yaml +++ b/conda/recipes/pylibcudf/meta.yaml @@ -83,9 +83,9 @@ requirements: - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0 + - cuda-python >=11.7.1,<12.0a0,!=11.8.4 {% else %} - - cuda-python >=12.0,<13.0a0 + - cuda-python >=12.0,<13.0a0,!=12.6.1 {% endif %} - nvtx >=0.2.1 - packaging diff --git a/dependencies.yaml b/dependencies.yaml index 90255ca674c..cc31619c217 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -658,10 +658,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - cuda-python>=12.0,<13.0a0 + - cuda-python>=12.0,<13.0a0,!=12.6.1 - matrix: {cuda: "11.*"} packages: &run_pylibcudf_packages_all_cu11 - - cuda-python>=11.7.1,<12.0a0 + - cuda-python>=11.7.1,<12.0a0,!=11.8.4 - {matrix: null, packages: *run_pylibcudf_packages_all_cu11} run_cudf: common: @@ -684,10 +684,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - cuda-python>=12.0,<13.0a0 + - cuda-python>=12.0,<13.0a0,!=12.6.1 - matrix: {cuda: "11.*"} packages: &run_cudf_packages_all_cu11 - - cuda-python>=11.7.1,<12.0a0 + - cuda-python>=11.7.1,<12.0a0,!=11.8.4 - {matrix: null, packages: *run_cudf_packages_all_cu11} - output_types: conda matrices: diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index b6105c17b3e..53f22a11e6b 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -20,7 +20,7 @@ requires-python = ">=3.10" dependencies = [ "cachetools", "cubinlinker", - "cuda-python>=11.7.1,<12.0a0", + "cuda-python>=11.7.1,<12.0a0,!=11.8.4", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", "libcudf==24.12.*,>=0.0.0a0", @@ -90,6 +90,8 @@ filterwarnings = [ "error", "ignore:::.*xdist.*", "ignore:::.*pytest.*", + # https://github.com/rapidsai/build-planning/issues/116 + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow() "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore", # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+ diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 667cd7b1db8..ec0bc0eb22b 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -51,7 +51,9 @@ rapids = ["rmm", "cudf", "dask_cudf"] addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ - "error" + "error", + # https://github.com/rapidsai/build-planning/issues/116 + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", ] xfail_strict = true diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index a2c62ef9460..2e75dff5c9e 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -53,7 +53,9 @@ version = {file = "cudf_polars/VERSION"} addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ - "error" + "error", + # https://github.com/rapidsai/build-planning/issues/116 + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", ] xfail_strict = true diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index a8ab05a3922..d3baf3bf4d2 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -85,6 +85,8 @@ addopts = "--tb=native --strict-config --strict-markers" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error", + # https://github.com/rapidsai/build-planning/issues/116 + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", "ignore:unclosed =11.7.1,<12.0a0", + "cuda-python>=11.7.1,<12.0a0,!=11.8.4", "libcudf==24.12.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", @@ -74,6 +74,8 @@ addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib" empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error", + # https://github.com/rapidsai/build-planning/issues/116 + "ignore:.*cuda..* module is deprecated.*:DeprecationWarning", "ignore:::.*xdist.*", "ignore:::.*pytest.*" ] From 57900dee500a1a051393dea438d32d94ecd4de61 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 7 Nov 2024 02:47:47 +0100 Subject: [PATCH 2/7] KvikIO shared library (#17239) Update cudf to use the new KvikIO shared library: https://github.com/rapidsai/kvikio/pull/527 #### Tasks - [x] Wait for the [KvikIO shared library PR](https://github.com/rapidsai/kvikio/pull/527) to be merged. - [x] Revert the use of the [KvikIO shared library](https://github.com/rapidsai/kvikio/pull/527) in CI: https://github.com/rapidsai/cudf/commit/2d8eeafe4959357a17f6ad488811837e0a07ba65. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cudf/pull/17239 --- ci/build_wheel_cudf.sh | 1 + ci/build_wheel_libcudf.sh | 1 + ci/build_wheel_pylibcudf.sh | 1 + dependencies.yaml | 1 + python/libcudf/libcudf/load.py | 11 +++++++++++ python/libcudf/pyproject.toml | 1 + 6 files changed, 16 insertions(+) diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index fef4416a366..ae4eb0d5c66 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -23,6 +23,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" python -m auditwheel repair \ --exclude libcudf.so \ --exclude libnvcomp.so \ + --exclude libkvikio.so \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh index b3d6778ea04..aabd3814a24 100755 --- a/ci/build_wheel_libcudf.sh +++ b/ci/build_wheel_libcudf.sh @@ -33,6 +33,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" mkdir -p ${package_dir}/final_dist python -m auditwheel repair \ --exclude libnvcomp.so.4 \ + --exclude libkvikio.so \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh index 839d98846fe..c4a89f20f5f 100755 --- a/ci/build_wheel_pylibcudf.sh +++ b/ci/build_wheel_pylibcudf.sh @@ -21,6 +21,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" python -m auditwheel repair \ --exclude libcudf.so \ --exclude libnvcomp.so \ + --exclude libkvikio.so \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* diff --git a/dependencies.yaml b/dependencies.yaml index cc31619c217..41ac6ce1808 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -177,6 +177,7 @@ files: extras: table: project includes: + - depends_on_libkvikio - depends_on_nvcomp py_build_pylibcudf: output: pyproject diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py index ba134710868..bf27ecfa7f5 100644 --- a/python/libcudf/libcudf/load.py +++ b/python/libcudf/libcudf/load.py @@ -18,6 +18,17 @@ def load_library(): + try: + # libkvikio must be loaded before libcudf because libcudf references its symbols + import libkvikio + + libkvikio.load_library() + except ModuleNotFoundError: + # libcudf's runtime dependency on libkvikio may be satisfied by a natively + # installed library or a conda package, in which case the import will fail and + # we assume the library is discoverable on system paths. + pass + # Dynamically load libcudf.so. Prefer a system library if one is present to # avoid clobbering symbols that other packages might expect, but if no # other library is present use the one in the wheel. diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index c6d9ae56467..62726bb0df4 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -38,6 +38,7 @@ classifiers = [ "Environment :: GPU :: NVIDIA CUDA", ] dependencies = [ + "libkvikio==24.12.*,>=0.0.0a0", "nvidia-nvcomp==4.1.0.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From 29484cb87a417e2e36c8f3b6cd2ec961abec3156 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 7 Nov 2024 00:51:59 -0600 Subject: [PATCH 3/7] Put a ceiling on cuda-python (#17264) Follow-up to #17253 Contributes to https://github.com/rapidsai/build-planning/issues/116 That PR used `!=` requirements to skip a particular version of `cuda-python` that `cudf` and `pylibcudf` were incompatible with. A newer version of `cuda-python` (12.6.2 for CUDA 12, 11.8.5 for CUDA 11) was just released, and it also causes some build issues for RAPIDS libraries: https://github.com/rapidsai/cuvs/pull/445#issuecomment-2461146449 To unblock CI across RAPIDS, this proposes **temporarily** switching to ceilings on the `cuda-python` dependency here. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17264 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cudf/meta.yaml | 4 ++-- conda/recipes/pylibcudf/meta.yaml | 4 ++-- dependencies.yaml | 8 ++++---- python/cudf/pyproject.toml | 2 +- python/pylibcudf/pyproject.toml | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index ace55a15c09..8a64ebf40c5 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -19,7 +19,7 @@ dependencies: - cramjam - cubinlinker - cuda-nvtx=11.8 -- cuda-python>=11.7.1,<12.0a0,!=11.8.4 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-sanitizer-api=11.8.86 - cuda-version=11.8 - cudatoolkit diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index d20db44497e..5f779c3170f 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - cuda-nvcc - cuda-nvrtc-dev - cuda-nvtx-dev -- cuda-python>=12.0,<13.0a0,!=12.6.1 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-sanitizer-api - cuda-version=12.5 - cupy>=12.0.0 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 6debcb281b1..2aafcae072d 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -91,7 +91,7 @@ requirements: - cudatoolkit - ptxcompiler >=0.7.0 - cubinlinker # CUDA enhanced compatibility. - - cuda-python >=11.7.1,<12.0a0,!=11.8.4 + - cuda-python >=11.7.1,<12.0a0,<=11.8.3 {% else %} - cuda-cudart - libcufile # [linux64] @@ -100,7 +100,7 @@ requirements: # TODO: Add nvjitlink here # xref: https://github.com/rapidsai/cudf/issues/12822 - cuda-nvrtc - - cuda-python >=12.0,<13.0a0,!=12.6.1 + - cuda-python >=12.0,<13.0a0,<=12.6.0 - pynvjitlink {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml index 92ca495f972..ec3fcd59c62 100644 --- a/conda/recipes/pylibcudf/meta.yaml +++ b/conda/recipes/pylibcudf/meta.yaml @@ -83,9 +83,9 @@ requirements: - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,!=11.8.4 + - cuda-python >=11.7.1,<12.0a0,<=11.8.3 {% else %} - - cuda-python >=12.0,<13.0a0,!=12.6.1 + - cuda-python >=12.0,<13.0a0,<=12.6.0 {% endif %} - nvtx >=0.2.1 - packaging diff --git a/dependencies.yaml b/dependencies.yaml index 41ac6ce1808..4c6aefe996f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -659,10 +659,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - cuda-python>=12.0,<13.0a0,!=12.6.1 + - cuda-python>=12.0,<13.0a0,<=12.6.0 - matrix: {cuda: "11.*"} packages: &run_pylibcudf_packages_all_cu11 - - cuda-python>=11.7.1,<12.0a0,!=11.8.4 + - cuda-python>=11.7.1,<12.0a0,<=11.8.3 - {matrix: null, packages: *run_pylibcudf_packages_all_cu11} run_cudf: common: @@ -685,10 +685,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - cuda-python>=12.0,<13.0a0,!=12.6.1 + - cuda-python>=12.0,<13.0a0,<=12.6.0 - matrix: {cuda: "11.*"} packages: &run_cudf_packages_all_cu11 - - cuda-python>=11.7.1,<12.0a0,!=11.8.4 + - cuda-python>=11.7.1,<12.0a0,<=11.8.3 - {matrix: null, packages: *run_cudf_packages_all_cu11} - output_types: conda matrices: diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 53f22a11e6b..1eadceaaccd 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -20,7 +20,7 @@ requires-python = ">=3.10" dependencies = [ "cachetools", "cubinlinker", - "cuda-python>=11.7.1,<12.0a0,!=11.8.4", + "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", "libcudf==24.12.*,>=0.0.0a0", diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index e8052dfba4c..b2cec80f484 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -18,7 +18,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cuda-python>=11.7.1,<12.0a0,!=11.8.4", + "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "libcudf==24.12.*,>=0.0.0a0", "nvtx>=0.2.1", "packaging", From bbd3b43719545754e9a1f6b204aad5b143f48419 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Thu, 7 Nov 2024 01:57:47 -0800 Subject: [PATCH 4/7] Fix the example in documentation for `get_dremel_data()` (#17242) Closes #11396. Fixes the example in the documentation of `get_dremel_data()` Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - David Wendt (https://github.com/davidwendt) - Vukasin Milovanovic (https://github.com/vuule) - Mike Wilson (https://github.com/hyperbolic2346) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/17242 --- cpp/include/cudf/lists/detail/dremel.hpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 96ee30dd261..f45da8e8d8d 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -58,7 +58,7 @@ struct dremel_data { }; /** - * @brief Get the dremel offsets and repetition and definition levels for a LIST column + * @brief Get the dremel offsets, repetition levels, and definition levels for a LIST column * * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale @@ -74,7 +74,7 @@ struct dremel_data { * * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/ * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da - * https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet + * https://blog.x.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet * * The remainder of this documentation assumes familiarity with the Dremel concepts. * @@ -102,16 +102,17 @@ struct dremel_data { * ``` * We can represent it in cudf format with two level of offsets like this: * ``` - * Level 0 offsets = {0, 0, 3, 5, 6} + * Level 0 offsets = {0, 0, 3, 4} * Level 1 offsets = {0, 0, 3, 5, 5} * Values = {1, 2, 3, 4, 5} * ``` - * The desired result of this function is the repetition and definition level values that - * correspond to the data values: + * This function returns the dremel offsets, repetition levels, and definition level + * values that correspond to the data values: * ``` - * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} - * def = { 0 1, 2, 2, 2, 2, 2, 1 } - * rep = { 0, 0, 0, 2, 2, 1, 2, 0 } + * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} + * dremel_offsets = { 0, 1, 7, 8} + * def_levels = { 0, 1, 2, 2, 2, 2, 2, 1 } + * rep_levels = { 0, 0, 1, 2, 2, 1, 2, 0 } * ``` * * Since repetition and definition levels arrays contain a value for each empty list, the size of From e29e0ab477f4a541752a578f8769d8dd816ffbe8 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 7 Nov 2024 06:14:58 -0500 Subject: [PATCH 5/7] Move strings/numeric convert benchmarks to nvbench (#17255) Moves the `cpp/benchmarks/string/convert_numerics.cpp` and `cpp/benchmarks/string/convert_fixed_point.cpp` benchmark implementations from google-bench to nvbench. Authors: - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Shruti Shivakumar (https://github.com/shrshi) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17255 --- cpp/benchmarks/CMakeLists.txt | 4 +- cpp/benchmarks/string/convert_fixed_point.cpp | 111 +++++--------- cpp/benchmarks/string/convert_numerics.cpp | 138 ++++++------------ 3 files changed, 79 insertions(+), 174 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 68781889c53..bdc360c082b 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -358,8 +358,6 @@ ConfigureBench( STRINGS_BENCH string/convert_datetime.cpp string/convert_durations.cpp - string/convert_fixed_point.cpp - string/convert_numerics.cpp string/copy.cu string/factory.cu string/filter.cpp @@ -375,6 +373,8 @@ ConfigureNVBench( string/char_types.cpp string/combine.cpp string/contains.cpp + string/convert_fixed_point.cpp + string/convert_numerics.cpp string/copy_if_else.cpp string/copy_range.cpp string/count.cpp diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp index e5bd794e405..97e114c0795 100644 --- a/cpp/benchmarks/string/convert_fixed_point.cpp +++ b/cpp/benchmarks/string/convert_fixed_point.cpp @@ -16,93 +16,48 @@ #include #include -#include #include #include #include -namespace { +#include -std::unique_ptr get_strings_column(cudf::size_type rows) -{ - auto result = - create_random_column(cudf::type_id::FLOAT32, row_count{static_cast(rows)}); - return cudf::strings::from_floats(result->view()); -} - -} // anonymous namespace - -class StringsToFixedPoint : public cudf::benchmark {}; - -template -void convert_to_fixed_point(benchmark::State& state) -{ - auto const rows = static_cast(state.range(0)); - auto const strings_col = get_strings_column(rows); - auto const strings_view = cudf::strings_column_view(strings_col->view()); - auto const dtype = cudf::data_type{cudf::type_to_id(), numeric::scale_type{-2}}; - - for (auto _ : state) { - cuda_event_timer raii(state, true); - auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype); - } +using Types = nvbench::type_list; - // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed( - state.iterations() * - (strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype))); -} - -class StringsFromFixedPoint : public cudf::benchmark {}; +NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32"); +NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64"); -template -void convert_from_fixed_point(benchmark::State& state) +template +void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list) { - auto const rows = static_cast(state.range(0)); - auto const strings_col = get_strings_column(rows); - auto const dtype = cudf::data_type{cudf::type_to_id(), numeric::scale_type{-2}}; - auto const fp_col = - cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype); - - std::unique_ptr results = nullptr; - - for (auto _ : state) { - cuda_event_timer raii(state, true); - results = cudf::strings::from_fixed_point(fp_col->view()); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const from_num = state.get_string("dir") == "from"; + + auto const data_type = cudf::data_type{cudf::type_to_id(), numeric::scale_type{-2}}; + auto const fp_col = create_random_column(data_type.id(), row_count{num_rows}); + + auto const strings_col = cudf::strings::from_fixed_point(fp_col->view()); + auto const sv = cudf::strings_column_view(strings_col->view()); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + + if (from_num) { + state.add_global_memory_reads(num_rows * cudf::size_of(data_type)); + state.add_global_memory_writes(sv.chars_size(stream)); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); }); + } else { + state.add_global_memory_reads(sv.chars_size(stream)); + state.add_global_memory_writes(num_rows * cudf::size_of(data_type)); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); }); } - - // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed( - state.iterations() * - (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) + - rows * cudf::size_of(dtype))); } -#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type) \ - BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \ - { \ - convert_to_fixed_point(state); \ - } \ - BENCHMARK_REGISTER_F(StringsToFixedPoint, name) \ - ->RangeMultiplier(4) \ - ->Range(1 << 12, 1 << 24) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type) \ - BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \ - { \ - convert_from_fixed_point(state); \ - } \ - BENCHMARK_REGISTER_F(StringsFromFixedPoint, name) \ - ->RangeMultiplier(4) \ - ->Range(1 << 12, 1 << 24) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32); -CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64); - -CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32); -CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64); +NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types)) + .set_name("fixed_point") + .set_type_axes_names({"DataType"}) + .add_string_axis("dir", {"to", "from"}) + .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22}); diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp index 8f875c5c80f..e1f650dd6cd 100644 --- a/cpp/benchmarks/string/convert_numerics.cpp +++ b/cpp/benchmarks/string/convert_numerics.cpp @@ -16,117 +16,67 @@ #include #include -#include #include #include #include -namespace { +#include -template -std::unique_ptr get_numerics_column(cudf::size_type rows) -{ - return create_random_column(cudf::type_to_id(), row_count{rows}); -} +namespace { template -std::unique_ptr get_strings_column(cudf::size_type rows) +std::unique_ptr get_strings_column(cudf::column_view const& nv) { - auto const numerics_col = get_numerics_column(rows); if constexpr (std::is_floating_point_v) { - return cudf::strings::from_floats(numerics_col->view()); + return cudf::strings::from_floats(nv); } else { - return cudf::strings::from_integers(numerics_col->view()); - } -} -} // anonymous namespace - -class StringsToNumeric : public cudf::benchmark {}; - -template -void convert_to_number(benchmark::State& state) -{ - auto const rows = static_cast(state.range(0)); - - auto const strings_col = get_strings_column(rows); - auto const strings_view = cudf::strings_column_view(strings_col->view()); - auto const col_type = cudf::type_to_id(); - - for (auto _ : state) { - cuda_event_timer raii(state, true); - if constexpr (std::is_floating_point_v) { - cudf::strings::to_floats(strings_view, cudf::data_type{col_type}); - } else { - cudf::strings::to_integers(strings_view, cudf::data_type{col_type}); - } + return cudf::strings::from_integers(nv); } - - // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed( - state.iterations() * - (strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType))); } +} // namespace -class StringsFromNumeric : public cudf::benchmark {}; +using Types = nvbench::type_list; template -void convert_from_number(benchmark::State& state) +void bench_convert_number(nvbench::state& state, nvbench::type_list) { - auto const rows = static_cast(state.range(0)); - - auto const numerics_col = get_numerics_column(rows); - auto const numerics_view = numerics_col->view(); - - std::unique_ptr results = nullptr; - - for (auto _ : state) { - cuda_event_timer raii(state, true); - if constexpr (std::is_floating_point_v) - results = cudf::strings::from_floats(numerics_view); - else - results = cudf::strings::from_integers(numerics_view); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const from_num = state.get_string("dir") == "from"; + + auto const data_type = cudf::data_type(cudf::type_to_id()); + auto const num_col = create_random_column(data_type.id(), row_count{num_rows}); + + auto const strings_col = get_strings_column(num_col->view()); + auto const sv = cudf::strings_column_view(strings_col->view()); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + + if (from_num) { + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(sv.chars_size(stream)); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + if constexpr (std::is_floating_point_v) { + cudf::strings::to_floats(sv, data_type); + } else { + cudf::strings::to_integers(sv, data_type); + } + }); + } else { + state.add_global_memory_reads(sv.chars_size(stream)); + state.add_global_memory_writes(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + if constexpr (std::is_floating_point_v) + cudf::strings::from_floats(num_col->view()); + else + cudf::strings::from_integers(num_col->view()); + }); } - - // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed( - state.iterations() * - (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) + - rows * sizeof(NumericType))); } -#define CONVERT_TO_NUMERICS_BD(name, type) \ - BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \ - { \ - convert_to_number(state); \ - } \ - BENCHMARK_REGISTER_F(StringsToNumeric, name) \ - ->RangeMultiplier(4) \ - ->Range(1 << 10, 1 << 17) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -#define CONVERT_FROM_NUMERICS_BD(name, type) \ - BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \ - { \ - convert_from_number(state); \ - } \ - BENCHMARK_REGISTER_F(StringsFromNumeric, name) \ - ->RangeMultiplier(4) \ - ->Range(1 << 10, 1 << 17) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -CONVERT_TO_NUMERICS_BD(strings_to_float32, float); -CONVERT_TO_NUMERICS_BD(strings_to_float64, double); -CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t); -CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t); -CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t); -CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t); - -CONVERT_FROM_NUMERICS_BD(strings_from_float32, float); -CONVERT_FROM_NUMERICS_BD(strings_from_float64, double); -CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t); -CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t); -CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t); -CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t); +NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types)) + .set_name("numeric") + .set_type_axes_names({"NumericType"}) + .add_string_axis("dir", {"to", "from"}) + .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22}); From 4cbc15aaf61a64e21a6eae0c5edf66ddf73f3f14 Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Thu, 7 Nov 2024 14:05:35 +0000 Subject: [PATCH 6/7] Added ast tree to simplify expression lifetime management (#17156) This merge request follows up on https://github.com/rapidsai/cudf/issues/10744. It attempts to simplify managing expressions by adding a class called an ast tree. The ast tree manages and holds related expressions together. When the tree is destroyed, all the expressions are also destroyed. Ideally we would use a bump allocator for allocating the expressions instead of `std::vector>`. We'd also ideally use a `cuda::std::inplace_vector` for storing the operands of the `operation` class, but that's in a newer version of CCCL. Authors: - Basit Ayantunde (https://github.com/lamarrr) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Lawrence Mitchell (https://github.com/wence-) - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/17156 --- .../cudf/ast/detail/expression_parser.hpp | 6 +- cpp/include/cudf/ast/expressions.hpp | 100 +++++++++++++++++- cpp/src/ast/expression_parser.cpp | 2 +- cpp/src/ast/expressions.cpp | 24 +++-- cpp/src/io/parquet/predicate_pushdown.cpp | 7 +- cpp/src/io/parquet/reader_impl_helpers.hpp | 2 +- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/ast/ast_tree_tests.cpp | 79 ++++++++++++++ cpp/tests/ast/transform_tests.cpp | 5 +- 9 files changed, 207 insertions(+), 20 deletions(-) create mode 100644 cpp/tests/ast/ast_tree_tests.cpp diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index f4cce8e6da6..b5973d0ace9 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -19,6 +19,10 @@ #include #include #include +#include +#include + +#include #include #include @@ -296,7 +300,7 @@ class expression_parser { * @return The indices of the operands stored in the data references. */ std::vector visit_operands( - std::vector> operands); + cudf::host_span const> operands); /** * @brief Add a data reference to the internal list. diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 4299ee5f20f..bcc9ad1b391 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -22,6 +22,8 @@ #include #include +#include +#include namespace CUDF_EXPORT cudf { namespace ast { @@ -478,7 +480,7 @@ class operation : public expression { * * @return Vector of operands */ - [[nodiscard]] std::vector> get_operands() const + [[nodiscard]] std::vector> const& get_operands() const { return operands; } @@ -506,8 +508,8 @@ class operation : public expression { }; private: - ast_operator const op; - std::vector> const operands; + ast_operator op; + std::vector> operands; }; /** @@ -552,6 +554,98 @@ class column_name_reference : public expression { std::string column_name; }; +/** + * @brief An AST expression tree. It owns and contains multiple dependent expressions. All the + * expressions are destroyed when the tree is destructed. + */ +class tree { + public: + /** + * @brief construct an empty ast tree + */ + tree() = default; + + /** + * @brief Moves the ast tree + */ + tree(tree&&) = default; + + /** + * @brief move-assigns the AST tree + * @returns a reference to the move-assigned tree + */ + tree& operator=(tree&&) = default; + + ~tree() = default; + + // the tree is not copyable + tree(tree const&) = delete; + tree& operator=(tree const&) = delete; + + /** + * @brief Add an expression to the AST tree + * @param args Arguments to use to construct the ast expression + * @returns a reference to the added expression + */ + template + Expr const& emplace(Args&&... args) + { + static_assert(std::is_base_of_v); + auto expr = std::make_shared(std::forward(args)...); + Expr const& expr_ref = *expr; + expressions.emplace_back(std::static_pointer_cast(std::move(expr))); + return expr_ref; + } + + /** + * @brief Add an expression to the AST tree + * @param expr AST expression to be added + * @returns a reference to the added expression + */ + template + Expr const& push(Expr expr) + { + return emplace(std::move(expr)); + } + + /** + * @brief get the first expression in the tree + * @returns the first inserted expression into the tree + */ + expression const& front() const { return *expressions.front(); } + + /** + * @brief get the last expression in the tree + * @returns the last inserted expression into the tree + */ + expression const& back() const { return *expressions.back(); } + + /** + * @brief get the number of expressions added to the tree + * @returns the number of expressions added to the tree + */ + size_t size() const { return expressions.size(); } + + /** + * @brief get the expression at an index in the tree. Index is checked. + * @param index index of expression in the ast tree + * @returns the expression at the specified index + */ + expression const& at(size_t index) { return *expressions.at(index); } + + /** + * @brief get the expression at an index in the tree. Index is unchecked. + * @param index index of expression in the ast tree + * @returns the expression at the specified index + */ + expression const& operator[](size_t index) const { return *expressions[index]; } + + private: + // TODO: use better ownership semantics, the shared_ptr here is redundant. Consider using a bump + // allocator with type-erased deleters. + std::vector> expressions; +}; + /** @} */ // end of group } // namespace ast diff --git a/cpp/src/ast/expression_parser.cpp b/cpp/src/ast/expression_parser.cpp index 5815ce33e33..d0e4c59ca54 100644 --- a/cpp/src/ast/expression_parser.cpp +++ b/cpp/src/ast/expression_parser.cpp @@ -207,7 +207,7 @@ cudf::data_type expression_parser::output_type() const } std::vector expression_parser::visit_operands( - std::vector> operands) + cudf::host_span const> operands) { auto operand_data_reference_indices = std::vector(); for (auto const& operand : operands) { diff --git a/cpp/src/ast/expressions.cpp b/cpp/src/ast/expressions.cpp index 4c2b56dd4f5..b7e4e4609cb 100644 --- a/cpp/src/ast/expressions.cpp +++ b/cpp/src/ast/expressions.cpp @@ -20,36 +20,41 @@ #include #include +#include + namespace cudf { namespace ast { -operation::operation(ast_operator op, expression const& input) : op(op), operands({input}) +operation::operation(ast_operator op, expression const& input) : op{op}, operands{input} { - if (cudf::ast::detail::ast_operator_arity(op) != 1) { - CUDF_FAIL("The provided operator is not a unary operator."); - } + CUDF_EXPECTS(cudf::ast::detail::ast_operator_arity(op) == 1, + "The provided operator is not a unary operator.", + std::invalid_argument); } operation::operation(ast_operator op, expression const& left, expression const& right) - : op(op), operands({left, right}) + : op{op}, operands{left, right} { - if (cudf::ast::detail::ast_operator_arity(op) != 2) { - CUDF_FAIL("The provided operator is not a binary operator."); - } + CUDF_EXPECTS(cudf::ast::detail::ast_operator_arity(op) == 2, + "The provided operator is not a binary operator.", + std::invalid_argument); } cudf::size_type literal::accept(detail::expression_parser& visitor) const { return visitor.visit(*this); } + cudf::size_type column_reference::accept(detail::expression_parser& visitor) const { return visitor.visit(*this); } + cudf::size_type operation::accept(detail::expression_parser& visitor) const { return visitor.visit(*this); } + cudf::size_type column_name_reference::accept(detail::expression_parser& visitor) const { return visitor.visit(*this); @@ -60,16 +65,19 @@ auto literal::accept(detail::expression_transformer& visitor) const { return visitor.visit(*this); } + auto column_reference::accept(detail::expression_transformer& visitor) const -> decltype(visitor.visit(*this)) { return visitor.visit(*this); } + auto operation::accept(detail::expression_transformer& visitor) const -> decltype(visitor.visit(*this)) { return visitor.visit(*this); } + auto column_name_reference::accept(detail::expression_transformer& visitor) const -> decltype(visitor.visit(*this)) { diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index a965f3325d5..cd3dcd2bce4 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -373,7 +374,7 @@ class stats_expression_converter : public ast::detail::expression_transformer { private: std::vector> visit_operands( - std::vector> operands) + cudf::host_span const> operands) { std::vector> transformed_operands; for (auto const& operand : operands) { @@ -553,7 +554,7 @@ std::reference_wrapper named_to_reference_converter::visi std::vector> named_to_reference_converter::visit_operands( - std::vector> operands) + cudf::host_span const> operands) { std::vector> transformed_operands; for (auto const& operand : operands) { @@ -623,7 +624,7 @@ class names_from_expression : public ast::detail::expression_transformer { } private: - void visit_operands(std::vector> operands) + void visit_operands(cudf::host_span const> operands) { for (auto const& operand : operands) { operand.get().accept(*this); diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 6487c92f48f..fd692c0cdd6 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -425,7 +425,7 @@ class named_to_reference_converter : public ast::detail::expression_transformer private: std::vector> visit_operands( - std::vector> operands); + cudf::host_span const> operands); std::unordered_map column_name_to_index; std::optional> _stats_expr; diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 23632f6fbba..e9ba58ba224 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -650,7 +650,7 @@ ConfigureTest(ENCODE_TEST encode/encode_tests.cpp) # ################################################################################################## # * ast tests ------------------------------------------------------------------------------------- -ConfigureTest(AST_TEST ast/transform_tests.cpp) +ConfigureTest(AST_TEST ast/transform_tests.cpp ast/ast_tree_tests.cpp) # ################################################################################################## # * lists tests ---------------------------------------------------------------------------------- diff --git a/cpp/tests/ast/ast_tree_tests.cpp b/cpp/tests/ast/ast_tree_tests.cpp new file mode 100644 index 00000000000..1a960c68e23 --- /dev/null +++ b/cpp/tests/ast/ast_tree_tests.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +template +using column_wrapper = cudf::test::fixed_width_column_wrapper; + +TEST(AstTreeTest, ExpressionTree) +{ + namespace ast = cudf::ast; + using op = ast::ast_operator; + using operation = ast::operation; + + // computes (y = mx + c)... and linearly interpolates them using interpolator t + auto m0_col = column_wrapper{10, 20, 50, 100}; + auto x0_col = column_wrapper{10, 5, 2, 1}; + auto c0_col = column_wrapper{100, 100, 100, 100}; + + auto m1_col = column_wrapper{10, 20, 50, 100}; + auto x1_col = column_wrapper{20, 10, 4, 2}; + auto c1_col = column_wrapper{200, 200, 200, 200}; + + auto one_scalar = cudf::numeric_scalar{1}; + auto t_scalar = cudf::numeric_scalar{0.5F}; + + auto table = cudf::table_view{{m0_col, x0_col, c0_col, m1_col, x1_col, c1_col}}; + + ast::tree tree{}; + + auto const& one = tree.push(ast::literal{one_scalar}); + auto const& t = tree.push(ast::literal{t_scalar}); + auto const& m0 = tree.push(ast::column_reference(0)); + auto const& x0 = tree.push(ast::column_reference(1)); + auto const& c0 = tree.push(ast::column_reference(2)); + auto const& m1 = tree.push(ast::column_reference(3)); + auto const& x1 = tree.push(ast::column_reference(4)); + auto const& c1 = tree.push(ast::column_reference(5)); + + // compute: y0 = m0 x0 + c0 + auto const& y0 = tree.push(operation{op::ADD, tree.push(operation{op::MUL, m0, x0}), c0}); + + // compute: y1 = m1 x1 + c1 + auto const& y1 = tree.push(operation{op::ADD, tree.push(operation{op::MUL, m1, x1}), c1}); + + // compute weighted: (1 - t) * y0 + auto const& y0_w = tree.push(operation{op::MUL, tree.push(operation{op::SUB, one, t}), y0}); + + // compute weighted: y = t * y1 + auto const& y1_w = tree.push(operation{op::MUL, t, y1}); + + // add weighted: result = lerp(y0, y1, t) = (1 - t) * y0 + t * y1 + auto result = cudf::compute_column(table, tree.push(operation{op::ADD, y0_w, y1_w})); + + auto expected = column_wrapper{300, 300, 300, 300}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp index 7af88d8aa34..e28d92bb615 100644 --- a/cpp/tests/ast/transform_tests.cpp +++ b/cpp/tests/ast/transform_tests.cpp @@ -530,9 +530,10 @@ TEST_F(TransformTest, UnaryTrigonometry) TEST_F(TransformTest, ArityCheckFailure) { auto col_ref_0 = cudf::ast::column_reference(0); - EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0), cudf::logic_error); + EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0), + std::invalid_argument); EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ABS, col_ref_0, col_ref_0), - cudf::logic_error); + std::invalid_argument); } TEST_F(TransformTest, StringComparison) From e4c52ddb23b3524b665c7a97c905ed66fb341ea6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Thu, 7 Nov 2024 08:26:06 -0600 Subject: [PATCH 7/7] `cudf-polars` string/numeric casting (#17076) Depends on https://github.com/rapidsai/cudf/pull/16991 Part of https://github.com/rapidsai/cudf/issues/17060 Implements cross casting from string <-> numeric types in `cudf-polars` Authors: - https://github.com/brandon-b-miller - Matthew Murray (https://github.com/Matt711) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Muhammad Haseeb (https://github.com/mhaseeb123) - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/17076 --- cpp/include/cudf/utilities/traits.hpp | 24 ++++ cpp/src/utilities/traits.cpp | 13 ++ .../cudf_polars/containers/column.py | 56 ++++++++- .../cudf_polars/dsl/expressions/unary.py | 4 +- .../cudf_polars/cudf_polars/testing/plugin.py | 1 + .../cudf_polars/cudf_polars/utils/dtypes.py | 65 +++++++++- .../tests/expressions/test_casting.py | 2 +- .../tests/expressions/test_numeric_binops.py | 10 -- .../tests/expressions/test_stringfunction.py | 117 ++++++++++++++++++ python/cudf_polars/tests/utils/test_dtypes.py | 31 ++++- .../pylibcudf/libcudf/utilities/traits.pxd | 1 + .../pylibcudf/pylibcudf/tests/test_traits.py | 5 + python/pylibcudf/pylibcudf/traits.pyx | 6 + 13 files changed, 311 insertions(+), 24 deletions(-) diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index cf8413b597f..22a67ca049a 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -301,6 +301,30 @@ constexpr inline bool is_integral_not_bool() */ bool is_integral_not_bool(data_type type); +/** + * @brief Indicates whether the type `T` is a numeric type but not bool type. + * + * @tparam T The type to verify + * @return true `T` is numeric but not bool + * @return false `T` is not numeric or is bool + */ +template +constexpr inline bool is_numeric_not_bool() +{ + return cudf::is_numeric() and not std::is_same_v; +} + +/** + * @brief Indicates whether `type` is a numeric `data_type` but not BOOL8 + * + * "Numeric" types are integral/floating point types such as `INT*` or `FLOAT*`. + * + * @param type The `data_type` to verify + * @return true `type` is numeric but not bool + * @return false `type` is not numeric or is bool + */ +bool is_numeric_not_bool(data_type type); + /** * @brief Indicates whether the type `T` is a floating point type. * diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp index c1e71f5f8f9..41ee4e960b6 100644 --- a/cpp/src/utilities/traits.cpp +++ b/cpp/src/utilities/traits.cpp @@ -169,6 +169,19 @@ bool is_integral_not_bool(data_type type) return cudf::type_dispatcher(type, is_integral_not_bool_impl{}); } +struct is_numeric_not_bool_impl { + template + constexpr bool operator()() + { + return is_numeric_not_bool(); + } +}; + +bool is_numeric_not_bool(data_type type) +{ + return cudf::type_dispatcher(type, is_numeric_not_bool_impl{}); +} + struct is_floating_point_impl { template constexpr bool operator()() diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py index 00186098e54..93d95346a37 100644 --- a/python/cudf_polars/cudf_polars/containers/column.py +++ b/python/cudf_polars/cudf_polars/containers/column.py @@ -8,7 +8,18 @@ import functools from typing import TYPE_CHECKING +from polars.exceptions import InvalidOperationError + import pylibcudf as plc +from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats +from pylibcudf.strings.convert.convert_integers import ( + from_integers, + is_integer, + to_integers, +) +from pylibcudf.traits import is_floating_point + +from cudf_polars.utils.dtypes import is_order_preserving_cast if TYPE_CHECKING: from typing_extensions import Self @@ -129,11 +140,46 @@ def astype(self, dtype: plc.DataType) -> Column: This only produces a copy if the requested dtype doesn't match the current one. """ - if self.obj.type() != dtype: - return Column(plc.unary.cast(self.obj, dtype), name=self.name).sorted_like( - self - ) - return self + if self.obj.type() == dtype: + return self + + if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING: + return Column(self._handle_string_cast(dtype)) + else: + result = Column(plc.unary.cast(self.obj, dtype)) + if is_order_preserving_cast(self.obj.type(), dtype): + return result.sorted_like(self) + return result + + def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column: + if dtype.id() == plc.TypeId.STRING: + if is_floating_point(self.obj.type()): + return from_floats(self.obj) + else: + return from_integers(self.obj) + else: + if is_floating_point(dtype): + floats = is_float(self.obj) + if not plc.interop.to_arrow( + plc.reduce.reduce( + floats, + plc.aggregation.all(), + plc.DataType(plc.TypeId.BOOL8), + ) + ).as_py(): + raise InvalidOperationError("Conversion from `str` failed.") + return to_floats(self.obj, dtype) + else: + integers = is_integer(self.obj) + if not plc.interop.to_arrow( + plc.reduce.reduce( + integers, + plc.aggregation.all(), + plc.DataType(plc.TypeId.BOOL8), + ) + ).as_py(): + raise InvalidOperationError("Conversion from `str` failed.") + return to_integers(self.obj, dtype) def copy_metadata(self, from_: pl.Series, /) -> Self: """ diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py index 6f22544c050..7999ec86068 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py @@ -35,7 +35,7 @@ def __init__(self, dtype: plc.DataType, value: Expr) -> None: self.children = (value,) if not dtypes.can_cast(value.dtype, self.dtype): raise NotImplementedError( - f"Can't cast {self.dtype.id().name} to {value.dtype.id().name}" + f"Can't cast {value.dtype.id().name} to {self.dtype.id().name}" ) def do_evaluate( @@ -48,7 +48,7 @@ def do_evaluate( """Evaluate this expression given a dataframe for context.""" (child,) = self.children column = child.evaluate(df, context=context, mapping=mapping) - return Column(plc.unary.cast(column.obj, self.dtype)).sorted_like(column) + return column.astype(self.dtype) def collect_agg(self, *, depth: int) -> AggInfo: """Collect information about aggregations in groupbys.""" diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index e01ccd05527..2f95cd38c57 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -158,6 +158,7 @@ def pytest_configure(config: pytest.Config) -> None: "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU", "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU", "tests/unit/sql/test_cast.py::test_cast_errors[values2-values::int1-conversion from `i64` to `i8` failed]": "Casting that raises not supported on GPU", + "tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user receives is wrong", "tests/unit/sql/test_miscellaneous.py::test_read_csv": "Incorrect handling of missing_is_null in read_csv", "tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception", "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match", diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py index 1d0479802ca..a90c283ee54 100644 --- a/python/cudf_polars/cudf_polars/utils/dtypes.py +++ b/python/cudf_polars/cudf_polars/utils/dtypes.py @@ -12,10 +12,20 @@ import polars as pl +from pylibcudf.traits import ( + is_floating_point, + is_integral_not_bool, + is_numeric_not_bool, +) + +__all__ = [ + "from_polars", + "downcast_arrow_lists", + "can_cast", + "is_order_preserving_cast", +] import pylibcudf as plc -__all__ = ["from_polars", "downcast_arrow_lists", "can_cast"] - def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType: """ @@ -62,9 +72,54 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool: True if casting is supported, False otherwise """ return ( - plc.traits.is_fixed_width(to) - and plc.traits.is_fixed_width(from_) - and plc.unary.is_supported_cast(from_, to) + ( + plc.traits.is_fixed_width(to) + and plc.traits.is_fixed_width(from_) + and plc.unary.is_supported_cast(from_, to) + ) + or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to)) + or (to.id() == plc.TypeId.STRING and is_numeric_not_bool(from_)) + ) + + +def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool: + """ + Determine if a cast would preserve the order of the source data. + + Parameters + ---------- + from_ + Source datatype + to + Target datatype + + Returns + ------- + True if the cast is order-preserving, False otherwise + """ + if from_.id() == to.id(): + return True + + if is_integral_not_bool(from_) and is_integral_not_bool(to): + # True if signedness is the same and the target is larger + if plc.traits.is_unsigned(from_) == plc.traits.is_unsigned(to): + if plc.types.size_of(to) >= plc.types.size_of(from_): + return True + elif (plc.traits.is_unsigned(from_) and not plc.traits.is_unsigned(to)) and ( + plc.types.size_of(to) > plc.types.size_of(from_) + ): + # Unsigned to signed is order preserving if target is large enough + # But signed to unsigned is never order preserving due to negative values + return True + elif ( + is_floating_point(from_) + and is_floating_point(to) + and (plc.types.size_of(to) >= plc.types.size_of(from_)) + ): + # True if the target is larger + return True + return (is_integral_not_bool(from_) and is_floating_point(to)) or ( + is_floating_point(from_) and is_integral_not_bool(to) ) diff --git a/python/cudf_polars/tests/expressions/test_casting.py b/python/cudf_polars/tests/expressions/test_casting.py index 3e003054338..0722a0f198a 100644 --- a/python/cudf_polars/tests/expressions/test_casting.py +++ b/python/cudf_polars/tests/expressions/test_casting.py @@ -14,7 +14,7 @@ _supported_dtypes = [(pl.Int8(), pl.Int64())] _unsupported_dtypes = [ - (pl.String(), pl.Int64()), + (pl.Datetime("ns"), pl.Int64()), ] diff --git a/python/cudf_polars/tests/expressions/test_numeric_binops.py b/python/cudf_polars/tests/expressions/test_numeric_binops.py index 8f68bbc460c..fa1ec3c19e4 100644 --- a/python/cudf_polars/tests/expressions/test_numeric_binops.py +++ b/python/cudf_polars/tests/expressions/test_numeric_binops.py @@ -8,7 +8,6 @@ from cudf_polars.testing.asserts import ( assert_gpu_result_equal, - assert_ir_translation_raises, ) dtypes = [ @@ -114,12 +113,3 @@ def test_binop_with_scalar(left_scalar, right_scalar): q = df.select(lop / rop) assert_gpu_result_equal(q) - - -def test_numeric_to_string_cast_fails(): - df = pl.DataFrame( - {"a": [1, 1, 2, 3, 3, 4, 1], "b": [None, 2, 3, 4, 5, 6, 7]} - ).lazy() - q = df.select(pl.col("a").cast(pl.String)) - - assert_ir_translation_raises(q, NotImplementedError) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index 4f6850ac977..8d7d970eb07 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -40,6 +40,79 @@ def ldf(with_nulls): ) +@pytest.fixture(params=[pl.Int8, pl.Int16, pl.Int32, pl.Int64]) +def integer_type(request): + return request.param + + +@pytest.fixture(params=[pl.Float32, pl.Float64]) +def floating_type(request): + return request.param + + +@pytest.fixture(params=[pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.Float32, pl.Float64]) +def numeric_type(request): + return request.param + + +@pytest.fixture +def str_to_integer_data(with_nulls): + a = ["1", "2", "3", "4", "5", "6"] + if with_nulls: + a[4] = None + return pl.LazyFrame({"a": a}) + + +@pytest.fixture +def str_to_float_data(with_nulls): + a = [ + "1.1", + "2.2", + "3.3", + "4.4", + "5.5", + "6.6", + "inf", + "+inf", + "-inf", + "Inf", + "-Inf", + "nan", + "-1.234", + "2e2", + ] + if with_nulls: + a[4] = None + return pl.LazyFrame({"a": a}) + + +@pytest.fixture +def str_from_integer_data(with_nulls, integer_type): + a = [1, 2, 3, 4, 5, 6] + if with_nulls: + a[4] = None + return pl.LazyFrame({"a": pl.Series(a, dtype=integer_type)}) + + +@pytest.fixture +def str_from_float_data(with_nulls, floating_type): + a = [ + 1.1, + 2.2, + 3.3, + 4.4, + 5.5, + 6.6, + float("inf"), + float("+inf"), + float("-inf"), + float("nan"), + ] + if with_nulls: + a[4] = None + return pl.LazyFrame({"a": pl.Series(a, dtype=floating_type)}) + + slice_cases = [ (1, 3), (0, 3), @@ -337,3 +410,47 @@ def test_unsupported_regex_raises(pattern): q = df.select(pl.col("a").str.contains(pattern, strict=True)) assert_ir_translation_raises(q, NotImplementedError) + + +def test_string_to_integer(str_to_integer_data, integer_type): + query = str_to_integer_data.select(pl.col("a").cast(integer_type)) + assert_gpu_result_equal(query) + + +def test_string_from_integer(str_from_integer_data): + query = str_from_integer_data.select(pl.col("a").cast(pl.String)) + assert_gpu_result_equal(query) + + +def test_string_to_float(str_to_float_data, floating_type): + query = str_to_float_data.select(pl.col("a").cast(floating_type)) + assert_gpu_result_equal(query) + + +def test_string_from_float(request, str_from_float_data): + if str_from_float_data.collect_schema()["a"] == pl.Float32: + # libcudf will return a string representing the precision out to + # a certain number of hardcoded decimal places. This results in + # the fractional part being thrown away which causes discrepancies + # for certain numbers. For instance, the float32 representation of + # 1.1 is 1.100000023841858. When cast to a string, this will become + # 1.100000024. But the float64 representation of 1.1 is + # 1.1000000000000000888 which will result in libcudf truncating the + # final value to 1.1. + request.applymarker(pytest.mark.xfail(reason="libcudf truncation")) + query = str_from_float_data.select(pl.col("a").cast(pl.String)) + + # libcudf reads float('inf') -> "inf" + # but polars reads float('inf') -> "Inf" + query = query.select(pl.col("a").str.to_lowercase()) + assert_gpu_result_equal(query) + + +def test_string_to_numeric_invalid(numeric_type): + df = pl.LazyFrame({"a": ["a", "b", "c"]}) + q = df.select(pl.col("a").cast(numeric_type)) + assert_collect_raises( + q, + polars_except=pl.exceptions.InvalidOperationError, + cudf_except=pl.exceptions.ComputeError, + ) diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py index bbdb4faa256..f63c2079e04 100644 --- a/python/cudf_polars/tests/utils/test_dtypes.py +++ b/python/cudf_polars/tests/utils/test_dtypes.py @@ -7,7 +7,20 @@ import polars as pl -from cudf_polars.utils.dtypes import from_polars +import pylibcudf as plc + +from cudf_polars.utils.dtypes import from_polars, is_order_preserving_cast + +INT8 = plc.DataType(plc.TypeId.INT8) +INT16 = plc.DataType(plc.TypeId.INT16) +INT32 = plc.DataType(plc.TypeId.INT32) +INT64 = plc.DataType(plc.TypeId.INT64) +UINT8 = plc.DataType(plc.TypeId.UINT8) +UINT16 = plc.DataType(plc.TypeId.UINT16) +UINT32 = plc.DataType(plc.TypeId.UINT32) +UINT64 = plc.DataType(plc.TypeId.UINT64) +FLOAT32 = plc.DataType(plc.TypeId.FLOAT32) +FLOAT64 = plc.DataType(plc.TypeId.FLOAT64) @pytest.mark.parametrize( @@ -30,3 +43,19 @@ def test_unhandled_dtype_conversion_raises(pltype): with pytest.raises(NotImplementedError): _ = from_polars(pltype) + + +def test_is_order_preserving_cast(): + assert is_order_preserving_cast(INT8, INT8) # Same type + assert is_order_preserving_cast(INT8, INT16) # Smaller type + assert is_order_preserving_cast(INT8, FLOAT32) # Int to large enough float + assert is_order_preserving_cast(UINT8, UINT16) # Unsigned to larger unsigned + assert is_order_preserving_cast(UINT8, FLOAT32) # Unsigned to large enough float + assert is_order_preserving_cast(FLOAT32, FLOAT64) # Float to larger float + assert is_order_preserving_cast(INT64, FLOAT32) # Int any float + assert is_order_preserving_cast(FLOAT32, INT32) # Float to undersized int + assert is_order_preserving_cast(FLOAT32, INT64) # float to large int + + assert not is_order_preserving_cast(INT16, INT8) # Bigger type + assert not is_order_preserving_cast(INT8, UINT8) # Different signedness + assert not is_order_preserving_cast(FLOAT64, FLOAT32) # Smaller float diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd index 69765e44274..5533530754e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd @@ -9,6 +9,7 @@ cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil: cdef bool is_relationally_comparable(data_type) cdef bool is_equality_comparable(data_type) cdef bool is_numeric(data_type) + cdef bool is_numeric_not_bool(data_type) cdef bool is_index_type(data_type) cdef bool is_unsigned(data_type) cdef bool is_integral(data_type) diff --git a/python/pylibcudf/pylibcudf/tests/test_traits.py b/python/pylibcudf/pylibcudf/tests/test_traits.py index 2570e8abd51..2c1708304eb 100644 --- a/python/pylibcudf/pylibcudf/tests/test_traits.py +++ b/python/pylibcudf/pylibcudf/tests/test_traits.py @@ -20,6 +20,11 @@ def test_is_numeric(): assert not plc.traits.is_numeric(plc.DataType(plc.TypeId.LIST)) +def test_is_numeric_not_bool(): + assert plc.traits.is_numeric_not_bool(plc.DataType(plc.TypeId.FLOAT64)) + assert not plc.traits.is_numeric_not_bool(plc.DataType(plc.TypeId.BOOL8)) + + def test_is_index_type(): assert plc.traits.is_index_type(plc.DataType(plc.TypeId.INT8)) assert not plc.traits.is_index_type(plc.DataType(plc.TypeId.BOOL8)) diff --git a/python/pylibcudf/pylibcudf/traits.pyx b/python/pylibcudf/pylibcudf/traits.pyx index 5a1c67e1f6c..9c52e0ac1ab 100644 --- a/python/pylibcudf/pylibcudf/traits.pyx +++ b/python/pylibcudf/pylibcudf/traits.pyx @@ -29,6 +29,12 @@ cpdef bool is_numeric(DataType typ): """ return traits.is_numeric(typ.c_obj) +cpdef bool is_numeric_not_bool(DataType typ): + """Checks if the given data type is numeric excluding booleans. + + For details, see :cpp:func:`is_numeric_not_bool`. + """ + return traits.is_numeric_not_bool(typ.c_obj) cpdef bool is_index_type(DataType typ): """Checks if the given data type is an index type.