From 06b3f83b3e7f1b1364973be34f58fac4caf773f3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 6 Nov 2024 16:54:28 -0500
Subject: [PATCH 1/7] Disallow cuda-python 12.6.1 and 11.8.4 (#17253)

Due to a bug in cuda-python we must disallow cuda-python 12.6.1 and
11.8.4. This PR disallows those versions. It also silences new
cuda-python deprecation warnings so that our test suite passes.

See https://github.com/rapidsai/build-planning/issues/116 for more
information.

---------

Co-authored-by: James Lamb <jlamb@nvidia.com>
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 4 ++--
 conda/recipes/pylibcudf/meta.yaml                | 4 ++--
 dependencies.yaml                                | 8 ++++----
 python/cudf/pyproject.toml                       | 4 +++-
 python/cudf_kafka/pyproject.toml                 | 4 +++-
 python/cudf_polars/pyproject.toml                | 4 +++-
 python/custreamz/pyproject.toml                  | 2 ++
 python/dask_cudf/pyproject.toml                  | 2 ++
 python/pylibcudf/pyproject.toml                  | 4 +++-
 11 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 9d9fec97731..ace55a15c09 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
-- cuda-python>=11.7.1,<12.0a0
+- cuda-python>=11.7.1,<12.0a0,!=11.8.4
 - cuda-sanitizer-api=11.8.86
 - cuda-version=11.8
 - cudatoolkit
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 19e3eafd641..d20db44497e 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -21,7 +21,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.0,<13.0a0
+- cuda-python>=12.0,<13.0a0,!=12.6.1
 - cuda-sanitizer-api
 - cuda-version=12.5
 - cupy>=12.0.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 2c254415318..6debcb281b1 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -91,7 +91,7 @@ requirements:
     - cudatoolkit
     - ptxcompiler >=0.7.0
     - cubinlinker  # CUDA enhanced compatibility.
-    - cuda-python >=11.7.1,<12.0a0
+    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
     {% else %}
     - cuda-cudart
     - libcufile  # [linux64]
@@ -100,7 +100,7 @@ requirements:
     # TODO: Add nvjitlink here
     # xref: https://github.com/rapidsai/cudf/issues/12822
     - cuda-nvrtc
-    - cuda-python >=12.0,<13.0a0
+    - cuda-python >=12.0,<13.0a0,!=12.6.1
     - pynvjitlink
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 3d965f30986..92ca495f972 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -83,9 +83,9 @@ requirements:
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
-    - cuda-python >=11.7.1,<12.0a0
+    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
     {% else %}
-    - cuda-python >=12.0,<13.0a0
+    - cuda-python >=12.0,<13.0a0,!=12.6.1
     {% endif %}
     - nvtx >=0.2.1
     - packaging
diff --git a/dependencies.yaml b/dependencies.yaml
index 90255ca674c..cc31619c217 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -658,10 +658,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0
+              - cuda-python>=12.0,<13.0a0,!=12.6.1
           - matrix: {cuda: "11.*"}
             packages: &run_pylibcudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0
+              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
           - {matrix: null, packages: *run_pylibcudf_packages_all_cu11}
   run_cudf:
     common:
@@ -684,10 +684,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0
+              - cuda-python>=12.0,<13.0a0,!=12.6.1
           - matrix: {cuda: "11.*"}
             packages: &run_cudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0
+              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
           - {matrix: null, packages: *run_cudf_packages_all_cu11}
       - output_types: conda
         matrices:
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index b6105c17b3e..53f22a11e6b 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -20,7 +20,7 @@ requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
-    "cuda-python>=11.7.1,<12.0a0",
+    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==24.12.*,>=0.0.0a0",
@@ -90,6 +90,8 @@ filterwarnings = [
     "error",
     "ignore:::.*xdist.*",
     "ignore:::.*pytest.*",
+    # https://github.com/rapidsai/build-planning/issues/116
+    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
     "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore",
     # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 667cd7b1db8..ec0bc0eb22b 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -51,7 +51,9 @@ rapids = ["rmm", "cudf", "dask_cudf"]
 addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
-  "error"
+  "error",
+  # https://github.com/rapidsai/build-planning/issues/116
+  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
 ]
 xfail_strict = true
 
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index a2c62ef9460..2e75dff5c9e 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -53,7 +53,9 @@ version = {file = "cudf_polars/VERSION"}
 addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
-  "error"
+  "error",
+  # https://github.com/rapidsai/build-planning/issues/116
+  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
 ]
 xfail_strict = true
 
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index a8ab05a3922..d3baf3bf4d2 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -85,6 +85,8 @@ addopts = "--tb=native --strict-config --strict-markers"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error",
+    # https://github.com/rapidsai/build-planning/issues/116
+    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     "ignore:unclosed <socket.socket:ResourceWarning",
     "ignore:Port .* is already in use.:UserWarning:distributed",
     # Should be fixed in the next streamz release
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 862e8f36eaa..c7e4cbc45ea 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -87,6 +87,8 @@ empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error::FutureWarning",
     "error::DeprecationWarning",
+    # https://github.com/rapidsai/build-planning/issues/116
+    "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
     "ignore:.*datetime.*utcnow.*scheduled for removal:DeprecationWarning:botocore",
     "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index a80c85a1fa8..e8052dfba4c 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=11.7.1,<12.0a0",
+    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
     "libcudf==24.12.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
@@ -74,6 +74,8 @@ addopts = "--tb=native --strict-config --strict-markers --import-mode=importlib"
 empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error",
+  # https://github.com/rapidsai/build-planning/issues/116
+  "ignore:.*cuda..* module is deprecated.*:DeprecationWarning",
   "ignore:::.*xdist.*",
   "ignore:::.*pytest.*"
 ]

From 57900dee500a1a051393dea438d32d94ecd4de61 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Thu, 7 Nov 2024 02:47:47 +0100
Subject: [PATCH 2/7] KvikIO shared library (#17239)

Update cudf  to use the new KvikIO shared library: https://github.com/rapidsai/kvikio/pull/527

#### Tasks
- [x] Wait for the [KvikIO shared library PR](https://github.com/rapidsai/kvikio/pull/527) to be merged.
- [x] Revert the use of the [KvikIO shared library](https://github.com/rapidsai/kvikio/pull/527) in CI: https://github.com/rapidsai/cudf/commit/2d8eeafe4959357a17f6ad488811837e0a07ba65.

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/17239
---
 ci/build_wheel_cudf.sh         |  1 +
 ci/build_wheel_libcudf.sh      |  1 +
 ci/build_wheel_pylibcudf.sh    |  1 +
 dependencies.yaml              |  1 +
 python/libcudf/libcudf/load.py | 11 +++++++++++
 python/libcudf/pyproject.toml  |  1 +
 6 files changed, 16 insertions(+)

diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
index fef4416a366..ae4eb0d5c66 100755
--- a/ci/build_wheel_cudf.sh
+++ b/ci/build_wheel_cudf.sh
@@ -23,6 +23,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
 python -m auditwheel repair \
     --exclude libcudf.so \
     --exclude libnvcomp.so \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh
index b3d6778ea04..aabd3814a24 100755
--- a/ci/build_wheel_libcudf.sh
+++ b/ci/build_wheel_libcudf.sh
@@ -33,6 +33,7 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 mkdir -p ${package_dir}/final_dist
 python -m auditwheel repair \
     --exclude libnvcomp.so.4 \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
index 839d98846fe..c4a89f20f5f 100755
--- a/ci/build_wheel_pylibcudf.sh
+++ b/ci/build_wheel_pylibcudf.sh
@@ -21,6 +21,7 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
 python -m auditwheel repair \
     --exclude libcudf.so \
     --exclude libnvcomp.so \
+    --exclude libkvikio.so \
     -w ${package_dir}/final_dist \
     ${package_dir}/dist/*
 
diff --git a/dependencies.yaml b/dependencies.yaml
index cc31619c217..41ac6ce1808 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -177,6 +177,7 @@ files:
     extras:
       table: project
     includes:
+      - depends_on_libkvikio
       - depends_on_nvcomp
   py_build_pylibcudf:
     output: pyproject
diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py
index ba134710868..bf27ecfa7f5 100644
--- a/python/libcudf/libcudf/load.py
+++ b/python/libcudf/libcudf/load.py
@@ -18,6 +18,17 @@
 
 
 def load_library():
+    try:
+        # libkvikio must be loaded before libcudf because libcudf references its symbols
+        import libkvikio
+
+        libkvikio.load_library()
+    except ModuleNotFoundError:
+        # libcudf's runtime dependency on libkvikio may be satisfied by a natively
+        # installed library or a conda package, in which case the import will fail and
+        # we assume the library is discoverable on system paths.
+        pass
+
     # Dynamically load libcudf.so. Prefer a system library if one is present to
     # avoid clobbering symbols that other packages might expect, but if no
     # other library is present use the one in the wheel.
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
index c6d9ae56467..62726bb0df4 100644
--- a/python/libcudf/pyproject.toml
+++ b/python/libcudf/pyproject.toml
@@ -38,6 +38,7 @@ classifiers = [
     "Environment :: GPU :: NVIDIA CUDA",
 ]
 dependencies = [
+    "libkvikio==24.12.*,>=0.0.0a0",
     "nvidia-nvcomp==4.1.0.6",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 

From 29484cb87a417e2e36c8f3b6cd2ec961abec3156 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 7 Nov 2024 00:51:59 -0600
Subject: [PATCH 3/7] Put a ceiling on cuda-python (#17264)

Follow-up to #17253

Contributes to https://github.com/rapidsai/build-planning/issues/116

That PR used `!=` requirements to skip a particular version of `cuda-python` that `cudf` and `pylibcudf` were incompatible with. A newer version of `cuda-python` (12.6.2 for CUDA 12, 11.8.5 for CUDA 11) was just released, and it also causes some build issues for RAPIDS libraries: https://github.com/rapidsai/cuvs/pull/445#issuecomment-2461146449

To unblock CI across RAPIDS, this proposes **temporarily** switching to ceilings on the `cuda-python` dependency here.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17264
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 4 ++--
 conda/recipes/pylibcudf/meta.yaml                | 4 ++--
 dependencies.yaml                                | 8 ++++----
 python/cudf/pyproject.toml                       | 2 +-
 python/pylibcudf/pyproject.toml                  | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index ace55a15c09..8a64ebf40c5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
-- cuda-python>=11.7.1,<12.0a0,!=11.8.4
+- cuda-python>=11.7.1,<12.0a0,<=11.8.3
 - cuda-sanitizer-api=11.8.86
 - cuda-version=11.8
 - cudatoolkit
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index d20db44497e..5f779c3170f 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -21,7 +21,7 @@ dependencies:
 - cuda-nvcc
 - cuda-nvrtc-dev
 - cuda-nvtx-dev
-- cuda-python>=12.0,<13.0a0,!=12.6.1
+- cuda-python>=12.0,<13.0a0,<=12.6.0
 - cuda-sanitizer-api
 - cuda-version=12.5
 - cupy>=12.0.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 6debcb281b1..2aafcae072d 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -91,7 +91,7 @@ requirements:
     - cudatoolkit
     - ptxcompiler >=0.7.0
     - cubinlinker  # CUDA enhanced compatibility.
-    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
+    - cuda-python >=11.7.1,<12.0a0,<=11.8.3
     {% else %}
     - cuda-cudart
     - libcufile  # [linux64]
@@ -100,7 +100,7 @@ requirements:
     # TODO: Add nvjitlink here
     # xref: https://github.com/rapidsai/cudf/issues/12822
     - cuda-nvrtc
-    - cuda-python >=12.0,<13.0a0,!=12.6.1
+    - cuda-python >=12.0,<13.0a0,<=12.6.0
     - pynvjitlink
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 92ca495f972..ec3fcd59c62 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -83,9 +83,9 @@ requirements:
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
-    - cuda-python >=11.7.1,<12.0a0,!=11.8.4
+    - cuda-python >=11.7.1,<12.0a0,<=11.8.3
     {% else %}
-    - cuda-python >=12.0,<13.0a0,!=12.6.1
+    - cuda-python >=12.0,<13.0a0,<=12.6.0
     {% endif %}
     - nvtx >=0.2.1
     - packaging
diff --git a/dependencies.yaml b/dependencies.yaml
index 41ac6ce1808..4c6aefe996f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -659,10 +659,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0,!=12.6.1
+              - cuda-python>=12.0,<13.0a0,<=12.6.0
           - matrix: {cuda: "11.*"}
             packages: &run_pylibcudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
+              - cuda-python>=11.7.1,<12.0a0,<=11.8.3
           - {matrix: null, packages: *run_pylibcudf_packages_all_cu11}
   run_cudf:
     common:
@@ -685,10 +685,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cuda-python>=12.0,<13.0a0,!=12.6.1
+              - cuda-python>=12.0,<13.0a0,<=12.6.0
           - matrix: {cuda: "11.*"}
             packages: &run_cudf_packages_all_cu11
-              - cuda-python>=11.7.1,<12.0a0,!=11.8.4
+              - cuda-python>=11.7.1,<12.0a0,<=11.8.3
           - {matrix: null, packages: *run_cudf_packages_all_cu11}
       - output_types: conda
         matrices:
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 53f22a11e6b..1eadceaaccd 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -20,7 +20,7 @@ requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
-    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
+    "cuda-python>=11.7.1,<12.0a0,<=11.8.3",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==24.12.*,>=0.0.0a0",
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index e8052dfba4c..b2cec80f484 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-python>=11.7.1,<12.0a0,!=11.8.4",
+    "cuda-python>=11.7.1,<12.0a0,<=11.8.3",
     "libcudf==24.12.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",

From bbd3b43719545754e9a1f6b204aad5b143f48419 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Thu, 7 Nov 2024 01:57:47 -0800
Subject: [PATCH 4/7] Fix the example in documentation for `get_dremel_data()`
 (#17242)

Closes #11396. Fixes the example in the documentation of `get_dremel_data()`

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/17242
---
 cpp/include/cudf/lists/detail/dremel.hpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp
index 96ee30dd261..f45da8e8d8d 100644
--- a/cpp/include/cudf/lists/detail/dremel.hpp
+++ b/cpp/include/cudf/lists/detail/dremel.hpp
@@ -58,7 +58,7 @@ struct dremel_data {
 };
 
 /**
- * @brief Get the dremel offsets and repetition and definition levels for a LIST column
+ * @brief Get the dremel offsets, repetition levels, and definition levels for a LIST column
  *
  * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is
  * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale
@@ -74,7 +74,7 @@ struct dremel_data {
  *
  * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/
  * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da
- * https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet
+ * https://blog.x.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet
  *
  * The remainder of this documentation assumes familiarity with the Dremel concepts.
  *
@@ -102,16 +102,17 @@ struct dremel_data {
  * ```
  * We can represent it in cudf format with two level of offsets like this:
  * ```
- * Level 0 offsets = {0, 0, 3, 5, 6}
+ * Level 0 offsets = {0, 0, 3, 4}
  * Level 1 offsets = {0, 0, 3, 5, 5}
  * Values          = {1, 2, 3, 4, 5}
  * ```
- * The desired result of this function is the repetition and definition level values that
- * correspond to the data values:
+ * This function returns the dremel offsets, repetition levels, and definition level
+ * values that correspond to the data values:
  * ```
- * col = {[], [[], [1, 2, 3], [4, 5]], [[]]}
- * def = { 0    1,  2, 2, 2,   2, 2,     1 }
- * rep = { 0,   0,  0, 2, 2,   1, 2,     0 }
+ * col =            {[], [[], [1, 2, 3], [4, 5]], [[]]}
+ * dremel_offsets = { 0,  1,                       7, 8}
+ * def_levels     = { 0,  1,   2, 2, 2,   2, 2,    1 }
+ * rep_levels     = { 0,  0,   1, 2, 2,   1, 2,    0 }
  * ```
  *
  * Since repetition and definition levels arrays contain a value for each empty list, the size of

From e29e0ab477f4a541752a578f8769d8dd816ffbe8 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 7 Nov 2024 06:14:58 -0500
Subject: [PATCH 5/7] Move strings/numeric convert benchmarks to nvbench
 (#17255)

Moves the `cpp/benchmarks/string/convert_numerics.cpp` and `cpp/benchmarks/string/convert_fixed_point.cpp` benchmark implementations from google-bench to nvbench.

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Shruti Shivakumar (https://github.com/shrshi)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17255
---
 cpp/benchmarks/CMakeLists.txt                 |   4 +-
 cpp/benchmarks/string/convert_fixed_point.cpp | 111 +++++---------
 cpp/benchmarks/string/convert_numerics.cpp    | 138 ++++++------------
 3 files changed, 79 insertions(+), 174 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 68781889c53..bdc360c082b 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -358,8 +358,6 @@ ConfigureBench(
   STRINGS_BENCH
   string/convert_datetime.cpp
   string/convert_durations.cpp
-  string/convert_fixed_point.cpp
-  string/convert_numerics.cpp
   string/copy.cu
   string/factory.cu
   string/filter.cpp
@@ -375,6 +373,8 @@ ConfigureNVBench(
   string/char_types.cpp
   string/combine.cpp
   string/contains.cpp
+  string/convert_fixed_point.cpp
+  string/convert_numerics.cpp
   string/copy_if_else.cpp
   string/copy_range.cpp
   string/count.cpp
diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp
index e5bd794e405..97e114c0795 100644
--- a/cpp/benchmarks/string/convert_fixed_point.cpp
+++ b/cpp/benchmarks/string/convert_fixed_point.cpp
@@ -16,93 +16,48 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/types.hpp>
 
-namespace {
+#include <nvbench/nvbench.cuh>
 
-std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
-{
-  auto result =
-    create_random_column(cudf::type_id::FLOAT32, row_count{static_cast<cudf::size_type>(rows)});
-  return cudf::strings::from_floats(result->view());
-}
-
-}  // anonymous namespace
-
-class StringsToFixedPoint : public cudf::benchmark {};
-
-template <typename fixed_point_type>
-void convert_to_fixed_point(benchmark::State& state)
-{
-  auto const rows         = static_cast<cudf::size_type>(state.range(0));
-  auto const strings_col  = get_strings_column(rows);
-  auto const strings_view = cudf::strings_column_view(strings_col->view());
-  auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto volatile results = cudf::strings::to_fixed_point(strings_view, dtype);
-  }
+using Types = nvbench::type_list<numeric::decimal32, numeric::decimal64>;
 
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
-}
-
-class StringsFromFixedPoint : public cudf::benchmark {};
+NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal32, "decimal32", "decimal32");
+NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal64, "decimal64", "decimal64");
 
-template <typename fixed_point_type>
-void convert_from_fixed_point(benchmark::State& state)
+template <typename DataType>
+void bench_convert_fixed_point(nvbench::state& state, nvbench::type_list<DataType>)
 {
-  auto const rows        = static_cast<cudf::size_type>(state.range(0));
-  auto const strings_col = get_strings_column(rows);
-  auto const dtype = cudf::data_type{cudf::type_to_id<fixed_point_type>(), numeric::scale_type{-2}};
-  auto const fp_col =
-    cudf::strings::to_fixed_point(cudf::strings_column_view(strings_col->view()), dtype);
-
-  std::unique_ptr<cudf::column> results = nullptr;
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    results = cudf::strings::from_fixed_point(fp_col->view());
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const from_num = state.get_string("dir") == "from";
+
+  auto const data_type = cudf::data_type{cudf::type_to_id<DataType>(), numeric::scale_type{-2}};
+  auto const fp_col    = create_random_column(data_type.id(), row_count{num_rows});
+
+  auto const strings_col = cudf::strings::from_fixed_point(fp_col->view());
+  auto const sv          = cudf::strings_column_view(strings_col->view());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (from_num) {
+    state.add_global_memory_reads<int8_t>(num_rows * cudf::size_of(data_type));
+    state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { cudf::strings::to_fixed_point(sv, data_type); });
+  } else {
+    state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
+    state.add_global_memory_writes<int8_t>(num_rows * cudf::size_of(data_type));
+    state.exec(nvbench::exec_tag::sync,
+               [&](nvbench::launch& launch) { cudf::strings::from_fixed_point(fp_col->view()); });
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
-     rows * cudf::size_of(dtype)));
 }
 
-#define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type)                  \
-  BENCHMARK_DEFINE_F(StringsToFixedPoint, name)(::benchmark::State & state) \
-  {                                                                         \
-    convert_to_fixed_point<fixed_point_type>(state);                        \
-  }                                                                         \
-  BENCHMARK_REGISTER_F(StringsToFixedPoint, name)                           \
-    ->RangeMultiplier(4)                                                    \
-    ->Range(1 << 12, 1 << 24)                                               \
-    ->UseManualTime()                                                       \
-    ->Unit(benchmark::kMicrosecond);
-
-#define CONVERT_FROM_FIXED_POINT_BMD(name, fixed_point_type)                  \
-  BENCHMARK_DEFINE_F(StringsFromFixedPoint, name)(::benchmark::State & state) \
-  {                                                                           \
-    convert_from_fixed_point<fixed_point_type>(state);                        \
-  }                                                                           \
-  BENCHMARK_REGISTER_F(StringsFromFixedPoint, name)                           \
-    ->RangeMultiplier(4)                                                      \
-    ->Range(1 << 12, 1 << 24)                                                 \
-    ->UseManualTime()                                                         \
-    ->Unit(benchmark::kMicrosecond);
-
-CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal32, numeric::decimal32);
-CONVERT_TO_FIXED_POINT_BMD(strings_to_decimal64, numeric::decimal64);
-
-CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal32, numeric::decimal32);
-CONVERT_FROM_FIXED_POINT_BMD(strings_from_decimal64, numeric::decimal64);
+NVBENCH_BENCH_TYPES(bench_convert_fixed_point, NVBENCH_TYPE_AXES(Types))
+  .set_name("fixed_point")
+  .set_type_axes_names({"DataType"})
+  .add_string_axis("dir", {"to", "from"})
+  .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});
diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp
index 8f875c5c80f..e1f650dd6cd 100644
--- a/cpp/benchmarks/string/convert_numerics.cpp
+++ b/cpp/benchmarks/string/convert_numerics.cpp
@@ -16,117 +16,67 @@
 
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/strings/convert/convert_integers.hpp>
 #include <cudf/types.hpp>
 
-namespace {
+#include <nvbench/nvbench.cuh>
 
-template <typename NumericType>
-std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
-{
-  return create_random_column(cudf::type_to_id<NumericType>(), row_count{rows});
-}
+namespace {
 
 template <typename NumericType>
-std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
+std::unique_ptr<cudf::column> get_strings_column(cudf::column_view const& nv)
 {
-  auto const numerics_col = get_numerics_column<NumericType>(rows);
   if constexpr (std::is_floating_point_v<NumericType>) {
-    return cudf::strings::from_floats(numerics_col->view());
+    return cudf::strings::from_floats(nv);
   } else {
-    return cudf::strings::from_integers(numerics_col->view());
-  }
-}
-}  // anonymous namespace
-
-class StringsToNumeric : public cudf::benchmark {};
-
-template <typename NumericType>
-void convert_to_number(benchmark::State& state)
-{
-  auto const rows = static_cast<cudf::size_type>(state.range(0));
-
-  auto const strings_col  = get_strings_column<NumericType>(rows);
-  auto const strings_view = cudf::strings_column_view(strings_col->view());
-  auto const col_type     = cudf::type_to_id<NumericType>();
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    if constexpr (std::is_floating_point_v<NumericType>) {
-      cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
-    } else {
-      cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
-    }
+    return cudf::strings::from_integers(nv);
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
 }
+}  // namespace
 
-class StringsFromNumeric : public cudf::benchmark {};
+using Types = nvbench::type_list<float, double, int32_t, int64_t, uint8_t, uint16_t>;
 
 template <typename NumericType>
-void convert_from_number(benchmark::State& state)
+void bench_convert_number(nvbench::state& state, nvbench::type_list<NumericType>)
 {
-  auto const rows = static_cast<cudf::size_type>(state.range(0));
-
-  auto const numerics_col  = get_numerics_column<NumericType>(rows);
-  auto const numerics_view = numerics_col->view();
-
-  std::unique_ptr<cudf::column> results = nullptr;
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    if constexpr (std::is_floating_point_v<NumericType>)
-      results = cudf::strings::from_floats(numerics_view);
-    else
-      results = cudf::strings::from_integers(numerics_view);
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const from_num = state.get_string("dir") == "from";
+
+  auto const data_type = cudf::data_type(cudf::type_to_id<NumericType>());
+  auto const num_col   = create_random_column(data_type.id(), row_count{num_rows});
+
+  auto const strings_col = get_strings_column<NumericType>(num_col->view());
+  auto const sv          = cudf::strings_column_view(strings_col->view());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+
+  if (from_num) {
+    state.add_global_memory_reads<NumericType>(num_rows);
+    state.add_global_memory_writes<int8_t>(sv.chars_size(stream));
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      if constexpr (std::is_floating_point_v<NumericType>) {
+        cudf::strings::to_floats(sv, data_type);
+      } else {
+        cudf::strings::to_integers(sv, data_type);
+      }
+    });
+  } else {
+    state.add_global_memory_reads<int8_t>(sv.chars_size(stream));
+    state.add_global_memory_writes<NumericType>(num_rows);
+    state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+      if constexpr (std::is_floating_point_v<NumericType>)
+        cudf::strings::from_floats(num_col->view());
+      else
+        cudf::strings::from_integers(num_col->view());
+    });
   }
-
-  // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(
-    state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
-     rows * sizeof(NumericType)));
 }
 
-#define CONVERT_TO_NUMERICS_BD(name, type)                               \
-  BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
-  {                                                                      \
-    convert_to_number<type>(state);                                      \
-  }                                                                      \
-  BENCHMARK_REGISTER_F(StringsToNumeric, name)                           \
-    ->RangeMultiplier(4)                                                 \
-    ->Range(1 << 10, 1 << 17)                                            \
-    ->UseManualTime()                                                    \
-    ->Unit(benchmark::kMicrosecond);
-
-#define CONVERT_FROM_NUMERICS_BD(name, type)                               \
-  BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
-  {                                                                        \
-    convert_from_number<type>(state);                                      \
-  }                                                                        \
-  BENCHMARK_REGISTER_F(StringsFromNumeric, name)                           \
-    ->RangeMultiplier(4)                                                   \
-    ->Range(1 << 10, 1 << 17)                                              \
-    ->UseManualTime()                                                      \
-    ->Unit(benchmark::kMicrosecond);
-
-CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
-CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
-CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
-CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
-CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
-CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);
-
-CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
-CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
-CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
-CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);
+NVBENCH_BENCH_TYPES(bench_convert_number, NVBENCH_TYPE_AXES(Types))
+  .set_name("numeric")
+  .set_type_axes_names({"NumericType"})
+  .add_string_axis("dir", {"to", "from"})
+  .add_int64_axis("num_rows", {1 << 16, 1 << 18, 1 << 20, 1 << 22});

From 4cbc15aaf61a64e21a6eae0c5edf66ddf73f3f14 Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Thu, 7 Nov 2024 14:05:35 +0000
Subject: [PATCH 6/7] Added ast tree to simplify expression lifetime management
 (#17156)

This merge request follows up on https://github.com/rapidsai/cudf/issues/10744.
It attempts to simplify managing expressions by adding a class called an ast tree. The ast tree manages and holds related expressions together. When the tree is destroyed, all the expressions are also destroyed. Ideally we would use a bump allocator for allocating the expressions instead of `std::vector<std::unique_ptr<expression>>`.

We'd also ideally use a `cuda::std::inplace_vector` for storing the operands of the `operation` class, but that's in a newer version of CCCL.

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/17156
---
 .../cudf/ast/detail/expression_parser.hpp     |   6 +-
 cpp/include/cudf/ast/expressions.hpp          | 100 +++++++++++++++++-
 cpp/src/ast/expression_parser.cpp             |   2 +-
 cpp/src/ast/expressions.cpp                   |  24 +++--
 cpp/src/io/parquet/predicate_pushdown.cpp     |   7 +-
 cpp/src/io/parquet/reader_impl_helpers.hpp    |   2 +-
 cpp/tests/CMakeLists.txt                      |   2 +-
 cpp/tests/ast/ast_tree_tests.cpp              |  79 ++++++++++++++
 cpp/tests/ast/transform_tests.cpp             |   5 +-
 9 files changed, 207 insertions(+), 20 deletions(-)
 create mode 100644 cpp/tests/ast/ast_tree_tests.cpp

diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp
index f4cce8e6da6..b5973d0ace9 100644
--- a/cpp/include/cudf/ast/detail/expression_parser.hpp
+++ b/cpp/include/cudf/ast/detail/expression_parser.hpp
@@ -19,6 +19,10 @@
 #include <cudf/ast/expressions.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <thrust/scan.h>
 
 #include <functional>
 #include <numeric>
@@ -296,7 +300,7 @@ class expression_parser {
    * @return The indices of the operands stored in the data references.
    */
   std::vector<cudf::size_type> visit_operands(
-    std::vector<std::reference_wrapper<expression const>> operands);
+    cudf::host_span<std::reference_wrapper<cudf::ast::expression const> const> operands);
 
   /**
    * @brief Add a data reference to the internal list.
diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp
index 4299ee5f20f..bcc9ad1b391 100644
--- a/cpp/include/cudf/ast/expressions.hpp
+++ b/cpp/include/cudf/ast/expressions.hpp
@@ -22,6 +22,8 @@
 #include <cudf/utilities/error.hpp>
 
 #include <cstdint>
+#include <memory>
+#include <vector>
 
 namespace CUDF_EXPORT cudf {
 namespace ast {
@@ -478,7 +480,7 @@ class operation : public expression {
    *
    * @return Vector of operands
    */
-  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> get_operands() const
+  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
   {
     return operands;
   }
@@ -506,8 +508,8 @@ class operation : public expression {
   };
 
  private:
-  ast_operator const op;
-  std::vector<std::reference_wrapper<expression const>> const operands;
+  ast_operator op;
+  std::vector<std::reference_wrapper<expression const>> operands;
 };
 
 /**
@@ -552,6 +554,98 @@ class column_name_reference : public expression {
   std::string column_name;
 };
 
+/**
+ * @brief An AST expression tree. It owns and contains multiple dependent expressions. All the
+ * expressions are destroyed when the tree is destructed.
+ */
+class tree {
+ public:
+  /**
+   * @brief construct an empty ast tree
+   */
+  tree() = default;
+
+  /**
+   * @brief Moves the ast tree
+   */
+  tree(tree&&) = default;
+
+  /**
+   * @brief move-assigns the AST tree
+   * @returns a reference to the move-assigned tree
+   */
+  tree& operator=(tree&&) = default;
+
+  ~tree() = default;
+
+  // the tree is not copyable
+  tree(tree const&)            = delete;
+  tree& operator=(tree const&) = delete;
+
+  /**
+   * @brief Add an expression to the AST tree
+   * @param args Arguments to use to construct the ast expression
+   * @returns a reference to the added expression
+   */
+  template <typename Expr, typename... Args>
+  Expr const& emplace(Args&&... args)
+  {
+    static_assert(std::is_base_of_v<expression, Expr>);
+    auto expr            = std::make_shared<Expr>(std::forward<Args>(args)...);
+    Expr const& expr_ref = *expr;
+    expressions.emplace_back(std::static_pointer_cast<expression>(std::move(expr)));
+    return expr_ref;
+  }
+
+  /**
+   * @brief Add an expression to the AST tree
+   * @param expr AST expression to be added
+   * @returns a reference to the added expression
+   */
+  template <typename Expr>
+  Expr const& push(Expr expr)
+  {
+    return emplace<Expr>(std::move(expr));
+  }
+
+  /**
+   * @brief get the first expression in the tree
+   * @returns the first inserted expression into the tree
+   */
+  expression const& front() const { return *expressions.front(); }
+
+  /**
+   * @brief get the last expression in the tree
+   * @returns the last inserted expression into the tree
+   */
+  expression const& back() const { return *expressions.back(); }
+
+  /**
+   * @brief get the number of expressions added to the tree
+   * @returns the number of expressions added to the tree
+   */
+  size_t size() const { return expressions.size(); }
+
+  /**
+   * @brief get the expression at an index in the tree. Index is checked.
+   * @param index index of expression in the ast tree
+   * @returns the expression at the specified index
+   */
+  expression const& at(size_t index) { return *expressions.at(index); }
+
+  /**
+   * @brief get the expression at an index in the tree. Index is unchecked.
+   * @param index index of expression in the ast tree
+   * @returns the expression at the specified index
+   */
+  expression const& operator[](size_t index) const { return *expressions[index]; }
+
+ private:
+  // TODO: use better ownership semantics, the shared_ptr here is redundant. Consider using a bump
+  // allocator with type-erased deleters.
+  std::vector<std::shared_ptr<expression>> expressions;
+};
+
 /** @} */  // end of group
 }  // namespace ast
 
diff --git a/cpp/src/ast/expression_parser.cpp b/cpp/src/ast/expression_parser.cpp
index 5815ce33e33..d0e4c59ca54 100644
--- a/cpp/src/ast/expression_parser.cpp
+++ b/cpp/src/ast/expression_parser.cpp
@@ -207,7 +207,7 @@ cudf::data_type expression_parser::output_type() const
 }
 
 std::vector<cudf::size_type> expression_parser::visit_operands(
-  std::vector<std::reference_wrapper<expression const>> operands)
+  cudf::host_span<std::reference_wrapper<expression const> const> operands)
 {
   auto operand_data_reference_indices = std::vector<cudf::size_type>();
   for (auto const& operand : operands) {
diff --git a/cpp/src/ast/expressions.cpp b/cpp/src/ast/expressions.cpp
index 4c2b56dd4f5..b7e4e4609cb 100644
--- a/cpp/src/ast/expressions.cpp
+++ b/cpp/src/ast/expressions.cpp
@@ -20,36 +20,41 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
 
+#include <stdexcept>
+
 namespace cudf {
 namespace ast {
 
-operation::operation(ast_operator op, expression const& input) : op(op), operands({input})
+operation::operation(ast_operator op, expression const& input) : op{op}, operands{input}
 {
-  if (cudf::ast::detail::ast_operator_arity(op) != 1) {
-    CUDF_FAIL("The provided operator is not a unary operator.");
-  }
+  CUDF_EXPECTS(cudf::ast::detail::ast_operator_arity(op) == 1,
+               "The provided operator is not a unary operator.",
+               std::invalid_argument);
 }
 
 operation::operation(ast_operator op, expression const& left, expression const& right)
-  : op(op), operands({left, right})
+  : op{op}, operands{left, right}
 {
-  if (cudf::ast::detail::ast_operator_arity(op) != 2) {
-    CUDF_FAIL("The provided operator is not a binary operator.");
-  }
+  CUDF_EXPECTS(cudf::ast::detail::ast_operator_arity(op) == 2,
+               "The provided operator is not a binary operator.",
+               std::invalid_argument);
 }
 
 cudf::size_type literal::accept(detail::expression_parser& visitor) const
 {
   return visitor.visit(*this);
 }
+
 cudf::size_type column_reference::accept(detail::expression_parser& visitor) const
 {
   return visitor.visit(*this);
 }
+
 cudf::size_type operation::accept(detail::expression_parser& visitor) const
 {
   return visitor.visit(*this);
 }
+
 cudf::size_type column_name_reference::accept(detail::expression_parser& visitor) const
 {
   return visitor.visit(*this);
@@ -60,16 +65,19 @@ auto literal::accept(detail::expression_transformer& visitor) const
 {
   return visitor.visit(*this);
 }
+
 auto column_reference::accept(detail::expression_transformer& visitor) const
   -> decltype(visitor.visit(*this))
 {
   return visitor.visit(*this);
 }
+
 auto operation::accept(detail::expression_transformer& visitor) const
   -> decltype(visitor.visit(*this))
 {
   return visitor.visit(*this);
 }
+
 auto column_name_reference::accept(detail::expression_transformer& visitor) const
   -> decltype(visitor.visit(*this))
 {
diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp
index a965f3325d5..cd3dcd2bce4 100644
--- a/cpp/src/io/parquet/predicate_pushdown.cpp
+++ b/cpp/src/io/parquet/predicate_pushdown.cpp
@@ -25,6 +25,7 @@
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
@@ -373,7 +374,7 @@ class stats_expression_converter : public ast::detail::expression_transformer {
 
  private:
   std::vector<std::reference_wrapper<ast::expression const>> visit_operands(
-    std::vector<std::reference_wrapper<ast::expression const>> operands)
+    cudf::host_span<std::reference_wrapper<ast::expression const> const> operands)
   {
     std::vector<std::reference_wrapper<ast::expression const>> transformed_operands;
     for (auto const& operand : operands) {
@@ -553,7 +554,7 @@ std::reference_wrapper<ast::expression const> named_to_reference_converter::visi
 
 std::vector<std::reference_wrapper<ast::expression const>>
 named_to_reference_converter::visit_operands(
-  std::vector<std::reference_wrapper<ast::expression const>> operands)
+  cudf::host_span<std::reference_wrapper<ast::expression const> const> operands)
 {
   std::vector<std::reference_wrapper<ast::expression const>> transformed_operands;
   for (auto const& operand : operands) {
@@ -623,7 +624,7 @@ class names_from_expression : public ast::detail::expression_transformer {
   }
 
  private:
-  void visit_operands(std::vector<std::reference_wrapper<ast::expression const>> operands)
+  void visit_operands(cudf::host_span<std::reference_wrapper<ast::expression const> const> operands)
   {
     for (auto const& operand : operands) {
       operand.get().accept(*this);
diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp
index 6487c92f48f..fd692c0cdd6 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.hpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.hpp
@@ -425,7 +425,7 @@ class named_to_reference_converter : public ast::detail::expression_transformer
 
  private:
   std::vector<std::reference_wrapper<ast::expression const>> visit_operands(
-    std::vector<std::reference_wrapper<ast::expression const>> operands);
+    cudf::host_span<std::reference_wrapper<ast::expression const> const> operands);
 
   std::unordered_map<std::string, size_type> column_name_to_index;
   std::optional<std::reference_wrapper<ast::expression const>> _stats_expr;
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 23632f6fbba..e9ba58ba224 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -650,7 +650,7 @@ ConfigureTest(ENCODE_TEST encode/encode_tests.cpp)
 
 # ##################################################################################################
 # * ast tests -------------------------------------------------------------------------------------
-ConfigureTest(AST_TEST ast/transform_tests.cpp)
+ConfigureTest(AST_TEST ast/transform_tests.cpp ast/ast_tree_tests.cpp)
 
 # ##################################################################################################
 # * lists tests ----------------------------------------------------------------------------------
diff --git a/cpp/tests/ast/ast_tree_tests.cpp b/cpp/tests/ast/ast_tree_tests.cpp
new file mode 100644
index 00000000000..1a960c68e23
--- /dev/null
+++ b/cpp/tests/ast/ast_tree_tests.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
+
+#include <cudf/ast/expressions.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/transform.hpp>
+#include <cudf/types.hpp>
+
+template <typename T>
+using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;
+
+TEST(AstTreeTest, ExpressionTree)
+{
+  namespace ast   = cudf::ast;
+  using op        = ast::ast_operator;
+  using operation = ast::operation;
+
+  // computes (y = mx + c)... and linearly interpolates them using interpolator t
+  auto m0_col = column_wrapper<float>{10, 20, 50, 100};
+  auto x0_col = column_wrapper<float>{10, 5, 2, 1};
+  auto c0_col = column_wrapper<float>{100, 100, 100, 100};
+
+  auto m1_col = column_wrapper<float>{10, 20, 50, 100};
+  auto x1_col = column_wrapper<float>{20, 10, 4, 2};
+  auto c1_col = column_wrapper<float>{200, 200, 200, 200};
+
+  auto one_scalar = cudf::numeric_scalar<float>{1};
+  auto t_scalar   = cudf::numeric_scalar<float>{0.5F};
+
+  auto table = cudf::table_view{{m0_col, x0_col, c0_col, m1_col, x1_col, c1_col}};
+
+  ast::tree tree{};
+
+  auto const& one = tree.push(ast::literal{one_scalar});
+  auto const& t   = tree.push(ast::literal{t_scalar});
+  auto const& m0  = tree.push(ast::column_reference(0));
+  auto const& x0  = tree.push(ast::column_reference(1));
+  auto const& c0  = tree.push(ast::column_reference(2));
+  auto const& m1  = tree.push(ast::column_reference(3));
+  auto const& x1  = tree.push(ast::column_reference(4));
+  auto const& c1  = tree.push(ast::column_reference(5));
+
+  // compute: y0 = m0 x0 + c0
+  auto const& y0 = tree.push(operation{op::ADD, tree.push(operation{op::MUL, m0, x0}), c0});
+
+  // compute: y1 = m1 x1 + c1
+  auto const& y1 = tree.push(operation{op::ADD, tree.push(operation{op::MUL, m1, x1}), c1});
+
+  // compute weighted: (1 - t) * y0
+  auto const& y0_w = tree.push(operation{op::MUL, tree.push(operation{op::SUB, one, t}), y0});
+
+  // compute weighted: y = t * y1
+  auto const& y1_w = tree.push(operation{op::MUL, t, y1});
+
+  // add weighted: result = lerp(y0, y1, t) = (1 - t) * y0 + t * y1
+  auto result = cudf::compute_column(table, tree.push(operation{op::ADD, y0_w, y1_w}));
+
+  auto expected = column_wrapper<float>{300, 300, 300, 300};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index 7af88d8aa34..e28d92bb615 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -530,9 +530,10 @@ TEST_F(TransformTest, UnaryTrigonometry)
 TEST_F(TransformTest, ArityCheckFailure)
 {
   auto col_ref_0 = cudf::ast::column_reference(0);
-  EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0), cudf::logic_error);
+  EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0),
+               std::invalid_argument);
   EXPECT_THROW(cudf::ast::operation(cudf::ast::ast_operator::ABS, col_ref_0, col_ref_0),
-               cudf::logic_error);
+               std::invalid_argument);
 }
 
 TEST_F(TransformTest, StringComparison)

From e4c52ddb23b3524b665c7a97c905ed66fb341ea6 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 7 Nov 2024 08:26:06 -0600
Subject: [PATCH 7/7] `cudf-polars` string/numeric casting (#17076)

Depends on https://github.com/rapidsai/cudf/pull/16991
Part of https://github.com/rapidsai/cudf/issues/17060

Implements cross casting from string <-> numeric types in `cudf-polars`

Authors:
  - https://github.com/brandon-b-miller
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/17076
---
 cpp/include/cudf/utilities/traits.hpp         |  24 ++++
 cpp/src/utilities/traits.cpp                  |  13 ++
 .../cudf_polars/containers/column.py          |  56 ++++++++-
 .../cudf_polars/dsl/expressions/unary.py      |   4 +-
 .../cudf_polars/cudf_polars/testing/plugin.py |   1 +
 .../cudf_polars/cudf_polars/utils/dtypes.py   |  65 +++++++++-
 .../tests/expressions/test_casting.py         |   2 +-
 .../tests/expressions/test_numeric_binops.py  |  10 --
 .../tests/expressions/test_stringfunction.py  | 117 ++++++++++++++++++
 python/cudf_polars/tests/utils/test_dtypes.py |  31 ++++-
 .../pylibcudf/libcudf/utilities/traits.pxd    |   1 +
 .../pylibcudf/pylibcudf/tests/test_traits.py  |   5 +
 python/pylibcudf/pylibcudf/traits.pyx         |   6 +
 13 files changed, 311 insertions(+), 24 deletions(-)

diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index cf8413b597f..22a67ca049a 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -301,6 +301,30 @@ constexpr inline bool is_integral_not_bool()
  */
 bool is_integral_not_bool(data_type type);
 
+/**
+ * @brief Indicates whether the type `T` is a numeric type but not bool type.
+ *
+ * @tparam T  The type to verify
+ * @return true `T` is numeric but not bool
+ * @return false  `T` is not numeric or is bool
+ */
+template <typename T>
+constexpr inline bool is_numeric_not_bool()
+{
+  return cudf::is_numeric<T>() and not std::is_same_v<T, bool>;
+}
+
+/**
+ * @brief Indicates whether `type` is a numeric `data_type` but not BOOL8
+ *
+ * "Numeric" types are integral/floating point types such as `INT*` or `FLOAT*`.
+ *
+ * @param type The `data_type` to verify
+ * @return true `type` is numeric but not bool
+ * @return false `type` is not numeric or is bool
+ */
+bool is_numeric_not_bool(data_type type);
+
 /**
  * @brief Indicates whether the type `T` is a floating point type.
  *
diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp
index c1e71f5f8f9..41ee4e960b6 100644
--- a/cpp/src/utilities/traits.cpp
+++ b/cpp/src/utilities/traits.cpp
@@ -169,6 +169,19 @@ bool is_integral_not_bool(data_type type)
   return cudf::type_dispatcher(type, is_integral_not_bool_impl{});
 }
 
+struct is_numeric_not_bool_impl {
+  template <typename T>
+  constexpr bool operator()()
+  {
+    return is_numeric_not_bool<T>();
+  }
+};
+
+bool is_numeric_not_bool(data_type type)
+{
+  return cudf::type_dispatcher(type, is_numeric_not_bool_impl{});
+}
+
 struct is_floating_point_impl {
   template <typename T>
   constexpr bool operator()()
diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 00186098e54..93d95346a37 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -8,7 +8,18 @@
 import functools
 from typing import TYPE_CHECKING
 
+from polars.exceptions import InvalidOperationError
+
 import pylibcudf as plc
+from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats
+from pylibcudf.strings.convert.convert_integers import (
+    from_integers,
+    is_integer,
+    to_integers,
+)
+from pylibcudf.traits import is_floating_point
+
+from cudf_polars.utils.dtypes import is_order_preserving_cast
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -129,11 +140,46 @@ def astype(self, dtype: plc.DataType) -> Column:
         This only produces a copy if the requested dtype doesn't match
         the current one.
         """
-        if self.obj.type() != dtype:
-            return Column(plc.unary.cast(self.obj, dtype), name=self.name).sorted_like(
-                self
-            )
-        return self
+        if self.obj.type() == dtype:
+            return self
+
+        if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING:
+            return Column(self._handle_string_cast(dtype))
+        else:
+            result = Column(plc.unary.cast(self.obj, dtype))
+            if is_order_preserving_cast(self.obj.type(), dtype):
+                return result.sorted_like(self)
+            return result
+
+    def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
+        if dtype.id() == plc.TypeId.STRING:
+            if is_floating_point(self.obj.type()):
+                return from_floats(self.obj)
+            else:
+                return from_integers(self.obj)
+        else:
+            if is_floating_point(dtype):
+                floats = is_float(self.obj)
+                if not plc.interop.to_arrow(
+                    plc.reduce.reduce(
+                        floats,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    )
+                ).as_py():
+                    raise InvalidOperationError("Conversion from `str` failed.")
+                return to_floats(self.obj, dtype)
+            else:
+                integers = is_integer(self.obj)
+                if not plc.interop.to_arrow(
+                    plc.reduce.reduce(
+                        integers,
+                        plc.aggregation.all(),
+                        plc.DataType(plc.TypeId.BOOL8),
+                    )
+                ).as_py():
+                    raise InvalidOperationError("Conversion from `str` failed.")
+                return to_integers(self.obj, dtype)
 
     def copy_metadata(self, from_: pl.Series, /) -> Self:
         """
diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
index 6f22544c050..7999ec86068 100644
--- a/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
+++ b/python/cudf_polars/cudf_polars/dsl/expressions/unary.py
@@ -35,7 +35,7 @@ def __init__(self, dtype: plc.DataType, value: Expr) -> None:
         self.children = (value,)
         if not dtypes.can_cast(value.dtype, self.dtype):
             raise NotImplementedError(
-                f"Can't cast {self.dtype.id().name} to {value.dtype.id().name}"
+                f"Can't cast {value.dtype.id().name} to {self.dtype.id().name}"
             )
 
     def do_evaluate(
@@ -48,7 +48,7 @@ def do_evaluate(
         """Evaluate this expression given a dataframe for context."""
         (child,) = self.children
         column = child.evaluate(df, context=context, mapping=mapping)
-        return Column(plc.unary.cast(column.obj, self.dtype)).sorted_like(column)
+        return column.astype(self.dtype)
 
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index e01ccd05527..2f95cd38c57 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -158,6 +158,7 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU",
     "tests/unit/sql/test_cast.py::test_cast_errors[values2-values::int1-conversion from `i64` to `i8` failed]": "Casting that raises not supported on GPU",
+    "tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user receives is wrong",
     "tests/unit/sql/test_miscellaneous.py::test_read_csv": "Incorrect handling of missing_is_null in read_csv",
     "tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception",
     "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match",
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 1d0479802ca..a90c283ee54 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -12,10 +12,20 @@
 
 import polars as pl
 
+from pylibcudf.traits import (
+    is_floating_point,
+    is_integral_not_bool,
+    is_numeric_not_bool,
+)
+
+__all__ = [
+    "from_polars",
+    "downcast_arrow_lists",
+    "can_cast",
+    "is_order_preserving_cast",
+]
 import pylibcudf as plc
 
-__all__ = ["from_polars", "downcast_arrow_lists", "can_cast"]
-
 
 def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType:
     """
@@ -62,9 +72,54 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     True if casting is supported, False otherwise
     """
     return (
-        plc.traits.is_fixed_width(to)
-        and plc.traits.is_fixed_width(from_)
-        and plc.unary.is_supported_cast(from_, to)
+        (
+            plc.traits.is_fixed_width(to)
+            and plc.traits.is_fixed_width(from_)
+            and plc.unary.is_supported_cast(from_, to)
+        )
+        or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
+        or (to.id() == plc.TypeId.STRING and is_numeric_not_bool(from_))
+    )
+
+
+def is_order_preserving_cast(from_: plc.DataType, to: plc.DataType) -> bool:
+    """
+    Determine if a cast would preserve the order of the source data.
+
+    Parameters
+    ----------
+    from_
+        Source datatype
+    to
+        Target datatype
+
+    Returns
+    -------
+    True if the cast is order-preserving, False otherwise
+    """
+    if from_.id() == to.id():
+        return True
+
+    if is_integral_not_bool(from_) and is_integral_not_bool(to):
+        # True if signedness is the same and the target is larger
+        if plc.traits.is_unsigned(from_) == plc.traits.is_unsigned(to):
+            if plc.types.size_of(to) >= plc.types.size_of(from_):
+                return True
+        elif (plc.traits.is_unsigned(from_) and not plc.traits.is_unsigned(to)) and (
+            plc.types.size_of(to) > plc.types.size_of(from_)
+        ):
+            # Unsigned to signed is order preserving if target is large enough
+            # But signed to unsigned is never order preserving due to negative values
+            return True
+    elif (
+        is_floating_point(from_)
+        and is_floating_point(to)
+        and (plc.types.size_of(to) >= plc.types.size_of(from_))
+    ):
+        # True if the target is larger
+        return True
+    return (is_integral_not_bool(from_) and is_floating_point(to)) or (
+        is_floating_point(from_) and is_integral_not_bool(to)
     )
 
 
diff --git a/python/cudf_polars/tests/expressions/test_casting.py b/python/cudf_polars/tests/expressions/test_casting.py
index 3e003054338..0722a0f198a 100644
--- a/python/cudf_polars/tests/expressions/test_casting.py
+++ b/python/cudf_polars/tests/expressions/test_casting.py
@@ -14,7 +14,7 @@
 _supported_dtypes = [(pl.Int8(), pl.Int64())]
 
 _unsupported_dtypes = [
-    (pl.String(), pl.Int64()),
+    (pl.Datetime("ns"), pl.Int64()),
 ]
 
 
diff --git a/python/cudf_polars/tests/expressions/test_numeric_binops.py b/python/cudf_polars/tests/expressions/test_numeric_binops.py
index 8f68bbc460c..fa1ec3c19e4 100644
--- a/python/cudf_polars/tests/expressions/test_numeric_binops.py
+++ b/python/cudf_polars/tests/expressions/test_numeric_binops.py
@@ -8,7 +8,6 @@
 
 from cudf_polars.testing.asserts import (
     assert_gpu_result_equal,
-    assert_ir_translation_raises,
 )
 
 dtypes = [
@@ -114,12 +113,3 @@ def test_binop_with_scalar(left_scalar, right_scalar):
     q = df.select(lop / rop)
 
     assert_gpu_result_equal(q)
-
-
-def test_numeric_to_string_cast_fails():
-    df = pl.DataFrame(
-        {"a": [1, 1, 2, 3, 3, 4, 1], "b": [None, 2, 3, 4, 5, 6, 7]}
-    ).lazy()
-    q = df.select(pl.col("a").cast(pl.String))
-
-    assert_ir_translation_raises(q, NotImplementedError)
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 4f6850ac977..8d7d970eb07 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -40,6 +40,79 @@ def ldf(with_nulls):
     )
 
 
+@pytest.fixture(params=[pl.Int8, pl.Int16, pl.Int32, pl.Int64])
+def integer_type(request):
+    return request.param
+
+
+@pytest.fixture(params=[pl.Float32, pl.Float64])
+def floating_type(request):
+    return request.param
+
+
+@pytest.fixture(params=[pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.Float32, pl.Float64])
+def numeric_type(request):
+    return request.param
+
+
+@pytest.fixture
+def str_to_integer_data(with_nulls):
+    a = ["1", "2", "3", "4", "5", "6"]
+    if with_nulls:
+        a[4] = None
+    return pl.LazyFrame({"a": a})
+
+
+@pytest.fixture
+def str_to_float_data(with_nulls):
+    a = [
+        "1.1",
+        "2.2",
+        "3.3",
+        "4.4",
+        "5.5",
+        "6.6",
+        "inf",
+        "+inf",
+        "-inf",
+        "Inf",
+        "-Inf",
+        "nan",
+        "-1.234",
+        "2e2",
+    ]
+    if with_nulls:
+        a[4] = None
+    return pl.LazyFrame({"a": a})
+
+
+@pytest.fixture
+def str_from_integer_data(with_nulls, integer_type):
+    a = [1, 2, 3, 4, 5, 6]
+    if with_nulls:
+        a[4] = None
+    return pl.LazyFrame({"a": pl.Series(a, dtype=integer_type)})
+
+
+@pytest.fixture
+def str_from_float_data(with_nulls, floating_type):
+    a = [
+        1.1,
+        2.2,
+        3.3,
+        4.4,
+        5.5,
+        6.6,
+        float("inf"),
+        float("+inf"),
+        float("-inf"),
+        float("nan"),
+    ]
+    if with_nulls:
+        a[4] = None
+    return pl.LazyFrame({"a": pl.Series(a, dtype=floating_type)})
+
+
 slice_cases = [
     (1, 3),
     (0, 3),
@@ -337,3 +410,47 @@ def test_unsupported_regex_raises(pattern):
 
     q = df.select(pl.col("a").str.contains(pattern, strict=True))
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+def test_string_to_integer(str_to_integer_data, integer_type):
+    query = str_to_integer_data.select(pl.col("a").cast(integer_type))
+    assert_gpu_result_equal(query)
+
+
+def test_string_from_integer(str_from_integer_data):
+    query = str_from_integer_data.select(pl.col("a").cast(pl.String))
+    assert_gpu_result_equal(query)
+
+
+def test_string_to_float(str_to_float_data, floating_type):
+    query = str_to_float_data.select(pl.col("a").cast(floating_type))
+    assert_gpu_result_equal(query)
+
+
+def test_string_from_float(request, str_from_float_data):
+    if str_from_float_data.collect_schema()["a"] == pl.Float32:
+        # libcudf will return a string representing the precision out to
+        # a certain number of hardcoded decimal places. This results in
+        # the fractional part being thrown away which causes discrepancies
+        # for certain numbers. For instance, the float32 representation of
+        # 1.1 is 1.100000023841858. When cast to a string, this will become
+        # 1.100000024. But the float64 representation of 1.1 is
+        # 1.1000000000000000888 which will result in libcudf truncating the
+        # final value to 1.1.
+        request.applymarker(pytest.mark.xfail(reason="libcudf truncation"))
+    query = str_from_float_data.select(pl.col("a").cast(pl.String))
+
+    # libcudf reads float('inf') -> "inf"
+    # but polars reads float('inf') -> "Inf"
+    query = query.select(pl.col("a").str.to_lowercase())
+    assert_gpu_result_equal(query)
+
+
+def test_string_to_numeric_invalid(numeric_type):
+    df = pl.LazyFrame({"a": ["a", "b", "c"]})
+    q = df.select(pl.col("a").cast(numeric_type))
+    assert_collect_raises(
+        q,
+        polars_except=pl.exceptions.InvalidOperationError,
+        cudf_except=pl.exceptions.ComputeError,
+    )
diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py
index bbdb4faa256..f63c2079e04 100644
--- a/python/cudf_polars/tests/utils/test_dtypes.py
+++ b/python/cudf_polars/tests/utils/test_dtypes.py
@@ -7,7 +7,20 @@
 
 import polars as pl
 
-from cudf_polars.utils.dtypes import from_polars
+import pylibcudf as plc
+
+from cudf_polars.utils.dtypes import from_polars, is_order_preserving_cast
+
+INT8 = plc.DataType(plc.TypeId.INT8)
+INT16 = plc.DataType(plc.TypeId.INT16)
+INT32 = plc.DataType(plc.TypeId.INT32)
+INT64 = plc.DataType(plc.TypeId.INT64)
+UINT8 = plc.DataType(plc.TypeId.UINT8)
+UINT16 = plc.DataType(plc.TypeId.UINT16)
+UINT32 = plc.DataType(plc.TypeId.UINT32)
+UINT64 = plc.DataType(plc.TypeId.UINT64)
+FLOAT32 = plc.DataType(plc.TypeId.FLOAT32)
+FLOAT64 = plc.DataType(plc.TypeId.FLOAT64)
 
 
 @pytest.mark.parametrize(
@@ -30,3 +43,19 @@
 def test_unhandled_dtype_conversion_raises(pltype):
     with pytest.raises(NotImplementedError):
         _ = from_polars(pltype)
+
+
+def test_is_order_preserving_cast():
+    assert is_order_preserving_cast(INT8, INT8)  # Same type
+    assert is_order_preserving_cast(INT8, INT16)  # Smaller type
+    assert is_order_preserving_cast(INT8, FLOAT32)  # Int to large enough float
+    assert is_order_preserving_cast(UINT8, UINT16)  # Unsigned to larger unsigned
+    assert is_order_preserving_cast(UINT8, FLOAT32)  # Unsigned to large enough float
+    assert is_order_preserving_cast(FLOAT32, FLOAT64)  # Float to larger float
+    assert is_order_preserving_cast(INT64, FLOAT32)  # Int any float
+    assert is_order_preserving_cast(FLOAT32, INT32)  # Float to undersized int
+    assert is_order_preserving_cast(FLOAT32, INT64)  # float to large int
+
+    assert not is_order_preserving_cast(INT16, INT8)  # Bigger type
+    assert not is_order_preserving_cast(INT8, UINT8)  # Different signedness
+    assert not is_order_preserving_cast(FLOAT64, FLOAT32)  # Smaller float
diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
index 69765e44274..5533530754e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
@@ -9,6 +9,7 @@ cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
     cdef bool is_relationally_comparable(data_type)
     cdef bool is_equality_comparable(data_type)
     cdef bool is_numeric(data_type)
+    cdef bool is_numeric_not_bool(data_type)
     cdef bool is_index_type(data_type)
     cdef bool is_unsigned(data_type)
     cdef bool is_integral(data_type)
diff --git a/python/pylibcudf/pylibcudf/tests/test_traits.py b/python/pylibcudf/pylibcudf/tests/test_traits.py
index 2570e8abd51..2c1708304eb 100644
--- a/python/pylibcudf/pylibcudf/tests/test_traits.py
+++ b/python/pylibcudf/pylibcudf/tests/test_traits.py
@@ -20,6 +20,11 @@ def test_is_numeric():
     assert not plc.traits.is_numeric(plc.DataType(plc.TypeId.LIST))
 
 
+def test_is_numeric_not_bool():
+    assert plc.traits.is_numeric_not_bool(plc.DataType(plc.TypeId.FLOAT64))
+    assert not plc.traits.is_numeric_not_bool(plc.DataType(plc.TypeId.BOOL8))
+
+
 def test_is_index_type():
     assert plc.traits.is_index_type(plc.DataType(plc.TypeId.INT8))
     assert not plc.traits.is_index_type(plc.DataType(plc.TypeId.BOOL8))
diff --git a/python/pylibcudf/pylibcudf/traits.pyx b/python/pylibcudf/pylibcudf/traits.pyx
index 5a1c67e1f6c..9c52e0ac1ab 100644
--- a/python/pylibcudf/pylibcudf/traits.pyx
+++ b/python/pylibcudf/pylibcudf/traits.pyx
@@ -29,6 +29,12 @@ cpdef bool is_numeric(DataType typ):
     """
     return traits.is_numeric(typ.c_obj)
 
+cpdef bool is_numeric_not_bool(DataType typ):
+    """Checks if the given data type is numeric excluding booleans.
+
+    For details, see :cpp:func:`is_numeric_not_bool`.
+    """
+    return traits.is_numeric_not_bool(typ.c_obj)
 
 cpdef bool is_index_type(DataType typ):
     """Checks if the given data type is an index type.