From 95f0a33e377eff36bf2b20f25748489dbdb5e5b2 Mon Sep 17 00:00:00 2001
From: Ray Douglass <ray@raydouglass.com>
Date: Thu, 19 Sep 2024 11:46:22 -0400
Subject: [PATCH 1/8] DOC v24.12 Updates [skip ci]

---
 .github/workflows/build.yaml                  | 10 +++----
 .github/workflows/pr.yaml                     | 12 ++++-----
 .github/workflows/test.yaml                   |  2 +-
 VERSION                                       |  2 +-
 ci/build_docs.sh                              |  2 +-
 .../all_cuda-114_arch-x86_64.yaml             | 14 +++++-----
 .../all_cuda-118_arch-x86_64.yaml             | 14 +++++-----
 .../all_cuda-125_arch-x86_64.yaml             | 14 +++++-----
 dependencies.yaml                             | 26 +++++++++----------
 docs/source/explicit_comms.rst                |  2 +-
 pyproject.toml                                | 10 +++----
 11 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 67bbd027..3d097bcd 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   conda-python-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -38,7 +38,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: [conda-python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -51,7 +51,7 @@ jobs:
   upload-conda:
     needs: [conda-python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -59,7 +59,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -72,7 +72,7 @@ jobs:
   wheel-publish:
     needs: wheel-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 76014652..0e20bdaf 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -18,26 +18,26 @@ jobs:
       - docs-build
       - wheel-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12
   conda-python-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -46,7 +46,7 @@ jobs:
       run_script: "ci/build_docs.sh"
   wheel-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12
     with:
       build_type: pull-request
       # Package is pure Python and only ever requires one build.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1a0e7d87..631a6173 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/VERSION b/VERSION
index 7c7ba044..af28c42b 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-24.10.00
+24.12.00
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 42103004..7850211e 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -23,7 +23,7 @@ rapids-mamba-retry install \
     --channel "${PYTHON_CHANNEL}" \
     dask-cuda
 
-export RAPIDS_VERSION_NUMBER="24.10"
+export RAPIDS_VERSION_NUMBER="24.12"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build Python docs"
diff --git a/conda/environments/all_cuda-114_arch-x86_64.yaml b/conda/environments/all_cuda-114_arch-x86_64.yaml
index 3cfd9cb2..3c327ff0 100644
--- a/conda/environments/all_cuda-114_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-114_arch-x86_64.yaml
@@ -10,10 +10,10 @@ dependencies:
 - click >=8.1
 - cuda-version=11.4
 - cudatoolkit
-- cudf==24.10.*,>=0.0.0a0
-- dask-cudf==24.10.*,>=0.0.0a0
-- distributed-ucxx==0.40.*,>=0.0.0a0
-- kvikio==24.10.*,>=0.0.0a0
+- cudf==24.12.*,>=0.0.0a0
+- dask-cudf==24.12.*,>=0.0.0a0
+- distributed-ucxx==0.41.*,>=0.0.0a0
+- kvikio==24.12.*,>=0.0.0a0
 - numactl-devel-cos7-x86_64
 - numba>=0.57
 - numpy>=1.23,<3.0a0
@@ -25,13 +25,13 @@ dependencies:
 - pytest-cov
 - python>=3.10,<3.13
 - rapids-build-backend>=0.3.0,<0.4.0dev0
-- rapids-dask-dependency==24.10.*,>=0.0.0a0
+- rapids-dask-dependency==24.12.*,>=0.0.0a0
 - setuptools>=64.0.0
 - sphinx
 - sphinx-click>=2.7.1
 - sphinx-rtd-theme>=0.5.1
 - ucx-proc=*=gpu
-- ucx-py==0.40.*,>=0.0.0a0
-- ucxx==0.40.*,>=0.0.0a0
+- ucx-py==0.41.*,>=0.0.0a0
+- ucxx==0.41.*,>=0.0.0a0
 - zict>=2.0.0
 name: all_cuda-114_arch-x86_64
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index b7b99751..3931f3bf 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -10,10 +10,10 @@ dependencies:
 - click >=8.1
 - cuda-version=11.8
 - cudatoolkit
-- cudf==24.10.*,>=0.0.0a0
-- dask-cudf==24.10.*,>=0.0.0a0
-- distributed-ucxx==0.40.*,>=0.0.0a0
-- kvikio==24.10.*,>=0.0.0a0
+- cudf==24.12.*,>=0.0.0a0
+- dask-cudf==24.12.*,>=0.0.0a0
+- distributed-ucxx==0.41.*,>=0.0.0a0
+- kvikio==24.12.*,>=0.0.0a0
 - numactl-devel-cos7-x86_64
 - numba>=0.57
 - numpy>=1.23,<3.0a0
@@ -25,13 +25,13 @@ dependencies:
 - pytest-cov
 - python>=3.10,<3.13
 - rapids-build-backend>=0.3.0,<0.4.0dev0
-- rapids-dask-dependency==24.10.*,>=0.0.0a0
+- rapids-dask-dependency==24.12.*,>=0.0.0a0
 - setuptools>=64.0.0
 - sphinx
 - sphinx-click>=2.7.1
 - sphinx-rtd-theme>=0.5.1
 - ucx-proc=*=gpu
-- ucx-py==0.40.*,>=0.0.0a0
-- ucxx==0.40.*,>=0.0.0a0
+- ucx-py==0.41.*,>=0.0.0a0
+- ucxx==0.41.*,>=0.0.0a0
 - zict>=2.0.0
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 652a8f0c..760ae971 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -11,10 +11,10 @@ dependencies:
 - cuda-nvcc-impl
 - cuda-nvrtc
 - cuda-version=12.5
-- cudf==24.10.*,>=0.0.0a0
-- dask-cudf==24.10.*,>=0.0.0a0
-- distributed-ucxx==0.40.*,>=0.0.0a0
-- kvikio==24.10.*,>=0.0.0a0
+- cudf==24.12.*,>=0.0.0a0
+- dask-cudf==24.12.*,>=0.0.0a0
+- distributed-ucxx==0.41.*,>=0.0.0a0
+- kvikio==24.12.*,>=0.0.0a0
 - numactl-devel-cos7-x86_64
 - numba>=0.57
 - numpy>=1.23,<3.0a0
@@ -26,13 +26,13 @@ dependencies:
 - pytest-cov
 - python>=3.10,<3.13
 - rapids-build-backend>=0.3.0,<0.4.0dev0
-- rapids-dask-dependency==24.10.*,>=0.0.0a0
+- rapids-dask-dependency==24.12.*,>=0.0.0a0
 - setuptools>=64.0.0
 - sphinx
 - sphinx-click>=2.7.1
 - sphinx-rtd-theme>=0.5.1
 - ucx-proc=*=gpu
-- ucx-py==0.40.*,>=0.0.0a0
-- ucxx==0.40.*,>=0.0.0a0
+- ucx-py==0.41.*,>=0.0.0a0
+- ucxx==0.41.*,>=0.0.0a0
 - zict>=2.0.0
 name: all_cuda-125_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 9e6b3a10..59ac8c01 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -158,7 +158,7 @@ dependencies:
           - numpy>=1.23,<3.0a0
           - pandas>=1.3
           - pynvml>=11.0.0,<11.5
-          - rapids-dask-dependency==24.10.*,>=0.0.0a0
+          - rapids-dask-dependency==24.12.*,>=0.0.0a0
           - zict>=2.0.0
   test_python:
     common:
@@ -168,13 +168,13 @@ dependencies:
           - pytest-cov
       - output_types: [conda]
         packages:
-          - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0
-          - &dask_cudf_unsuffixed dask-cudf==24.10.*,>=0.0.0a0
-          - distributed-ucxx==0.40.*,>=0.0.0a0
-          - &kvikio_unsuffixed kvikio==24.10.*,>=0.0.0a0
-          - &ucx_py_unsuffixed ucx-py==0.40.*,>=0.0.0a0
+          - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0
+          - &dask_cudf_unsuffixed dask-cudf==24.12.*,>=0.0.0a0
+          - distributed-ucxx==0.41.*,>=0.0.0a0
+          - &kvikio_unsuffixed kvikio==24.12.*,>=0.0.0a0
+          - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0
           - ucx-proc=*=gpu
-          - ucxx==0.40.*,>=0.0.0a0
+          - ucxx==0.41.*,>=0.0.0a0
     specific:
       - output_types: conda
         matrices:
@@ -194,16 +194,16 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - cudf-cu12==24.10.*,>=0.0.0a0
-              - dask-cudf-cu12==24.10.*,>=0.0.0a0
-              - ucx-py-cu12==0.40.*,>=0.0.0a0
+              - cudf-cu12==24.12.*,>=0.0.0a0
+              - dask-cudf-cu12==24.12.*,>=0.0.0a0
+              - ucx-py-cu12==0.41.*,>=0.0.0a0
           - matrix:
               cuda: "11.*"
               cuda_suffixed: "true"
             packages:
-              - cudf-cu11==24.10.*,>=0.0.0a0
-              - dask-cudf-cu11==24.10.*,>=0.0.0a0
-              - ucx-py-cu11==0.40.*,>=0.0.0a0
+              - cudf-cu11==24.12.*,>=0.0.0a0
+              - dask-cudf-cu11==24.12.*,>=0.0.0a0
+              - ucx-py-cu11==0.41.*,>=0.0.0a0
           - matrix:
             packages:
               - *cudf_unsuffixed
diff --git a/docs/source/explicit_comms.rst b/docs/source/explicit_comms.rst
index af317056..db621977 100644
--- a/docs/source/explicit_comms.rst
+++ b/docs/source/explicit_comms.rst
@@ -14,4 +14,4 @@ Usage
 In order to use explicit-comms in Dask/Distributed automatically, simply define the environment variable ``DASK_EXPLICIT_COMMS=True`` or setting the ``"explicit-comms"``
 key in the `Dask configuration <https://docs.dask.org/en/latest/configuration.html>`_.
 
-It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle <https://github.com/rapidsai/dask-cuda/blob/branch-24.10/dask_cuda/explicit_comms/dataframe/shuffle.py>`_ for guidance.
+It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle <https://github.com/rapidsai/dask-cuda/blob/branch-24.12/dask_cuda/explicit_comms/dataframe/shuffle.py>`_ for guidance.
diff --git a/pyproject.toml b/pyproject.toml
index 730225ad..fcf57276 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "numpy>=1.23,<3.0a0",
     "pandas>=1.3",
     "pynvml>=11.0.0,<11.5",
-    "rapids-dask-dependency==24.10.*,>=0.0.0a0",
+    "rapids-dask-dependency==24.12.*,>=0.0.0a0",
     "zict>=2.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -50,12 +50,12 @@ docs = [
     "sphinx-rtd-theme>=0.5.1",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
 test = [
-    "cudf==24.10.*,>=0.0.0a0",
-    "dask-cudf==24.10.*,>=0.0.0a0",
-    "kvikio==24.10.*,>=0.0.0a0",
+    "cudf==24.12.*,>=0.0.0a0",
+    "dask-cudf==24.12.*,>=0.0.0a0",
+    "kvikio==24.12.*,>=0.0.0a0",
     "pytest",
     "pytest-cov",
-    "ucx-py==0.40.*,>=0.0.0a0",
+    "ucx-py==0.41.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]

From 45924dfc91cf9b2ec9ad7070a05161cb83d17777 Mon Sep 17 00:00:00 2001
From: Ray Douglass <ray@raydouglass.com>
Date: Wed, 9 Oct 2024 09:39:12 -0400
Subject: [PATCH 2/8] Update Changelog [skip ci]

---
 CHANGELOG.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37c58851..f8c992fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,32 @@
+# dask-cuda 24.10.00 (9 Oct 2024)
+
+## 🚨 Breaking Changes
+
+- Replace cuDF (de)serializer with cuDF spill-aware (de)serializer ([#1369](https://github.com/rapidsai/dask-cuda/pull/1369)) [@pentschev](https://github.com/pentschev)
+
+## 📖 Documentation
+
+- Fix typo in spilling documentation ([#1384](https://github.com/rapidsai/dask-cuda/pull/1384)) [@rjzamora](https://github.com/rjzamora)
+- Add notes on cudf spilling to docs ([#1383](https://github.com/rapidsai/dask-cuda/pull/1383)) [@rjzamora](https://github.com/rjzamora)
+
+## 🚀 New Features
+
+- [Benchmark] Add  parquet read benchmark ([#1371](https://github.com/rapidsai/dask-cuda/pull/1371)) [@rjzamora](https://github.com/rjzamora)
+- Replace cuDF (de)serializer with cuDF spill-aware (de)serializer ([#1369](https://github.com/rapidsai/dask-cuda/pull/1369)) [@pentschev](https://github.com/pentschev)
+
+## 🛠️ Improvements
+
+- Update update-version.sh to use packaging lib ([#1387](https://github.com/rapidsai/dask-cuda/pull/1387)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Use CI workflow branch &#39;branch-24.10&#39; again ([#1386](https://github.com/rapidsai/dask-cuda/pull/1386)) [@jameslamb](https://github.com/jameslamb)
+- Update to flake8 7.1.1. ([#1385](https://github.com/rapidsai/dask-cuda/pull/1385)) [@bdice](https://github.com/bdice)
+- enable Python 3.12 tests on PRs ([#1382](https://github.com/rapidsai/dask-cuda/pull/1382)) [@jameslamb](https://github.com/jameslamb)
+- Add support for Python 3.12 ([#1380](https://github.com/rapidsai/dask-cuda/pull/1380)) [@jameslamb](https://github.com/jameslamb)
+- Update rapidsai/pre-commit-hooks ([#1379](https://github.com/rapidsai/dask-cuda/pull/1379)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Drop Python 3.9 support ([#1377](https://github.com/rapidsai/dask-cuda/pull/1377)) [@jameslamb](https://github.com/jameslamb)
+- Remove NumPy &lt;2 pin ([#1375](https://github.com/rapidsai/dask-cuda/pull/1375)) [@seberg](https://github.com/seberg)
+- Update pre-commit hooks ([#1373](https://github.com/rapidsai/dask-cuda/pull/1373)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Merge branch-24.08 into branch-24.10 ([#1368](https://github.com/rapidsai/dask-cuda/pull/1368)) [@jameslamb](https://github.com/jameslamb)
+
 # dask-cuda 24.08.00 (7 Aug 2024)
 
 ## 🐛 Bug Fixes

From 93a1ee23a43563f33fba8a5a8761c03ccef25a1c Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Wed, 9 Oct 2024 18:12:23 +0200
Subject: [PATCH 3/8] Limit output of pytest durations (#1393)

Durations output were previously increased to show all tests to allow us debugging of timeouts. However, now they have not been as important so limiting to only the 50 longer running tests is best to decrease log lengths, we may soon remove it entirely if they are not currently important.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/dask-cuda/pull/1393
---
 ci/test_python.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index 78330a40..32c0d940 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -52,7 +52,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \
 UCX_MEMTYPE_CACHE=n \
 timeout 60m pytest \
   -vv \
-  --durations=0 \
+  --durations=50 \
   --capture=no \
   --cache-clear \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda.xml" \
@@ -73,7 +73,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \
 UCX_MEMTYPE_CACHE=n \
 timeout 30m pytest \
   -vv \
-  --durations=0 \
+  --durations=50 \
   --capture=no \
   --cache-clear \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda-legacy.xml" \

From f775d883c1149b00a462a041cf6589f9081aa4fb Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 10 Oct 2024 12:59:31 -0500
Subject: [PATCH 4/8] make conda installs in CI stricter (#1395)

Contributes to https://github.com/rapidsai/build-planning/issues/106

Proposes specifying the RAPIDS version in `conda install` calls that install CI artifacts, to reduce the risk of CI jobs picking up artifacts from other releases.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/dask-cuda/pull/1395
---
 ci/build_docs.sh             | 7 ++++---
 ci/release/update-version.sh | 1 -
 ci/test_python.sh            | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 7850211e..58da36c7 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 rapids-logger "Create test conda environment"
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION="$(rapids-version)"
+
 rapids-dependency-file-generator \
     --output conda \
     --file-key docs \
@@ -21,9 +23,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
 rapids-mamba-retry install \
     --channel "${PYTHON_CHANNEL}" \
-    dask-cuda
+    "dask-cuda=${RAPIDS_VERSION}"
 
-export RAPIDS_VERSION_NUMBER="24.12"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build Python docs"
@@ -33,4 +34,4 @@ mkdir -p "${RAPIDS_DOCS_DIR}/dask-cuda/"html
 mv _html/* "${RAPIDS_DOCS_DIR}/dask-cuda/html"
 popd
 
-rapids-upload-docs
+RAPIDS_VERSION_NUMBER="$(rapids-version-major-minor)" rapids-upload-docs
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 2dbe504c..b229d280 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -68,7 +68,6 @@ done
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
 done
-sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
 
 # Docs referencing source code
 find docs/source/ -type f -name *.rst -print0 | while IFS= read -r -d '' filename; do
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 32c0d940..33914172 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION="$(rapids-version)"
+
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
@@ -29,7 +31,7 @@ rapids-print-env
 
 rapids-mamba-retry install \
   --channel "${PYTHON_CHANNEL}" \
-  dask-cuda
+  "dask-cuda=${RAPIDS_VERSION}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi

From 8d88006a6a064165e8408dcb9c288059c6f98a7f Mon Sep 17 00:00:00 2001
From: Vibhu Jawa <vibhujawa@gmail.com>
Date: Sat, 12 Oct 2024 13:51:38 -0600
Subject: [PATCH 5/8] Enable Pytorch to share same memory pool as RMM via cli
 (#1392)

This PR closes: https://github.com/rapidsai/dask-cuda/issues/1281

Usage example:
```
from dask_cuda import LocalCUDACluster
from dask.distributed import Client

cluster = LocalCUDACluster(rmm_allocator_external_lib_list=["torch", "cupy"])
client = Client(cluster)
```

Verify working
```
def get_torch_allocator():
    import torch
    return torch.cuda.get_allocator_backend()

client.run(get_torch_allocator)
```

```
client.run(get_torch_allocator)
```

```
{'tcp://127.0.0.1:37167': 'pluggable',
 'tcp://127.0.0.1:38749': 'pluggable',
 'tcp://127.0.0.1:43109': 'pluggable',
 'tcp://127.0.0.1:44259': 'pluggable',
 'tcp://127.0.0.1:44953': 'pluggable',
 'tcp://127.0.0.1:45087': 'pluggable',
 'tcp://127.0.0.1:45623': 'pluggable',
 'tcp://127.0.0.1:45847': 'pluggable'}
```

Without it its `native`.


Context: This helps NeMo-Curator to have a  more stable use of Pytorch+dask-cuda

CC: @pentschev .

Authors:
  - Vibhu Jawa (https://github.com/VibhuJawa)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)

URL: https://github.com/rapidsai/dask-cuda/pull/1392
---
 dask_cuda/cli.py                | 14 ++++++-
 dask_cuda/cuda_worker.py        |  2 +
 dask_cuda/local_cuda_cluster.py | 22 +++++++++++
 dask_cuda/plugins.py            | 67 +++++++++++++++++++++++++++++++++
 dask_cuda/utils.py              | 11 ++++++
 5 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py
index a8c6d972..8101f020 100644
--- a/dask_cuda/cli.py
+++ b/dask_cuda/cli.py
@@ -13,7 +13,7 @@
 from distributed.utils import import_term
 
 from .cuda_worker import CUDAWorker
-from .utils import print_cluster_config
+from .utils import CommaSeparatedChoice, print_cluster_config
 
 logger = logging.getLogger(__name__)
 
@@ -164,6 +164,16 @@ def cuda():
         incompatible with RMM pools and managed memory, trying to enable both will
         result in failure.""",
 )
+@click.option(
+    "--set-rmm-allocator-for-libs",
+    "rmm_allocator_external_lib_list",
+    type=CommaSeparatedChoice(["cupy", "torch"]),
+    default=None,
+    show_default=True,
+    help="""
+    Set RMM as the allocator for external libraries. Provide a comma-separated
+    list of libraries to set, e.g., "torch,cupy".""",
+)
 @click.option(
     "--rmm-release-threshold",
     default=None,
@@ -351,6 +361,7 @@ def worker(
     rmm_maximum_pool_size,
     rmm_managed_memory,
     rmm_async,
+    rmm_allocator_external_lib_list,
     rmm_release_threshold,
     rmm_log_directory,
     rmm_track_allocations,
@@ -425,6 +436,7 @@ def worker(
             rmm_maximum_pool_size,
             rmm_managed_memory,
             rmm_async,
+            rmm_allocator_external_lib_list,
             rmm_release_threshold,
             rmm_log_directory,
             rmm_track_allocations,
diff --git a/dask_cuda/cuda_worker.py b/dask_cuda/cuda_worker.py
index 3e03ed29..30c14450 100644
--- a/dask_cuda/cuda_worker.py
+++ b/dask_cuda/cuda_worker.py
@@ -47,6 +47,7 @@ def __init__(
         rmm_maximum_pool_size=None,
         rmm_managed_memory=False,
         rmm_async=False,
+        rmm_allocator_external_lib_list=None,
         rmm_release_threshold=None,
         rmm_log_directory=None,
         rmm_track_allocations=False,
@@ -231,6 +232,7 @@ def del_pid_file():
                         release_threshold=rmm_release_threshold,
                         log_directory=rmm_log_directory,
                         track_allocations=rmm_track_allocations,
+                        external_lib_list=rmm_allocator_external_lib_list,
                     ),
                     PreImport(pre_import),
                     CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
diff --git a/dask_cuda/local_cuda_cluster.py b/dask_cuda/local_cuda_cluster.py
index c037223b..7a24df43 100644
--- a/dask_cuda/local_cuda_cluster.py
+++ b/dask_cuda/local_cuda_cluster.py
@@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster):
             The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
             incompatible with RMM pools and managed memory. Trying to enable both will
             result in an exception.
+    rmm_allocator_external_lib_list: str, list or None, default None
+        List of external libraries for which to set RMM as the allocator.
+        Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
+        (like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``).
+        If ``None``, no external libraries will use RMM as their allocator.
     rmm_release_threshold: int, str or None, default None
         When ``rmm.async is True`` and the pool size grows beyond this value, unused
         memory held by the pool will be released at the next synchronization point.
@@ -231,6 +236,7 @@ def __init__(
         rmm_maximum_pool_size=None,
         rmm_managed_memory=False,
         rmm_async=False,
+        rmm_allocator_external_lib_list=None,
         rmm_release_threshold=None,
         rmm_log_directory=None,
         rmm_track_allocations=False,
@@ -265,6 +271,19 @@ def __init__(
             n_workers = len(CUDA_VISIBLE_DEVICES)
         if n_workers < 1:
             raise ValueError("Number of workers cannot be less than 1.")
+
+        if rmm_allocator_external_lib_list is not None:
+            if isinstance(rmm_allocator_external_lib_list, str):
+                rmm_allocator_external_lib_list = [
+                    v.strip() for v in rmm_allocator_external_lib_list.split(",")
+                ]
+            elif not isinstance(rmm_allocator_external_lib_list, list):
+                raise ValueError(
+                    "rmm_allocator_external_lib_list must be either a comma-separated "
+                    "string or a list of strings. Examples: 'torch,cupy' "
+                    "or ['torch', 'cupy']"
+                )
+
         # Set nthreads=1 when parsing mem_limit since it only depends on n_workers
         logger = logging.getLogger(__name__)
         self.memory_limit = parse_memory_limit(
@@ -284,6 +303,8 @@ def __init__(
         self.rmm_managed_memory = rmm_managed_memory
         self.rmm_async = rmm_async
         self.rmm_release_threshold = rmm_release_threshold
+        self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list
+
         if rmm_pool_size is not None or rmm_managed_memory or rmm_async:
             try:
                 import rmm  # noqa F401
@@ -437,6 +458,7 @@ def new_worker_spec(self):
                         release_threshold=self.rmm_release_threshold,
                         log_directory=self.rmm_log_directory,
                         track_allocations=self.rmm_track_allocations,
+                        external_lib_list=self.rmm_allocator_external_lib_list,
                     ),
                     PreImport(self.pre_import),
                     CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
diff --git a/dask_cuda/plugins.py b/dask_cuda/plugins.py
index 122f93ff..cd1928af 100644
--- a/dask_cuda/plugins.py
+++ b/dask_cuda/plugins.py
@@ -1,5 +1,6 @@
 import importlib
 import os
+from typing import Callable, Dict
 
 from distributed import WorkerPlugin
 
@@ -39,6 +40,7 @@ def __init__(
         release_threshold,
         log_directory,
         track_allocations,
+        external_lib_list,
     ):
         if initial_pool_size is None and maximum_pool_size is not None:
             raise ValueError(
@@ -61,6 +63,7 @@ def __init__(
         self.logging = log_directory is not None
         self.log_directory = log_directory
         self.rmm_track_allocations = track_allocations
+        self.external_lib_list = external_lib_list
 
     def setup(self, worker=None):
         if self.initial_pool_size is not None:
@@ -123,6 +126,70 @@ def setup(self, worker=None):
             mr = rmm.mr.get_current_device_resource()
             rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))
 
+        if self.external_lib_list is not None:
+            for lib in self.external_lib_list:
+                enable_rmm_memory_for_library(lib)
+
+
+def enable_rmm_memory_for_library(lib_name: str) -> None:
+    """Enable RMM memory pool support for a specified third-party library.
+
+    This function allows the given library to utilize RMM's memory pool if it supports
+    integration with RMM. The library name is passed as a string argument, and if the
+    library is compatible, its memory allocator will be configured to use RMM.
+
+    Parameters
+    ----------
+    lib_name : str
+        The name of the third-party library to enable RMM memory pool support for.
+        Supported libraries are "cupy" and "torch".
+
+    Raises
+    ------
+    ValueError
+        If the library name is not supported or does not have RMM integration.
+    ImportError
+        If the required library is not installed.
+    """
+
+    # Mapping of supported libraries to their respective setup functions
+    setup_functions: Dict[str, Callable[[], None]] = {
+        "torch": _setup_rmm_for_torch,
+        "cupy": _setup_rmm_for_cupy,
+    }
+
+    if lib_name not in setup_functions:
+        supported_libs = ", ".join(setup_functions.keys())
+        raise ValueError(
+            f"The library '{lib_name}' is not supported for RMM integration. "
+            f"Supported libraries are: {supported_libs}."
+        )
+
+    # Call the setup function for the specified library
+    setup_functions[lib_name]()
+
+
+def _setup_rmm_for_torch() -> None:
+    try:
+        import torch
+    except ImportError as e:
+        raise ImportError("PyTorch is not installed.") from e
+
+    from rmm.allocators.torch import rmm_torch_allocator
+
+    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+
+
+def _setup_rmm_for_cupy() -> None:
+    try:
+        import cupy
+    except ImportError as e:
+        raise ImportError("CuPy is not installed.") from e
+
+    from rmm.allocators.cupy import rmm_cupy_allocator
+
+    cupy.cuda.set_allocator(rmm_cupy_allocator)
+
 
 class PreImport(WorkerPlugin):
     def __init__(self, libraries):
diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py
index ff4dbbae..74596fe2 100644
--- a/dask_cuda/utils.py
+++ b/dask_cuda/utils.py
@@ -9,6 +9,7 @@
 from multiprocessing import cpu_count
 from typing import Optional
 
+import click
 import numpy as np
 import pynvml
 import toolz
@@ -764,3 +765,13 @@ def get_rmm_memory_resource_stack(mr) -> list:
         if isinstance(mr, rmm.mr.StatisticsResourceAdaptor):
             return mr.allocation_counts["current_bytes"]
     return None
+
+
+class CommaSeparatedChoice(click.Choice):
+    def convert(self, value, param, ctx):
+        values = [v.strip() for v in value.split(",")]
+        for v in values:
+            if v not in self.choices:
+                choices_str = ", ".join(f"'{c}'" for c in self.choices)
+                self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})")
+        return values

From dfcd399171cdaca93155fe7a1f47812db63c780c Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Mon, 14 Oct 2024 19:06:16 +0200
Subject: [PATCH 6/8] Reenable UCXX in CI (#1396)

UCXX CI tests had been previously disabled due to instabilities, see https://github.com/rapidsai/dask-cuda/pull/1270#issuecomment-1806295358, it should now be much more resilient so we should reenable them in preparation for the permanent migration to UCXX.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/dask-cuda/pull/1396
---
 ci/test_python.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index 33914172..18dd88cf 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -52,7 +52,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \
 UCXPY_IFNAME=eth0 \
 UCX_WARN_UNUSED_ENV_VARS=n \
 UCX_MEMTYPE_CACHE=n \
-timeout 60m pytest \
+timeout 90m pytest \
   -vv \
   --durations=50 \
   --capture=no \
@@ -62,7 +62,7 @@ timeout 60m pytest \
   --cov=dask_cuda \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage.xml" \
   --cov-report=term \
-  tests -k "not ucxx"
+  tests
 popd
 
 rapids-logger "pytest explicit-comms (legacy dd)"
@@ -73,7 +73,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \
 UCXPY_IFNAME=eth0 \
 UCX_WARN_UNUSED_ENV_VARS=n \
 UCX_MEMTYPE_CACHE=n \
-timeout 30m pytest \
+timeout 60m pytest \
   -vv \
   --durations=50 \
   --capture=no \
@@ -83,7 +83,7 @@ timeout 30m pytest \
   --cov=dask_cuda \
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage-legacy.xml" \
   --cov-report=term \
-  tests/test_explicit_comms.py -k "not ucxx"
+  tests/test_explicit_comms.py
 popd
 
 rapids-logger "Run local benchmark (dask-expr)"

From 0f78f5d23029313ecb3647faca6c28933b52d130 Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Tue, 22 Oct 2024 23:39:51 +0200
Subject: [PATCH 7/8] Ignore legacy Dask dataframe warnings (#1397)

Ignore legacy Dask dataframe warnings that the implementation is going to be soon removed, introduced in
https://github.com/dask/dask/pull/11437 .

The warning is only raised for `DASK_DATAFRAME__QUERY_PLANNING=False` cases.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/dask-cuda/pull/1397
---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index fcf57276..2266fb5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -128,6 +128,9 @@ filterwarnings = [
     # is enabled in both dask-cudf and dask-cuda.
     # See: https://github.com/rapidsai/dask-cuda/issues/1311
     "ignore:Dask DataFrame implementation is deprecated:DeprecationWarning",
+    # Dask now loudly throws warnings: https://github.com/dask/dask/pull/11437
+    # When the legacy implementation is removed we can remove this warning and stop running pytests with `DASK_DATAFRAME__QUERY_PLANNING=False`
+    "ignore:The legacy Dask DataFrame implementation is deprecated and will be removed in a future version.*:FutureWarning",
 ]
 
 [tool.rapids-build-backend]

From 4639a968bcbf9837085be5c8df40ef27d00bf009 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 23 Oct 2024 14:12:46 -0500
Subject: [PATCH 8/8] remove unnecessary cmake and sccache configuration
 (#1400)

Contributes to https://github.com/rapidsai/build-planning/issues/108

This is a pure Python project, so it doesn't need configuration about CMake or `sccache`.

This proposes removing them to simplify build scripts a bit.

It also proposes updating the `rapids-dependency-file-generator` pre-commit hook to it's latest version, something I'm trying to roll out across RAPIDS as part of https://github.com/rapidsai/build-planning/issues/108.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/dask-cuda/pull/1400
---
 .pre-commit-config.yaml | 2 +-
 ci/build_python.sh      | 4 ----
 ci/build_wheel.sh       | 3 +--
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4707492a..a2202df3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
         hooks:
             - id: verify-alpha-spec
       - repo: https://github.com/rapidsai/dependency-file-generator
-        rev: v1.13.11
+        rev: v1.16.0
         hooks:
             - id: rapids-dependency-file-generator
               args: ["--clean"]
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 48cece32..c12a0dde 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -5,12 +5,8 @@ set -euo pipefail
 
 rapids-configure-conda-channels
 
-source rapids-configure-sccache
-
 source rapids-date-string
 
-export CMAKE_GENERATOR=Ninja
-
 rapids-print-env
 
 rapids-generate-version > ./VERSION
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 828972dc..91c57231 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -3,11 +3,10 @@
 
 set -euo pipefail
 
-source rapids-configure-sccache
 source rapids-date-string
 
 rapids-generate-version > ./VERSION
 
-python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
+python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check
 
 RAPIDS_PY_WHEEL_NAME="dask-cuda" rapids-upload-wheels-to-s3 dist