From 95f0a33e377eff36bf2b20f25748489dbdb5e5b2 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Thu, 19 Sep 2024 11:46:22 -0400 Subject: [PATCH 1/8] DOC v24.12 Updates [skip ci] --- .github/workflows/build.yaml | 10 +++---- .github/workflows/pr.yaml | 12 ++++----- .github/workflows/test.yaml | 2 +- VERSION | 2 +- ci/build_docs.sh | 2 +- .../all_cuda-114_arch-x86_64.yaml | 14 +++++----- .../all_cuda-118_arch-x86_64.yaml | 14 +++++----- .../all_cuda-125_arch-x86_64.yaml | 14 +++++----- dependencies.yaml | 26 +++++++++---------- docs/source/explicit_comms.rst | 2 +- pyproject.toml | 10 +++---- 11 files changed, 54 insertions(+), 54 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 67bbd027..3d097bcd 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: conda-python-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: if: github.ref_type == 'branch' needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: upload-conda: needs: [conda-python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -72,7 +72,7 @@ jobs: wheel-publish: needs: wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 76014652..0e20bdaf 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,26 +18,26 @@ jobs: - docs-build - wheel-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 conda-python-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -46,7 +46,7 @@ jobs: run_script: "ci/build_docs.sh" wheel-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request # Package is pure Python and only ever requires one build. diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1a0e7d87..631a6173 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/VERSION b/VERSION index 7c7ba044..af28c42b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.10.00 +24.12.00 diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 42103004..7850211e 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -23,7 +23,7 @@ rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ dask-cuda -export RAPIDS_VERSION_NUMBER="24.10" +export RAPIDS_VERSION_NUMBER="24.12" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build Python docs" diff --git a/conda/environments/all_cuda-114_arch-x86_64.yaml b/conda/environments/all_cuda-114_arch-x86_64.yaml index 3cfd9cb2..3c327ff0 100644 --- a/conda/environments/all_cuda-114_arch-x86_64.yaml +++ b/conda/environments/all_cuda-114_arch-x86_64.yaml @@ -10,10 +10,10 @@ dependencies: - click >=8.1 - cuda-version=11.4 - cudatoolkit -- cudf==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 -- kvikio==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 +- kvikio==24.12.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<3.0a0 @@ -25,13 +25,13 @@ dependencies: - pytest-cov - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 -- ucxx==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 +- ucxx==0.41.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-114_arch-x86_64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b7b99751..3931f3bf 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,10 +10,10 @@ dependencies: - click >=8.1 - cuda-version=11.8 - cudatoolkit -- cudf==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 -- kvikio==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 +- kvikio==24.12.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<3.0a0 @@ -25,13 +25,13 @@ dependencies: - pytest-cov - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 -- ucxx==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 +- ucxx==0.41.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 652a8f0c..760ae971 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -11,10 +11,10 @@ dependencies: - cuda-nvcc-impl - cuda-nvrtc - cuda-version=12.5 -- cudf==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 -- distributed-ucxx==0.40.*,>=0.0.0a0 -- kvikio==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 +- distributed-ucxx==0.41.*,>=0.0.0a0 +- kvikio==24.12.*,>=0.0.0a0 - numactl-devel-cos7-x86_64 - numba>=0.57 - numpy>=1.23,<3.0a0 @@ -26,13 +26,13 @@ dependencies: - pytest-cov - python>=3.10,<3.13 - rapids-build-backend>=0.3.0,<0.4.0dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - setuptools>=64.0.0 - sphinx - sphinx-click>=2.7.1 - sphinx-rtd-theme>=0.5.1 - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 -- ucxx==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 +- ucxx==0.41.*,>=0.0.0a0 - zict>=2.0.0 name: all_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index 9e6b3a10..59ac8c01 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -158,7 +158,7 @@ dependencies: - numpy>=1.23,<3.0a0 - pandas>=1.3 - pynvml>=11.0.0,<11.5 - - rapids-dask-dependency==24.10.*,>=0.0.0a0 + - rapids-dask-dependency==24.12.*,>=0.0.0a0 - zict>=2.0.0 test_python: common: @@ -168,13 +168,13 @@ dependencies: - pytest-cov - output_types: [conda] packages: - - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0 - - &dask_cudf_unsuffixed dask-cudf==24.10.*,>=0.0.0a0 - - distributed-ucxx==0.40.*,>=0.0.0a0 - - &kvikio_unsuffixed kvikio==24.10.*,>=0.0.0a0 - - &ucx_py_unsuffixed ucx-py==0.40.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0 + - &dask_cudf_unsuffixed dask-cudf==24.12.*,>=0.0.0a0 + - distributed-ucxx==0.41.*,>=0.0.0a0 + - &kvikio_unsuffixed kvikio==24.12.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 - ucx-proc=*=gpu - - ucxx==0.40.*,>=0.0.0a0 + - ucxx==0.41.*,>=0.0.0a0 specific: - output_types: conda matrices: @@ -194,16 +194,16 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.10.*,>=0.0.0a0 - - dask-cudf-cu12==24.10.*,>=0.0.0a0 - - ucx-py-cu12==0.40.*,>=0.0.0a0 + - cudf-cu12==24.12.*,>=0.0.0a0 + - dask-cudf-cu12==24.12.*,>=0.0.0a0 + - ucx-py-cu12==0.41.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.10.*,>=0.0.0a0 - - dask-cudf-cu11==24.10.*,>=0.0.0a0 - - ucx-py-cu11==0.40.*,>=0.0.0a0 + - cudf-cu11==24.12.*,>=0.0.0a0 + - dask-cudf-cu11==24.12.*,>=0.0.0a0 + - ucx-py-cu11==0.41.*,>=0.0.0a0 - matrix: packages: - *cudf_unsuffixed diff --git a/docs/source/explicit_comms.rst b/docs/source/explicit_comms.rst index af317056..db621977 100644 --- a/docs/source/explicit_comms.rst +++ b/docs/source/explicit_comms.rst @@ -14,4 +14,4 @@ Usage In order to use explicit-comms in Dask/Distributed automatically, simply define the environment variable ``DASK_EXPLICIT_COMMS=True`` or setting the ``"explicit-comms"`` key in the `Dask configuration `_. -It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle `_ for guidance. +It is also possible to use explicit-comms in tasks manually, see the `API <../api/#explicit-comms>`_ and our `implementation of shuffle `_ for guidance. diff --git a/pyproject.toml b/pyproject.toml index 730225ad..fcf57276 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "numpy>=1.23,<3.0a0", "pandas>=1.3", "pynvml>=11.0.0,<11.5", - "rapids-dask-dependency==24.10.*,>=0.0.0a0", + "rapids-dask-dependency==24.12.*,>=0.0.0a0", "zict>=2.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -50,12 +50,12 @@ docs = [ "sphinx-rtd-theme>=0.5.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. test = [ - "cudf==24.10.*,>=0.0.0a0", - "dask-cudf==24.10.*,>=0.0.0a0", - "kvikio==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", + "dask-cudf==24.12.*,>=0.0.0a0", + "kvikio==24.12.*,>=0.0.0a0", "pytest", "pytest-cov", - "ucx-py==0.40.*,>=0.0.0a0", + "ucx-py==0.41.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From 45924dfc91cf9b2ec9ad7070a05161cb83d17777 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 9 Oct 2024 09:39:12 -0400 Subject: [PATCH 2/8] Update Changelog [skip ci] --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37c58851..f8c992fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,32 @@ +# dask-cuda 24.10.00 (9 Oct 2024) + +## 🚨 Breaking Changes + +- Replace cuDF (de)serializer with cuDF spill-aware (de)serializer ([#1369](https://github.com/rapidsai/dask-cuda/pull/1369)) [@pentschev](https://github.com/pentschev) + +## 📖 Documentation + +- Fix typo in spilling documentation ([#1384](https://github.com/rapidsai/dask-cuda/pull/1384)) [@rjzamora](https://github.com/rjzamora) +- Add notes on cudf spilling to docs ([#1383](https://github.com/rapidsai/dask-cuda/pull/1383)) [@rjzamora](https://github.com/rjzamora) + +## 🚀 New Features + +- [Benchmark] Add parquet read benchmark ([#1371](https://github.com/rapidsai/dask-cuda/pull/1371)) [@rjzamora](https://github.com/rjzamora) +- Replace cuDF (de)serializer with cuDF spill-aware (de)serializer ([#1369](https://github.com/rapidsai/dask-cuda/pull/1369)) [@pentschev](https://github.com/pentschev) + +## 🛠️ Improvements + +- Update update-version.sh to use packaging lib ([#1387](https://github.com/rapidsai/dask-cuda/pull/1387)) [@AyodeAwe](https://github.com/AyodeAwe) +- Use CI workflow branch 'branch-24.10' again ([#1386](https://github.com/rapidsai/dask-cuda/pull/1386)) [@jameslamb](https://github.com/jameslamb) +- Update to flake8 7.1.1. ([#1385](https://github.com/rapidsai/dask-cuda/pull/1385)) [@bdice](https://github.com/bdice) +- enable Python 3.12 tests on PRs ([#1382](https://github.com/rapidsai/dask-cuda/pull/1382)) [@jameslamb](https://github.com/jameslamb) +- Add support for Python 3.12 ([#1380](https://github.com/rapidsai/dask-cuda/pull/1380)) [@jameslamb](https://github.com/jameslamb) +- Update rapidsai/pre-commit-hooks ([#1379](https://github.com/rapidsai/dask-cuda/pull/1379)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Drop Python 3.9 support ([#1377](https://github.com/rapidsai/dask-cuda/pull/1377)) [@jameslamb](https://github.com/jameslamb) +- Remove NumPy <2 pin ([#1375](https://github.com/rapidsai/dask-cuda/pull/1375)) [@seberg](https://github.com/seberg) +- Update pre-commit hooks ([#1373](https://github.com/rapidsai/dask-cuda/pull/1373)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Merge branch-24.08 into branch-24.10 ([#1368](https://github.com/rapidsai/dask-cuda/pull/1368)) [@jameslamb](https://github.com/jameslamb) + # dask-cuda 24.08.00 (7 Aug 2024) ## 🐛 Bug Fixes From 93a1ee23a43563f33fba8a5a8761c03ccef25a1c Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 9 Oct 2024 18:12:23 +0200 Subject: [PATCH 3/8] Limit output of pytest durations (#1393) Durations output were previously increased to show all tests to allow us debugging of timeouts. However, now they have not been as important so limiting to only the 50 longer running tests is best to decrease log lengths, we may soon remove it entirely if they are not currently important. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/dask-cuda/pull/1393 --- ci/test_python.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 78330a40..32c0d940 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -52,7 +52,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ timeout 60m pytest \ -vv \ - --durations=0 \ + --durations=50 \ --capture=no \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda.xml" \ @@ -73,7 +73,7 @@ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ timeout 30m pytest \ -vv \ - --durations=0 \ + --durations=50 \ --capture=no \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cuda-legacy.xml" \ From f775d883c1149b00a462a041cf6589f9081aa4fb Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 10 Oct 2024 12:59:31 -0500 Subject: [PATCH 4/8] make conda installs in CI stricter (#1395) Contributes to https://github.com/rapidsai/build-planning/issues/106 Proposes specifying the RAPIDS version in `conda install` calls that install CI artifacts, to reduce the risk of CI jobs picking up artifacts from other releases. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/dask-cuda/pull/1395 --- ci/build_docs.sh | 7 ++++--- ci/release/update-version.sh | 1 - ci/test_python.sh | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 7850211e..58da36c7 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -5,6 +5,8 @@ set -euo pipefail rapids-logger "Create test conda environment" . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-dependency-file-generator \ --output conda \ --file-key docs \ @@ -21,9 +23,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ - dask-cuda + "dask-cuda=${RAPIDS_VERSION}" -export RAPIDS_VERSION_NUMBER="24.12" export RAPIDS_DOCS_DIR="$(mktemp -d)" rapids-logger "Build Python docs" @@ -33,4 +34,4 @@ mkdir -p "${RAPIDS_DOCS_DIR}/dask-cuda/"html mv _html/* "${RAPIDS_DOCS_DIR}/dask-cuda/html" popd -rapids-upload-docs +RAPIDS_VERSION_NUMBER="$(rapids-version-major-minor)" rapids-upload-docs diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 2dbe504c..b229d280 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -68,7 +68,6 @@ done for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done -sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh # Docs referencing source code find docs/source/ -type f -name *.rst -print0 | while IFS= read -r -d '' filename; do diff --git a/ci/test_python.sh b/ci/test_python.sh index 32c0d940..33914172 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -5,6 +5,8 @@ set -euo pipefail . /opt/conda/etc/profile.d/conda.sh +RAPIDS_VERSION="$(rapids-version)" + rapids-logger "Generate Python testing dependencies" rapids-dependency-file-generator \ --output conda \ @@ -29,7 +31,7 @@ rapids-print-env rapids-mamba-retry install \ --channel "${PYTHON_CHANNEL}" \ - dask-cuda + "dask-cuda=${RAPIDS_VERSION}" rapids-logger "Check GPU usage" nvidia-smi From 8d88006a6a064165e8408dcb9c288059c6f98a7f Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Sat, 12 Oct 2024 13:51:38 -0600 Subject: [PATCH 5/8] Enable Pytorch to share same memory pool as RMM via cli (#1392) This PR closes: https://github.com/rapidsai/dask-cuda/issues/1281 Usage example: ``` from dask_cuda import LocalCUDACluster from dask.distributed import Client cluster = LocalCUDACluster(rmm_allocator_external_lib_list=["torch", "cupy"]) client = Client(cluster) ``` Verify working ``` def get_torch_allocator(): import torch return torch.cuda.get_allocator_backend() client.run(get_torch_allocator) ``` ``` client.run(get_torch_allocator) ``` ``` {'tcp://127.0.0.1:37167': 'pluggable', 'tcp://127.0.0.1:38749': 'pluggable', 'tcp://127.0.0.1:43109': 'pluggable', 'tcp://127.0.0.1:44259': 'pluggable', 'tcp://127.0.0.1:44953': 'pluggable', 'tcp://127.0.0.1:45087': 'pluggable', 'tcp://127.0.0.1:45623': 'pluggable', 'tcp://127.0.0.1:45847': 'pluggable'} ``` Without it its `native`. Context: This helps NeMo-Curator to have a more stable use of Pytorch+dask-cuda CC: @pentschev . Authors: - Vibhu Jawa (https://github.com/VibhuJawa) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/dask-cuda/pull/1392 --- dask_cuda/cli.py | 14 ++++++- dask_cuda/cuda_worker.py | 2 + dask_cuda/local_cuda_cluster.py | 22 +++++++++++ dask_cuda/plugins.py | 67 +++++++++++++++++++++++++++++++++ dask_cuda/utils.py | 11 ++++++ 5 files changed, 115 insertions(+), 1 deletion(-) diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index a8c6d972..8101f020 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -13,7 +13,7 @@ from distributed.utils import import_term from .cuda_worker import CUDAWorker -from .utils import print_cluster_config +from .utils import CommaSeparatedChoice, print_cluster_config logger = logging.getLogger(__name__) @@ -164,6 +164,16 @@ def cuda(): incompatible with RMM pools and managed memory, trying to enable both will result in failure.""", ) +@click.option( + "--set-rmm-allocator-for-libs", + "rmm_allocator_external_lib_list", + type=CommaSeparatedChoice(["cupy", "torch"]), + default=None, + show_default=True, + help=""" + Set RMM as the allocator for external libraries. Provide a comma-separated + list of libraries to set, e.g., "torch,cupy".""", +) @click.option( "--rmm-release-threshold", default=None, @@ -351,6 +361,7 @@ def worker( rmm_maximum_pool_size, rmm_managed_memory, rmm_async, + rmm_allocator_external_lib_list, rmm_release_threshold, rmm_log_directory, rmm_track_allocations, @@ -425,6 +436,7 @@ def worker( rmm_maximum_pool_size, rmm_managed_memory, rmm_async, + rmm_allocator_external_lib_list, rmm_release_threshold, rmm_log_directory, rmm_track_allocations, diff --git a/dask_cuda/cuda_worker.py b/dask_cuda/cuda_worker.py index 3e03ed29..30c14450 100644 --- a/dask_cuda/cuda_worker.py +++ b/dask_cuda/cuda_worker.py @@ -47,6 +47,7 @@ def __init__( rmm_maximum_pool_size=None, rmm_managed_memory=False, rmm_async=False, + rmm_allocator_external_lib_list=None, rmm_release_threshold=None, rmm_log_directory=None, rmm_track_allocations=False, @@ -231,6 +232,7 @@ def del_pid_file(): release_threshold=rmm_release_threshold, log_directory=rmm_log_directory, track_allocations=rmm_track_allocations, + external_lib_list=rmm_allocator_external_lib_list, ), PreImport(pre_import), CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats), diff --git a/dask_cuda/local_cuda_cluster.py b/dask_cuda/local_cuda_cluster.py index c037223b..7a24df43 100644 --- a/dask_cuda/local_cuda_cluster.py +++ b/dask_cuda/local_cuda_cluster.py @@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster): The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also incompatible with RMM pools and managed memory. Trying to enable both will result in an exception. + rmm_allocator_external_lib_list: str, list or None, default None + List of external libraries for which to set RMM as the allocator. + Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string + (like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``). + If ``None``, no external libraries will use RMM as their allocator. rmm_release_threshold: int, str or None, default None When ``rmm.async is True`` and the pool size grows beyond this value, unused memory held by the pool will be released at the next synchronization point. @@ -231,6 +236,7 @@ def __init__( rmm_maximum_pool_size=None, rmm_managed_memory=False, rmm_async=False, + rmm_allocator_external_lib_list=None, rmm_release_threshold=None, rmm_log_directory=None, rmm_track_allocations=False, @@ -265,6 +271,19 @@ def __init__( n_workers = len(CUDA_VISIBLE_DEVICES) if n_workers < 1: raise ValueError("Number of workers cannot be less than 1.") + + if rmm_allocator_external_lib_list is not None: + if isinstance(rmm_allocator_external_lib_list, str): + rmm_allocator_external_lib_list = [ + v.strip() for v in rmm_allocator_external_lib_list.split(",") + ] + elif not isinstance(rmm_allocator_external_lib_list, list): + raise ValueError( + "rmm_allocator_external_lib_list must be either a comma-separated " + "string or a list of strings. Examples: 'torch,cupy' " + "or ['torch', 'cupy']" + ) + # Set nthreads=1 when parsing mem_limit since it only depends on n_workers logger = logging.getLogger(__name__) self.memory_limit = parse_memory_limit( @@ -284,6 +303,8 @@ def __init__( self.rmm_managed_memory = rmm_managed_memory self.rmm_async = rmm_async self.rmm_release_threshold = rmm_release_threshold + self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list + if rmm_pool_size is not None or rmm_managed_memory or rmm_async: try: import rmm # noqa F401 @@ -437,6 +458,7 @@ def new_worker_spec(self): release_threshold=self.rmm_release_threshold, log_directory=self.rmm_log_directory, track_allocations=self.rmm_track_allocations, + external_lib_list=self.rmm_allocator_external_lib_list, ), PreImport(self.pre_import), CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats), diff --git a/dask_cuda/plugins.py b/dask_cuda/plugins.py index 122f93ff..cd1928af 100644 --- a/dask_cuda/plugins.py +++ b/dask_cuda/plugins.py @@ -1,5 +1,6 @@ import importlib import os +from typing import Callable, Dict from distributed import WorkerPlugin @@ -39,6 +40,7 @@ def __init__( release_threshold, log_directory, track_allocations, + external_lib_list, ): if initial_pool_size is None and maximum_pool_size is not None: raise ValueError( @@ -61,6 +63,7 @@ def __init__( self.logging = log_directory is not None self.log_directory = log_directory self.rmm_track_allocations = track_allocations + self.external_lib_list = external_lib_list def setup(self, worker=None): if self.initial_pool_size is not None: @@ -123,6 +126,70 @@ def setup(self, worker=None): mr = rmm.mr.get_current_device_resource() rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr)) + if self.external_lib_list is not None: + for lib in self.external_lib_list: + enable_rmm_memory_for_library(lib) + + +def enable_rmm_memory_for_library(lib_name: str) -> None: + """Enable RMM memory pool support for a specified third-party library. + + This function allows the given library to utilize RMM's memory pool if it supports + integration with RMM. The library name is passed as a string argument, and if the + library is compatible, its memory allocator will be configured to use RMM. + + Parameters + ---------- + lib_name : str + The name of the third-party library to enable RMM memory pool support for. + Supported libraries are "cupy" and "torch". + + Raises + ------ + ValueError + If the library name is not supported or does not have RMM integration. + ImportError + If the required library is not installed. + """ + + # Mapping of supported libraries to their respective setup functions + setup_functions: Dict[str, Callable[[], None]] = { + "torch": _setup_rmm_for_torch, + "cupy": _setup_rmm_for_cupy, + } + + if lib_name not in setup_functions: + supported_libs = ", ".join(setup_functions.keys()) + raise ValueError( + f"The library '{lib_name}' is not supported for RMM integration. " + f"Supported libraries are: {supported_libs}." + ) + + # Call the setup function for the specified library + setup_functions[lib_name]() + + +def _setup_rmm_for_torch() -> None: + try: + import torch + except ImportError as e: + raise ImportError("PyTorch is not installed.") from e + + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + +def _setup_rmm_for_cupy() -> None: + try: + import cupy + except ImportError as e: + raise ImportError("CuPy is not installed.") from e + + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + class PreImport(WorkerPlugin): def __init__(self, libraries): diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index ff4dbbae..74596fe2 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -9,6 +9,7 @@ from multiprocessing import cpu_count from typing import Optional +import click import numpy as np import pynvml import toolz @@ -764,3 +765,13 @@ def get_rmm_memory_resource_stack(mr) -> list: if isinstance(mr, rmm.mr.StatisticsResourceAdaptor): return mr.allocation_counts["current_bytes"] return None + + +class CommaSeparatedChoice(click.Choice): + def convert(self, value, param, ctx): + values = [v.strip() for v in value.split(",")] + for v in values: + if v not in self.choices: + choices_str = ", ".join(f"'{c}'" for c in self.choices) + self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})") + return values From dfcd399171cdaca93155fe7a1f47812db63c780c Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Mon, 14 Oct 2024 19:06:16 +0200 Subject: [PATCH 6/8] Reenable UCXX in CI (#1396) UCXX CI tests had been previously disabled due to instabilities, see https://github.com/rapidsai/dask-cuda/pull/1270#issuecomment-1806295358, it should now be much more resilient so we should reenable them in preparation for the permanent migration to UCXX. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/dask-cuda/pull/1396 --- ci/test_python.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 33914172..18dd88cf 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -52,7 +52,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ -timeout 60m pytest \ +timeout 90m pytest \ -vv \ --durations=50 \ --capture=no \ @@ -62,7 +62,7 @@ timeout 60m pytest \ --cov=dask_cuda \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage.xml" \ --cov-report=term \ - tests -k "not ucxx" + tests popd rapids-logger "pytest explicit-comms (legacy dd)" @@ -73,7 +73,7 @@ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT=20 \ UCXPY_IFNAME=eth0 \ UCX_WARN_UNUSED_ENV_VARS=n \ UCX_MEMTYPE_CACHE=n \ -timeout 30m pytest \ +timeout 60m pytest \ -vv \ --durations=50 \ --capture=no \ @@ -83,7 +83,7 @@ timeout 30m pytest \ --cov=dask_cuda \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cuda-coverage-legacy.xml" \ --cov-report=term \ - tests/test_explicit_comms.py -k "not ucxx" + tests/test_explicit_comms.py popd rapids-logger "Run local benchmark (dask-expr)" From 0f78f5d23029313ecb3647faca6c28933b52d130 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 22 Oct 2024 23:39:51 +0200 Subject: [PATCH 7/8] Ignore legacy Dask dataframe warnings (#1397) Ignore legacy Dask dataframe warnings that the implementation is going to be soon removed, introduced in https://github.com/dask/dask/pull/11437 . The warning is only raised for `DASK_DATAFRAME__QUERY_PLANNING=False` cases. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/dask-cuda/pull/1397 --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index fcf57276..2266fb5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,9 @@ filterwarnings = [ # is enabled in both dask-cudf and dask-cuda. # See: https://github.com/rapidsai/dask-cuda/issues/1311 "ignore:Dask DataFrame implementation is deprecated:DeprecationWarning", + # Dask now loudly throws warnings: https://github.com/dask/dask/pull/11437 + # When the legacy implementation is removed we can remove this warning and stop running pytests with `DASK_DATAFRAME__QUERY_PLANNING=False` + "ignore:The legacy Dask DataFrame implementation is deprecated and will be removed in a future version.*:FutureWarning", ] [tool.rapids-build-backend] From 4639a968bcbf9837085be5c8df40ef27d00bf009 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 23 Oct 2024 14:12:46 -0500 Subject: [PATCH 8/8] remove unnecessary cmake and sccache configuration (#1400) Contributes to https://github.com/rapidsai/build-planning/issues/108 This is a pure Python project, so it doesn't need configuration about CMake or `sccache`. This proposes removing them to simplify build scripts a bit. It also proposes updating the `rapids-dependency-file-generator` pre-commit hook to it's latest version, something I'm trying to roll out across RAPIDS as part of https://github.com/rapidsai/build-planning/issues/108. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/dask-cuda/pull/1400 --- .pre-commit-config.yaml | 2 +- ci/build_python.sh | 4 ---- ci/build_wheel.sh | 3 +-- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4707492a..a2202df3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: hooks: - id: verify-alpha-spec - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.13.11 + rev: v1.16.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/ci/build_python.sh b/ci/build_python.sh index 48cece32..c12a0dde 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -5,12 +5,8 @@ set -euo pipefail rapids-configure-conda-channels -source rapids-configure-sccache - source rapids-date-string -export CMAKE_GENERATOR=Ninja - rapids-print-env rapids-generate-version > ./VERSION diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 828972dc..91c57231 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -3,11 +3,10 @@ set -euo pipefail -source rapids-configure-sccache source rapids-date-string rapids-generate-version > ./VERSION -python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check RAPIDS_PY_WHEEL_NAME="dask-cuda" rapids-upload-wheels-to-s3 dist