Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into mukernels_bools
Browse files Browse the repository at this point in the history
  • Loading branch information
pmattione-nvidia committed Oct 29, 2024
2 parents c9154ef + eeb4d27 commit 86ade66
Show file tree
Hide file tree
Showing 440 changed files with 1,732 additions and 1,526 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ ENV PYTHONDONTWRITEBYTECODE="1"

ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-devs"
ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
ENV HISTFILE="/home/coder/.cache/._bash_history"
ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
17 changes: 17 additions & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: "Auto Assign PR"

on:
pull_request_target:
types:
- opened
- reopened
- synchronize

jobs:
add_assignees:
runs-on: ubuntu-latest
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
github_token: "${{ secrets.GITHUB_TOKEN }}"
assignees: ${{ github.actor }}
1 change: 1 addition & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: "Pull Request Labeler"

on:
- pull_request_target

Expand Down
14 changes: 2 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,6 @@ repos:
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
exclude: |
(?x)^(^python/cudf_polars/.*)
types_or: [python, cython, pyi]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
hooks:
Expand Down Expand Up @@ -150,6 +139,7 @@ repos:
rev: v0.4.8
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
Expand All @@ -165,7 +155,7 @@ repos:
)
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
rev: v1.16.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
Expand Down
5 changes: 3 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ conduct. More information can be found at:
8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks).
Fix if needed.
9. Wait for other developers to review your code and update code as needed.
Changes to any C++ files require at least 2 approvals from the cudf-cpp-codeowners before merging.
10. Once reviewed and approved, a RAPIDS developer will merge your pull request.

If you are unsure about anything, don't hesitate to comment on issues and ask for clarification!
Expand Down Expand Up @@ -293,8 +294,8 @@ In order to run doxygen as a linter on C++/CUDA code, run
./ci/checks/doxygen.sh
```

Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/),
[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/).
Python code runs several linters including [Ruff](https://docs.astral.sh/ruff/)
with its various rules for Black-like formatting or Isort.

cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling
mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes,
Expand Down
4 changes: 4 additions & 0 deletions ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ rapids-print-env

rapids-logger "Begin cpp build"

sccache --zero-stats

# With boa installed conda build forward to boa
RAPIDS_PACKAGE_VERSION=$(rapids-generate-version) rapids-conda-retry mambabuild \
conda/recipes/libcudf

sccache --show-adv-stats

rapids-upload-conda-to-s3 cpp
10 changes: 10 additions & 0 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ rapids-logger "Begin py build"

CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

sccache --zero-stats

# TODO: Remove `--no-test` flag once importing on a CPU
# node works correctly
# With boa installed conda build forwards to the boa builder
Expand All @@ -28,12 +30,18 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${CPP_CHANNEL}" \
conda/recipes/pylibcudf

sccache --show-adv-stats
sccache --zero-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf

sccache --show-adv-stats
sccache --zero-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
Expand All @@ -46,6 +54,8 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/cudf_kafka

sccache --show-adv-stats

RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
Expand Down
15 changes: 13 additions & 2 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

set -euo pipefail

package_dir=$1
package_name=$1
package_dir=$2

source rapids-configure-sccache
source rapids-date-string
Expand All @@ -12,4 +13,14 @@ rapids-generate-version > ./VERSION

cd "${package_dir}"

python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check
sccache --zero-stats

rapids-logger "Building '${package_name}' wheel"
python -m pip wheel \
-w dist \
-v \
--no-deps \
--disable-pip-version-check \
.

sccache --show-adv-stats
2 changes: 1 addition & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf
echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh cudf ${package_dir}

python -m auditwheel repair \
--exclude libcudf.so \
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/cudf_polars"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh cudf-polars ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
4 changes: 2 additions & 2 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/dask_cudf"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh dask-cudf ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
24 changes: 22 additions & 2 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,30 @@

set -euo pipefail

package_name="libcudf"
package_dir="python/libcudf"

rapids-logger "Generating build requirements"

rapids-dependency-file-generator \
--output requirements \
--file-key "py_build_${package_name}" \
--file-key "py_rapids_build_${package_name}" \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \
| tee /tmp/requirements-build.txt

rapids-logger "Installing build requirements"
python -m pip install \
-v \
--prefer-binary \
-r /tmp/requirements-build.txt

# build with '--no-build-isolation', for better sccache hit rate
# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735)
export PIP_NO_BUILD_ISOLATION=0

export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh "${package_name}" "${package_dir}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

Expand All @@ -16,4 +36,4 @@ python -m auditwheel repair \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist"
4 changes: 2 additions & 2 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt
export PIP_CONSTRAINT="/tmp/constraints.txt"

./ci/build_wheel.sh ${package_dir}
./ci/build_wheel.sh pylibcudf ${package_dir}

python -m auditwheel repair \
--exclude libcudf.so \
--exclude libnvcomp.so \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.8,<1.9
- polars>=1.11,<1.12
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<18.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.8,<1.9
- polars>=1.11,<1.12
- pre-commit
- pyarrow>=14.0.0,<18.0.0a0
- pydata-sphinx-theme!=0.14.2
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- pylibcudf ={{ version }}
- polars >=1.8,<1.9
- polars >=1.11,<1.12
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ add_library(
src/filling/sequence.cu
src/groupby/groupby.cu
src/groupby/hash/compute_groupby.cu
src/groupby/hash/compute_mapping_indices.cu
src/groupby/hash/compute_mapping_indices_null.cu
src/groupby/hash/compute_single_pass_aggs.cu
src/groupby/hash/create_sparse_results_table.cu
src/groupby/hash/flatten_single_pass_aggs.cpp
Expand Down
6 changes: 3 additions & 3 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,11 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary

# ##################################################################################################
# * nvtext benchmark -------------------------------------------------------------------
ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp)
ConfigureBench(TEXT_BENCH text/subword.cpp)

ConfigureNVBench(
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp text/word_minhash.cpp
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/ngrams.cpp
text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp
)

# ##################################################################################################
Expand Down
Loading

0 comments on commit 86ade66

Please sign in to comment.