add experimental checkpoint logic #70
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Tests | |
on: | |
push: | |
pull_request: | |
schedule: | |
- cron: "0 6,18 * * *" | |
workflow_dispatch: | |
# When this workflow is queued, automatically cancel any previous running | |
# or pending jobs from the same branch | |
concurrency: | |
group: tests-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
test: | |
# Do not run the schedule job on forks | |
if: github.repository == 'dask/distributed' || github.event_name != 'schedule' | |
runs-on: ${{ matrix.os }} | |
# If you change the pattern of this name, please adjust the scripts/test_report.py parsing accordingly | |
name: ${{ matrix.os }}-${{ matrix.environment }}-${{ matrix.label }}-${{ matrix.partition }} | |
timeout-minutes: 120 | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ubuntu-latest, windows-latest, macos-latest] | |
environment: [mindeps, "3.9", "3.10", "3.11", "3.12"] | |
label: [queue] | |
extra_packages: [null] | |
# Cherry-pick test modules to split the overall runtime roughly in half | |
partition: [ci1, not ci1] | |
exclude: | |
# MacOS CI does not have any hosts available; run it on 3.12 only | |
- os: macos-latest | |
environment: mindeps | |
- os: macos-latest | |
environment: "3.9" | |
- os: macos-latest | |
environment: "3.10" | |
- os: macos-latest | |
environment: "3.11" | |
- os: windows-latest | |
environment: mindeps | |
include: | |
# Set distributed.scheduler.worker-saturation: .inf | |
- os: ubuntu-latest | |
environment: "3.9" | |
label: no_queue | |
partition: "ci1" | |
- os: ubuntu-latest | |
environment: "3.9" | |
label: no_queue | |
partition: "not ci1" | |
# dask.array P2P shuffle | |
- os: ubuntu-latest | |
environment: mindeps | |
label: numpy | |
extra_packages: [numpy=1.21] | |
partition: "ci1" | |
- os: ubuntu-latest | |
environment: mindeps | |
label: numpy | |
extra_packages: [numpy=1.21] | |
partition: "not ci1" | |
# dask.dataframe P2P shuffle | |
- os: ubuntu-latest | |
environment: mindeps | |
label: pandas | |
extra_packages: [numpy=1.21, pandas=1.3, pyarrow=7, pyarrow-hotfix] | |
partition: "ci1" | |
- os: ubuntu-latest | |
environment: mindeps | |
label: pandas | |
extra_packages: [numpy=1.21, pandas=1.3, pyarrow=7, pyarrow-hotfix] | |
partition: "not ci1" | |
- os: ubuntu-latest | |
environment: mindeps | |
label: memray | |
extra_packages: [memray] | |
partition: "extra_packages" | |
# Uncomment to stress-test the test suite for random failures. | |
# Must also change `export TEST_ID` in first step below. | |
# This will take a LONG time and delay all PRs across the whole github.com/dask! | |
# To avoid hamstringing other people, change 'on: [push, pull_request]' above | |
# to just 'on: [push]'; this way the stress test will run exclusively in your | |
# branch (https://github.com/<your name>/distributed/actions). | |
# run: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
env: | |
CONDA_FILE: continuous_integration/environment-${{ matrix.environment }}.yaml | |
steps: | |
- name: Set $TEST_ID | |
run: | | |
export PARTITION_LABEL=$( echo "${{ matrix.partition }}" | sed "s/ //" ) | |
export TEST_ID="${{ matrix.os }}-${{ matrix.environment }}-${{ matrix.label }}-$PARTITION_LABEL" | |
# Switch to this version for stress-test: | |
# export TEST_ID="$TEST_ID-${{ matrix.run }}" | |
echo "TEST_ID: $TEST_ID" | |
echo "TEST_ID=$TEST_ID" >> $GITHUB_ENV | |
shell: bash | |
- name: Checkout source | |
uses: actions/[email protected] | |
with: | |
fetch-depth: 0 | |
- name: Setup Conda Environment | |
uses: conda-incubator/[email protected] | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
condarc-file: continuous_integration/condarc | |
use-mamba: true | |
activate-environment: dask-distributed | |
- name: Show conda options | |
shell: bash -l {0} | |
run: conda config --show | |
- name: Check if caching is enabled | |
uses: xarray-contrib/[email protected] | |
id: skip-caching | |
with: | |
keyword: "[skip-caching]" | |
- name: Get Date | |
if: | | |
( | |
steps.skip-caching.outputs.trigger-found != 'true' | |
&& !(github.event_name == 'pull_request' | |
&& contains(github.event.pull_request.labels.*.name, 'skip-caching')) | |
) | |
id: get-date | |
run: echo "::set-output name=today::$(/bin/date -u '+%Y%m%d')" | |
shell: bash | |
- name: Cache Conda env | |
if: steps.skip-caching.outputs.trigger-found != 'true' | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.CONDA }}/envs | |
key: conda-${{ matrix.os }}-${{ steps.get-date.outputs.today }}-${{ hashFiles(env.CONDA_FILE) }}-${{ env.CACHE_NUMBER }} | |
env: | |
# Increase this value to reset cache if | |
# continuous_integration/environment-${{ matrix.environment }}.yaml has not | |
# changed. See also same variable in .pre-commit-config.yaml | |
CACHE_NUMBER: 0 | |
id: cache | |
- name: Update environment | |
run: mamba env update -n dask-distributed -f ${{ env.CONDA_FILE }} | |
if: | | |
( | |
steps.skip-caching.outputs.trigger-found == 'true' | |
|| (github.event_name == 'pull_request' | |
&& contains(github.event.pull_request.labels.*.name, 'skip-caching')) | |
|| steps.cache.outputs.cache-hit != 'true' | |
) | |
- name: Install | |
shell: bash -l {0} | |
run: | | |
python -m pip install --no-deps -e . | |
- name: Extra Installs | |
if: ${{ matrix.extra_packages }} | |
shell: bash -l {0} | |
run: mamba install -y ${{ join(matrix.extra_packages, ' ') }} | |
- name: mamba list | |
shell: bash -l {0} | |
run: mamba list | |
- name: mamba env export | |
shell: bash -l {0} | |
run: | | |
echo -e "--\n--Conda Environment (re-create this with \`mamba env create --name <name> -f <output_file>\`)\n--" | |
mamba env export | grep -E -v '^prefix:.*$' | |
- name: Setup SSH | |
shell: bash -l {0} | |
# FIXME no SSH available on Windows | |
# https://github.com/dask/distributed/issues/4509 | |
if: ${{ matrix.os != 'windows-latest' }} | |
run: bash continuous_integration/scripts/setup_ssh.sh | |
- name: Reconfigure pytest-timeout | |
shell: bash -l {0} | |
# No SIGALRM available on Windows | |
if: ${{ matrix.os != 'windows-latest' }} | |
run: sed -i.bak 's/timeout_method = "thread"/timeout_method = "signal"/' pyproject.toml | |
- name: Disable IPv6 | |
shell: bash -l {0} | |
# FIXME ipv6-related failures on Ubuntu and MacOS github actions CI | |
# https://github.com/dask/distributed/issues/4514 | |
if: ${{ matrix.os != 'windows-latest' }} | |
run: echo "DISABLE_IPV6=1" >> $GITHUB_ENV | |
- name: Set up dask env for job queuing | |
shell: bash -l {0} | |
if: ${{ matrix.label == 'no_queue' }} | |
run: echo "DASK_DISTRIBUTED__SCHEDULER__WORKER_SATURATION=inf" >> $GITHUB_ENV | |
- name: Print host info | |
# host_info.py imports numpy, which isn't a direct dependency of distributed | |
if: matrix.environment != 'mindeps' | |
shell: bash -l {0} | |
run: | | |
python continuous_integration/scripts/host_info.py | |
- name: Test | |
id: run_tests | |
shell: bash -l {0} | |
env: | |
PYTHONFAULTHANDLER: 1 | |
MINDEPS: ${{ matrix.environment == 'mindeps' }} | |
run: | | |
source continuous_integration/scripts/set_ulimit.sh | |
set -o pipefail | |
mkdir reports | |
pytest distributed \ | |
-m "not avoid_ci and ${{ matrix.partition }}" --runslow \ | |
--leaks=fds,processes,threads \ | |
--junitxml reports/pytest.xml -o junit_suite_name=$TEST_ID \ | |
--cov=distributed --cov-report=xml \ | |
| tee reports/stdout | |
- name: Generate junit XML report in case of pytest-timeout | |
if: ${{ failure() }} | |
shell: bash -l {0} | |
run: | | |
if [ ! -e reports/pytest.xml ] | |
then | |
# This should only ever happen on Windows. | |
# On Linux and MacOS, pytest-timeout kills off the individual tests | |
# See (reconfigure pytest-timeout above) | |
python continuous_integration/scripts/parse_stdout.py < reports/stdout > reports/pytest.xml | |
fi | |
# - name: Debug with tmate on failure | |
# if: ${{ failure() }} | |
# uses: mxschmitt/action-tmate@v3 | |
# https://community.codecov.com/t/files-missing-from-report/3902/7 | |
# The coverage file being created records filenames at the distributed/ directory | |
# as opposed to root. This is causing filename mismatches in Codecov. | |
# This step edits `coverage.xml` in-file by adding `distributed` to all filenames. | |
- name: Prepare coverage report | |
if: > | |
always() && | |
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure') && | |
matrix.os != 'windows-latest' | |
shell: bash -l {0} | |
run: sed -i'' -e 's/filename="/filename="distributed\//g' coverage.xml | |
# Do not upload coverage reports for cron jobs | |
- name: Coverage | |
if: > | |
always() && | |
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure') && | |
github.event_name != 'schedule' | |
uses: codecov/codecov-action@v3 | |
with: | |
name: ${{ env.TEST_ID }} | |
# See https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954 | |
token: ${{ secrets.CODECOV_TOKEN }} | |
- name: Upload test results | |
# ensure this runs even if pytest fails | |
if: > | |
always() && | |
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure') | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.TEST_ID }} | |
path: reports | |
- name: Upload gen_cluster dumps for failed tests | |
# ensure this runs even if pytest fails | |
if: > | |
always() && | |
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure') | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.TEST_ID }}_cluster_dumps | |
path: test_cluster_dump | |
if-no-files-found: ignore | |
# Publish an artifact for the event; used by publish-test-results.yaml | |
event_file: | |
# Do not run the schedule job on forks | |
if: github.repository == 'dask/distributed' || github.event_name != 'schedule' | |
name: "Event File" | |
runs-on: ubuntu-latest | |
steps: | |
- name: Upload | |
uses: actions/upload-artifact@v4 | |
with: | |
name: Event File | |
path: ${{ github.event_path }} |