Skip to content

add experimental checkpoint logic #70

add experimental checkpoint logic

add experimental checkpoint logic #70

Workflow file for this run

name: Tests
on:
push:
pull_request:
schedule:
- cron: "0 6,18 * * *"
workflow_dispatch:
# When this workflow is queued, automatically cancel any previous running
# or pending jobs from the same branch
concurrency:
group: tests-${{ github.ref }}
cancel-in-progress: true
jobs:
test:
# Do not run the schedule job on forks
if: github.repository == 'dask/distributed' || github.event_name != 'schedule'
runs-on: ${{ matrix.os }}
# If you change the pattern of this name, please adjust the scripts/test_report.py parsing accordingly
name: ${{ matrix.os }}-${{ matrix.environment }}-${{ matrix.label }}-${{ matrix.partition }}
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
environment: [mindeps, "3.9", "3.10", "3.11", "3.12"]
label: [queue]
extra_packages: [null]
# Cherry-pick test modules to split the overall runtime roughly in half
partition: [ci1, not ci1]
exclude:
# MacOS CI does not have any hosts available; run it on 3.12 only
- os: macos-latest
environment: mindeps
- os: macos-latest
environment: "3.9"
- os: macos-latest
environment: "3.10"
- os: macos-latest
environment: "3.11"
- os: windows-latest
environment: mindeps
include:
# Set distributed.scheduler.worker-saturation: .inf
- os: ubuntu-latest
environment: "3.9"
label: no_queue
partition: "ci1"
- os: ubuntu-latest
environment: "3.9"
label: no_queue
partition: "not ci1"
# dask.array P2P shuffle
- os: ubuntu-latest
environment: mindeps
label: numpy
extra_packages: [numpy=1.21]
partition: "ci1"
- os: ubuntu-latest
environment: mindeps
label: numpy
extra_packages: [numpy=1.21]
partition: "not ci1"
# dask.dataframe P2P shuffle
- os: ubuntu-latest
environment: mindeps
label: pandas
extra_packages: [numpy=1.21, pandas=1.3, pyarrow=7, pyarrow-hotfix]
partition: "ci1"
- os: ubuntu-latest
environment: mindeps
label: pandas
extra_packages: [numpy=1.21, pandas=1.3, pyarrow=7, pyarrow-hotfix]
partition: "not ci1"
- os: ubuntu-latest
environment: mindeps
label: memray
extra_packages: [memray]
partition: "extra_packages"
# Uncomment to stress-test the test suite for random failures.
# Must also change `export TEST_ID` in first step below.
# This will take a LONG time and delay all PRs across the whole github.com/dask!
# To avoid hamstringing other people, change 'on: [push, pull_request]' above
# to just 'on: [push]'; this way the stress test will run exclusively in your
# branch (https://github.com/<your name>/distributed/actions).
# run: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
env:
CONDA_FILE: continuous_integration/environment-${{ matrix.environment }}.yaml
steps:
- name: Set $TEST_ID
run: |
export PARTITION_LABEL=$( echo "${{ matrix.partition }}" | sed "s/ //" )
export TEST_ID="${{ matrix.os }}-${{ matrix.environment }}-${{ matrix.label }}-$PARTITION_LABEL"
# Switch to this version for stress-test:
# export TEST_ID="$TEST_ID-${{ matrix.run }}"
echo "TEST_ID: $TEST_ID"
echo "TEST_ID=$TEST_ID" >> $GITHUB_ENV
shell: bash
- name: Checkout source
uses: actions/[email protected]
with:
fetch-depth: 0
- name: Setup Conda Environment
uses: conda-incubator/[email protected]
with:
miniforge-variant: Mambaforge
miniforge-version: latest
condarc-file: continuous_integration/condarc
use-mamba: true
activate-environment: dask-distributed
- name: Show conda options
shell: bash -l {0}
run: conda config --show
- name: Check if caching is enabled
uses: xarray-contrib/[email protected]
id: skip-caching
with:
keyword: "[skip-caching]"
- name: Get Date
if: |
(
steps.skip-caching.outputs.trigger-found != 'true'
&& !(github.event_name == 'pull_request'
&& contains(github.event.pull_request.labels.*.name, 'skip-caching'))
)
id: get-date
run: echo "::set-output name=today::$(/bin/date -u '+%Y%m%d')"
shell: bash
- name: Cache Conda env
if: steps.skip-caching.outputs.trigger-found != 'true'
uses: actions/cache@v4
with:
path: ${{ env.CONDA }}/envs
key: conda-${{ matrix.os }}-${{ steps.get-date.outputs.today }}-${{ hashFiles(env.CONDA_FILE) }}-${{ env.CACHE_NUMBER }}
env:
# Increase this value to reset cache if
# continuous_integration/environment-${{ matrix.environment }}.yaml has not
# changed. See also same variable in .pre-commit-config.yaml
CACHE_NUMBER: 0
id: cache
- name: Update environment
run: mamba env update -n dask-distributed -f ${{ env.CONDA_FILE }}
if: |
(
steps.skip-caching.outputs.trigger-found == 'true'
|| (github.event_name == 'pull_request'
&& contains(github.event.pull_request.labels.*.name, 'skip-caching'))
|| steps.cache.outputs.cache-hit != 'true'
)
- name: Install
shell: bash -l {0}
run: |
python -m pip install --no-deps -e .
- name: Extra Installs
if: ${{ matrix.extra_packages }}
shell: bash -l {0}
run: mamba install -y ${{ join(matrix.extra_packages, ' ') }}
- name: mamba list
shell: bash -l {0}
run: mamba list
- name: mamba env export
shell: bash -l {0}
run: |
echo -e "--\n--Conda Environment (re-create this with \`mamba env create --name <name> -f <output_file>\`)\n--"
mamba env export | grep -E -v '^prefix:.*$'
- name: Setup SSH
shell: bash -l {0}
# FIXME no SSH available on Windows
# https://github.com/dask/distributed/issues/4509
if: ${{ matrix.os != 'windows-latest' }}
run: bash continuous_integration/scripts/setup_ssh.sh
- name: Reconfigure pytest-timeout
shell: bash -l {0}
# No SIGALRM available on Windows
if: ${{ matrix.os != 'windows-latest' }}
run: sed -i.bak 's/timeout_method = "thread"/timeout_method = "signal"/' pyproject.toml
- name: Disable IPv6
shell: bash -l {0}
# FIXME ipv6-related failures on Ubuntu and MacOS github actions CI
# https://github.com/dask/distributed/issues/4514
if: ${{ matrix.os != 'windows-latest' }}
run: echo "DISABLE_IPV6=1" >> $GITHUB_ENV
- name: Set up dask env for job queuing
shell: bash -l {0}
if: ${{ matrix.label == 'no_queue' }}
run: echo "DASK_DISTRIBUTED__SCHEDULER__WORKER_SATURATION=inf" >> $GITHUB_ENV
- name: Print host info
# host_info.py imports numpy, which isn't a direct dependency of distributed
if: matrix.environment != 'mindeps'
shell: bash -l {0}
run: |
python continuous_integration/scripts/host_info.py
- name: Test
id: run_tests
shell: bash -l {0}
env:
PYTHONFAULTHANDLER: 1
MINDEPS: ${{ matrix.environment == 'mindeps' }}
run: |
source continuous_integration/scripts/set_ulimit.sh
set -o pipefail
mkdir reports
pytest distributed \
-m "not avoid_ci and ${{ matrix.partition }}" --runslow \
--leaks=fds,processes,threads \
--junitxml reports/pytest.xml -o junit_suite_name=$TEST_ID \
--cov=distributed --cov-report=xml \
| tee reports/stdout
- name: Generate junit XML report in case of pytest-timeout
if: ${{ failure() }}
shell: bash -l {0}
run: |
if [ ! -e reports/pytest.xml ]
then
# This should only ever happen on Windows.
# On Linux and MacOS, pytest-timeout kills off the individual tests
# See (reconfigure pytest-timeout above)
python continuous_integration/scripts/parse_stdout.py < reports/stdout > reports/pytest.xml
fi
# - name: Debug with tmate on failure
# if: ${{ failure() }}
# uses: mxschmitt/action-tmate@v3
# https://community.codecov.com/t/files-missing-from-report/3902/7
# The coverage file being created records filenames at the distributed/ directory
# as opposed to root. This is causing filename mismatches in Codecov.
# This step edits `coverage.xml` in-file by adding `distributed` to all filenames.
- name: Prepare coverage report
if: >
always() &&
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure') &&
matrix.os != 'windows-latest'
shell: bash -l {0}
run: sed -i'' -e 's/filename="/filename="distributed\//g' coverage.xml
# Do not upload coverage reports for cron jobs
- name: Coverage
if: >
always() &&
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure') &&
github.event_name != 'schedule'
uses: codecov/codecov-action@v3
with:
name: ${{ env.TEST_ID }}
# See https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954
token: ${{ secrets.CODECOV_TOKEN }}
- name: Upload test results
# ensure this runs even if pytest fails
if: >
always() &&
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure')
uses: actions/upload-artifact@v4
with:
name: ${{ env.TEST_ID }}
path: reports
- name: Upload gen_cluster dumps for failed tests
# ensure this runs even if pytest fails
if: >
always() &&
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure')
uses: actions/upload-artifact@v4
with:
name: ${{ env.TEST_ID }}_cluster_dumps
path: test_cluster_dump
if-no-files-found: ignore
# Publish an artifact for the event; used by publish-test-results.yaml
event_file:
# Do not run the schedule job on forks
if: github.repository == 'dask/distributed' || github.event_name != 'schedule'
name: "Event File"
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
with:
name: Event File
path: ${{ github.event_path }}