From 7ab19bd22ccfd2b742b458b593c52ec3e3fad17c Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 3 Oct 2023 12:23:01 -0400 Subject: [PATCH] change webbpsf data caching in github CI download webbpsf data once per week to update a cache ``webbpsf-`` that can be used as a ``cache-restore-keys`` entry for the cache in other ci jobs. This can also be triggered when a PR is specifically labeled to update webbpsf data. Update other ``roman_ci.yml`` to use most recent ``webbpsf-`` to look up hash to then construct a combined cache key (for the combined crds/webbpsf data cache) using the webbpsf data as a restore key to initialize the cache with the pre-fetched webbpsf data. ``roman_ci_cron.yml`` will need similar updates. --- .github/workflows/roman_ci.yml | 34 ++++++++++---------- .github/workflows/webbpsf_data.yml | 51 ++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 18 deletions(-) create mode 100644 .github/workflows/webbpsf_data.yml diff --git a/.github/workflows/roman_ci.yml b/.github/workflows/roman_ci.yml index d85cf38a5..371e148c8 100644 --- a/.github/workflows/roman_ci.yml +++ b/.github/workflows/roman_ci.yml @@ -26,8 +26,11 @@ jobs: env: OBSERVATORY: roman CRDS_SERVER_URL: https://roman-crds.stsci.edu + # CRDS_PATH here should match DATA_PATH in webbpsf_data.yml CRDS_PATH: /tmp/data + GH_TOKEN: ${{ github.token }} outputs: + # TODO simplify data_path and crds_path as they are the same data_path: ${{ steps.data.outputs.path }} webbpsf_path: ${{ steps.webbpsf_path.outputs.path }} data_hash: ${{ steps.data_hash.outputs.hash }} @@ -36,6 +39,8 @@ jobs: crds_server: ${{ steps.crds_server.outputs.url }} steps: # crds: + - id: data + run: echo "path=/tmp/data" >> $GITHUB_OUTPUT - id: crds_context run: > echo "pmap=$( @@ -49,25 +54,17 @@ jobs: - id: crds_server run: echo "url=${{ env.CRDS_SERVER_URL }}" >> $GITHUB_OUTPUT # webbpsf: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - id: data - run: | - echo "path=/tmp/data" >> $GITHUB_OUTPUT - echo "webbpsf_url=https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz" >> $GITHUB_OUTPUT - - run: | - mkdir -p tmp/data/ - mkdir -p ${{ steps.data.outputs.path }} - - run: wget ${{ steps.data.outputs.webbpsf_url }} -O tmp/minimal-webbpsf-data.tar.gz - - run: tar -xzvf tmp/minimal-webbpsf-data.tar.gz -C tmp/data/ - id: data_hash - run: echo "hash=${{ hashFiles( 'tmp/data' ) }}" >> $GITHUB_OUTPUT - - run: mv tmp/data/* ${{ steps.data.outputs.path }} - - uses: actions/cache@v3 - with: - path: ${{ steps.data.outputs.path }} - key: data-${{ steps.data_hash.outputs.hash }}-${{ steps.crds_context.outputs.pmap }} + run: | + # use actions/gh-actions-cache to allow filtering by key + gh extension install actions/gh-actions-cache + + RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1) + echo "RECENT=$RECENT" + HASH=$(echo $RECENT | cut -d '-' -f 2) + echo "HASH=$HASH" + echo "hash=$HASH" >> $GITHUB_OUTPUT + if [ "$HASH" == '' ]; then exit 1; fi - id: webbpsf_path run: echo "path=${{ steps.data.outputs.path }}/webbpsf-data" >> $GITHUB_OUTPUT check: @@ -93,6 +90,7 @@ jobs: DD_GIT_BRANCH: ${{ github.ref_name }} cache-path: ${{ needs.data.outputs.data_path }} cache-key: data-${{ needs.data.outputs.data_hash }}-${{ needs.data.outputs.crds_context }} + cache-restore-keys: webbpsf-${{ needs.data.outputs.data_hash }} envs: | - linux: py39-oldestdeps-cov pytest-results-summary: true diff --git a/.github/workflows/webbpsf_data.yml b/.github/workflows/webbpsf_data.yml new file mode 100644 index 000000000..06b102388 --- /dev/null +++ b/.github/workflows/webbpsf_data.yml @@ -0,0 +1,51 @@ +name: check/update webbpsf cache + +on: + schedule: + - cron: "42 4 * * 3" + pull_request: + # We also want this workflow triggered if the `update webbpsf data` label is + # added or present when PR is updated + types: + - opened + - reopened + - labeled + - unlabeled + - synchronize + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + webbpsf-data: + if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) + name: fetch, check, and possibly update webbpsf data cache + runs-on: ubuntu-latest + env: + # DATA_PATH here should match CRDS_PATH in roman_ci.yml and roman_ci_cron.yml + DATA_PATH: /tmp/data + WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz + outputs: + path: ${{ steps.cache_path.outputs.path }} + hash: ${{ steps.data_hash.outputs.hash }} + steps: + - id: cache_path + run: | + echo "path=${{ env.DATA_PATH }}" >> $GITHUB_OUTPUT + - id: data_hash + run: | + mkdir -p tmp/data + wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz + echo "hash=$( shasum tmp/webbpsf-data.tar.gz | cut -d ' ' -f 1 )" >> $GITHUB_OUTPUT + - id: cache_check + uses: actions/cache@v3 + with: + path: ${{ steps.cache_path.outputs.path }} + key: webbpsf-${{ steps.data_hash.outputs.hash }} + - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} + name: Initialize cache + run: | + mkdir -p ${{ steps.cache_path.outputs.path }} + tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ steps.cache_path.outputs.path }}