Skip to content

download and cache data #22

download and cache data

download and cache data #22

Workflow file for this run

# Downloads STPSF dataset and caches it to GitHub cache,
# making a new cache for a new data version
# the cache key is in the form `stpsf-data-mini-1.3.0`,
# `stpsf-data-full-1.2.6`, etc.
#
# To provide your own test workflow with the STPSF dataset,
# you should use this workflow to set up a cache in your repository,
# and then use `retrieve_cache.yml` to retrieve that cache so you
# don't have to download the entire dataset every time.
#
# to set up a cache of STPSF data in your own repository,
# create a workflow like the following:
#
# # .github/workflows/download_stpsf.yml
# name: download and cache STPSF data
# on:
# schedule:
# - cron: "0 0 * * 0"
# jobs:
# download_stpsf:
# uses: spacetelescope/stpsf/.github/workflows/download_data.yml@beda656c80a0254e6f80649d9c9c49235634522f # v1.4.0
# with:
# minimal: true
name: download and cache data
on:
workflow_call:
inputs:
url:
description: URL to gzip file
type: string
required: false
default: https://stsci.box.com/shared/static/3hzmbarac5yxjt6x7gn17vz02k7c8z1d.gz
minimal:
description: dataset is minimal (as opposed to full)
type: boolean
required: false
default: true
outputs:
version:
value: ${{ jobs.download.outputs.version }}
cache_path:
value: ${{ jobs.download.outputs.cache_path }}
cache_key:
value: ${{ jobs.download.outputs.cache_key }}
workflow_dispatch:
inputs:
url:
description: URL to gzip file
type: string
required: false
default: https://stsci.box.com/shared/static/3hzmbarac5yxjt6x7gn17vz02k7c8z1d.gz
minimal:
description: dataset is minimal (as opposed to full)
type: boolean
required: false
default: true
schedule:
- cron: "0 0 * * 0"
release:
push:
branches:
- develop
env:
FULL_DATA_URL: https://stsci.box.com/shared/static/kqfolg2bfzqc4mjkgmujo06d3iaymahv.gz
MINIMAL_DATA_URL: https://stsci.box.com/shared/static/3hzmbarac5yxjt6x7gn17vz02k7c8z1d.gz
jobs:
download:
name: download and cache STPSF data
runs-on: ubuntu-latest
steps:
- run: wget ${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call') && inputs.url || env.MINIMAL_DATA_URL }} -O ${{ runner.temp }}/stpsf-data.tar.gz
- run: mkdir ${{ runner.temp }}/data/
- run: tar -xzvf ${{ runner.temp }}/stpsf-data.tar.gz -C ${{ runner.temp }}/data/
- id: cache_path
run: echo cache_path=${{ runner.temp }}/data/ >> $GITHUB_OUTPUT
- id: version
run: echo "version=$(cat ${{ steps.cache_path.outputs.cache_path }}/stpsf-data/version.txt)" >> $GITHUB_OUTPUT
- id: cache_key
run: echo "cache_key=stpsf-data-${{ (github.event_name == 'schedule' || github.event_name == 'release') && 'mini' || inputs.minimal && 'mini' || 'full' }}-${{ steps.version.outputs.version }}" >> $GITHUB_OUTPUT
- uses: actions/cache/save@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ${{ runner.temp }}/data/
key: ${{ steps.cache_key.outputs.cache_key }}
outputs:
version: ${{ steps.version.outputs.version }}
cache_path: ${{ steps.cache_path.outputs.cache_path }}
cache_key: ${{ steps.cache_key.outputs.cache_key }}