Skip to content

Run ferc_to_sqlite and pudl_etl independent of integration tests #8811

Run ferc_to_sqlite and pudl_etl independent of integration tests

Run ferc_to_sqlite and pudl_etl independent of integration tests #8811

Workflow file for this run

---
name: tox-pytest
on: [pull_request, workflow_dispatch]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
PUDL_OUTPUT: /home/runner/pudl-work/output/
PUDL_INPUT: /home/runner/pudl-work/data/
DAGSTER_HOME: /home/runner/pudl-work/dagster_home/
jobs:
# ci-static:
# runs-on: ubuntu-latest
# strategy:
# fail-fast: false
# matrix:
# tox-env:
# - linters
# - docs
# defaults:
# run:
# shell: bash -l {0}
# steps:
# - uses: actions/checkout@v3
# with:
# fetch-depth: 2
# - name: Install Conda environment using mamba
# uses: mamba-org/provision-with-micromamba@v16
# with:
# environment-file: test/test-environment.yml
# cache-env: true
# channels: conda-forge,defaults
# channel-priority: strict
# - name: Log environment details
# run: |
# conda info
# conda list
# conda config --show-sources
# conda config --show
# printenv | sort
# - name: Build ${{ matrix.tox-env}} with Tox
# run: |
# tox -e ${{ matrix.tox-env }}
# - name: Upload coverage
# uses: actions/upload-artifact@v3
# if: ${{ matrix.tox-env == 'docs' }}
# with:
# name: coverage-docs
# path: coverage.xml
# ci-unit:
# runs-on: ubuntu-latest
# strategy:
# fail-fast: false
# defaults:
# run:
# shell: bash -l {0}
# steps:
# - uses: actions/checkout@v3
# with:
# fetch-depth: 2
# - name: Install Conda environment using mamba
# uses: mamba-org/provision-with-micromamba@v16
# with:
# environment-file: test/test-environment.yml
# cache-env: true
# channels: conda-forge,defaults
# channel-priority: strict
# - name: Log environment details
# run: |
# conda info
# conda list
# conda config --show-sources
# conda config --show
# printenv | sort
# - name: Log SQLite3 version
# run: |
# which sqlite3
# sqlite3 --version
# - name: Run unit tests with Tox
# run: |
# tox -e unit -- --durations 0
# - name: Upload coverage
# uses: actions/upload-artifact@v3
# with:
# name: coverage-unit
# path: coverage.xml
# TODO(rousik): the following could simply run docker
# container mounted to specific directory.
# TODO(rousik): run this on larger runners, see dev for
# how to do that.
ci-integration:
runs-on:
group: large-runner-group
labels: ubuntu-22.04-4core
permissions:
contents: read
id-token: write
defaults:
run:
shell: bash -l {0}
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 2
# TODO(rousik): setup-python may be redundant with micromamba
- uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
- name: Install Conda environment using mamba
uses: mamba-org/setup-micromamba@v1
with:
environment-file: test/test-environment.yml
cache-environment: true
condarc: |
channels:
- conda-forge
- defaults
channel_priority: strict
- name: Install PUDL and dependencies
run: pip install .
- name: Compile Zenodo datastore DOIs for cache invalidation
run:
grep -e '.*10\.\(5281\|5072\)/zenodo\..*' src/pudl/workspace/datastore.py
| sed -e 's/",*$//g' | sed -e 's/^.*"//g' | sort > datastore-dois.txt
- name: Restore Zenodo datastore from cache if possible
uses: actions/cache@v3
id: cache-zenodo-datastore
with:
path: ${{ env.PUDL_INPUT }}
key: zenodo-datastore-${{ hashFiles('datastore-dois.txt') }}
- name: Make input, output and dagster dirs
run: mkdir -p ${{ env.PUDL_OUTPUT }} ${{ env.PUDL_INPUT}} ${{ env.DAGSTER_HOME }}
- name: Set default GCP credentials
id: gcloud-auth
continue-on-error: true
uses: "google-github-actions/auth@v1"
with:
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"
service_account: "tox-pytest-github-action@catalyst-cooperative-pudl.iam.gserviceaccount.com"
- name: Prepare for ETL execution
run: alembic upgrade head
- name: Run ferc_to_sqlite
run: |
ferc_to_sqlite --gcs-cache-path=gs://zenodo-cache.catalyst.coop \
src/pudl/package_data/settings/etl_fast.yml
- name: Run pudl_etl
run: |
pudl_etl --gcs-cache-path=gs://zenodo-cache.catalyst.coop \
src/pudl/package_data/settings/etl_fast.yml
- name: Run integration tests
run: tox -e validate -- --live-dbs
# ci-integration:
# runs-on: ubuntu-latest
# needs:
# - ci-integration-etl
# permissions:
# contents: read
# id-token: write
# strategy:
# fail-fast: false
# defaults:
# run:
# shell: bash -l {0}
# steps:
# - uses: actions/checkout@v3
# with:
# fetch-depth: 2
#
# - name: Install Conda environment using mamba
# uses: mamba-org/provision-with-micromamba@v16
# with:
# environment-file: test/test-environment.yml
# cache-env: true
# channels: conda-forge,defaults
# channel-priority: strict
#
# - name: Log environment details
# run: |
# conda info
# conda list
# conda config --show-sources
# conda config --show
# printenv | sort
#
# - name: Log SQLite3 version
# run: |
# which sqlite3
# sqlite3 --version
#
# - name: Compile Zenodo datastore DOIs for cache invalidation
# run:
# grep -e '.*10\.\(5281\|5072\)/zenodo\..*' src/pudl/workspace/datastore.py
# | sed -e 's/",*$//g' | sed -e 's/^.*"//g' | sort > datastore-dois.txt
#
# - name: Restore Zenodo datastore from cache if possible
# uses: actions/cache@v3
# id: cache-zenodo-datastore
# with:
# path: ${{ env.PUDL_INPUT }}
# key: zenodo-datastore-${{ hashFiles('datastore-dois.txt') }}
#
# - name: Make input, output and dagster dirs
# run: mkdir -p ${{ env.PUDL_OUTPUT }} ${{ env.PUDL_INPUT}} ${{ env.DAGSTER_HOME }}
#
# - name: List workspace contents
# run: find /home/runner/pudl-work
#
# - name: Set default GCP credentials
# id: gcloud-auth
# continue-on-error: true
# uses: "google-github-actions/auth@v1"
# with:
# workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"
# service_account: "tox-pytest-github-action@catalyst-cooperative-pudl.iam.gserviceaccount.com"
#
# - name: Run integration tests, trying to use GCS cache if possible
# run: |
# tox -e integration -- --gcs-cache-path=gs://zenodo-cache.catalyst.coop --durations 0
#
# - name: Upload coverage
# uses: actions/upload-artifact@v3
# with:
# name: coverage-integration
# path: coverage.xml
#
# - name: Log post-test Zenodo datastore contents
# run: find ${{ env.PUDL_INPUT }}
# ci-coverage:
# runs-on: ubuntu-latest
# needs:
# - ci-unit
# - ci-integration
# - ci-static
# steps:
# - uses: actions/checkout@v3
# - name: Download coverage
# id: download-unit
# uses: actions/download-artifact@v3
# with:
# path: coverage
# - name: List downloaded files
# run: |
# ls -R
# - name: Upload test coverage report to CodeCov
# uses: codecov/codecov-action@v3
# with:
# directory: coverage
# ci-notify:
# runs-on: ubuntu-latest
# if: ${{ always() }}
# needs:
# - ci-unit
# - ci-integration
# steps:
# - name: Inform the Codemonkeys
# uses: 8398a7/action-slack@v3
# continue-on-error: true
# with:
# status: custom
# fields: workflow,job,commit,repo,ref,author,took
# custom_payload: |
# {
# username: 'action-slack',
# icon_emoji: ':octocat:',
# attachments: [{
# color: '${{ needs.ci-test.result }}' === 'success' ? 'good' : '${{ needs.ci-test.result }}' === 'failure' ? 'danger' : 'warning',
# text: `${process.env.AS_REPO}@${process.env.AS_REF}\n ${process.env.AS_WORKFLOW} (${process.env.AS_COMMIT})\n by ${process.env.AS_AUTHOR}\n Status: ${{ needs.ci-test.result }}`,
# }]
# }
# env:
# GITHUB_TOKEN: ${{ github.token }} # required
# SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} # required
# MATRIX_CONTEXT: ${{ toJson(matrix) }} # required