From 7ca2750c25511886ad9f6c7a3cee7b2aad7bf75c Mon Sep 17 00:00:00 2001 From: e-belfer Date: Mon, 17 Jun 2024 17:14:48 -0400 Subject: [PATCH 01/34] Add validation failures to slackbot --- scripts/make_slack_notification_message.py | 85 ++++++++++++++++------ 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py index b9349184..a1affae5 100755 --- a/scripts/make_slack_notification_message.py +++ b/scripts/make_slack_notification_message.py @@ -29,6 +29,56 @@ def _parse_args(): return parser.parse_args() +def _format_failures(summary: dict) -> list[dict]: + name = summary["dataset_name"] + url = summary["record_url"] + + test_failures = defaultdict(list) + for validation_test in summary["validation_tests"]: + if (not validation_test["success"]) and ( + validation_test["required_for_run_success"] + ): + test_failures[validation_test["name"]].append(validation_test["notes"]) + + if test_failures: + failures = f"```\n{json.dumps(test_failures, indent=2)}\n```" + else: + return None + + max_len = 3000 + text = f"<{url}|*{name}*>\n{failures}"[:max_len] + return [ + { + "type": "section", + "text": {"type": "mrkdwn", "text": text}, + }, + ] + + +def _format_summary(summary: dict) -> list[dict]: + name = summary["dataset_name"] + url = summary["record_url"] + if any(test["success"] for test in summary["validation_tests"]): + return None # Don't report on file changes if any test failed. + + if file_changes := summary["file_changes"]: + abridged_changes = defaultdict(list) + for change in file_changes: + abridged_changes[change["diff_type"]].append(change["name"]) + changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```" + else: + changes = "No changes." + + max_len = 3000 + text = f"<{url}|*{name}*>\n{changes}"[:max_len] + return [ + { + "type": "section", + "text": {"type": "mrkdwn", "text": text}, + }, + ] + + def main(summary_files: list[Path]) -> None: """Format summary files for Slack perusal.""" summaries = [] @@ -36,34 +86,24 @@ def main(summary_files: list[Path]) -> None: with summary_file.open() as f: summaries.extend(json.loads(f.read())) - def format_summary(summary: dict) -> list[dict]: - name = summary["dataset_name"] - url = summary["record_url"] - if file_changes := summary["file_changes"]: - abridged_changes = defaultdict(list) - for change in file_changes: - abridged_changes[change["diff_type"]].append(change["name"]) - changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```" - else: - changes = "No changes." - - max_len = 3000 - text = f"<{url}|*{name}*>\n{changes}"[:max_len] - return [ - { - "type": "section", - "text": {"type": "mrkdwn", "text": text}, - }, - ] + failed_blocks = list( + itertools.chain.from_iterable( + _format_failures(s) for s in summaries if _format_failures(s) is not None + ) + ) unchanged_blocks = list( itertools.chain.from_iterable( - format_summary(s) for s in summaries if not s["file_changes"] + _format_summary(s) + for s in summaries + if (not s["file_changes"]) and (_format_summary(s) is not None) ) ) changed_blocks = list( itertools.chain.from_iterable( - format_summary(s) for s in summaries if s["file_changes"] + _format_summary(s) + for s in summaries + if (s["file_changes"]) and (_format_summary(s) is not None) ) ) @@ -73,6 +113,8 @@ def header_block(text: str) -> dict: def section_block(text: str) -> dict: return {"type": "section", "text": {"type": "mrkdwn", "text": text}} + if failed_blocks: + failed_blocks = [section_block("*Validation Failures*")] + failed_blocks if changed_blocks: changed_blocks = [section_block("*Changed*")] + changed_blocks if unchanged_blocks: @@ -84,6 +126,7 @@ def section_block(text: str) -> dict: "attachments": [ { "blocks": [header_block("Archiver Run Outcomes")] + + failed_blocks + changed_blocks + unchanged_blocks, } From 13302d9d1c95f23b3a75305405b9e5a3e5dcaab4 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Mon, 17 Jun 2024 17:17:39 -0400 Subject: [PATCH 02/34] Fix skipping failed tests --- scripts/make_slack_notification_message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py index a1affae5..d8b08164 100755 --- a/scripts/make_slack_notification_message.py +++ b/scripts/make_slack_notification_message.py @@ -58,7 +58,7 @@ def _format_failures(summary: dict) -> list[dict]: def _format_summary(summary: dict) -> list[dict]: name = summary["dataset_name"] url = summary["record_url"] - if any(test["success"] for test in summary["validation_tests"]): + if any(not test["success"] for test in summary["validation_tests"]): return None # Don't report on file changes if any test failed. if file_changes := summary["file_changes"]: From 9425dd5de39519bb3c47f3deecc133695de89694 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Mon, 17 Jun 2024 17:50:32 -0400 Subject: [PATCH 03/34] Add format_message and reduce duplicated code --- scripts/make_slack_notification_message.py | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py index d8b08164..5043c59e 100755 --- a/scripts/make_slack_notification_message.py +++ b/scripts/make_slack_notification_message.py @@ -29,6 +29,18 @@ def _parse_args(): return parser.parse_args() +def _format_message( + url: str, name: str, content: str, max_len: int = 3000 +) -> list[dict]: + text = f"<{url}|*{name}*>\n{content}"[:max_len] + return [ + { + "type": "section", + "text": {"type": "mrkdwn", "text": text}, + }, + ] + + def _format_failures(summary: dict) -> list[dict]: name = summary["dataset_name"] url = summary["record_url"] @@ -45,14 +57,7 @@ def _format_failures(summary: dict) -> list[dict]: else: return None - max_len = 3000 - text = f"<{url}|*{name}*>\n{failures}"[:max_len] - return [ - { - "type": "section", - "text": {"type": "mrkdwn", "text": text}, - }, - ] + return _format_message(url=url, name=name, content=failures) def _format_summary(summary: dict) -> list[dict]: @@ -69,14 +74,7 @@ def _format_summary(summary: dict) -> list[dict]: else: changes = "No changes." - max_len = 3000 - text = f"<{url}|*{name}*>\n{changes}"[:max_len] - return [ - { - "type": "section", - "text": {"type": "mrkdwn", "text": text}, - }, - ] + return _format_message(url=url, name=name, content=changes) def main(summary_files: list[Path]) -> None: From 96109647244fe54f63a1f922ad5aceb118efcf58 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Mon, 17 Jun 2024 18:26:23 -0400 Subject: [PATCH 04/34] Test by running on some busted and non-busted archives --- .github/workflows/run-archiver.yml | 66 ------------------------------ 1 file changed, 66 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index f948907b..0e70c25e 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -14,25 +14,11 @@ jobs: strategy: matrix: dataset: - - eia176 - - eia191 - - eia757a - - eia860 - - eia861 - - eia860m - - eia923 - - eia930 - - eiaaeo - - eiawater - - eia_bulk_elec - - epacamd_eia - - ferc1 - ferc2 - ferc6 - ferc60 - ferc714 - mshamines - - nrelatb - phmsagas fail-fast: false @@ -77,58 +63,6 @@ jobs: name: run-summaries-${{ matrix.dataset }} path: ${{ matrix.dataset }}_run_summary.json - archive-run-large: - defaults: - run: - shell: bash -l {0} - strategy: - matrix: - dataset: - - epacems - fail-fast: false - runs-on: - group: large-runner-group - labels: ubuntu-22.04-4core - steps: - - uses: actions/checkout@v4 - - - name: Install Conda environment using mamba - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: environment.yml - cache-environment: true - condarc: | - channels: - - conda-forge - - defaults - channel_priority: strict - - - name: Log the conda environment - run: | - conda info - conda list - conda config --show-sources - conda config --show - printenv | sort - - - name: Run archiver for ${{ matrix.dataset }} - env: - ZENODO_SANDBOX_TOKEN_UPLOAD: ${{ secrets.ZENODO_SANDBOX_TOKEN_UPLOAD }} - ZENODO_SANDBOX_TOKEN_PUBLISH: ${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} - EPACEMS_API_KEY: ${{ secrets.EPACEMS_API_KEY }} - ZENODO_TOKEN_UPLOAD: ${{ secrets.ZENODO_TOKEN_UPLOAD }} - ZENODO_TOKEN_PUBLISH: ${{ secrets.ZENODO_TOKEN_PUBLISH }} - run: | - pudl_archiver --datasets ${{ matrix.dataset }} --summary-file ${{ matrix.dataset }}_run_summary.json - - - name: Upload run summaries - if: failure() || success() - id: upload_summaries - uses: actions/upload-artifact@v4 - with: - name: run-summaries-${{ matrix.dataset }} - path: ${{ matrix.dataset }}_run_summary.json - archive-notify: runs-on: ubuntu-latest needs: From bf58f1ce42870789e61a977d30e01dab2d57871c Mon Sep 17 00:00:00 2001 From: e-belfer Date: Mon, 17 Jun 2024 18:28:03 -0400 Subject: [PATCH 05/34] Fix issue in test run-archiver.yml --- .github/workflows/run-archiver.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 0e70c25e..9cb402be 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -67,7 +67,6 @@ jobs: runs-on: ubuntu-latest needs: - archive-run-small - - archive-run-large if: ${{ always() }} steps: - uses: actions/checkout@v4 From 68c6edf35433a38e7cee29c4fd26eb652aad329d Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 08:45:40 -0400 Subject: [PATCH 06/34] Shrink test and flatten validation test lists --- .github/workflows/run-archiver.yml | 4 ---- scripts/make_slack_notification_message.py | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 9cb402be..3bc22e32 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -15,10 +15,6 @@ jobs: matrix: dataset: - ferc2 - - ferc6 - - ferc60 - - ferc714 - - mshamines - phmsagas fail-fast: false diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py index 5043c59e..c126e02c 100755 --- a/scripts/make_slack_notification_message.py +++ b/scripts/make_slack_notification_message.py @@ -50,7 +50,9 @@ def _format_failures(summary: dict) -> list[dict]: if (not validation_test["success"]) and ( validation_test["required_for_run_success"] ): - test_failures[validation_test["name"]].append(validation_test["notes"]) + test_failures[validation_test["name"]].append( + ". ".join(validation_test["notes"]) + ) # Flatten list of lists if test_failures: failures = f"```\n{json.dumps(test_failures, indent=2)}\n```" From 83961e39ed247ffc79ff5af5037f90965afff1bc Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 09:58:49 -0400 Subject: [PATCH 07/34] Update issue template, add template creation to workflow --- .github/ISSUE_TEMPLATE/monthly-archive-update.md | 12 ++++++++---- .github/workflows/run-archiver.yml | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/monthly-archive-update.md b/.github/ISSUE_TEMPLATE/monthly-archive-update.md index 360a698d..598845f6 100644 --- a/.github/ISSUE_TEMPLATE/monthly-archive-update.md +++ b/.github/ISSUE_TEMPLATE/monthly-archive-update.md @@ -1,12 +1,16 @@ --- name: Monthly archive update about: Template for publishing monthly archives. -title: Publish archives for the month of MONTH +title: Publish {{ date | date('MMMM Do YYYY') }} archives labels: automation, zenodo -assignees: '' +assignees: e-belfer --- +# Summary of results: +See the job run results [here][1]. +[1]: {{ env.RUN_URL }} + # Review and publish archives For each of the following archives, find the run status in the Github archiver run. If validation tests pass, manually review the archive and publish. If no changes detected, delete the draft. If changes are detected, manually review the archive following the guidelines in step 3 of `README.md`, then publish the new version. Then check the box here to confirm publication status, adding a note on the status (e.g., "v1 published", "no changes detected, draft deleted"): @@ -50,8 +54,8 @@ If the validation failure is blocking (e.g., file format incorrect, whole datase For each run that failed because of another reason (e.g., underlying data changes, code failures), create an issue describing the failure and take necessary steps to resolve it. ```[tasklist] -- [ ] +- [ ] dataset ``` # Relevant logs -[Link to logs from GHA run]( PLEASE FIND THE ACTUAL LINK AND FILL IN HERE ) +[Link to logs from GHA run][1] diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 3bc22e32..399c3d83 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -89,3 +89,18 @@ jobs: payload: ${{ steps.all_summaries.outputs.SLACK_PAYLOAD }} env: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} + + make-github-issue: + runs-on: ubuntu-latest + needs: + - archive-run-small + if: ${{ always() }} + steps: + - name: Create an issue + - uses: actions/checkout@v3 + - uses: JasonEtco/create-an-issue@v2.9.2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + with: + filename: .github/ISSUE_TEMPLATE/monthly-archive-update.md From 60a76c57ec35a75c8cd7403f6666a2bbd2f2d4aa Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 10:04:42 -0400 Subject: [PATCH 08/34] Fix workflow format --- .github/workflows/run-archiver.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 399c3d83..d1178784 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -96,9 +96,9 @@ jobs: - archive-run-small if: ${{ always() }} steps: - - name: Create an issue - uses: actions/checkout@v3 - - uses: JasonEtco/create-an-issue@v2.9.2 + - name: Create an issue + uses: JasonEtco/create-an-issue@v2.9.2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} From dd270b4d4269160468933ea837df5ee82f8697f0 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 10:16:16 -0400 Subject: [PATCH 09/34] Fix link formatting --- .github/ISSUE_TEMPLATE/monthly-archive-update.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/monthly-archive-update.md b/.github/ISSUE_TEMPLATE/monthly-archive-update.md index 598845f6..97a23a4e 100644 --- a/.github/ISSUE_TEMPLATE/monthly-archive-update.md +++ b/.github/ISSUE_TEMPLATE/monthly-archive-update.md @@ -8,8 +8,7 @@ assignees: e-belfer --- # Summary of results: -See the job run results [here][1]. -[1]: {{ env.RUN_URL }} +See the job run results [here]({{ env.RUN_URL }}). # Review and publish archives @@ -58,4 +57,4 @@ For each run that failed because of another reason (e.g., underlying data change ``` # Relevant logs -[Link to logs from GHA run][1] +[Link to logs from GHA run]({{ env.RUN_URL }}) From a2cb688c39c1bc6b2e0cb380678e5ccdd2a1f28c Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 10:28:22 -0400 Subject: [PATCH 10/34] Make slack validation failures more succinct --- scripts/make_slack_notification_message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py index c126e02c..097947aa 100755 --- a/scripts/make_slack_notification_message.py +++ b/scripts/make_slack_notification_message.py @@ -50,8 +50,8 @@ def _format_failures(summary: dict) -> list[dict]: if (not validation_test["success"]) and ( validation_test["required_for_run_success"] ): - test_failures[validation_test["name"]].append( - ". ".join(validation_test["notes"]) + test_failures = ". ".join( + [validation_test["name"], ". ".join(validation_test["notes"])] ) # Flatten list of lists if test_failures: From f304b4eb375a8393bd40eb14446ff969b9318916 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 10:50:00 -0400 Subject: [PATCH 11/34] Attempt to add dataset selection in manual run --- .github/workflows/run-archiver.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index d1178784..06c3f091 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -3,6 +3,17 @@ name: run-archiver on: workflow_dispatch: + inputs: + dataset: + description: "Comma-separated list of datasets to archive (e.g., ferc2,ferc6)." + default: "eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems" + required: false + type: string + create_github_issue: + description: "Create a Github issue from this run?" + default: true + required: true + type: boolean schedule: - cron: "21 8 1 * *" # 8:21 AM UTC, first of every month @@ -13,9 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: - - ferc2 - - phmsagas + dataset: ${{ fromJSON(format('[{0}]', inputs.dataset || 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems')) }} fail-fast: false runs-on: ubuntu-latest @@ -94,7 +103,7 @@ jobs: runs-on: ubuntu-latest needs: - archive-run-small - if: ${{ always() }} + if: ${{ github.event.inputs.create_github_issue }} steps: - uses: actions/checkout@v3 - name: Create an issue From ce9158edaa7a318256aa933a2ccab8ec91a5e806 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 10:56:37 -0400 Subject: [PATCH 12/34] Try to fix inputs --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 06c3f091..f78f9319 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -6,7 +6,7 @@ on: inputs: dataset: description: "Comma-separated list of datasets to archive (e.g., ferc2,ferc6)." - default: "eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems" + default: 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems' required: false type: string create_github_issue: From 7cb879ada868300c60f534beea9226f5e87e74d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 14:57:32 +0000 Subject: [PATCH 13/34] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index f78f9319..06c3f091 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -6,7 +6,7 @@ on: inputs: dataset: description: "Comma-separated list of datasets to archive (e.g., ferc2,ferc6)." - default: 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems' + default: "eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems" required: false type: string create_github_issue: From 3d31147396bf280a99cc6e80d27811944c59f41f Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 11:13:08 -0400 Subject: [PATCH 14/34] Try to fix matrix strategy --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 06c3f091..21dccc7c 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('[{0}]', inputs.dataset || 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems')) }} + dataset: ${{ fromJSON(format('{{'dataset':[{0}]}}', github.event.inputs.dataset || 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems')) }} fail-fast: false runs-on: ubuntu-latest From 53c12d46053093cbbb1164975965d4d322a8c172 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 11:15:55 -0400 Subject: [PATCH 15/34] Try to fix matrix strategy --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 21dccc7c..d292f09f 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('{{'dataset':[{0}]}}', github.event.inputs.dataset || 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems')) }} + dataset: ${{ fromJSON(format('{{"dataset":[{0}]}}', github.event.inputs.dataset || 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems')) }} fail-fast: false runs-on: ubuntu-latest From 6242c479ed6521acd3a628d1505c5c1d34402179 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 11:21:17 -0400 Subject: [PATCH 16/34] Try to fix matrix strategy --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index d292f09f..91db3faf 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('{{"dataset":[{0}]}}', github.event.inputs.dataset || 'eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems')) }} + dataset: ${{ fromJSON(github.event.inputs.dataset)) }} fail-fast: false runs-on: ubuntu-latest From 8bbdf9cc8e66f9eadf6b1e9a10a6e1c3c2467a2c Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 11:22:17 -0400 Subject: [PATCH 17/34] Fix syntax --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 91db3faf..19b21298 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(github.event.inputs.dataset)) }} + dataset: ${{ fromJSON(github.event.inputs.dataset) }} fail-fast: false runs-on: ubuntu-latest From 6094cb48a96944039b7948d57d6d81f5ba343928 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 11:43:17 -0400 Subject: [PATCH 18/34] Test syntax and aditional quotes --- .github/workflows/run-archiver.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 19b21298..47645e98 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -6,8 +6,8 @@ on: inputs: dataset: description: "Comma-separated list of datasets to archive (e.g., ferc2,ferc6)." - default: "eia176,eia191,eia757a,eia860,eia860m,eia861,eia923,eia930,eiaaeo,eiawater,eia_bulk_elec,epacamd_eia,ferc1,ferc2,ferc6,ferc60,ferc714,mshamines,nrelatb,phmsagas,epacems" - required: false + default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas","epacems"' + required: true type: string create_github_issue: description: "Create a Github issue from this run?" @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(github.event.inputs.dataset) }} + dataset: ${{ fromJSON(format('[{0}]', github.events.inputs.dataset || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas","epacems"')) }} fail-fast: false runs-on: ubuntu-latest From a7622b07ff4a380b3a8fa049607612587868263f Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 11:47:33 -0400 Subject: [PATCH 19/34] Remove epacems from large, try to get filtering to work --- .github/workflows/run-archiver.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 47645e98..137f8951 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -24,7 +24,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('[{0}]', github.events.inputs.dataset || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas","epacems"')) }} + dataset: ${{ fromJSON(format('[{0}]', inputs.dataset || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"')) }} fail-fast: false runs-on: ubuntu-latest @@ -103,7 +103,7 @@ jobs: runs-on: ubuntu-latest needs: - archive-run-small - if: ${{ github.event.inputs.create_github_issue }} + if: ${{ inputs.create_github_issue }} steps: - uses: actions/checkout@v3 - name: Create an issue From ec4eea9343062ffcdcd7bd1aeefc5368dcfd6eee Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:12:29 -0400 Subject: [PATCH 20/34] Add back large runner --- .github/workflows/run-archiver.yml | 66 ++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 137f8951..4c203381 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -4,11 +4,16 @@ name: run-archiver on: workflow_dispatch: inputs: - dataset: - description: "Comma-separated list of datasets to archive (e.g., ferc2,ferc6)." + small_runner: + description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").' default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas","epacems"' required: true type: string + large_runner: + description: "Kick off large runners (for epacems)?" + required: true + default: false + type: boolean create_github_issue: description: "Create a Github issue from this run?" default: true @@ -24,8 +29,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('[{0}]', inputs.dataset || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"')) }} - + dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"')) }} fail-fast: false runs-on: ubuntu-latest steps: @@ -68,10 +72,63 @@ jobs: name: run-summaries-${{ matrix.dataset }} path: ${{ matrix.dataset }}_run_summary.json + archive-run-large: + if: inputs.large_runner + defaults: + run: + shell: bash -l {0} + strategy: + matrix: + dataset: + - epacems + fail-fast: false + runs-on: + group: large-runner-group + labels: ubuntu-22.04-4core + steps: + - uses: actions/checkout@v4 + - name: Install Conda environment using mamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: environment.yml + cache-environment: true + condarc: | + channels: + - conda-forge + - defaults + channel_priority: strict + + - name: Log the conda environment + run: | + conda info + conda list + conda config --show-sources + conda config --show + printenv | sort + + - name: Run archiver for ${{ matrix.dataset }} + env: + ZENODO_SANDBOX_TOKEN_UPLOAD: ${{ secrets.ZENODO_SANDBOX_TOKEN_UPLOAD }} + ZENODO_SANDBOX_TOKEN_PUBLISH: ${{ secrets.ZENODO_SANDBOX_TOKEN_PUBLISH }} + EPACEMS_API_KEY: ${{ secrets.EPACEMS_API_KEY }} + ZENODO_TOKEN_UPLOAD: ${{ secrets.ZENODO_TOKEN_UPLOAD }} + ZENODO_TOKEN_PUBLISH: ${{ secrets.ZENODO_TOKEN_PUBLISH }} + run: | + pudl_archiver --datasets ${{ matrix.dataset }} --summary-file ${{ matrix.dataset }}_run_summary.json + + - name: Upload run summaries + if: failure() || success() + id: upload_summaries + uses: actions/upload-artifact@v4 + with: + name: run-summaries-${{ matrix.dataset }} + path: ${{ matrix.dataset }}_run_summary.json + archive-notify: runs-on: ubuntu-latest needs: - archive-run-small + - archive-run-large if: ${{ always() }} steps: - uses: actions/checkout@v4 @@ -103,6 +160,7 @@ jobs: runs-on: ubuntu-latest needs: - archive-run-small + - archive-run-large if: ${{ inputs.create_github_issue }} steps: - uses: actions/checkout@v3 From ad7464c50d36e2e42deeb1dfec8e43a4d230977f Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:15:32 -0400 Subject: [PATCH 21/34] Remove epacems from default small runner list --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 4c203381..4e0e5e9d 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -6,7 +6,7 @@ on: inputs: small_runner: description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").' - default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas","epacems"' + default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' required: true type: string large_runner: From 58836701a37486386559a2f43961416b945dd671 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:23:59 -0400 Subject: [PATCH 22/34] Fix github issue creation --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 4e0e5e9d..88c7361d 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -161,7 +161,7 @@ jobs: needs: - archive-run-small - archive-run-large - if: ${{ inputs.create_github_issue }} + if: inputs.create_github_issue steps: - uses: actions/checkout@v3 - name: Create an issue From 71bd6277566f7feab61e3c0a8bf37730c9a02900 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:30:42 -0400 Subject: [PATCH 23/34] Deal with foolish boolean formats --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 88c7361d..f223461d 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -161,7 +161,7 @@ jobs: needs: - archive-run-small - archive-run-large - if: inputs.create_github_issue + if: inputs.create_github_issue == true || inputs.create_github_issue == "" steps: - uses: actions/checkout@v3 - name: Create an issue From 4bf7d5c1e614ab70e10659b48a9415b1a983f2ff Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:32:05 -0400 Subject: [PATCH 24/34] Appease the GHA formatting nightmare --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index f223461d..003fe565 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -161,7 +161,7 @@ jobs: needs: - archive-run-small - archive-run-large - if: inputs.create_github_issue == true || inputs.create_github_issue == "" + if: inputs.create_github_issue == true || inputs.create_github_issue == '' steps: - uses: actions/checkout@v3 - name: Create an issue From a1301a09e1efb187677317a5e6e186f19ec64128 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:41:42 -0400 Subject: [PATCH 25/34] More playing around with github issue creation --- .github/workflows/run-archiver.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 003fe565..6579e152 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -16,7 +16,7 @@ on: type: boolean create_github_issue: description: "Create a Github issue from this run?" - default: true + default: false required: true type: boolean schedule: @@ -161,7 +161,7 @@ jobs: needs: - archive-run-small - archive-run-large - if: inputs.create_github_issue == true || inputs.create_github_issue == '' + if: inputs.create_github_issue steps: - uses: actions/checkout@v3 - name: Create an issue From 48297538b12eeb088959e10f83c29f672523c82e Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:42:31 -0400 Subject: [PATCH 26/34] Even more tooling with github issue creation --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 6579e152..43dbf122 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -157,11 +157,11 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: + if: inputs.create_github_issue runs-on: ubuntu-latest needs: - archive-run-small - archive-run-large - if: inputs.create_github_issue steps: - uses: actions/checkout@v3 - name: Create an issue From 0d51e249b4127815805c8bb2f12fa4d233450276 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:48:05 -0400 Subject: [PATCH 27/34] Just try everything --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 43dbf122..dff36168 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -157,7 +157,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: - if: inputs.create_github_issue + if: ${{ inputs.create_github_issue == 'true' || inputs.create_github_issue == true || inputs.create_github_issue == '' }} runs-on: ubuntu-latest needs: - archive-run-small From 0e015c665a3ffd83e463eac9c2dd07ca59cfe25d Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 12:55:51 -0400 Subject: [PATCH 28/34] Try different tack for boolean --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index dff36168..dd4a1716 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -157,7 +157,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: - if: ${{ inputs.create_github_issue == 'true' || inputs.create_github_issue == true || inputs.create_github_issue == '' }} + if: ${{ github.events.inputs.create_github_issue != 'false' }} runs-on: ubuntu-latest needs: - archive-run-small From 6fdd34798c51d43a1656122f7b9e6d603a4fdd10 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 13:00:57 -0400 Subject: [PATCH 29/34] Try different tack for boolean --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index dd4a1716..5817f312 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -157,7 +157,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: - if: ${{ github.events.inputs.create_github_issue != 'false' }} + if: inputs.create_github_issue != 'false' runs-on: ubuntu-latest needs: - archive-run-small From 1fe8ba00528361373c91ff7e6ac819183c5a3f93 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 13:02:31 -0400 Subject: [PATCH 30/34] Try false instead of false --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 5817f312..6f47624c 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -157,7 +157,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: - if: inputs.create_github_issue != 'false' + if: inputs.create_github_issue != false runs-on: ubuntu-latest needs: - archive-run-small From f179acc85a397567a6f5373f5d3a440f345076cf Mon Sep 17 00:00:00 2001 From: e-belfer Date: Tue, 18 Jun 2024 13:08:47 -0400 Subject: [PATCH 31/34] Handle skips and irrational GHA format requirements --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 6f47624c..3b75fe05 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -157,7 +157,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: - if: inputs.create_github_issue != false + if: always() && inputs.create_github_issue != false runs-on: ubuntu-latest needs: - archive-run-small From 02d0eb6635ef3164528e001a7306fba79332de7a Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 19 Jun 2024 10:23:51 -0400 Subject: [PATCH 32/34] Make scheduled run workflow more explicit, remove redundant logs in issue template --- .github/ISSUE_TEMPLATE/monthly-archive-update.md | 5 +---- .github/workflows/run-archiver.yml | 15 +++++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/monthly-archive-update.md b/.github/ISSUE_TEMPLATE/monthly-archive-update.md index 97a23a4e..798501c5 100644 --- a/.github/ISSUE_TEMPLATE/monthly-archive-update.md +++ b/.github/ISSUE_TEMPLATE/monthly-archive-update.md @@ -8,7 +8,7 @@ assignees: e-belfer --- # Summary of results: -See the job run results [here]({{ env.RUN_URL }}). +See the job run logs and results [here]({{ env.RUN_URL }}). # Review and publish archives @@ -55,6 +55,3 @@ For each run that failed because of another reason (e.g., underlying data change ```[tasklist] - [ ] dataset ``` - -# Relevant logs -[Link to logs from GHA run]({{ env.RUN_URL }}) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 3b75fe05..81c0a234 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -1,12 +1,16 @@ --- name: run-archiver +env: + SMALL_DATASETS: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' + LARGE_DATASETS: '"epacems"' # Datasets requiring a large (paid) runner + on: workflow_dispatch: inputs: small_runner: description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").' - default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' + default: ${{ env.SMALL_DATASETS }} required: true type: string large_runner: @@ -29,7 +33,7 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"')) }} + dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || env.SMALL_DATASETS )) }} fail-fast: false runs-on: ubuntu-latest steps: @@ -73,14 +77,13 @@ jobs: path: ${{ matrix.dataset }}_run_summary.json archive-run-large: - if: inputs.large_runner + if: ${{ github.event_name == 'schedule' || inputs.large_runner }} defaults: run: shell: bash -l {0} strategy: matrix: - dataset: - - epacems + dataset: ${{ fromJSON(format('[{0}]', env.LARGE_DATASETS )) }} fail-fast: false runs-on: group: large-runner-group @@ -157,7 +160,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} make-github-issue: - if: always() && inputs.create_github_issue != false + if: ${{ always() && (github.event_name == 'schedule' || inputs.create_github_issue == true) }} runs-on: ubuntu-latest needs: - archive-run-small From 844ecc28906a0356016ea1b5adef1550bd7cb928 Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 19 Jun 2024 10:26:18 -0400 Subject: [PATCH 33/34] Workflow dispatch doesn't like env variables as input --- .github/workflows/run-archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 81c0a234..6bea74bf 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -10,7 +10,7 @@ on: inputs: small_runner: description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").' - default: ${{ env.SMALL_DATASETS }} + default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' required: true type: string large_runner: From 140473c42f57ceb546f23579eac61903a9b7488e Mon Sep 17 00:00:00 2001 From: e-belfer Date: Wed, 19 Jun 2024 10:49:33 -0400 Subject: [PATCH 34/34] Roll back env vars due to difficult GHA behavior --- .github/workflows/run-archiver.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index 6bea74bf..735dadb2 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -1,15 +1,12 @@ --- name: run-archiver -env: - SMALL_DATASETS: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' - LARGE_DATASETS: '"epacems"' # Datasets requiring a large (paid) runner - on: workflow_dispatch: inputs: small_runner: description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").' + # We can't pass env variables to the workflow_dispatch, so we manually list all small datasets here. default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' required: true type: string @@ -33,7 +30,8 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || env.SMALL_DATASETS )) }} + # Note that we can't pass global env variables to the matrix, so we manually reproduce the list of datasets here. + dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' )) }} fail-fast: false runs-on: ubuntu-latest steps: @@ -83,7 +81,8 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: ${{ fromJSON(format('[{0}]', env.LARGE_DATASETS )) }} + # Note that we can't pass global env variables to the matrix, so we manually list the datasets here. + dataset: ${{ fromJSON(format('[{0}]', '"epacems"' )) }} fail-fast: false runs-on: group: large-runner-group