diff --git a/.github/ISSUE_TEMPLATE/monthly-archive-update.md b/.github/ISSUE_TEMPLATE/monthly-archive-update.md index 360a698d..798501c5 100644 --- a/.github/ISSUE_TEMPLATE/monthly-archive-update.md +++ b/.github/ISSUE_TEMPLATE/monthly-archive-update.md @@ -1,12 +1,15 @@ --- name: Monthly archive update about: Template for publishing monthly archives. -title: Publish archives for the month of MONTH +title: Publish {{ date | date('MMMM Do YYYY') }} archives labels: automation, zenodo -assignees: '' +assignees: e-belfer --- +# Summary of results: +See the job run logs and results [here]({{ env.RUN_URL }}). + # Review and publish archives For each of the following archives, find the run status in the Github archiver run. If validation tests pass, manually review the archive and publish. If no changes detected, delete the draft. If changes are detected, manually review the archive following the guidelines in step 3 of `README.md`, then publish the new version. Then check the box here to confirm publication status, adding a note on the status (e.g., "v1 published", "no changes detected, draft deleted"): @@ -50,8 +53,5 @@ If the validation failure is blocking (e.g., file format incorrect, whole datase For each run that failed because of another reason (e.g., underlying data changes, code failures), create an issue describing the failure and take necessary steps to resolve it. ```[tasklist] -- [ ] +- [ ] dataset ``` - -# Relevant logs -[Link to logs from GHA run]( PLEASE FIND THE ACTUAL LINK AND FILL IN HERE ) diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml index f948907b..735dadb2 100644 --- a/.github/workflows/run-archiver.yml +++ b/.github/workflows/run-archiver.yml @@ -3,6 +3,23 @@ name: run-archiver on: workflow_dispatch: + inputs: + small_runner: + description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").' + # We can't pass env variables to the workflow_dispatch, so we manually list all small datasets here. + default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' + required: true + type: string + large_runner: + description: "Kick off large runners (for epacems)?" + required: true + default: false + type: boolean + create_github_issue: + description: "Create a Github issue from this run?" + default: false + required: true + type: boolean schedule: - cron: "21 8 1 * *" # 8:21 AM UTC, first of every month @@ -13,28 +30,8 @@ jobs: shell: bash -l {0} strategy: matrix: - dataset: - - eia176 - - eia191 - - eia757a - - eia860 - - eia861 - - eia860m - - eia923 - - eia930 - - eiaaeo - - eiawater - - eia_bulk_elec - - epacamd_eia - - ferc1 - - ferc2 - - ferc6 - - ferc60 - - ferc714 - - mshamines - - nrelatb - - phmsagas - + # Note that we can't pass global env variables to the matrix, so we manually reproduce the list of datasets here. + dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"' )) }} fail-fast: false runs-on: ubuntu-latest steps: @@ -78,20 +75,20 @@ jobs: path: ${{ matrix.dataset }}_run_summary.json archive-run-large: + if: ${{ github.event_name == 'schedule' || inputs.large_runner }} defaults: run: shell: bash -l {0} strategy: matrix: - dataset: - - epacems + # Note that we can't pass global env variables to the matrix, so we manually list the datasets here. + dataset: ${{ fromJSON(format('[{0}]', '"epacems"' )) }} fail-fast: false runs-on: group: large-runner-group labels: ubuntu-22.04-4core steps: - uses: actions/checkout@v4 - - name: Install Conda environment using mamba uses: mamba-org/setup-micromamba@v1 with: @@ -160,3 +157,19 @@ jobs: payload: ${{ steps.all_summaries.outputs.SLACK_PAYLOAD }} env: SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} + + make-github-issue: + if: ${{ always() && (github.event_name == 'schedule' || inputs.create_github_issue == true) }} + runs-on: ubuntu-latest + needs: + - archive-run-small + - archive-run-large + steps: + - uses: actions/checkout@v3 + - name: Create an issue + uses: JasonEtco/create-an-issue@v2.9.2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + with: + filename: .github/ISSUE_TEMPLATE/monthly-archive-update.md diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py index b9349184..097947aa 100755 --- a/scripts/make_slack_notification_message.py +++ b/scripts/make_slack_notification_message.py @@ -29,6 +29,56 @@ def _parse_args(): return parser.parse_args() +def _format_message( + url: str, name: str, content: str, max_len: int = 3000 +) -> list[dict]: + text = f"<{url}|*{name}*>\n{content}"[:max_len] + return [ + { + "type": "section", + "text": {"type": "mrkdwn", "text": text}, + }, + ] + + +def _format_failures(summary: dict) -> list[dict]: + name = summary["dataset_name"] + url = summary["record_url"] + + test_failures = defaultdict(list) + for validation_test in summary["validation_tests"]: + if (not validation_test["success"]) and ( + validation_test["required_for_run_success"] + ): + test_failures = ". ".join( + [validation_test["name"], ". ".join(validation_test["notes"])] + ) # Flatten list of lists + + if test_failures: + failures = f"```\n{json.dumps(test_failures, indent=2)}\n```" + else: + return None + + return _format_message(url=url, name=name, content=failures) + + +def _format_summary(summary: dict) -> list[dict]: + name = summary["dataset_name"] + url = summary["record_url"] + if any(not test["success"] for test in summary["validation_tests"]): + return None # Don't report on file changes if any test failed. + + if file_changes := summary["file_changes"]: + abridged_changes = defaultdict(list) + for change in file_changes: + abridged_changes[change["diff_type"]].append(change["name"]) + changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```" + else: + changes = "No changes." + + return _format_message(url=url, name=name, content=changes) + + def main(summary_files: list[Path]) -> None: """Format summary files for Slack perusal.""" summaries = [] @@ -36,34 +86,24 @@ def main(summary_files: list[Path]) -> None: with summary_file.open() as f: summaries.extend(json.loads(f.read())) - def format_summary(summary: dict) -> list[dict]: - name = summary["dataset_name"] - url = summary["record_url"] - if file_changes := summary["file_changes"]: - abridged_changes = defaultdict(list) - for change in file_changes: - abridged_changes[change["diff_type"]].append(change["name"]) - changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```" - else: - changes = "No changes." - - max_len = 3000 - text = f"<{url}|*{name}*>\n{changes}"[:max_len] - return [ - { - "type": "section", - "text": {"type": "mrkdwn", "text": text}, - }, - ] + failed_blocks = list( + itertools.chain.from_iterable( + _format_failures(s) for s in summaries if _format_failures(s) is not None + ) + ) unchanged_blocks = list( itertools.chain.from_iterable( - format_summary(s) for s in summaries if not s["file_changes"] + _format_summary(s) + for s in summaries + if (not s["file_changes"]) and (_format_summary(s) is not None) ) ) changed_blocks = list( itertools.chain.from_iterable( - format_summary(s) for s in summaries if s["file_changes"] + _format_summary(s) + for s in summaries + if (s["file_changes"]) and (_format_summary(s) is not None) ) ) @@ -73,6 +113,8 @@ def header_block(text: str) -> dict: def section_block(text: str) -> dict: return {"type": "section", "text": {"type": "mrkdwn", "text": text}} + if failed_blocks: + failed_blocks = [section_block("*Validation Failures*")] + failed_blocks if changed_blocks: changed_blocks = [section_block("*Changed*")] + changed_blocks if unchanged_blocks: @@ -84,6 +126,7 @@ def section_block(text: str) -> dict: "attachments": [ { "blocks": [header_block("Archiver Run Outcomes")] + + failed_blocks + changed_blocks + unchanged_blocks, }