catalyst-cooperative · e-belfer · Jun 19, 2024 · Jun 17, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/.github/ISSUE_TEMPLATE/monthly-archive-update.md b/.github/ISSUE_TEMPLATE/monthly-archive-update.md
@@ -1,12 +1,15 @@
 ---
 name: Monthly archive update
 about: Template for publishing monthly archives.
-title: Publish archives for the month of MONTH
+title: Publish {{ date | date('MMMM Do YYYY') }} archives
 labels: automation, zenodo
-assignees: ''
+assignees: e-belfer
 
 ---
 
+# Summary of results:
+See the job run results [here]({{ env.RUN_URL }}).
+
 # Review and publish archives
 
 For each of the following archives, find the run status in the Github archiver run. If validation tests pass, manually review the archive and publish. If no changes detected, delete the draft. If changes are detected, manually review the archive following the guidelines in step 3 of `README.md`, then publish the new version. Then check the box here to confirm publication status, adding a note on the status (e.g., "v1 published", "no changes detected, draft deleted"):
@@ -50,8 +53,8 @@ If the validation failure is blocking (e.g., file format incorrect, whole datase
 For each run that failed because of another reason (e.g., underlying data changes, code failures), create an issue describing the failure and take necessary steps to resolve it.
 
 ```[tasklist]
-- [ ]
+- [ ] dataset
 ```
 
 # Relevant logs
-[Link to logs from GHA run]( PLEASE FIND THE ACTUAL LINK AND FILL IN HERE )
+[Link to logs from GHA run]({{ env.RUN_URL }})
diff --git a/.github/workflows/run-archiver.yml b/.github/workflows/run-archiver.yml
@@ -3,6 +3,22 @@ name: run-archiver
 
 on:
   workflow_dispatch:
+    inputs:
+      small_runner:
+        description: 'Small runner: Comma-separated list of datasets to archive (e.g., "ferc2","ferc6").'
+        default: '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"'
+        required: true
+        type: string
+      large_runner:
+        description: "Kick off large runners (for epacems)?"
+        required: true
+        default: false
+        type: boolean
+      create_github_issue:
+        description: "Create a Github issue from this run?"
+        default: false
+        required: true
+        type: boolean
   schedule:
     - cron: "21 8 1 * *" # 8:21 AM UTC, first of every month
 
@@ -13,28 +29,7 @@ jobs:
         shell: bash -l {0}
     strategy:
       matrix:
-        dataset:
-          - eia176
-          - eia191
-          - eia757a
-          - eia860
-          - eia861
-          - eia860m
-          - eia923
-          - eia930
-          - eiaaeo
-          - eiawater
-          - eia_bulk_elec
-          - epacamd_eia
-          - ferc1
-          - ferc2
-          - ferc6
-          - ferc60
-          - ferc714
-          - mshamines
-          - nrelatb
-          - phmsagas
-
+        dataset: ${{ fromJSON(format('[{0}]', inputs.small_runner || '"eia176","eia191","eia757a","eia860","eia860m","eia861","eia923","eia930","eiaaeo","eiawater","eia_bulk_elec","epacamd_eia","ferc1","ferc2","ferc6","ferc60","ferc714","mshamines","nrelatb","phmsagas"')) }}
       fail-fast: false
     runs-on: ubuntu-latest
     steps:
@@ -78,6 +73,7 @@ jobs:
           path: ${{ matrix.dataset }}_run_summary.json
 
   archive-run-large:
+    if: inputs.large_runner
     defaults:
       run:
         shell: bash -l {0}
@@ -91,7 +87,6 @@ jobs:
       labels: ubuntu-22.04-4core
     steps:
       - uses: actions/checkout@v4
-
       - name: Install Conda environment using mamba
         uses: mamba-org/setup-micromamba@v1
         with:
@@ -160,3 +155,19 @@ jobs:
           payload: ${{ steps.all_summaries.outputs.SLACK_PAYLOAD }}
         env:
           SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }}
+
+  make-github-issue:
+    if: always() && inputs.create_github_issue != false
+    runs-on: ubuntu-latest
+    needs:
+      - archive-run-small
+      - archive-run-large
+    steps:
+      - uses: actions/checkout@v3
+      - name: Create an issue
+        uses: JasonEtco/[email protected]
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        with:
+          filename: .github/ISSUE_TEMPLATE/monthly-archive-update.md
diff --git a/scripts/make_slack_notification_message.py b/scripts/make_slack_notification_message.py
@@ -29,41 +29,81 @@ def _parse_args():
     return parser.parse_args()
 
 
+def _format_message(
+    url: str, name: str, content: str, max_len: int = 3000
+) -> list[dict]:
+    text = f"<{url}|*{name}*>\n{content}"[:max_len]
+    return [
+        {
+            "type": "section",
+            "text": {"type": "mrkdwn", "text": text},
+        },
+    ]
+
+
+def _format_failures(summary: dict) -> list[dict]:
+    name = summary["dataset_name"]
+    url = summary["record_url"]
+
+    test_failures = defaultdict(list)
+    for validation_test in summary["validation_tests"]:
+        if (not validation_test["success"]) and (
+            validation_test["required_for_run_success"]
+        ):
+            test_failures = ". ".join(
+                [validation_test["name"], ". ".join(validation_test["notes"])]
+            )  # Flatten list of lists
+
+    if test_failures:
+        failures = f"```\n{json.dumps(test_failures, indent=2)}\n```"
+    else:
+        return None
+
+    return _format_message(url=url, name=name, content=failures)
+
+
+def _format_summary(summary: dict) -> list[dict]:
+    name = summary["dataset_name"]
+    url = summary["record_url"]
+    if any(not test["success"] for test in summary["validation_tests"]):
+        return None  # Don't report on file changes if any test failed.
+
+    if file_changes := summary["file_changes"]:
+        abridged_changes = defaultdict(list)
+        for change in file_changes:
+            abridged_changes[change["diff_type"]].append(change["name"])
+        changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```"
+    else:
+        changes = "No changes."
+
+    return _format_message(url=url, name=name, content=changes)
+
+
 def main(summary_files: list[Path]) -> None:
     """Format summary files for Slack perusal."""
     summaries = []
     for summary_file in summary_files:
         with summary_file.open() as f:
             summaries.extend(json.loads(f.read()))
 
-    def format_summary(summary: dict) -> list[dict]:
-        name = summary["dataset_name"]
-        url = summary["record_url"]
-        if file_changes := summary["file_changes"]:
-            abridged_changes = defaultdict(list)
-            for change in file_changes:
-                abridged_changes[change["diff_type"]].append(change["name"])
-            changes = f"```\n{json.dumps(abridged_changes, indent=2)}\n```"
-        else:
-            changes = "No changes."
-
-        max_len = 3000
-        text = f"<{url}|*{name}*>\n{changes}"[:max_len]
-        return [
-            {
-                "type": "section",
-                "text": {"type": "mrkdwn", "text": text},
-            },
-        ]
+    failed_blocks = list(
+        itertools.chain.from_iterable(
+            _format_failures(s) for s in summaries if _format_failures(s) is not None
+        )
+    )
 
     unchanged_blocks = list(
         itertools.chain.from_iterable(
-            format_summary(s) for s in summaries if not s["file_changes"]
+            _format_summary(s)
+            for s in summaries
+            if (not s["file_changes"]) and (_format_summary(s) is not None)
         )
     )
     changed_blocks = list(
         itertools.chain.from_iterable(
-            format_summary(s) for s in summaries if s["file_changes"]
+            _format_summary(s)
+            for s in summaries
+            if (s["file_changes"]) and (_format_summary(s) is not None)
         )
     )
 
@@ -73,6 +113,8 @@ def header_block(text: str) -> dict:
     def section_block(text: str) -> dict:
         return {"type": "section", "text": {"type": "mrkdwn", "text": text}}
 
+    if failed_blocks:
+        failed_blocks = [section_block("*Validation Failures*")] + failed_blocks
     if changed_blocks:
         changed_blocks = [section_block("*Changed*")] + changed_blocks
     if unchanged_blocks:
@@ -84,6 +126,7 @@ def section_block(text: str) -> dict:
                 "attachments": [
                     {
                         "blocks": [header_block("Archiver Run Outcomes")]
+                        + failed_blocks
                         + changed_blocks
                         + unchanged_blocks,
                     }