diff --git a/.github/workflows/overflow-test.yaml b/.github/workflows/overflow-test.yaml index bfedf83..00eafa0 100644 --- a/.github/workflows/overflow-test.yaml +++ b/.github/workflows/overflow-test.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest name: Overflow Test steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Generate Test Summary id: generate-summary @@ -23,7 +23,7 @@ jobs: - name: If there is an overflow summary, archive it if: ${{steps.generate-summary.outputs.Overflow}} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ${{steps.generate-summary.outputs.Overflow}} path: ${{steps.generate-summary.outputs.Overflow}} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 7cd87f3..90ca794 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest name: Smoke Test steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Generate Test Summary id: generate-summary @@ -18,3 +18,6 @@ jobs: - name: If there are alerts, echo them if: ${{steps.generate-summary.outputs.alerts}} run: echo "${{steps.generate-summary.outputs.alerts}}" + + - name: Echo the thermometer + run: echo "${{steps.generate-summary.outputs.thermometer}}" diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index bbcceb7..dd4d2f1 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -7,10 +7,10 @@ jobs: runs-on: ubuntu-latest name: Unit test steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' diff --git a/DEVELOPERS_DEVELOPERS_DEVELOPERS.md b/DEVELOPERS_DEVELOPERS_DEVELOPERS.md index bf36689..014338d 100644 --- a/DEVELOPERS_DEVELOPERS_DEVELOPERS.md +++ b/DEVELOPERS_DEVELOPERS_DEVELOPERS.md @@ -1,8 +1,25 @@ -# Building and testing locally +# Building, testing, releasing The `ciclops` GitHub Action runs using a Docker container that encapsulates the Python script that does the CI test analysis. +## Releasing + +We recommend that users of Ciclops use released versions rather than `main`. +For testing, it may be convenient to [use a full SHA](#testing-within-a-calling-github-workflow). + +The procedure for cutting a release: + +1. Decide on the version number (following semVer) +1. Update the [Release notes file](ReleaseNotes.md), following the convention + in the file, i.e. the version number included in the section, and the release + date in the first line +1. Review and merge the release notes, and create and push a new tag with the + desired version number +1. Cut a new release in [GitHub](https://github.com/cloudnative-pg/ciclops/releases/new), + choosing the recent tag, and pasting the relevant content from the + Release Notes file (no need for the release date line). + ## Developing and testing You can test directly with the Python code on the `example-artifacts` directory, @@ -72,6 +89,22 @@ CIclops has the beginning of a unit test suite. You can run it with: python3 -m unittest ``` +## Testing within a calling GitHub workflow + +Even with unit tests and local tests, it's good to try Ciclops code out from a +client workflow. We can use a full length commit SHA to test out changes, +before cutting out a new release. +See the [GitHub document on using third party actions](https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-third-party-actions). + +Example: +``` yaml + - name: Compute the E2E test summary + id: generate-summary + uses: cloudnative-pg/ciclops@ + with: + artifact_directory: test-artifacts/da +``` + ## How it works The files in this repository are needed for the Dockerfile to build and run, of diff --git a/README.md b/README.md index f0ef6e1..cd0c191 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,11 @@ watch over Continuous Integration pipelines for all eternity. ## Outputs -Two outputs might be produced: +Up to three outputs might be produced: + +- `thermometer`: this will contain stand-alone text with a color-coded list + of test metrics that can serve as an overview of the state of the test suite + on CI/CD. This is generated on every execution of Ciclops. - `alerts`: this will contain stand-alone text with systematic failures detected by CIclops. It is meant to enable further steps in the calling @@ -97,7 +101,12 @@ There are two advanced cases we want to call attention to: called `Overflow`. 2. Monitoring with chatops \ - CIclops will create a series of alerts when systematic failures are detected. + Ciclops will generate a "thermometer" on every execution, offering a + color-coded overview of the test health. This thermometer is included in + the GitHub summary, and in addition, is exported as an output in plain + text, which can be sent via chatops. + In addition, Ciclops will create a series of alerts when systematic failures + are detected. By "systematic", we mean cases such as: - all test combinations have failed @@ -131,6 +140,13 @@ The following snippet shows how to use these features: path: ${{steps.generate-summary.outputs.Overflow}} retention-days: 7 + - name: Get a slack message with the Ciclops thermometer + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_USERNAME: cnpg-bot + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_MESSAGE: ${{steps.generate-summary.outputs.thermometer}} + - name: If there are alerts, send them over Slack if: ${{steps.generate-summary.outputs.alerts}} uses: rtCamp/action-slack-notify@v2 diff --git a/action.yaml b/action.yaml index abb6486..3127fde 100644 --- a/action.yaml +++ b/action.yaml @@ -22,6 +22,8 @@ inputs: outputs: alerts: description: 'Any systematic failures found by CIclops' + thermometer: + description: 'A color-coded health meter' Overflow: description: 'The name of the file where the full report was written, on oveflow' runs: diff --git a/example-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json b/example-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json index 9ad974f..ab2b262 100644 --- a/example-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json +++ b/example-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json @@ -10,7 +10,7 @@ "postgres_kind": "PostgreSQL", "matrix_id": "id1", "postgres_version": "11.1", - "k8s_version": "22", + "k8s_version": "1.22", "workflow_id": 12, "repo": "my-repo", "branch": "my-branch" diff --git a/example-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json b/example-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json index 41628dd..fba938d 100644 --- a/example-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json +++ b/example-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json @@ -10,7 +10,7 @@ "postgres_kind": "PostgreSQL", "matrix_id": "id1", "postgres_version": "11.1", - "k8s_version": "22", + "k8s_version": "1.22", "workflow_id": 12, "repo": "my-repo", "branch": "my-branch" diff --git a/example-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json b/example-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json index d2d2d66..b7eb4d4 100644 --- a/example-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json +++ b/example-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json @@ -10,7 +10,7 @@ "postgres_kind": "PostgreSQL", "matrix_id": "id1", "postgres_version": "11.1", - "k8s_version": "22", + "k8s_version": "1.22", "workflow_id": 12, "repo": "my-repo", "branch": "my-branch" diff --git a/few-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json b/few-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json index 9ad974f..ab2b262 100644 --- a/few-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json +++ b/few-artifacts/id1_0b185c51a60964ecab5bb7d97458ca95fd421f325f3896ed239d5d3f.json @@ -10,7 +10,7 @@ "postgres_kind": "PostgreSQL", "matrix_id": "id1", "postgres_version": "11.1", - "k8s_version": "22", + "k8s_version": "1.22", "workflow_id": 12, "repo": "my-repo", "branch": "my-branch" diff --git a/few-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json b/few-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json index 00e9065..0f3beaf 100644 --- a/few-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json +++ b/few-artifacts/id1_4902843ff6a60bc4fdb76000698c46de8e7f9763a1a0fe63f70fd5f8.json @@ -10,7 +10,7 @@ "postgres_kind": "PostgreSQL", "matrix_id": "id1", "postgres_version": "11.1", - "k8s_version": "22", + "k8s_version": "1.22", "workflow_id": 12, "repo": "my-repo", "branch": "my-branch" diff --git a/few-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json b/few-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json index d2d2d66..b7eb4d4 100644 --- a/few-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json +++ b/few-artifacts/id1_9891d0d1caa1ec8fd0adfe622e341f84dd3d1df1cffee843e1fb84a0.json @@ -10,7 +10,7 @@ "postgres_kind": "PostgreSQL", "matrix_id": "id1", "postgres_version": "11.1", - "k8s_version": "22", + "k8s_version": "1.22", "workflow_id": 12, "repo": "my-repo", "branch": "my-branch" diff --git a/summarize_test_results.py b/summarize_test_results.py index 81d9570..2a1bc4a 100644 --- a/summarize_test_results.py +++ b/summarize_test_results.py @@ -133,6 +133,24 @@ def is_test_artifact(test_entry): return True +def compress_kubernetes_version(test_entry): + """ensure the k8s_version field contains only the minor release + of kubernetes, and that the presence or absence of an initial "v" is ignored. + Otherwise, ciclops can over-represent failure percentages and k8s releases tested + """ + k8s = test_entry["k8s_version"] + if k8s[0] == "v": + k8s = k8s[1:] + frags = k8s.split(".") + if len(frags) <= 2: + test_entry["k8s_version"] = k8s + return test_entry + else: + minor = ".".join(frags[0:2]) + test_entry["k8s_version"] = minor + return test_entry + + def combine_postgres_data(test_entry): """combines Postgres kind and version of the test artifact to a single field called `pg_version` @@ -191,7 +209,7 @@ def track_time_taken(test_results, test_times, suite_times): if duration < test_times["min"][name]: test_times["min"][name] = duration - # track suite time. + # Track test suite timings. # For each platform-matrix branch, track the earliest start and the latest end platform = test_results["platform"] if platform not in suite_times["start_time"]: @@ -243,7 +261,7 @@ def count_bucketed_by_code(test_results, by_failing_code): name = test_results["name"] if test_results["error"] == "" or test_results["state"] == "ignoreFailed": return - # it does not make sense to show failing code that is outside of the test + # it does not make sense to show failing code that is outside the test, # so we skip special failures if not is_normal_failure(test_results): return @@ -286,7 +304,12 @@ def count_bucketed_by_special_failures(test_results, by_special_failures): if failure not in by_special_failures["total"]: by_special_failures["total"][failure] = 0 - for key in ["tests_failed", "k8s_versions_failed", "pg_versions_failed", "platforms_failed"]: + for key in [ + "tests_failed", + "k8s_versions_failed", + "pg_versions_failed", + "platforms_failed", + ]: if failure not in by_special_failures[key]: by_special_failures[key][failure] = {} @@ -397,6 +420,7 @@ def compute_test_summary(test_dir): # skipping non-artifacts continue test_results = combine_postgres_data(parsed) + test_results = compress_kubernetes_version(test_results) total_runs = 1 + total_runs if is_failed(test_results): @@ -468,12 +492,57 @@ def compile_overview(summary): } +def metric_name(metric): + metric_type = { + "by_test": "Tests", + "by_k8s": "Kubernetes versions", + "by_postgres": "Postgres versions", + "by_platform": "Platforms", + } + return metric_type[metric] + + +def compute_systematic_failures_on_metric(summary, metric, embed=True): + """tests if there are items within the metric that have systematic failures. + For example, in the "by_test" metric, if there is a test with systematic failures. + Returns a boolean to indicate there are systematic failures, and an output string + with the failures. + + The `embed` argument controls the output. If True (default) it computes the full list + of alerts for the metric. If False, it will cap at 2 rows with alerts, so as not to + flood the ChatOps client. + """ + output = "" + has_systematic_failure_in_metric = False + counter = 0 + for bucket_hits in summary[metric]["failed"].items(): + bucket = bucket_hits[0] # the items() call returns (bucket, hits) pairs + failures = summary[metric]["failed"][bucket] + runs = summary[metric]["total"][bucket] + if failures == runs and failures > 1: + if not has_systematic_failure_in_metric: + output += f"{metric_name(metric)} with systematic failures:\n\n" + has_systematic_failure_in_metric = True + if counter >= 2 and not embed: + output += f"- ...and more. See full story in GH Test Summary\n" + break + else: + output += f"- {bucket}: ({failures} out of {runs} tests failed)\n" + counter += 1 + if has_systematic_failure_in_metric: + # add a newline after at the end of the list of failures before starting the + # next metric + output += f"\n" + return True, output + return False, "" + + def format_alerts(summary, embed=True, file_out=None): """print Alerts for tests that have failed systematically If the `embed` argument is true, it will produce a fragment of Markdown to be included with the action summary. - Otherwise, it will be output as plain text. + Otherwise, it will be output as plain text intended for stand-alone use. We want to capture: - all test combinations failed (if this happens, no more investigation needed) @@ -496,28 +565,12 @@ def format_alerts(summary, embed=True, file_out=None): print("EOF", file=file_out) return - metric_name = { - "by_test": "Tests", - "by_k8s": "Kubernetes versions", - "by_postgres": "Postgres versions", - "by_platform": "Platforms", - } - output = "" for metric in ["by_test", "by_k8s", "by_postgres", "by_platform"]: - has_failure_in_metric = False - for bucket_hits in summary[metric]["failed"].items(): - bucket = bucket_hits[0] # the items() call returns (bucket, hits) pairs - failures = summary[metric]["failed"][bucket] - runs = summary[metric]["total"][bucket] - if failures == runs and failures > 1: - if not has_failure_in_metric: - output += f"{metric_name[metric]} with systematic failures:\n\n" - has_failure_in_metric = True - has_systematic_failures = True - output += f"- {bucket}: ({failures} out of {runs} tests failed)\n" - if has_failure_in_metric: - output += f"\n" + has_alerts, out = compute_systematic_failures_on_metric(summary, metric, embed) + if has_alerts: + has_systematic_failures = True + output += out if not has_systematic_failures: return @@ -531,6 +584,70 @@ def format_alerts(summary, embed=True, file_out=None): print("EOF", file=file_out) +def compute_semaphore(success_percent, embed=True): + """create a semaphore light summarizing the success percent. + If set to `embed`, an emoji will be used. Else, a textual representation + of a Slack emoji is used. + """ + if embed: + if success_percent >= 95: + return "🟢" + elif success_percent >= 60: + return "🟡" + else: + return "🔴" + else: + if success_percent >= 95: + return ":large_green_circle:" + elif success_percent >= 60: + return ":large_yellow_circle:" + else: + return ":red_circle:" + + +def compute_thermometer_on_metric(summary, metric, embed=True): + """computes a summary per item in the metric, with the success percentage + and a color coding based on said percentage + """ + + output = f"{metric_name(metric)} thermometer:\n\n" + for bucket_hits in summary[metric]["total"].items(): + bucket = bucket_hits[0] # the items() call returns (bucket, hits) pairs + failures = 0 + if bucket in summary[metric]["failed"]: + failures = summary[metric]["failed"][bucket] + runs = summary[metric]["total"][bucket] + success_percent = (1 - failures / runs) * 100 + color = compute_semaphore(success_percent, embed) + output += f"- {color} - {bucket}: {round(success_percent, 1)}% success.\t" + output += f"({failures} out of {runs} tests failed)\n" + output += f"\n" + return output + + +def format_thermometer(summary, embed=True, file_out=None): + """print thermometer with the percentage of success for a set of metrics. + e.g. per-platform and per-kubernetes + + If the `embed` argument is true, it will produce a fragment of Markdown + to be included with the action summary. + Otherwise, it will be output as plain text intended for stand-alone use. + """ + + output = "" + # we only test the "by_platform" metric for the thermometer, at the moment + for metric in ["by_platform"]: + output += compute_thermometer_on_metric(summary, metric, embed) + + if embed: + print(f"## Thermometer\n", file=file_out) + print(f"{output}", end="", file=file_out) + else: + print("thermometer<") @@ -897,6 +1014,7 @@ def format_test_summary(summary, file_out=None): ], } + format_thermometer(summary, file_out=file_out) format_alerts(summary, file_out=file_out) format_overview(overview, overview_section, file_out=file_out) @@ -967,6 +1085,7 @@ def format_short_test_summary(summary, file_out=None): ], } + format_thermometer(summary, file_out=file_out) format_alerts(summary, file_out=file_out) format_overview(overview, overview_section, file_out=file_out) @@ -1005,8 +1124,8 @@ def format_short_test_summary(summary, file_out=None): format_test_summary(test_summary, file_out=f) if args.limit: print("with GITHUB_STEP_SUMMARY limit", args.limit) - bytes = os.stat(os.getenv("GITHUB_STEP_SUMMARY")).st_size - if bytes > args.limit: + summary_bytes = os.stat(os.getenv("GITHUB_STEP_SUMMARY")).st_size + if summary_bytes > args.limit: # we re-open the STEP_SUMMARY with "w" to wipe out previous content with open(os.getenv("GITHUB_STEP_SUMMARY"), "w") as f: format_short_test_summary(test_summary, file_out=f) @@ -1020,4 +1139,5 @@ def format_short_test_summary(summary, file_out=None): if os.getenv("GITHUB_OUTPUT"): print("with GITHUB_OUTPUT", os.getenv("GITHUB_OUTPUT")) with open(os.getenv("GITHUB_OUTPUT"), "a") as f: + format_thermometer(test_summary, embed=False, file_out=f) format_alerts(test_summary, embed=False, file_out=f) diff --git a/test_summary.py b/test_summary.py index 1bdaccf..128dbd3 100644 --- a/test_summary.py +++ b/test_summary.py @@ -20,41 +20,43 @@ class TestIsFailed(unittest.TestCase): + summary = summarize_test_results.compute_test_summary("few-artifacts") + def test_compute_summary(self): self.maxDiff = None - summary = summarize_test_results.compute_test_summary("few-artifacts") - self.assertEqual(summary["total_run"], 3) - self.assertEqual(summary["total_failed"], 1) + self.assertEqual(self.summary["total_run"], 3) + self.assertEqual(self.summary["total_failed"], 1) self.assertEqual( - summary["by_code"]["total"], - { - "/Users/myuser/repos/cloudnative-pg/tests/e2e/initdb_test.go:80": 1 - }, + self.summary["by_code"]["total"], + {"/Users/myuser/repos/cloudnative-pg/tests/e2e/initdb_test.go:80": 1}, "unexpected summary", ) self.assertEqual( - summary["by_code"]["tests"], + self.summary["by_code"]["tests"], { "/Users/myuser/repos/cloudnative-pg/tests/e2e/initdb_test.go:80": { - "InitDB settings - initdb custom post-init SQL scripts -- can find the tables created by the post-init SQL queries": True + "InitDB settings - initdb custom post-init SQL scripts -- can find the" + " tables created by the post-init SQL queries": True } }, "unexpected summary", ) self.assertEqual( - summary["by_matrix"], {"total": {"id1": 3}, "failed": {"id1": 1}} + self.summary["by_matrix"], {"total": {"id1": 3}, "failed": {"id1": 1}} ) - self.assertEqual(summary["by_k8s"], {"total": {"22": 3}, "failed": {"22": 1}}) self.assertEqual( - summary["by_platform"], {"total": {"local": 3}, "failed": {"local": 1}} + self.summary["by_k8s"], {"total": {"1.22": 3}, "failed": {"1.22": 1}} ) self.assertEqual( - summary["by_postgres"], + self.summary["by_platform"], {"total": {"local": 3}, "failed": {"local": 1}} + ) + self.assertEqual( + self.summary["by_postgres"], {"total": {"PostgreSQL-11.1": 3}, "failed": {"PostgreSQL-11.1": 1}}, ) self.assertEqual( - summary["suite_durations"], + self.summary["suite_durations"], { "end_time": { "local": {"id1": datetime.datetime(2021, 11, 29, 18, 31, 7)} @@ -65,6 +67,24 @@ def test_compute_summary(self): }, ) + def test_compute_thermometer(self): + self.maxDiff = None + thermometer = summarize_test_results.compute_thermometer_on_metric(self.summary, "by_platform") + + self.assertEqual( + thermometer, + "Platforms thermometer:\n\n" + "- 🟡 - local: 66.7% success.\t(1 out of 3 tests failed)\n\n" + ) + + def test_compute_systematic_failures(self): + self.maxDiff = None + + for metric in ["by_test", "by_k8s", "by_postgres", "by_platform"]: + has_alerts, out = summarize_test_results.compute_systematic_failures_on_metric(self.summary, metric) + self.assertEqual(has_alerts, False) + self.assertEqual(out, "") + if __name__ == "__main__": unittest.main()