radius-project · youngbupark · Apr 27, 2024 · Apr 28, 2024 · Apr 28, 2024 · Apr 28, 2024
@@ -0,0 +1,65 @@
+name: "Count completed failed runs"
+description: This actions counts the number of consecutive failed runs for a given workflow.
+inputs:
+ workflow_id:
+ description: 'Workflow ID to use for counting failed runs'
+ required: true
+ workflow_event:
+ description: 'Maximum number of workflow runs to check'
+ default: 'schedule'
+ required: false
+ per_page:
+ description: 'Number of workflow runs to check for failures'
+ default: '10'
+ required: false
+outputs:
+ total_runs:
+ value: ${{ steps.count_failures.outputs.result }}
+ description: The number of consecutive failed runs
+runs:
+ using: "composite"
+ steps:
+ - name: Count recently failed tests
+ id: count_failures
+ uses: actions/github-script@v7
+ env:
+ WORKFLOWID: ${{ inputs.workflow_id }}
+ PERPAGE: ${{ inputs.per_page }}
+ WORKFLOWEVENT: ${{ inputs.workflow_event }}
+ with:
+ script: |
+ workflowid = process.env.WORKFLOWID;
+ perpage = parseInt(process.env.PERPAGE, 10);
+ workflowevent = process.env.WORKFLOWEVENT;
+
+ console.log("workflow ID: " + workflowid + ", event: " + workflowevent + ", per_page " + perpage)
+
+ // Fetch actions runs to scan the recent failure conclusion runs.
+ response = await github.rest.actions.listWorkflowRuns({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ workflow_id: workflowid,
+ event: workflowevent,
+ status: 'completed',
+ per_page: perpage
+ });
+
+ // Scan `failure` conclusion runs to find the consecutive failures while
+ // skipping the other conclusions, such as 'cancelled`.
+ failureCount = 0;
+ for (const run of response.data.workflow_runs) {
+ console.log(`Validating the workflow run - ID: ${run.id}, Conclusion: ${run.conclusion}.`);
+
+ if (run.conclusion === 'failure') {
+ failureCount++;
+ } else if (run.conclusion === 'success') {
+ // If we find a successful run, we can stop scanning.
+ break;
+ } else {
+ // Skipping the other conclusions such as 'cancelled'.
+ console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`)
+ }
+ }
+
+ console.log(`Found ${failureCount} failed runs in a row.`);
+ return failureCount;
@@ -83,7 +83,7 @@ env:
  # Private Git repository where terraform module for testing is stored.
  TF_RECIPE_PRIVATE_GIT_SOURCE: "git::https://github.com/radius-project/terraform-private-modules//kubernetes-redis"
  # The number of failed tests to report.
- ISSUE_CREATE_THRESHOLD: 2
+ ISSUE_CREATE_THRESHOLD: 1
 
 jobs:
  build:
@@ -716,39 +716,30 @@ jobs:
  needs: [build, tests]
  runs-on: ubuntu-latest
  if: failure() && github.event_name == 'schedule' && github.repository == 'radius-project/radius'
+ env:
+ CHECKOUT_REPO: ${{ needs.build.outputs.CHECKOUT_REPO }}
+ CHECKOUT_REF: ${{ needs.build.outputs.CHECKOUT_REF }}
  steps:
- - name: Count recently failed tests
- id: count_failures
- uses: actions/github-script@v7
+ - name: Checkout
+ uses: actions/checkout@v4
  with:
- script: |
- response = await github.rest.actions.listWorkflowRuns({
- owner: context.repo.owner,
- repo: context.repo.repo,
- workflow_id: 'functional-test.yaml',
- event: 'schedule',
- per_page: 10
- });
-
- failureCount = 1;
- for (const run of response.data.workflow_runs) {
- if (run.conclusion === 'failure') {
- failureCount++;
- } else {
- break;
- }
- }
- return failureCount;
+ repository: ${{ env.CHECKOUT_REPO }}
+ ref: ${{ env.CHECKOUT_REF }}
+ - name: Count failed runs
+ id: count_failed_runs
+ uses: ./.github/actions/count-failed-runs
+ with:
+ workflow_id: 'functional-test.yaml'
  - name: Create failure issue for failing scheduled run
  uses: actions/github-script@v7
  # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests.
- if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD
+ if: steps.count_failed_runs.outputs.total_runs >= env.ISSUE_CREATE_THRESHOLD
  with:
  github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }}
  script: |
  github.rest.issues.create({
  ...context.repo,
  title: `Scheduled functional test failed - Run ID: ${context.runId}`,
  labels: ['bug', 'test-failure'],
- body: `## Bug information \n\nThis bug is generated automatically if the scheduled functional test fails at least ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius functional test operates on a schedule of every 4 hours during weekdays and every 12 hours over the weekend. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).`
+ body: `## Bug information \n\nThis bug is generated automatically if the scheduled functional test fails more than ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius functional test operates on a schedule of every 4 hours during weekdays and every 12 hours over the weekend. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).`
  })
@@ -96,7 +96,7 @@ env:
  ACTION_LINK: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
 
  # The number of failed tests to report.
- ISSUE_CREATE_THRESHOLD: 2
+ ISSUE_CREATE_THRESHOLD: 1
 
 jobs:
  build:
@@ -520,40 +520,31 @@ jobs:
  name: Report test failure
  needs: [build, tests]
  runs-on: ubuntu-latest
+ env:
+ CHECKOUT_REPO: ${{ needs.build.outputs.CHECKOUT_REPO }}
+ CHECKOUT_REF: ${{ needs.build.outputs.CHECKOUT_REF }}
  if: failure() && github.repository == 'radius-project/radius' && github.event_name == 'schedule'
  steps:
- - name: Count recently failed tests
- id: count_failures
- uses: actions/github-script@v7
+ - name: Checkout
+ uses: actions/checkout@v4
  with:
- script: |
- response = await github.rest.actions.listWorkflowRuns({
- owner: context.repo.owner,
- repo: context.repo.repo,
- workflow_id: 'long-running-azure.yaml',
- event: 'schedule',
- per_page: 10
- });
-
- failureCount = 1;
- for (const run of response.data.workflow_runs) {
- if (run.conclusion === 'failure') {
- failureCount++;
- } else {
- break;
- }
- }
- return failureCount;
- - name: Create failure issue for failing long running test run
+ repository: ${{ env.CHECKOUT_REPO }}
+ ref: ${{ env.CHECKOUT_REF }}
+ - name: Count failed runs
+ id: count_failed_runs
+ uses: ./.github/actions/count-failed-runs
+ with:
+ workflow_id: 'long-running-azure.yaml'
+ - name: Create failure issue for failing scheduled run
  uses: actions/github-script@v7
  # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests.
- if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD
+ if: steps.count_failed_runs.outputs.total_runs >= env.ISSUE_CREATE_THRESHOLD
  with:
  github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }}
  script: |
  github.rest.issues.create({
  ...context.repo,
  title: `Scheduled long running test failed - Run ID: ${context.runId}`,
  labels: ['bug', 'test-failure'],
- body: `## Bug information \n\nThis bug is generated automatically if the scheduled long running test fails at least ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius long running test operates on a schedule of every 2 hours everyday. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).`
+ body: `## Bug information \n\nThis bug is generated automatically if the scheduled long running test fails more than ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius long running test operates on a schedule of every 2 hours everyday. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).`
  })