From fc095c6c12582ebab10696f908a0fa839203e50c Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sat, 27 Apr 2024 09:42:47 +0900 Subject: [PATCH 01/13] do not count in_process run Signed-off-by: Young Bu Park --- .github/workflows/functional-test.yaml | 6 +++++- .github/workflows/long-running-azure.yaml | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/functional-test.yaml b/.github/workflows/functional-test.yaml index bcf12e2b91..ce2ad01611 100644 --- a/.github/workflows/functional-test.yaml +++ b/.github/workflows/functional-test.yaml @@ -734,10 +734,14 @@ jobs: for (const run of response.data.workflow_runs) { if (run.conclusion === 'failure') { failureCount++; - } else { + } else if (run.conclusion === 'success') { + // If we find a successful run, we can stop scanning. break; + } else { + console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) } } + console.log(`Found ${failureCount} failed runs in a row.`); return failureCount; - name: Create failure issue for failing scheduled run uses: actions/github-script@v7 diff --git a/.github/workflows/long-running-azure.yaml b/.github/workflows/long-running-azure.yaml index b1f242ce30..574c70d9ad 100644 --- a/.github/workflows/long-running-azure.yaml +++ b/.github/workflows/long-running-azure.yaml @@ -539,10 +539,14 @@ jobs: for (const run of response.data.workflow_runs) { if (run.conclusion === 'failure') { failureCount++; - } else { + } else if (run.conclusion === 'success') { + // If we find a successful run, we can stop scanning. break; + } else { + console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) } } + console.log(`Found ${failureCount} failed runs in a row.`); return failureCount; - name: Create failure issue for failing long running test run uses: actions/github-script@v7 From 2ea0a81f84f835ed15008454cd9591cc04407d6f Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 14:05:49 +0900 Subject: [PATCH 02/13] wip Signed-off-by: Young Bu Park --- .github/workflows/build.yaml | 23 +++++++++++++++++++ .github/workflows/functional-test.yaml | 28 +++++++---------------- .github/workflows/long-running-azure.yaml | 28 ++++++++--------------- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9101d3a67c..093b249fc7 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -66,6 +66,29 @@ env: IMAGE_SRC: https://github.com/radius-project/radius jobs: + report-failure: + name: Report test failure + runs-on: ubuntu-latest + steps: + - name: Check if this is the consecutive failure + id: checkprevfail + uses: actions/github-script@v7 + with: + script: | + response = await github.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'functional-test.yaml', + event: 'schedule', + status: 'completed', + per_page: 1 + }); + + const workflowRuns = response.data.workflow_runs; + return len(workflowRuns) > 0 && workflowRuns[0].conclusion === 'failure'; + - name: Create failure issue for failing scheduled run + if: steps.checkprevfail.outputs.result == 'true' + run: echo "hello" build-and-push-cli: name: Build ${{ matrix.target_os }}_${{ matrix.target_arch }} binaries runs-on: ubuntu-latest diff --git a/.github/workflows/functional-test.yaml b/.github/workflows/functional-test.yaml index ce2ad01611..d4531a71e2 100644 --- a/.github/workflows/functional-test.yaml +++ b/.github/workflows/functional-test.yaml @@ -82,8 +82,6 @@ env: FUNCTIONAL_TEST_APP_ID: 425843 # Private Git repository where terraform module for testing is stored. TF_RECIPE_PRIVATE_GIT_SOURCE: "git::https://github.com/radius-project/terraform-private-modules//kubernetes-redis" - # The number of failed tests to report. - ISSUE_CREATE_THRESHOLD: 2 jobs: build: @@ -717,8 +715,8 @@ jobs: runs-on: ubuntu-latest if: failure() && github.event_name == 'schedule' && github.repository == 'radius-project/radius' steps: - - name: Count recently failed tests - id: count_failures + - name: Check if this is the consecutive failure + id: checkprevfail uses: actions/github-script@v7 with: script: | @@ -727,26 +725,16 @@ jobs: repo: context.repo.repo, workflow_id: 'functional-test.yaml', event: 'schedule', - per_page: 10 + status: 'completed', + per_page: 1 }); - - failureCount = 1; - for (const run of response.data.workflow_runs) { - if (run.conclusion === 'failure') { - failureCount++; - } else if (run.conclusion === 'success') { - // If we find a successful run, we can stop scanning. - break; - } else { - console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) - } - } - console.log(`Found ${failureCount} failed runs in a row.`); - return failureCount; + + const workflowRuns = response.data.workflow_runs; + return len(workflowRuns) > 0 && workflowRuns[0].conclusion === 'failure'; - name: Create failure issue for failing scheduled run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. - if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD + if: steps.checkprevfail.outputs.result == 'true' with: github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }} script: | diff --git a/.github/workflows/long-running-azure.yaml b/.github/workflows/long-running-azure.yaml index 574c70d9ad..2d43510dfb 100644 --- a/.github/workflows/long-running-azure.yaml +++ b/.github/workflows/long-running-azure.yaml @@ -522,36 +522,26 @@ jobs: runs-on: ubuntu-latest if: failure() && github.repository == 'radius-project/radius' && github.event_name == 'schedule' steps: - - name: Count recently failed tests - id: count_failures + - name: Check if this is the consecutive failure + id: checkprevfail uses: actions/github-script@v7 with: script: | response = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, - workflow_id: 'long-running-azure.yaml', + workflow_id: 'functional-test.yaml', event: 'schedule', - per_page: 10 + status: 'completed', + per_page: 1 }); - - failureCount = 1; - for (const run of response.data.workflow_runs) { - if (run.conclusion === 'failure') { - failureCount++; - } else if (run.conclusion === 'success') { - // If we find a successful run, we can stop scanning. - break; - } else { - console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) - } - } - console.log(`Found ${failureCount} failed runs in a row.`); - return failureCount; + + const workflowRuns = response.data.workflow_runs; + return len(workflowRuns) > 0 && workflowRuns[0].conclusion === 'failure'; - name: Create failure issue for failing long running test run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. - if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD + if: steps.checkprevfail.outputs.result == 'true' with: github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }} script: | From 012d7246665a9feb90a7c7695a0d2874f4631a86 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 14:08:26 +0900 Subject: [PATCH 03/13] wip Signed-off-by: Young Bu Park --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 093b249fc7..4ced1689dc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -85,7 +85,7 @@ jobs: }); const workflowRuns = response.data.workflow_runs; - return len(workflowRuns) > 0 && workflowRuns[0].conclusion === 'failure'; + return workflowRuns && workflowRuns.length > 0 && workflowRuns[0].conclusion === 'failure'; - name: Create failure issue for failing scheduled run if: steps.checkprevfail.outputs.result == 'true' run: echo "hello" From 54582c7f81a1d4e510f381d3880bc5c90cb30f9f Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 14:11:33 +0900 Subject: [PATCH 04/13] revert Signed-off-by: Young Bu Park --- .github/workflows/build.yaml | 23 -------------- .github/workflows/functional-test.yaml | 27 ++++++++++++----- .github/workflows/long-running-azure.yaml | 37 ++++++++++++----------- 3 files changed, 39 insertions(+), 48 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4ced1689dc..9101d3a67c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -66,29 +66,6 @@ env: IMAGE_SRC: https://github.com/radius-project/radius jobs: - report-failure: - name: Report test failure - runs-on: ubuntu-latest - steps: - - name: Check if this is the consecutive failure - id: checkprevfail - uses: actions/github-script@v7 - with: - script: | - response = await github.rest.actions.listWorkflowRuns({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'functional-test.yaml', - event: 'schedule', - status: 'completed', - per_page: 1 - }); - - const workflowRuns = response.data.workflow_runs; - return workflowRuns && workflowRuns.length > 0 && workflowRuns[0].conclusion === 'failure'; - - name: Create failure issue for failing scheduled run - if: steps.checkprevfail.outputs.result == 'true' - run: echo "hello" build-and-push-cli: name: Build ${{ matrix.target_os }}_${{ matrix.target_arch }} binaries runs-on: ubuntu-latest diff --git a/.github/workflows/functional-test.yaml b/.github/workflows/functional-test.yaml index d4531a71e2..7359bf8272 100644 --- a/.github/workflows/functional-test.yaml +++ b/.github/workflows/functional-test.yaml @@ -82,6 +82,8 @@ env: FUNCTIONAL_TEST_APP_ID: 425843 # Private Git repository where terraform module for testing is stored. TF_RECIPE_PRIVATE_GIT_SOURCE: "git::https://github.com/radius-project/terraform-private-modules//kubernetes-redis" + # The number of failed tests to report. + ISSUE_CREATE_THRESHOLD: 2 jobs: build: @@ -715,8 +717,8 @@ jobs: runs-on: ubuntu-latest if: failure() && github.event_name == 'schedule' && github.repository == 'radius-project/radius' steps: - - name: Check if this is the consecutive failure - id: checkprevfail + - name: Count recently failed tests + id: count_failures uses: actions/github-script@v7 with: script: | @@ -726,15 +728,26 @@ jobs: workflow_id: 'functional-test.yaml', event: 'schedule', status: 'completed', - per_page: 1 + per_page: 10 }); - - const workflowRuns = response.data.workflow_runs; - return len(workflowRuns) > 0 && workflowRuns[0].conclusion === 'failure'; + + failureCount = 1; + for (const run of response.data.workflow_runs) { + if (run.conclusion === 'failure') { + failureCount++; + } else if (run.conclusion === 'success') { + // If we find a successful run, we can stop scanning. + break; + } else { + console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) + } + } + console.log(`Found ${failureCount} failed runs in a row.`); + return failureCount; - name: Create failure issue for failing scheduled run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. - if: steps.checkprevfail.outputs.result == 'true' + if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD with: github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }} script: | diff --git a/.github/workflows/long-running-azure.yaml b/.github/workflows/long-running-azure.yaml index 2d43510dfb..7b046c95a5 100644 --- a/.github/workflows/long-running-azure.yaml +++ b/.github/workflows/long-running-azure.yaml @@ -78,9 +78,6 @@ env: # The valid radius build time window in seconds to rebuild radius. 24 hours = 24 * 60 * 60 = 86400 VALID_RADIUS_BUILD_WINDOW: 86400 - # The functional test GitHub app id - FUNCTIONAL_TEST_APP_ID: 425843 - # The AKS cluster name AKS_CLUSTER_NAME: "radlrtest00-aks" # The resource group for AKS_CLUSTER_NAME resource. @@ -310,12 +307,6 @@ jobs: RAD_CLI_ARTIFACT_NAME: ${{ needs.build.outputs.RAD_CLI_ARTIFACT_NAME }} BICEP_RECIPE_TAG_VERSION: ${{ needs.build.outputs.REL_VERSION }} steps: - - name: Get GitHub app token - uses: tibdex/github-app-token@v2 - id: get_installation_token - with: - app_id: ${{ env.FUNCTIONAL_TEST_APP_ID }} - private_key: ${{ secrets.FUNCTIONAL_TEST_APP_PRIVATE_KEY }} - name: Checkout uses: actions/checkout@v4 with: @@ -474,7 +465,6 @@ jobs: DOCKER_REGISTRY: ${{ env.CONTAINER_REGISTRY }} BICEP_RECIPE_REGISTRY: ${{ env.BICEP_RECIPE_REGISTRY }} BICEP_RECIPE_TAG_VERSION: ${{ env.BICEP_RECIPE_TAG_VERSION }} - GH_TOKEN: ${{ steps.get_installation_token.outputs.token }} - name: Collect Pod details if: always() run: | @@ -522,26 +512,37 @@ jobs: runs-on: ubuntu-latest if: failure() && github.repository == 'radius-project/radius' && github.event_name == 'schedule' steps: - - name: Check if this is the consecutive failure - id: checkprevfail + - name: Count recently failed tests + id: count_failures uses: actions/github-script@v7 with: script: | response = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, - workflow_id: 'functional-test.yaml', + workflow_id: 'long-running-azure.yaml', event: 'schedule', status: 'completed', - per_page: 1 + per_page: 10 }); - - const workflowRuns = response.data.workflow_runs; - return len(workflowRuns) > 0 && workflowRuns[0].conclusion === 'failure'; + + failureCount = 1; + for (const run of response.data.workflow_runs) { + if (run.conclusion === 'failure') { + failureCount++; + } else if (run.conclusion === 'success') { + // If we find a successful run, we can stop scanning. + break; + } else { + console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) + } + } + console.log(`Found ${failureCount} failed runs in a row.`); + return failureCount; - name: Create failure issue for failing long running test run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. - if: steps.checkprevfail.outputs.result == 'true' + if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD with: github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }} script: | From d6ee198737fee88c0ae7c0f0894488f0ebf4d8f2 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 14:45:49 +0900 Subject: [PATCH 05/13] use composite pattern Signed-off-by: Young Bu Park --- .github/actions/count-failed-runs/action.yaml | 54 +++++++++++++++++++ .github/workflows/build.yaml | 12 +++++ .github/workflows/functional-test.yaml | 36 +++---------- .github/workflows/long-running-azure.yaml | 38 +++---------- 4 files changed, 81 insertions(+), 59 deletions(-) create mode 100644 .github/actions/count-failed-runs/action.yaml diff --git a/.github/actions/count-failed-runs/action.yaml b/.github/actions/count-failed-runs/action.yaml new file mode 100644 index 0000000000..275136bb67 --- /dev/null +++ b/.github/actions/count-failed-runs/action.yaml @@ -0,0 +1,54 @@ +name: "Count failed runs" +description: This is to count the number of consecutive failed runs. +inputs: + workflow_id: + description: 'Workflow ID to use for counting failed runs' + required: true + max_workflow_runs: + description: 'Maximum number of workflow runs to check' + default: '10' + required: false + workflow_event: + description: 'Maximum number of workflow runs to check' + default: 'schedule' + required: false +outputs: + total_runs: + value: ${{ steps.count_failures.outputs.result }} + description: The number of consecutive failed runs +runs: + using: "composite" + steps: + - name: Count recently failed tests + id: count_failures + uses: actions/github-script@v7 + with: + script: | + // Fetch actions runs to scan the recent failure conclusion runs. + response = await github.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: context.inputs.workflow_id, + event: context.inputs.workflow_event, + status: 'completed', + per_page: context.inputs.max_workflow_runs + }); + + console.log(response); + + // Scan `failure` conclusion runs to find the consecutive failures while + // skipping the other conclusions, such as 'cancelled`. + failureCount = 0; + for (const run of response.data.workflow_runs) { + if (run.conclusion === 'failure') { + failureCount++; + } else if (run.conclusion === 'success') { + // If we find a successful run, we can stop scanning. + break; + } else { + // Skipping the other conclusions such as 'cancelled'. + console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) + } + } + console.log(`Found ${failureCount} failed runs in a row.`); + return failureCount; diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9101d3a67c..a4792ce317 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -66,6 +66,18 @@ env: IMAGE_SRC: https://github.com/radius-project/radius jobs: + report-failure: + name: Report test failure + runs-on: ubuntu-latest + steps: + - name: Count failed runs + id: count_failed_runs + uses: ./.github/actions/count-failed-runs + with: + workflow_id: 'functional-test.yaml' + - name: hello + if: steps.count_failed_runs.outputs.total_runs >= 2 + run: echo "hello" build-and-push-cli: name: Build ${{ matrix.target_os }}_${{ matrix.target_arch }} binaries runs-on: ubuntu-latest diff --git a/.github/workflows/functional-test.yaml b/.github/workflows/functional-test.yaml index 7359bf8272..5d1c17b32f 100644 --- a/.github/workflows/functional-test.yaml +++ b/.github/workflows/functional-test.yaml @@ -83,7 +83,7 @@ env: # Private Git repository where terraform module for testing is stored. TF_RECIPE_PRIVATE_GIT_SOURCE: "git::https://github.com/radius-project/terraform-private-modules//kubernetes-redis" # The number of failed tests to report. - ISSUE_CREATE_THRESHOLD: 2 + ISSUE_CREATE_THRESHOLD: 1 jobs: build: @@ -717,37 +717,15 @@ jobs: runs-on: ubuntu-latest if: failure() && github.event_name == 'schedule' && github.repository == 'radius-project/radius' steps: - - name: Count recently failed tests - id: count_failures - uses: actions/github-script@v7 + - name: Count failed runs + id: count_failed_runs + uses: ./.github/actions/count-failed-runs with: - script: | - response = await github.rest.actions.listWorkflowRuns({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'functional-test.yaml', - event: 'schedule', - status: 'completed', - per_page: 10 - }); - - failureCount = 1; - for (const run of response.data.workflow_runs) { - if (run.conclusion === 'failure') { - failureCount++; - } else if (run.conclusion === 'success') { - // If we find a successful run, we can stop scanning. - break; - } else { - console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) - } - } - console.log(`Found ${failureCount} failed runs in a row.`); - return failureCount; + workflow_id: 'functional-test.yaml' - name: Create failure issue for failing scheduled run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. - if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD + if: steps.count_failed_runs.outputs.total_runs >= env.ISSUE_CREATE_THRESHOLD with: github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }} script: | @@ -755,5 +733,5 @@ jobs: ...context.repo, title: `Scheduled functional test failed - Run ID: ${context.runId}`, labels: ['bug', 'test-failure'], - body: `## Bug information \n\nThis bug is generated automatically if the scheduled functional test fails at least ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius functional test operates on a schedule of every 4 hours during weekdays and every 12 hours over the weekend. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).` + body: `## Bug information \n\nThis bug is generated automatically if the scheduled functional test fails more than ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius functional test operates on a schedule of every 4 hours during weekdays and every 12 hours over the weekend. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).` }) \ No newline at end of file diff --git a/.github/workflows/long-running-azure.yaml b/.github/workflows/long-running-azure.yaml index 7b046c95a5..c4a3cff09a 100644 --- a/.github/workflows/long-running-azure.yaml +++ b/.github/workflows/long-running-azure.yaml @@ -93,7 +93,7 @@ env: ACTION_LINK: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" # The number of failed tests to report. - ISSUE_CREATE_THRESHOLD: 2 + ISSUE_CREATE_THRESHOLD: 1 jobs: build: @@ -512,37 +512,15 @@ jobs: runs-on: ubuntu-latest if: failure() && github.repository == 'radius-project/radius' && github.event_name == 'schedule' steps: - - name: Count recently failed tests - id: count_failures - uses: actions/github-script@v7 + - name: Count failed runs + id: count_failed_runs + uses: ./.github/actions/count-failed-runs with: - script: | - response = await github.rest.actions.listWorkflowRuns({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'long-running-azure.yaml', - event: 'schedule', - status: 'completed', - per_page: 10 - }); - - failureCount = 1; - for (const run of response.data.workflow_runs) { - if (run.conclusion === 'failure') { - failureCount++; - } else if (run.conclusion === 'success') { - // If we find a successful run, we can stop scanning. - break; - } else { - console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) - } - } - console.log(`Found ${failureCount} failed runs in a row.`); - return failureCount; - - name: Create failure issue for failing long running test run + workflow_id: 'long-running-azure-test.yaml' + - name: Create failure issue for failing scheduled run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. - if: steps.count_failures.outputs.result >= env.ISSUE_CREATE_THRESHOLD + if: steps.count_failed_runs.outputs.total_runs >= env.ISSUE_CREATE_THRESHOLD with: github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }} script: | @@ -550,5 +528,5 @@ jobs: ...context.repo, title: `Scheduled long running test failed - Run ID: ${context.runId}`, labels: ['bug', 'test-failure'], - body: `## Bug information \n\nThis bug is generated automatically if the scheduled long running test fails at least ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius long running test operates on a schedule of every 2 hours everyday. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).` + body: `## Bug information \n\nThis bug is generated automatically if the scheduled long running test fails more than ${process.env.ISSUE_CREATE_THRESHOLD} times in a row. The Radius long running test operates on a schedule of every 2 hours everyday. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).` }) From d0cdf40f04be891673a49f926836b846daf5cb52 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 14:48:51 +0900 Subject: [PATCH 06/13] wip Signed-off-by: Young Bu Park --- .github/workflows/build.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a4792ce317..f5b9de0da5 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -70,6 +70,8 @@ jobs: name: Report test failure runs-on: ubuntu-latest steps: + - name: Check out repo + uses: actions/checkout@v4 - name: Count failed runs id: count_failed_runs uses: ./.github/actions/count-failed-runs From b147b9cc0646492db247ce194d8ccf2e8277c4df Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 14:58:33 +0900 Subject: [PATCH 07/13] wip Signed-off-by: Young Bu Park --- .github/actions/count-failed-runs/action.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/count-failed-runs/action.yaml b/.github/actions/count-failed-runs/action.yaml index 275136bb67..1833aee480 100644 --- a/.github/actions/count-failed-runs/action.yaml +++ b/.github/actions/count-failed-runs/action.yaml @@ -28,10 +28,10 @@ runs: response = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, - workflow_id: context.inputs.workflow_id, - event: context.inputs.workflow_event, + workflow_id: core.getInput('workflow_id'), + event: core.getInput('workflow_event'), status: 'completed', - per_page: context.inputs.max_workflow_runs + per_page: core.getInput('max_workflow_runs') }); console.log(response); From 79c1000ffa65511062a12a0e778185dc91ad90f9 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 15:05:30 +0900 Subject: [PATCH 08/13] wip Signed-off-by: Young Bu Park --- .github/actions/count-failed-runs/action.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actions/count-failed-runs/action.yaml b/.github/actions/count-failed-runs/action.yaml index 1833aee480..69a7b6d2db 100644 --- a/.github/actions/count-failed-runs/action.yaml +++ b/.github/actions/count-failed-runs/action.yaml @@ -24,14 +24,15 @@ runs: uses: actions/github-script@v7 with: script: | + console.log(context.inputs) // Fetch actions runs to scan the recent failure conclusion runs. response = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, - workflow_id: core.getInput('workflow_id'), - event: core.getInput('workflow_event'), + workflow_id: context.inputs.workflow_id, + event: context.inputs.workflow_event, status: 'completed', - per_page: core.getInput('max_workflow_runs') + per_page: context.inputs.max_workflow_runs }); console.log(response); From 7e2a07a7adb99ff76fcbdf7fabb457f48b14e457 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 15:52:29 +0900 Subject: [PATCH 09/13] wip Signed-off-by: Young Bu Park --- .github/actions/count-failed-runs/action.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/actions/count-failed-runs/action.yaml b/.github/actions/count-failed-runs/action.yaml index 69a7b6d2db..909f423f3f 100644 --- a/.github/actions/count-failed-runs/action.yaml +++ b/.github/actions/count-failed-runs/action.yaml @@ -24,15 +24,18 @@ runs: uses: actions/github-script@v7 with: script: | - console.log(context.inputs) + inputs = context.payload.inputs; + + console.log(inputs) + // Fetch actions runs to scan the recent failure conclusion runs. response = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, - workflow_id: context.inputs.workflow_id, - event: context.inputs.workflow_event, + workflow_id: inputs.workflow_id, + event: inputs.workflow_event, status: 'completed', - per_page: context.inputs.max_workflow_runs + per_page: inputs.max_workflow_runs }); console.log(response); From 7cfc2d035bb536780854bfa23fec6e07708ad5f6 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 16:00:13 +0900 Subject: [PATCH 10/13] wip Signed-off-by: Young Bu Park --- .github/actions/count-failed-runs/action.yaml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/actions/count-failed-runs/action.yaml b/.github/actions/count-failed-runs/action.yaml index 909f423f3f..5f3b5ab66f 100644 --- a/.github/actions/count-failed-runs/action.yaml +++ b/.github/actions/count-failed-runs/action.yaml @@ -4,7 +4,7 @@ inputs: workflow_id: description: 'Workflow ID to use for counting failed runs' required: true - max_workflow_runs: + per_page: description: 'Maximum number of workflow runs to check' default: '10' required: false @@ -22,20 +22,26 @@ runs: - name: Count recently failed tests id: count_failures uses: actions/github-script@v7 + env: + WORKFLOWID: ${{ inputs.workflow_id }} + PERPAGE: ${{ inputs.per_page }} + WORKFLOWEVENT: ${{ inputs.workflow_event }} with: script: | - inputs = context.payload.inputs; + workflowid = process.env.WORKFLOWID; + perpage = parseInt(process.env.PERPAGE, 10); + workflowevent = process.env.WORKFLOWEVENT; - console.log(inputs) + console.log("workflow ID: " + workflowid + ", event: " + workflowevent + ", per_page " + perpage) // Fetch actions runs to scan the recent failure conclusion runs. response = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, - workflow_id: inputs.workflow_id, - event: inputs.workflow_event, + workflow_id: workflowid, + event: workflowevent, status: 'completed', - per_page: inputs.max_workflow_runs + per_page: perpage }); console.log(response); From 4c4ee622bbd566c56af6a14bff23606c5f55bcde Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 16:13:26 +0900 Subject: [PATCH 11/13] test Signed-off-by: Young Bu Park --- .github/actions/count-failed-runs/action.yaml | 17 +++++++++-------- .github/workflows/build.yaml | 6 ++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/actions/count-failed-runs/action.yaml b/.github/actions/count-failed-runs/action.yaml index 5f3b5ab66f..351ba9ad2d 100644 --- a/.github/actions/count-failed-runs/action.yaml +++ b/.github/actions/count-failed-runs/action.yaml @@ -1,17 +1,17 @@ -name: "Count failed runs" -description: This is to count the number of consecutive failed runs. +name: "Count completed failed runs" +description: This actions counts the number of consecutive failed runs for a given workflow. inputs: workflow_id: description: 'Workflow ID to use for counting failed runs' required: true - per_page: - description: 'Maximum number of workflow runs to check' - default: '10' - required: false workflow_event: description: 'Maximum number of workflow runs to check' default: 'schedule' required: false + per_page: + description: 'Number of workflow runs to check for failures' + default: '10' + required: false outputs: total_runs: value: ${{ steps.count_failures.outputs.result }} @@ -44,12 +44,12 @@ runs: per_page: perpage }); - console.log(response); - // Scan `failure` conclusion runs to find the consecutive failures while // skipping the other conclusions, such as 'cancelled`. failureCount = 0; for (const run of response.data.workflow_runs) { + console.log(`Validating the workflow run - ID: ${run.id}, Conclusion: ${run.conclusion}.`); + if (run.conclusion === 'failure') { failureCount++; } else if (run.conclusion === 'success') { @@ -60,5 +60,6 @@ runs: console.log(`Skipping run ${run.id} with conclusion ${run.conclusion}.`) } } + console.log(`Found ${failureCount} failed runs in a row.`); return failureCount; diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f5b9de0da5..032da0e0bb 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -77,8 +77,10 @@ jobs: uses: ./.github/actions/count-failed-runs with: workflow_id: 'functional-test.yaml' - - name: hello - if: steps.count_failed_runs.outputs.total_runs >= 2 + - name: hello 1 + run: echo ${{ steps.count_failed_runs.outputs.total_runs }} + - name: hello 2 + if: steps.count_failed_runs.outputs.total_runs >= 1 run: echo "hello" build-and-push-cli: name: Build ${{ matrix.target_os }}_${{ matrix.target_arch }} binaries From 98852b27de36a52bf5017443faf3d68da028b75a Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 16:20:00 +0900 Subject: [PATCH 12/13] fix all Signed-off-by: Young Bu Park --- .github/workflows/build.yaml | 16 ---------------- .github/workflows/functional-test.yaml | 8 ++++++++ .github/workflows/long-running-azure.yaml | 10 +++++++++- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 032da0e0bb..9101d3a67c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -66,22 +66,6 @@ env: IMAGE_SRC: https://github.com/radius-project/radius jobs: - report-failure: - name: Report test failure - runs-on: ubuntu-latest - steps: - - name: Check out repo - uses: actions/checkout@v4 - - name: Count failed runs - id: count_failed_runs - uses: ./.github/actions/count-failed-runs - with: - workflow_id: 'functional-test.yaml' - - name: hello 1 - run: echo ${{ steps.count_failed_runs.outputs.total_runs }} - - name: hello 2 - if: steps.count_failed_runs.outputs.total_runs >= 1 - run: echo "hello" build-and-push-cli: name: Build ${{ matrix.target_os }}_${{ matrix.target_arch }} binaries runs-on: ubuntu-latest diff --git a/.github/workflows/functional-test.yaml b/.github/workflows/functional-test.yaml index 5d1c17b32f..2b7c916c06 100644 --- a/.github/workflows/functional-test.yaml +++ b/.github/workflows/functional-test.yaml @@ -716,7 +716,15 @@ jobs: needs: [build, tests] runs-on: ubuntu-latest if: failure() && github.event_name == 'schedule' && github.repository == 'radius-project/radius' + env: + CHECKOUT_REPO: ${{ needs.build.outputs.CHECKOUT_REPO }} + CHECKOUT_REF: ${{ needs.build.outputs.CHECKOUT_REF }} steps: + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ env.CHECKOUT_REPO }} + ref: ${{ env.CHECKOUT_REF }} - name: Count failed runs id: count_failed_runs uses: ./.github/actions/count-failed-runs diff --git a/.github/workflows/long-running-azure.yaml b/.github/workflows/long-running-azure.yaml index c4a3cff09a..87b2e878cc 100644 --- a/.github/workflows/long-running-azure.yaml +++ b/.github/workflows/long-running-azure.yaml @@ -510,13 +510,21 @@ jobs: name: Report test failure needs: [build, tests] runs-on: ubuntu-latest + env: + CHECKOUT_REPO: ${{ needs.build.outputs.CHECKOUT_REPO }} + CHECKOUT_REF: ${{ needs.build.outputs.CHECKOUT_REF }} if: failure() && github.repository == 'radius-project/radius' && github.event_name == 'schedule' steps: + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ env.CHECKOUT_REPO }} + ref: ${{ env.CHECKOUT_REF }} - name: Count failed runs id: count_failed_runs uses: ./.github/actions/count-failed-runs with: - workflow_id: 'long-running-azure-test.yaml' + workflow_id: 'long-running-azure.yaml' - name: Create failure issue for failing scheduled run uses: actions/github-script@v7 # Only create an issue if there are (env.ISSUE_CREATE_THRESHOLD) failures of the recent tests. From e9b3484e27d35d70fadd966297dc7a95a611f923 Mon Sep 17 00:00:00 2001 From: Young Bu Park Date: Sun, 28 Apr 2024 16:22:42 +0900 Subject: [PATCH 13/13] revert Signed-off-by: Young Bu Park --- .github/workflows/long-running-azure.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/long-running-azure.yaml b/.github/workflows/long-running-azure.yaml index 87b2e878cc..21761f230a 100644 --- a/.github/workflows/long-running-azure.yaml +++ b/.github/workflows/long-running-azure.yaml @@ -78,6 +78,9 @@ env: # The valid radius build time window in seconds to rebuild radius. 24 hours = 24 * 60 * 60 = 86400 VALID_RADIUS_BUILD_WINDOW: 86400 + # The functional test GitHub app id + FUNCTIONAL_TEST_APP_ID: 425843 + # The AKS cluster name AKS_CLUSTER_NAME: "radlrtest00-aks" # The resource group for AKS_CLUSTER_NAME resource. @@ -307,6 +310,12 @@ jobs: RAD_CLI_ARTIFACT_NAME: ${{ needs.build.outputs.RAD_CLI_ARTIFACT_NAME }} BICEP_RECIPE_TAG_VERSION: ${{ needs.build.outputs.REL_VERSION }} steps: + - name: Get GitHub app token + uses: tibdex/github-app-token@v2 + id: get_installation_token + with: + app_id: ${{ env.FUNCTIONAL_TEST_APP_ID }} + private_key: ${{ secrets.FUNCTIONAL_TEST_APP_PRIVATE_KEY }} - name: Checkout uses: actions/checkout@v4 with: @@ -465,6 +474,7 @@ jobs: DOCKER_REGISTRY: ${{ env.CONTAINER_REGISTRY }} BICEP_RECIPE_REGISTRY: ${{ env.BICEP_RECIPE_REGISTRY }} BICEP_RECIPE_TAG_VERSION: ${{ env.BICEP_RECIPE_TAG_VERSION }} + GH_TOKEN: ${{ steps.get_installation_token.outputs.token }} - name: Collect Pod details if: always() run: |