From 2fbc843e0d6925a63e9bafdcab38ba7dffac1402 Mon Sep 17 00:00:00 2001 From: Alex Szabo Date: Thu, 17 Oct 2024 15:03:28 +0200 Subject: [PATCH] [ci] Extract OAS check + add retry (#196534) ## Summary In the past months, the capture OAS snapshot step has been quite flaky, breaking the `Checks` step, and thus breaking the `on-merge` jobs. This PR extracts the check to its own step and adds retries. The separate step is ideal because it is quite heavy compared to the other checks (we fire up ES + Kibana for the OAS snapshot). Also, this PR removes the `Checks` step altogether from the kibana-chrome-forward-testing pipeline, as the Chrome versions used do not affect that aspect. This test run includes a retry within the Capture OAS Snapshot step: https://buildkite.com/elastic/kibana-pull-request/builds/243187#01929612-dac7-4584-b440-120ea3fea7ea --- .buildkite/pipelines/chrome_forward_testing.yml | 11 ----------- .buildkite/pipelines/on_merge.yml | 14 ++++++++++++++ .buildkite/pipelines/pull_request/base.yml | 11 +++++++++++ .buildkite/scripts/steps/checks.sh | 1 - .../steps/{ => checks}/capture_oas_snapshot.sh | 11 +++++++++-- 5 files changed, 34 insertions(+), 14 deletions(-) rename .buildkite/scripts/steps/{ => checks}/capture_oas_snapshot.sh (84%) diff --git a/.buildkite/pipelines/chrome_forward_testing.yml b/.buildkite/pipelines/chrome_forward_testing.yml index 76069f6ad8070..6dafddd8b2c5c 100644 --- a/.buildkite/pipelines/chrome_forward_testing.yml +++ b/.buildkite/pipelines/chrome_forward_testing.yml @@ -345,17 +345,6 @@ steps: - exit_status: '-1' limit: 1 - - command: .buildkite/scripts/steps/checks.sh - label: 'Checks' - agents: - machineType: n2-standard-2 - preemptible: true - timeout_in_minutes: 60 - retry: - automatic: - - exit_status: '-1' - limit: 3 - - wait: ~ continue_on_failure: true diff --git a/.buildkite/pipelines/on_merge.yml b/.buildkite/pipelines/on_merge.yml index e42f34bfba1fa..0e87d401c196e 100644 --- a/.buildkite/pipelines/on_merge.yml +++ b/.buildkite/pipelines/on_merge.yml @@ -532,6 +532,20 @@ steps: - exit_status: '-1' limit: 3 + - command: .buildkite/scripts/steps/checks/capture_oas_snapshot.sh + label: 'Check OAS Snapshot' + agents: + image: family/kibana-ubuntu-2004 + imageProject: elastic-images-prod + provider: gcp + machineType: n2-standard-2 + preemptible: true + timeout_in_minutes: 60 + retry: + automatic: + - exit_status: '-1' + limit: 3 + - command: .buildkite/scripts/steps/storybooks/build_and_upload.sh label: 'Build Storybooks' agents: diff --git a/.buildkite/pipelines/pull_request/base.yml b/.buildkite/pipelines/pull_request/base.yml index 2f2e0a739a304..c60d68bd2e88b 100644 --- a/.buildkite/pipelines/pull_request/base.yml +++ b/.buildkite/pipelines/pull_request/base.yml @@ -109,6 +109,17 @@ steps: - exit_status: '-1' limit: 3 + - command: .buildkite/scripts/steps/checks/capture_oas_snapshot.sh + label: 'Check OAS Snapshot' + agents: + machineType: n2-standard-2 + preemptible: true + timeout_in_minutes: 60 + retry: + automatic: + - exit_status: '-1' + limit: 3 + - command: .buildkite/scripts/steps/api_docs/build_api_docs.sh label: 'Build API Docs' agents: diff --git a/.buildkite/scripts/steps/checks.sh b/.buildkite/scripts/steps/checks.sh index 50ee0363ad93f..d6c4f1b80569f 100755 --- a/.buildkite/scripts/steps/checks.sh +++ b/.buildkite/scripts/steps/checks.sh @@ -10,7 +10,6 @@ if [[ "${FTR_ENABLE_FIPS_AGENT:-}" == "true" ]]; then fi .buildkite/scripts/steps/checks/saved_objects_compat_changes.sh .buildkite/scripts/steps/checks/saved_objects_definition_change.sh -.buildkite/scripts/steps/capture_oas_snapshot.sh .buildkite/scripts/steps/code_generation/elastic_assistant_codegen.sh .buildkite/scripts/steps/code_generation/security_solution_codegen.sh .buildkite/scripts/steps/openapi_bundling/security_solution_openapi_bundling.sh diff --git a/.buildkite/scripts/steps/capture_oas_snapshot.sh b/.buildkite/scripts/steps/checks/capture_oas_snapshot.sh similarity index 84% rename from .buildkite/scripts/steps/capture_oas_snapshot.sh rename to .buildkite/scripts/steps/checks/capture_oas_snapshot.sh index dc0ac88891f4f..98a825919f077 100755 --- a/.buildkite/scripts/steps/capture_oas_snapshot.sh +++ b/.buildkite/scripts/steps/checks/capture_oas_snapshot.sh @@ -2,6 +2,8 @@ set -euo pipefail +.buildkite/scripts/bootstrap.sh + source .buildkite/scripts/common/util.sh echo --- Capture OAS snapshot @@ -14,5 +16,10 @@ if [[ $BUILDKITE_PULL_REQUEST != "false" && "$BUILDKITE_PULL_REQUEST_BASE_BRANCH cmd="$cmd --no-serverless" fi -eval "$cmd" -check_for_changed_files "$cmd" true \ No newline at end of file +run_check() { + eval "$cmd" +} + +retry 5 15 run_check + +check_for_changed_files "$cmd" true