continuous-delivery #4068
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow executes the E2E Test Suite for a series of combinations that | |
# represent different execution environments | |
name: continuous-delivery | |
on: | |
issue_comment: | |
type: [created] | |
# Manually or triggered by another workflow | |
workflow_dispatch: | |
inputs: | |
depth: | |
description: 'Depth (push, pull_request, main (default), schedule)' | |
required: true | |
default: 'main' | |
limit: | |
description: 'Limit to the specified engines list (local, eks, aks, gke, openshift)' | |
required: false | |
test_level: | |
description: 'Test level: 0(highest) to 4(lowest). Default is 4.' | |
required: false | |
default: '4' | |
feature_type: | |
description: > | |
Feature Type (disruptive, performance, upgrade, smoke, basic, service-connectivity, self-healing, | |
backup-restore, snapshot, operator, observability, replication, plugin, postgres-configuration, | |
pod-scheduling, cluster-metadata, recovery, importing-databases, storage, security, maintenance, | |
tablespaces) | |
required: false | |
log_level: | |
description: 'Log level for operator (error, warning, info, debug(default), trace)' | |
required: false | |
default: 'debug' | |
schedule: | |
- cron: '0 1 * * *' | |
# set up environment variables to be used across all the jobs | |
env: | |
GOLANG_VERSION: "1.23.x" | |
KUBEBUILDER_VERSION: "2.3.1" | |
KIND_VERSION: "v0.26.0" | |
ROOK_VERSION: "v1.16.0" | |
EXTERNAL_SNAPSHOTTER_VERSION: "v8.2.0" | |
OPERATOR_IMAGE_NAME: "ghcr.io/${{ github.repository }}-testing" | |
BUILD_PUSH_PROVENANCE: "" | |
BUILD_PUSH_CACHE_FROM: "" | |
BUILD_PUSH_CACHE_TO: "" | |
REGISTRY: "ghcr.io" | |
REGISTRY_USER: ${{ github.actor }} | |
REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} | |
REPOSITORY_OWNER: "cloudnative-pg" | |
SLACK_USERNAME: "cnpg-bot" | |
BUILD_MANAGER_RELEASE_ARGS: "build --skip=validate --clean --id manager" | |
# Keep in mind that adding more platforms (architectures) will increase the building | |
# time even if we use the ghcache for the building process. | |
PLATFORMS: "linux/amd64,linux/arm64" | |
E2E_SUFFIX: "cnpge2e" | |
defaults: | |
run: | |
# default failure handling for shell scripts in 'run' steps | |
shell: 'bash -Eeuo pipefail -x {0}' | |
jobs: | |
# Trigger the workflow on release-* branches for smoke testing whenever it's a scheduled run. | |
# Note: this is a workaround since we can't directly schedule-run a workflow from a non default branch | |
smoke_test_release_branches: | |
runs-on: ubuntu-24.04 | |
name: smoke test release-* branches when it's a scheduled run | |
if: github.event_name == 'schedule' | |
strategy: | |
fail-fast: false | |
matrix: | |
branch: [release-1.22, release-1.23, release-1.24] | |
steps: | |
- name: Invoke workflow with inputs | |
uses: benc-uk/workflow-dispatch@v1 | |
with: | |
workflow: continuous-delivery | |
ref: ${{ matrix.branch }} | |
inputs: '{ "depth": "push", "limit": "local", "test_level": "4", "log_level": "debug" }' | |
check_commenter: | |
if: | | |
github.event_name == 'issue_comment' && | |
github.event.issue.pull_request && | |
startsWith(github.event.comment.body, '/test') | |
name: Retrieve command | |
runs-on: ubuntu-24.04 | |
outputs: | |
github_ref: ${{ steps.refs.outputs.head_sha }} | |
depth: ${{ env.DEPTH }} | |
limit: ${{ env.LIMIT }} | |
test_level: ${{ env.TEST_LEVEL }} | |
feature_type: ${{ env.FEATURE_TYPE }} | |
log_level: ${{ env.LOG_LEVEL }} | |
steps: | |
- name: Check for Command | |
id: command | |
uses: xt0rted/slash-command-action@v2 | |
continue-on-error: false | |
with: | |
command: test | |
reaction: "true" | |
reaction-type: "eyes" | |
allow-edits: "false" | |
permission-level: write | |
- name: Process arguments | |
id: args | |
run: | | |
ARGS="${{ steps.command.outputs.command-arguments }}" | |
# Set the defaults | |
DEPTH="main" | |
LIMIT="local" | |
TEST_LEVEL="4" | |
FEATURE_TYPE="" | |
LOG_LEVEL="debug" | |
for ARG in $ARGS; do | |
IFS='=' read name value <<< $ARG | |
case "${name}" in | |
"depth"|"d") | |
DEPTH="${value}" | |
;; | |
"limit"|"l") | |
LIMIT="${value}" | |
;; | |
"test_level"|"level"|"tl") | |
TEST_LEVEL="${value}" | |
;; | |
"feature_type"|"type"|"ft") | |
FEATURE_TYPE="${value}" | |
;; | |
"log_level"|"ll") | |
LOG_LEVEL="${value}" | |
;; | |
*) | |
;; | |
esac | |
done | |
echo "DEPTH=${DEPTH}" >> $GITHUB_ENV | |
echo "LIMIT=${LIMIT}" >> $GITHUB_ENV | |
echo "TEST_LEVEL=${TEST_LEVEL}" >> $GITHUB_ENV | |
echo "FEATURE_TYPE=${FEATURE_TYPE}" >> $GITHUB_ENV | |
echo "LOG_LEVEL=${LOG_LEVEL}" >> $GITHUB_ENV | |
- name: Resolve Git reference | |
uses: xt0rted/pull-request-comment-branch@v3 | |
id: refs | |
- name: Create comment | |
uses: peter-evans/create-or-update-comment@v4 | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
repository: ${{ github.repository }} | |
issue-number: ${{ github.event.issue.number }} | |
body: | | |
@${{ github.actor }}, here's the link to the E2E on CNPG workflow run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
test_arguments: | |
name: Parse arguments | |
if: | | |
github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' | |
runs-on: ubuntu-24.04 | |
outputs: | |
github_ref: ${{ github.ref }} | |
depth: ${{ env.DEPTH }} | |
limit: ${{ env.LIMIT }} | |
test_level: ${{ env.TEST_LEVEL }} | |
feature_type: ${{ env.FEATURE_TYPE }} | |
log_level: ${{ env.LOG_LEVEL }} | |
steps: | |
- name: Parse input to env | |
run: | | |
# Set the defaults for workflow dispatch | |
if [[ ${{ github.event_name }} == 'workflow_dispatch' ]]; then | |
DEPTH=${{ github.event.inputs.depth }} | |
LIMIT=${{ github.event.inputs.limit }} | |
TEST_LEVEL=${{ github.event.inputs.test_level }} | |
FEATURE_TYPE="${{ github.event.inputs.feature_type }}" | |
LOG_LEVEL="${{ github.event.inputs.log_level }}" | |
fi | |
# Set the defaults for schedule dispatch | |
if [[ ${{ github.event_name }} == 'schedule' ]]; then | |
DEPTH="schedule" | |
LIMIT="" | |
TEST_LEVEL="4" | |
FEATURE_TYPE="" | |
LOG_LEVEL="debug" | |
fi | |
echo "DEPTH=${DEPTH}" >> $GITHUB_ENV | |
echo "LIMIT=${LIMIT}" >> $GITHUB_ENV | |
echo "TEST_LEVEL=${TEST_LEVEL}" >> $GITHUB_ENV | |
echo "FEATURE_TYPE=${FEATURE_TYPE}" >> $GITHUB_ENV | |
echo "LOG_LEVEL=${LOG_LEVEL}" >> $GITHUB_ENV | |
evaluate_options: | |
name: Evaluate workflow options | |
needs: | |
- check_commenter | |
- test_arguments | |
runs-on: ubuntu-24.04 | |
if: | | |
( | |
needs.check_commenter.result == 'success' || | |
needs.test_arguments.result == 'success' | |
) && | |
!cancelled() | |
outputs: | |
git_ref: ${{ env.GITHUB_REF }} | |
depth: ${{ env.DEPTH }} | |
limit: ${{ env.LIMIT }} | |
test_level: ${{ env.TEST_LEVEL }} | |
feature_type: ${{ env.FEATURE_TYPE }} | |
log_level: ${{ env.LOG_LEVEL }} | |
steps: | |
- name: From command | |
run: | | |
if [[ ${{ github.event_name }} == 'workflow_dispatch' ]] || [[ ${{ github.event_name }} == 'schedule' ]]; then | |
echo 'GITHUB_REF=${{ needs.test_arguments.outputs.github_ref }}' >> $GITHUB_ENV | |
echo 'DEPTH=${{ needs.test_arguments.outputs.depth }}' >> $GITHUB_ENV | |
echo 'LIMIT=${{ needs.test_arguments.outputs.limit }}' >> $GITHUB_ENV | |
echo 'TEST_LEVEL=${{ needs.test_arguments.outputs.test_level }}' >> $GITHUB_ENV | |
echo 'FEATURE_TYPE=${{ needs.test_arguments.outputs.feature_type }}' >> $GITHUB_ENV | |
echo 'LOG_LEVEL=${{ needs.test_arguments.outputs.log_level }}' >> $GITHUB_ENV | |
fi | |
if [[ ${{ github.event_name }} == 'issue_comment' ]]; then | |
echo 'GITHUB_REF=${{ needs.check_commenter.outputs.github_ref }}' >> $GITHUB_ENV | |
echo 'DEPTH=${{ needs.check_commenter.outputs.depth }}' >> $GITHUB_ENV | |
echo 'LIMIT=${{ needs.check_commenter.outputs.limit }}' >> $GITHUB_ENV | |
echo 'TEST_LEVEL=${{ needs.check_commenter.outputs.test_level }}' >> $GITHUB_ENV | |
echo 'FEATURE_TYPE=${{ needs.check_commenter.outputs.feature_type }}' >> $GITHUB_ENV | |
echo 'LOG_LEVEL=${{ needs.check_commenter.outputs.log_level }}' >> $GITHUB_ENV | |
fi | |
buildx: | |
name: Build containers | |
needs: | |
- check_commenter | |
- test_arguments | |
- evaluate_options | |
if: | | |
always() && !cancelled() && | |
needs.evaluate_options.result == 'success' | |
runs-on: ubuntu-24.04 | |
permissions: | |
contents: read | |
packages: write | |
pull-requests: read | |
outputs: | |
image: ${{ steps.image-meta.outputs.image }} | |
# 'branch_name' is used in 'GetMostRecentReleaseTag' in the Go code | |
branch_name: ${{ steps.build-meta.outputs.branch_name }} | |
upload_artifacts: ${{ steps.build-meta.outputs.upload_artifacts }} | |
commit_msg: ${{ steps.build-meta.outputs.commit_msg }} | |
commit_sha: ${{ steps.build-meta.outputs.commit_sha }} | |
author_name: ${{ steps.build-meta.outputs.author_name }} | |
author_email: ${{ steps.build-meta.outputs.author_email }} | |
controller_img: ${{ env.CONTROLLER_IMG }} | |
controller_img_ubi8: ${{ env.CONTROLLER_IMG_UBI8 }} | |
bundle_img: ${{ env.BUNDLE_IMG }} | |
catalog_img: ${{ env.CATALOG_IMG }} | |
steps: | |
- | |
name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
# To identify the commit we need the history and all the tags. | |
fetch-depth: 0 | |
- | |
name: Install Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GOLANG_VERSION }} | |
check-latest: true | |
- | |
name: Build meta | |
id: build-meta | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
images='${{ env.OPERATOR_IMAGE_NAME }}' | |
tags='' | |
labels='' | |
commit_sha=${{ needs.evaluate_options.outputs.git_ref }} | |
commit_date=$(git log -1 --pretty=format:'%ad' --date short "${commit_sha}" || : ) | |
# use git describe to get the nearest tag and use that to build the version (e.g. 1.4.0-dev24 or 1.4.0) | |
commit_version=$(git describe --tags --match 'v*' "${commit_sha}"| sed -e 's/^v//; s/-g[0-9a-f]\+$//; s/-\([0-9]\+\)$/-dev\1/') | |
# shortened commit sha | |
commit_short=$(git rev-parse --short "${commit_sha}") | |
# multiline strings are weird | |
commit_message=$(git show -s --format=%B "${commit_sha}") | |
commit_message=${commit_message//$'%'/'%25'} | |
commit_message=${commit_message//$'\n'/'%0A'} | |
commit_message=${commit_message//$'\r'/'%0D'} | |
# get git user and email | |
author_name=$(git show -s --format='%an' "${commit_sha}") | |
author_email=$(git show -s --format='%ae' "${commit_sha}") | |
# extract branch name | |
if [[ ${{ github.event_name }} == 'workflow_dispatch' ]] || [[ ${{ github.event_name }} == 'schedule' ]] | |
then | |
branch_name=${GITHUB_REF#refs/heads/} | |
fi | |
if [[ ${{ github.event_name }} == 'issue_comment' ]] | |
then | |
branch_name=$(gh pr view "${{ github.event.issue.number }}" --json headRefName -q '.headRefName' 2>/dev/null) | |
fi | |
# extract tag from branch name | |
tag_name=$(echo "$branch_name" | sed 's/[^a-zA-Z0-9]/-/g') | |
upload_artifacts=false | |
if [[ ${branch_name} == main || ${branch_name} =~ ^release- ]]; then | |
upload_artifacts=true | |
fi | |
echo "IMAGES=${images}" >> $GITHUB_ENV | |
echo "TAGS=${tags}" >> $GITHUB_ENV | |
echo "LABELS=${labels}" >> $GITHUB_ENV | |
echo "DATE=${commit_date}" >> $GITHUB_ENV | |
echo "VERSION=${commit_version}" >> $GITHUB_ENV | |
echo "COMMIT=${commit_short}" >> $GITHUB_ENV | |
echo "commit_sha=${commit_sha}" >> $GITHUB_OUTPUT | |
echo "commit_msg=${commit_message}" >> $GITHUB_OUTPUT | |
echo "author_name=${author_name}" >> $GITHUB_OUTPUT | |
echo "author_email=${author_email}" >> $GITHUB_OUTPUT | |
echo "branch_name=${branch_name}" >> $GITHUB_OUTPUT | |
echo "tag_name=${tag_name,,}" >> $GITHUB_OUTPUT | |
echo "upload_artifacts=${upload_artifacts}" >> $GITHUB_OUTPUT | |
- | |
name: Set GoReleaser environment | |
run: | | |
echo GOPATH=$(go env GOPATH) >> $GITHUB_ENV | |
echo PWD=$(pwd) >> $GITHUB_ENV | |
- | |
name: Run GoReleaser | |
uses: goreleaser/goreleaser-action@v6 | |
with: | |
distribution: goreleaser | |
version: v2 | |
args: ${{ env.BUILD_MANAGER_RELEASE_ARGS }} | |
env: | |
DATE: ${{ env.DATE }} | |
COMMIT: ${{ env.COMMIT }} | |
VERSION: ${{ env.VERSION }} | |
- | |
name: Docker meta | |
id: docker-meta | |
uses: docker/metadata-action@v5 | |
with: | |
images: ${{ env.IMAGES }} | |
tags: | | |
type=raw,value=${{ steps.build-meta.outputs.tag_name }} | |
- | |
name: Docker meta UBI8 | |
id: docker-meta-ubi8 | |
uses: docker/metadata-action@v5 | |
with: | |
images: ${{ env.IMAGES }} | |
flavor: | | |
suffix=-ubi8 | |
tags: | | |
type=raw,value=${{ steps.build-meta.outputs.tag_name }} | |
- | |
name: Set up QEMU | |
uses: docker/setup-qemu-action@v3 | |
with: | |
platforms: ${{ env.PLATFORMS }} | |
- | |
name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- | |
name: Login into docker registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ env.REGISTRY_USER }} | |
password: ${{ env.REGISTRY_PASSWORD }} | |
- | |
name: Build and push | |
uses: docker/build-push-action@v6 | |
with: | |
platforms: ${{ env.PLATFORMS }} | |
context: . | |
file: Dockerfile | |
push: true | |
build-args: | | |
VERSION=${{ env.VERSION }} | |
tags: ${{ steps.docker-meta.outputs.tags }} | |
labels: ${{ env.LABELS }} | |
provenance: ${{ env.BUILD_PUSH_PROVENANCE }} | |
cache-from: ${{ env.BUILD_PUSH_CACHE_FROM }} | |
cache-to: ${{ env.BUILD_PUSH_CACHE_TO }} | |
- | |
name: Build and push UBI8 | |
uses: docker/build-push-action@v6 | |
with: | |
platforms: ${{ env.PLATFORMS }} | |
context: . | |
file: Dockerfile-ubi8 | |
push: true | |
build-args: | | |
VERSION=${{ env.VERSION }} | |
tags: ${{ steps.docker-meta-ubi8.outputs.tags }} | |
labels: ${{ env.LABELS }} | |
provenance: ${{ env.BUILD_PUSH_PROVENANCE }} | |
cache-from: ${{ env.BUILD_PUSH_CACHE_FROM }} | |
cache-to: ${{ env.BUILD_PUSH_CACHE_TO }} | |
- | |
name: Image Meta | |
id: image-meta | |
env: | |
TAGS: ${{ steps.docker-meta.outputs.tags }} | |
run: | | |
# If there is more than one tag, take the first one | |
# TAGS could be separated by newlines or commas | |
image=$(sed -n '1{s/,.*//; p}' <<< "$TAGS") | |
echo "image=${image}" >> $GITHUB_OUTPUT | |
- | |
name: Output images | |
env: | |
TAGS: ${{ steps.docker-meta.outputs.tags }} | |
TAGS_UBI8: ${{ steps.docker-meta-ubi8.outputs.tags }} | |
run: | | |
LOWERCASE_OPERATOR_IMAGE_NAME=${OPERATOR_IMAGE_NAME,,} | |
TAG=${TAGS#*:} | |
TAG_UBI=${TAGS_UBI8#*:} | |
echo "CONTROLLER_IMG=${LOWERCASE_OPERATOR_IMAGE_NAME}:${TAG}" >> $GITHUB_ENV | |
echo "CONTROLLER_IMG_UBI8=${LOWERCASE_OPERATOR_IMAGE_NAME}:${TAG_UBI}" >> $GITHUB_ENV | |
echo "BUNDLE_IMG=${LOWERCASE_OPERATOR_IMAGE_NAME}:bundle-${TAG}" >> $GITHUB_ENV | |
echo "CATALOG_IMG=${LOWERCASE_OPERATOR_IMAGE_NAME}:catalog-${TAG}" >> $GITHUB_ENV | |
- | |
name: Generate manifest for operator deployment | |
id: generate-manifest | |
env: | |
CONTROLLER_IMG: ${{ steps.image-meta.outputs.image }} | |
run: | | |
make generate-manifest | |
- | |
name: Upload the operator manifest as artifact in workflow | |
uses: actions/upload-artifact@v4 | |
with: | |
name: operator-manifest.yaml | |
path: dist/operator-manifest.yaml | |
retention-days: 7 | |
- | |
# In order to test the case of upgrading from the current operator | |
# to a future one, we build and push an image with a different VERSION | |
# to force a different hash for the manager binary. | |
# (Otherwise the ONLINE upgrade won't trigger) | |
# | |
# NOTE: we only fire this in TEST DEPTH = 4, as that is the level of the | |
# upgrade test | |
name: Build binary for upgrade test | |
uses: goreleaser/goreleaser-action@v6 | |
if: | | |
always() && !cancelled() && | |
needs.evaluate_options.outputs.test_level == '4' | |
with: | |
distribution: goreleaser | |
version: v2 | |
args: ${{ env.BUILD_MANAGER_RELEASE_ARGS }} | |
env: | |
DATE: ${{ env.DATE }} | |
COMMIT: ${{ env.COMMIT }} | |
VERSION: ${{ env.VERSION }}-prime | |
- | |
# In order to test the case of upgrading from the current operator | |
# to a future one, we build and push an image with a different VERSION | |
# to force a different hash for the manager binary. | |
# (Otherwise the ONLINE upgrade won't trigger) | |
# | |
# We push the "prime" binary using a tag with the suffix "-prime" | |
# NOTE: we only fire this in TEST DEPTH = 4, as that is the level of the | |
# upgrade test | |
name: Build and push image for upgrade test | |
uses: docker/build-push-action@v6 | |
if: | | |
always() && !cancelled() && | |
needs.evaluate_options.outputs.test_level == '4' | |
with: | |
platforms: ${{ env.PLATFORMS }} | |
context: . | |
file: Dockerfile | |
push: true | |
build-args: | | |
VERSION=${{ env.VERSION }}-prime | |
tags: ${{ steps.docker-meta.outputs.tags }}-prime | |
labels: ${{ env.LABELS }} | |
provenance: ${{ env.BUILD_PUSH_PROVENANCE }} | |
cache-from: ${{ env.BUILD_PUSH_CACHE_FROM }} | |
cache-to: ${{ env.BUILD_PUSH_CACHE_TO }} | |
# This will only execute in cloudnative-pg org | |
publish-artifacts: | |
name: Publish artifacts | |
needs: | |
- buildx | |
if: | | |
(always() && !cancelled()) && | |
needs.buildx.result == 'success' && | |
needs.buildx.outputs.upload_artifacts == 'true' && | |
github.repository_owner == 'cloudnative-pg' | |
runs-on: ubuntu-24.04 | |
steps: | |
- | |
name: Checkout artifact | |
uses: actions/checkout@v4 | |
with: | |
repository: cloudnative-pg/artifacts | |
token: ${{ secrets.REPO_GHA_PAT }} | |
ref: main | |
fetch-depth: 0 | |
- | |
name: Configure git user | |
run: | | |
git config user.email "${{ needs.buildx.outputs.author_email }}" | |
git config user.name "${{ needs.buildx.outputs.author_name }}" | |
- | |
name: Switch to or create the right branch | |
env: | |
BRANCH: ${{ needs.buildx.outputs.branch_name }} | |
run: | | |
git checkout "${BRANCH}" 2>/dev/null || git checkout -b "${BRANCH}" | |
# Remove the previous operator manifest if present because the next | |
# step doesn't overwrite existing files | |
rm -fr manifests/operator-manifest.yaml | |
- | |
name: Prepare the operator manifest | |
uses: actions/download-artifact@v4 | |
with: | |
name: operator-manifest.yaml | |
path: manifests | |
- | |
name: Prepare the commit | |
env: | |
COMMIT_MESSAGE: | | |
${{ needs.buildx.outputs.commit_msg }} | |
https://github.com/cloudnative-pg/cloudnative-pg/commit/${{ needs.buildx.outputs.commit_sha }} | |
run: | | |
# Skip creating the commit if there are no changes | |
[ -n "$(git status -s)" ] || exit 0 | |
git add . | |
git commit -m "${COMMIT_MESSAGE}" | |
- | |
name: Push changes | |
uses: ad-m/[email protected] | |
with: | |
github_token: ${{ secrets.REPO_GHA_PAT }} | |
repository: cloudnative-pg/artifacts | |
branch: ${{ needs.buildx.outputs.branch_name }} | |
generate-jobs: | |
name: Generate jobs for E2E tests | |
needs: | |
- buildx | |
- evaluate_options | |
# We try to avoid running the E2E Test Suite in general, to reduce load on | |
# GitHub resources. | |
# Currently, it's executed in the following cases: | |
# - When dispatched via chatops commands | |
# - On a push in main and release branches | |
# - On scheduled executions | |
if: | | |
(always() && !cancelled()) && | |
needs.buildx.result == 'success' | |
runs-on: ubuntu-24.04 | |
outputs: | |
image: ${{ needs.buildx.outputs.image }} | |
localMatrix: ${{ steps.generate-jobs.outputs.localMatrix }} | |
localEnabled: ${{ steps.generate-jobs.outputs.localEnabled }} | |
localTimeout: ${{ steps.generate-jobs.outputs.localE2ETimeout }} | |
eksMatrix: ${{ steps.generate-jobs.outputs.eksMatrix }} | |
eksEnabled: ${{ steps.generate-jobs.outputs.eksEnabled }} | |
eksTimeout: ${{ steps.generate-jobs.outputs.eksE2ETimeout }} | |
aksMatrix: ${{ steps.generate-jobs.outputs.aksMatrix }} | |
aksEnabled: ${{ steps.generate-jobs.outputs.aksEnabled }} | |
aksTimeout: ${{ steps.generate-jobs.outputs.aksE2ETimeout }} | |
gkeMatrix: ${{ steps.generate-jobs.outputs.gkeMatrix }} | |
gkeEnabled: ${{ steps.generate-jobs.outputs.gkeEnabled }} | |
gkeTimeout: ${{ steps.generate-jobs.outputs.gkeE2ETimeout }} | |
openshiftMatrix: ${{ steps.generate-jobs.outputs.openshiftMatrix }} | |
openshiftEnabled: ${{ steps.generate-jobs.outputs.openshiftEnabled }} | |
openshiftTimeout: ${{ steps.generate-jobs.outputs.openshiftE2ETimeout }} | |
steps: | |
- | |
name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
- | |
id: generate-jobs | |
# Generates the jobs that will become different matrix branches, | |
# according to the event, or to the "depth" parameter if set manually | |
name: Generate Jobs | |
shell: bash | |
run: | | |
python .github/e2e-matrix-generator.py \ | |
-m '${{ needs.evaluate_options.outputs.depth }}' \ | |
-l '${{ needs.evaluate_options.outputs.limit }}' | |
e2e-local: | |
name: Run E2E on local executors | |
if: | | |
(always() && !cancelled()) && | |
needs.generate-jobs.outputs.localEnabled == 'true' && | |
needs.generate-jobs.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- evaluate_options | |
strategy: | |
fail-fast: false | |
matrix: ${{ fromJSON(needs.generate-jobs.outputs.localMatrix) }} | |
runs-on: ubuntu-24.04 | |
env: | |
# TEST_DEPTH determines the maximum test level the suite should be running | |
TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }} | |
# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type | |
FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }} | |
K8S_VERSION: "${{ matrix.k8s_version }}" | |
POSTGRES_VERSION: ${{ matrix.postgres_version }} | |
POSTGRES_KIND: ${{ matrix.postgres_kind }} | |
MATRIX: ${{ matrix.id }} | |
POSTGRES_IMG: "${{ matrix.postgres_img }}" | |
# The version of operator to upgrade FROM, in the rolling upgrade E2E test | |
E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}" | |
TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.localTimeout }} | |
BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }} | |
DEBUG: "true" | |
BUILD_IMAGE: "false" | |
CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }} | |
E2E_DEFAULT_STORAGE_CLASS: standard | |
E2E_CSI_STORAGE_CLASS: csi-hostpath-sc | |
E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: csi-hostpath-snapclass | |
LOG_DIR: ${{ github.workspace }}/kind-logs/ | |
DOCKER_REGISTRY_MIRROR: https://mirror.gcr.io | |
TEST_CLOUD_VENDOR: "local" | |
steps: | |
- | |
name: Cleanup Disk | |
uses: jlumbroso/free-disk-space@main | |
with: | |
android: true | |
dotnet: true | |
haskell: true | |
tool-cache: true | |
large-packages: false | |
swap-storage: false | |
- | |
name: Cleanup docker cache | |
run: | | |
echo "-------------Disk info before cleanup----------------" | |
df -h | |
echo "-----------------------------------------------------" | |
docker system prune -a -f | |
echo "-------------Disk info after cleanup----------------" | |
df -h | |
echo "-----------------------------------------------------" | |
- | |
name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
- | |
name: Install Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GOLANG_VERSION }} | |
check-latest: true | |
- | |
## In case hack/setup-cluster.sh need pull operand image from registry | |
name: Login into docker registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ env.REGISTRY_USER }} | |
password: ${{ env.REGISTRY_PASSWORD }} | |
- | |
# 'Retry' preparing the E2E test ENV | |
name: Prepare the environment | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_seconds: 300 | |
max_attempts: 3 | |
on_retry_command: | | |
# Clear-ups before retries | |
sudo rm -rf /usr/local/bin/kind /usr/local/bin/kubectl | |
command: | | |
sudo apt-get update | |
sudo apt-get install -y gettext-base | |
sudo hack/setup-cluster.sh prepare /usr/local/bin | |
- | |
name: Prepare patch for customization | |
env: | |
## the following variable all need be set if we use env_override_customized.yaml.template | |
## this is customization for local kind | |
LEADER_ELECTION: "true" | |
LEADER_LEASE_DURATION: 15 | |
LEADER_RENEW_DEADLINE: 10 | |
LIVENESS_PROBE_THRESHOLD: 3 | |
LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }} | |
run: | | |
LOG_LEVEL=${LOG_LEVEL:-info} | |
envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml | |
cat config/manager/env_override.yaml | |
- | |
name: Run Kind End-to-End tests | |
env: | |
ENABLE_APISERVER_AUDIT: true | |
run: | |
make e2e-test-kind | |
- | |
# Summarize the failed E2E test cases if there are any | |
name: Report failed E2E tests | |
if: failure() | |
run: | | |
set +x | |
chmod +x .github/report-failed-test.sh | |
./.github/report-failed-test.sh | |
- | |
# Create an individual artifact for each E2E test, which will be used to | |
# generate E2E test summary in the follow-up job 'summarize-e2e-tests' | |
name: Create individual artifact for each E2E test | |
if: (always() && !cancelled()) | |
env: | |
RUNNER: "local" | |
RUN_ID: ${{ github.run_id }} | |
REPOSITORY: ${{ github.repository }} | |
GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }} | |
run: | | |
set +x | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/report.json \ | |
--environment=true | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/upgrade_report.json \ | |
--environment=true | |
fi | |
- | |
name: Archive test artifacts | |
if: (always() && !cancelled()) | |
uses: actions/upload-artifact@v4 | |
with: | |
name: testartifacts-${{ env.MATRIX }} | |
path: testartifacts-${{ env.MATRIX }}/ | |
retention-days: 7 | |
- | |
name: Cleanup test artifacts | |
if: always() | |
run: | |
rm -rf testartifacts-${{ env.MATRIX }}/ | |
- | |
name: Cleanup ginkgo JSON report | |
# Delete report.json after the analysis. File should always exist. | |
# Delete upgrade_report.json. It may not exist depending on test level. | |
if: always() | |
run: | | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
rm tests/e2e/out/upgrade_report.json | |
fi | |
if [ -f tests/e2e/out/report.json ]; then | |
rm tests/e2e/out/report.json | |
fi | |
- | |
# Archive logs for failed test cases if there are any | |
name: Archive Kind logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: kind-logs-${{ matrix.id }} | |
path: kind-logs/ | |
retention-days: 7 | |
- | |
name: Archive e2e failure contexts | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test-failure-contexts-${{ matrix.id }} | |
path: | | |
tests/*/out/ | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Archive e2e logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cluster-logs-${{ matrix.id }} | |
path: | | |
tests/e2e/cluster_logs/** | |
retention-days: 7 | |
if-no-files-found: ignore | |
# AKS Secrets required | |
# secrets.AZURE_CREDENTIALS | |
# secrets.AZURE_SUBSCRIPTION | |
# secrets.AZURE_RESOURCEGROUP | |
# secrets.AZURE_RESOURCENAME | |
# secrets.AZURE_WORKSPACE_RESOURCE_ID | |
e2e-aks-setup: | |
name: Setup shared resources for Microsoft AKS E2Es | |
if: | | |
(always() && !cancelled()) && | |
vars.AKS_ENABLED == 'true' && | |
needs.generate-jobs.outputs.aksEnabled == 'true' && | |
needs.generate-jobs.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- evaluate_options | |
runs-on: ubuntu-24.04 | |
outputs: | |
azure_storage_account: ${{ steps.setup.outputs.azure_storage_account }} | |
steps: | |
- | |
name: Azure Login | |
uses: azure/[email protected] | |
with: | |
creds: ${{ secrets.AZURE_CREDENTIALS }} | |
- | |
name: Create AKS shared resources | |
uses: nick-fields/retry@v3 | |
id: setup | |
with: | |
timeout_minutes: 10 | |
max_attempts: 3 | |
command: | | |
az extension add --allow-preview true --name aks-preview | |
az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }} | |
AZURE_STORAGE_ACCOUNT="${{ github.run_number }}${{ env.E2E_SUFFIX }}" | |
az storage account create \ | |
--resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \ | |
--name ${AZURE_STORAGE_ACCOUNT} \ | |
--sku Standard_LRS -o none | |
# Output storage account name | |
echo "azure_storage_account=${AZURE_STORAGE_ACCOUNT}" >> $GITHUB_OUTPUT | |
e2e-aks: | |
name: Run E2E on Microsoft AKS | |
if: | | |
(always() && !cancelled()) && | |
vars.AKS_ENABLED == 'true' && | |
needs.generate-jobs.outputs.aksEnabled == 'true' && | |
needs.generate-jobs.result == 'success' && | |
needs.e2e-aks-setup.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- evaluate_options | |
- e2e-aks-setup | |
strategy: | |
fail-fast: false | |
max-parallel: 8 | |
matrix: ${{ fromJSON(needs.generate-jobs.outputs.aksMatrix) }} | |
runs-on: ubuntu-24.04 | |
env: | |
# TEST_DEPTH determines the maximum test level the suite should be running | |
TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }} | |
# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type | |
FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }} | |
K8S_VERSION: "${{ matrix.k8s_version }}" | |
POSTGRES_VERSION: ${{ matrix.postgres_version }} | |
POSTGRES_KIND: ${{ matrix.postgres_kind }} | |
MATRIX: ${{ matrix.id }} | |
POSTGRES_IMG: "${{ matrix.postgres_img }}" | |
# The version of operator to upgrade FROM, in the rolling upgrade E2E test | |
E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}" | |
TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.aksTimeout }} | |
BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }} | |
AZURE_STORAGE_ACCOUNT: ${{ needs.e2e-aks-setup.outputs.azure_storage_account }} | |
# AZURE_STORAGE_KEY: this one is gathered during a subsequent step | |
DEBUG: "true" | |
BUILD_IMAGE: "false" | |
CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }} | |
E2E_DEFAULT_STORAGE_CLASS: rook-ceph-block | |
E2E_CSI_STORAGE_CLASS: rook-ceph-block | |
E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: csi-rbdplugin-snapclass | |
TEST_CLOUD_VENDOR: "aks" | |
steps: | |
- | |
name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
- | |
name: Install Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GOLANG_VERSION }} | |
check-latest: true | |
- | |
name: Prepare the environment | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_seconds: 300 | |
max_attempts: 3 | |
command: | | |
sudo apt-get update | |
sudo apt-get install -y gettext-base | |
- | |
name: Install ginkgo | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 1 | |
max_attempts: 3 | |
command: | | |
go install github.com/onsi/ginkgo/v2/ginkgo | |
- | |
## In case hack/setup-cluster.sh need pull operand image from registry | |
name: Login into docker registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ env.REGISTRY_USER }} | |
password: ${{ env.REGISTRY_PASSWORD }} | |
- | |
name: Azure Login | |
uses: azure/[email protected] | |
with: | |
creds: ${{ secrets.AZURE_CREDENTIALS }} | |
- | |
name: Install kubectl | |
uses: azure/setup-kubectl@v4 | |
with: | |
version: v${{ env.K8S_VERSION }} | |
- | |
name: Create AKS cluster | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 10 | |
max_attempts: 3 | |
command: | | |
az extension add --allow-preview true --name aks-preview | |
az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }} | |
# name of the AKS cluster | |
AZURE_AKS="${{ secrets.AZURE_RESOURCENAME }}-${{ github.run_number }}-$( echo ${{ matrix.id }} | tr -d '_.-' )" | |
echo "AZURE_AKS=${AZURE_AKS}" >> $GITHUB_ENV | |
# gather the storage account Key | |
AZURE_STORAGE_KEY=$(az storage account keys list -g "${{ secrets.AZURE_RESOURCEGROUP }}" -n "${{ env.AZURE_STORAGE_ACCOUNT }}" --query "[0].value" -o tsv) | |
echo "::add-mask::$AZURE_STORAGE_KEY" | |
echo "AZURE_STORAGE_KEY=${AZURE_STORAGE_KEY}" >> $GITHUB_ENV | |
# name of the cluster's blob container in the storage account | |
AZURE_BLOB_CONTAINER="$( echo ${{ matrix.id }} | tr -d '_.-' | tr '[:upper:]' '[:lower:]' )" | |
echo "AZURE_BLOB_CONTAINER=${AZURE_BLOB_CONTAINER}" >> $GITHUB_ENV | |
# create and login to the AKS cluster | |
az aks create --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \ | |
--name ${AZURE_AKS} \ | |
--tier standard \ | |
--node-count 3 -k v${K8S_VERSION} --generate-ssh-keys --enable-addons monitoring \ | |
--workspace-resource-id ${{ secrets.AZURE_WORKSPACE_RESOURCE_ID }} \ | |
--aks-custom-headers EnableAzureDiskFileCSIDriver=true | |
az aks get-credentials --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \ | |
--name ${AZURE_AKS} | |
# create diagnostic settings for monitoring kube-apiserver logs | |
AKS_CLUSTER_RESOURCE_ID=$(az aks show --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --name ${AZURE_AKS} --query id -o tsv --only-show-errors) | |
az monitor diagnostic-settings create \ | |
--resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \ | |
--resource ${AKS_CLUSTER_RESOURCE_ID} \ | |
--name diagnostic-kube-apiserver-logs \ | |
--workspace ${{ secrets.AZURE_WORKSPACE_RESOURCE_ID }} \ | |
--logs '[ { "category": "kube-apiserver", "enabled": true } ]' | |
- | |
# Azure is slow in provisioning disks, and we can't wait two minutes | |
# every time we create a pod, otherwise all the tests will time out. | |
# We set up a few large disks now, we run Rook on top of them and we | |
# use rook to get the small PV we use in the tests. | |
# It can still take a while to deploy rook. | |
name: Set up Rook | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 27 | |
max_attempts: 1 | |
command: | | |
STORAGECLASSNAME=default | |
go install github.com/mikefarah/yq/v4@v4 | |
ROOK_BASE_URL=https://raw.githubusercontent.com/rook/rook/${{ env.ROOK_VERSION }}/deploy/examples | |
kubectl apply -f ${ROOK_BASE_URL}/crds.yaml | |
kubectl apply -f ${ROOK_BASE_URL}/common.yaml | |
kubectl apply -f ${ROOK_BASE_URL}/operator.yaml | |
curl ${ROOK_BASE_URL}/cluster-on-pvc.yaml | \ | |
sed '/^ *#/d;/^ *$/d' | \ | |
yq e ".spec.storage.storageClassDeviceSets[].volumeClaimTemplates[].spec.resources.requests.storage = \"50Gi\" | | |
.spec.storage.storageClassDeviceSets[].volumeClaimTemplates[].spec.storageClassName = \"${STORAGECLASSNAME}\" | | |
.spec.mon.volumeClaimTemplate.spec.storageClassName = \"${STORAGECLASSNAME}\" " - | \ | |
kubectl apply -f - | |
while true; do | |
output=$( kubectl get deploy -n rook-ceph -l app=rook-ceph-osd --no-headers -o name ) | |
if [[ $(wc -w <<< $output) == 3 ]]; then | |
break | |
fi | |
done | |
echo "Waiting for Rook OSDs to be available" | |
kubectl wait deploy -n rook-ceph --for condition=available --timeout 480s -l app=rook-ceph-osd | |
kubectl apply -f ${ROOK_BASE_URL}/csi/rbd/storageclass.yaml | |
kubectl apply -f ${ROOK_BASE_URL}/csi/rbd/snapshotclass.yaml | |
kubectl annotate storageclass ${{env.E2E_DEFAULT_STORAGE_CLASS}} storage.kubernetes.io/default-snapshot-class=${{env.E2E_DEFAULT_VOLUMESNAPSHOT_CLASS}} --overwrite | |
- | |
name: Prepare patch for customization | |
env: | |
## the following variable all need be set if we use env_override_customized.yaml.template | |
## this is customization for aks | |
LEADER_ELECTION: "true" | |
LEADER_LEASE_DURATION: 15 | |
LEADER_RENEW_DEADLINE: 10 | |
LIVENESS_PROBE_THRESHOLD: 3 | |
LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }} | |
run: | | |
LOG_LEVEL=${LOG_LEVEL:-info} | |
envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml | |
cat config/manager/env_override.yaml | |
- | |
name: Run E2E tests | |
run: hack/e2e/run-e2e.sh | |
- | |
# Summarize the failed E2E test cases if there are any | |
name: Report failed E2E tests | |
if: failure() | |
run: | | |
set +x | |
chmod +x .github/report-failed-test.sh | |
./.github/report-failed-test.sh | |
- | |
# Create an individual artifact for each E2E test, which will be used to | |
# generate E2E test summary in the follow-up job 'summarize-e2e-tests' | |
name: Create individual artifact for each E2E test | |
if: (always() && !cancelled()) | |
env: | |
RUNNER: "aks" | |
RUN_ID: ${{ github.run_id }} | |
REPOSITORY: ${{ github.repository }} | |
GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }} | |
run: | | |
set +x | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/report.json \ | |
--environment=true | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/upgrade_report.json \ | |
--environment=true | |
fi | |
- | |
name: Archive test artifacts | |
if: (always() && !cancelled()) | |
uses: actions/upload-artifact@v4 | |
with: | |
name: testartifacts-${{ env.MATRIX }} | |
path: testartifacts-${{ env.MATRIX }}/ | |
retention-days: 7 | |
- | |
name: Cleanup test artifacts | |
if: always() | |
run: | |
rm -rf testartifacts-${{ env.MATRIX }}/ | |
- | |
name: Cleanup ginkgo JSON report | |
# Delete report.json after the analysis. File should always exist. | |
# Delete upgrade_report.json. It may not exist depending on test level. | |
if: always() | |
run: | | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
rm tests/e2e/out/upgrade_report.json | |
fi | |
if [ -f tests/e2e/out/report.json ]; then | |
rm tests/e2e/out/report.json | |
fi | |
- | |
name: Archive e2e failure contexts | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test-failure-contexts-${{ matrix.id }} | |
path: | | |
tests/*/out/ | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Archive e2e logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cluster-logs-${{ matrix.id }} | |
path: | | |
tests/e2e/cluster_logs/** | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Clean up | |
if: always() | |
run: | | |
set +e | |
az extension add --allow-preview true --name monitor-control-service | |
az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }} | |
attempt=1 | |
max_attempts=3 | |
while [ "${attempt}" -le "${max_attempts}" ]; do | |
echo "Deleting cluster. Attempt ${attempt} of ${max_attempts}" | |
az aks delete --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} -y --name ${{ env.AZURE_AKS }} | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "AKS cluster deleted" | |
break | |
fi | |
echo "Failed deleting cluster ${{ env.AZURE_AKS }}, retrying" | |
sleep 5 | |
attempt=$((attempt+1)) | |
done | |
attempt=1 | |
AZURE_RESOURCEGROUP_LOCATION="$( az group show --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --query location -o tsv --only-show-errors )" | |
DATA_COLL_RULE_NAME="MSCI-${AZURE_RESOURCEGROUP_LOCATION}-${{ env.AZURE_AKS }}" | |
while [ "${attempt}" -le "${max_attempts}" ]; do | |
echo "Deleting data-collection rule ${DATA_COLL_RULE_NAME}. Attempt ${attempt} of ${max_attempts}" | |
az monitor data-collection rule show --name ${DATA_COLL_RULE_NAME} --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --query name | |
# if not found, let it go | |
status=$? | |
if [[ $status != 0 ]]; then | |
echo "AKS data-collection rule not found" | |
break | |
fi | |
az monitor data-collection rule delete -y --name ${DATA_COLL_RULE_NAME} --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "AKS data-collection rule deleted" | |
break | |
fi | |
echo "Failed deleting data-collection rule ${DATA_COLL_RULE_NAME}, retrying" | |
sleep 5 | |
attempt=$((attempt+1)) | |
done | |
e2e-aks-teardown: | |
name: Teardown Microsoft AKS shared resources | |
if: | | |
always() && | |
vars.AKS_ENABLED == 'true' && | |
needs.generate-jobs.outputs.aksEnabled == 'true' && | |
needs.generate-jobs.result == 'success' && | |
needs.e2e-aks-setup.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- e2e-aks-setup | |
- e2e-aks | |
runs-on: ubuntu-24.04 | |
env: | |
AZURE_STORAGE_ACCOUNT: ${{ needs.e2e-aks-setup.outputs.azure_storage_account }} | |
steps: | |
- | |
name: Azure Login | |
if: always() | |
uses: azure/[email protected] | |
with: | |
creds: ${{ secrets.AZURE_CREDENTIALS }} | |
- | |
name: Teardown AKS shared resources | |
if: always() | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 5 | |
max_attempts: 3 | |
command: | | |
az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }} | |
az storage account delete -y --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --name ${{ env.AZURE_STORAGE_ACCOUNT }} | |
# EKS Secrets required | |
# secrets.AWS_EKS_ADMIN_IAM_ROLES | |
# secrets.AWS_ACCESS_KEY_ID | |
# secrets.AWS_SECRET_ACCESS_KEY | |
e2e-eks: | |
name: Run E2E on Amazon EKS | |
if: | | |
(always() && !cancelled()) && | |
vars.EKS_ENABLED == 'true' && | |
needs.generate-jobs.outputs.eksEnabled == 'true' && | |
needs.generate-jobs.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- evaluate_options | |
strategy: | |
fail-fast: false | |
max-parallel: 6 | |
matrix: ${{ fromJSON(needs.generate-jobs.outputs.eksMatrix) }} | |
runs-on: ubuntu-24.04 | |
env: | |
# TEST_DEPTH determines the maximum test level the suite should be running | |
TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }} | |
# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type | |
FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }} | |
K8S_VERSION: "${{ matrix.k8s_version }}" | |
POSTGRES_VERSION: ${{ matrix.postgres_version }} | |
POSTGRES_KIND: ${{ matrix.postgres_kind }} | |
MATRIX: ${{ matrix.id }} | |
POSTGRES_IMG: "${{ matrix.postgres_img }}" | |
# The version of operator to upgrade FROM, in the rolling upgrade E2E test | |
E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}" | |
TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.eksTimeout }} | |
BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }} | |
DEBUG: "true" | |
BUILD_IMAGE: "false" | |
CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }} | |
E2E_DEFAULT_STORAGE_CLASS: gp3 | |
E2E_CSI_STORAGE_CLASS: gp3 | |
E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: ebs-csi-snapclass | |
AWS_REGION: eu-central-1 | |
AWS_EKS_ADMIN_IAM_ROLES: ${{ secrets.AWS_EKS_ADMIN_IAM_ROLES }} | |
TEST_CLOUD_VENDOR: "eks" | |
steps: | |
- | |
name: Set cluster name | |
run: | | |
echo "CLUSTER_NAME=${{ env.E2E_SUFFIX }}-test-${{ github.run_number }}-$( echo ${{ matrix.id }} | tr -d '_.-' )" >> $GITHUB_ENV | |
- | |
name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
- | |
name: Install Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GOLANG_VERSION }} | |
check-latest: true | |
- | |
## In case hack/setup-cluster.sh need pull operand image from registry | |
name: Login into docker registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ env.REGISTRY_USER }} | |
password: ${{ env.REGISTRY_PASSWORD }} | |
- | |
name: Prepare the environment | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_seconds: 300 | |
max_attempts: 3 | |
command: | | |
sudo apt-get update | |
sudo apt-get install -y gettext-base | |
- | |
name: Install ginkgo | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 1 | |
max_attempts: 3 | |
command: | | |
go install github.com/onsi/ginkgo/v2/ginkgo | |
- | |
name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ env.AWS_REGION }} | |
- | |
name: Install eksctl | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_minutes: 1 | |
max_attempts: 3 | |
command: | | |
mkdir -p "$HOME/.local/bin" | |
curl -sL "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" \ | |
| tar xz -C $HOME/.local/bin | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
- | |
name: Configure EKS setup | |
run: | | |
envsubst < hack/e2e/eks-cluster.yaml.template > hack/e2e/eks-cluster.yaml | |
- | |
name: Setup EKS | |
run: | | |
# Setting up EKS cluster | |
echo "create cluster" | |
eksctl create cluster -f hack/e2e/eks-cluster.yaml | |
# Create iamidentitymapping | |
echo "$AWS_EKS_ADMIN_IAM_ROLES" | while read role | |
do | |
# Masking variables to hide values | |
echo "::add-mask::$role" | |
eksctl create iamidentitymapping --cluster "${CLUSTER_NAME}" --region="${AWS_REGION}" --arn "${role}" --group system:masters --username admin | |
done | |
# Updating .kubeconfig to use the correct version of client.authentication.k8s.io API | |
aws eks update-kubeconfig --name ${CLUSTER_NAME} --region ${AWS_REGION} | |
# Installing CRD for support volumeSnapshot | |
SNAPSHOTTER_BASE_URL=https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/${{env.EXTERNAL_SNAPSHOTTER_VERSION}} | |
kubectl apply -f ${SNAPSHOTTER_BASE_URL}/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml | |
kubectl apply -f ${SNAPSHOTTER_BASE_URL}/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml | |
kubectl apply -f ${SNAPSHOTTER_BASE_URL}/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml | |
## Controller | |
kubectl apply -f ${SNAPSHOTTER_BASE_URL}/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml | |
kubectl apply -f ${SNAPSHOTTER_BASE_URL}/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml | |
# Install volume snapshot class | |
kubectl apply -f hack/e2e/volumesnapshotclass-ebs-csi.yaml | |
kubectl get volumesnapshotclass | |
# Change to use gp3 as default storage account | |
kubectl annotate storageclass gp2 storageclass.kubernetes.io/is-default-class=false --overwrite | |
kubectl apply -f hack/e2e/storage-class-gp3.yaml | |
kubectl annotate storageclass ${{env.E2E_DEFAULT_STORAGE_CLASS}} storage.kubernetes.io/default-snapshot-class=${{env.E2E_DEFAULT_VOLUMESNAPSHOT_CLASS}} --overwrite | |
kubectl get storageclass | |
- | |
name: Setup Velero | |
uses: nick-fields/retry@v3 | |
env: | |
VELERO_VERSION: "v1.15.0" | |
VELERO_AWS_PLUGIN_VERSION: "v1.11.0" | |
with: | |
timeout_minutes: 10 | |
max_attempts: 3 | |
on_retry_command: | | |
# Clean up buckets | |
output=$( aws s3api delete-bucket --bucket "${VELERO_BUCKET_NAME}" --region "${AWS_REGION}" 2>&1 ) | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "S3 Bucket deleted" | |
break | |
fi | |
if ( grep "NoSuchBucket" <<< "$output" ); then | |
echo "S3 Bucket doesn't exist, nothing to remove" | |
break | |
fi | |
# Uninstall Velero | |
kubectl delete namespace/velero clusterrolebinding/velero | |
kubectl delete crds -l component=velero | |
command: | | |
VELERO_BUCKET_NAME="${CLUSTER_NAME,,}-velero" | |
echo "VELERO_BUCKET_NAME=${VELERO_BUCKET_NAME}" >> $GITHUB_ENV | |
# Create S3 bucket | |
aws s3api create-bucket \ | |
--bucket "${VELERO_BUCKET_NAME}" \ | |
--region "${AWS_REGION}" \ | |
--create-bucket-configuration LocationConstraint="${AWS_REGION}" | |
# Download Velero, extract and place it in $PATH | |
curl -sL "https://github.com/vmware-tanzu/velero/releases/download/${VELERO_VERSION}/velero-${VELERO_VERSION}-linux-amd64.tar.gz" | tar xz | |
mv velero-${VELERO_VERSION}-linux-amd64/velero $HOME/.local/bin | |
# Set Velero-specific credentials | |
echo -e "[default]\naws_access_key_id=${{ secrets.AWS_ACCESS_KEY_ID }}\naws_secret_access_key=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> credentials-velero | |
# Install Velero | |
velero install \ | |
--provider aws \ | |
--plugins velero/velero-plugin-for-aws:${VELERO_AWS_PLUGIN_VERSION} \ | |
--bucket "${VELERO_BUCKET_NAME}" \ | |
--backup-location-config region="${AWS_REGION}" \ | |
--snapshot-location-config region="${AWS_REGION}" \ | |
--secret-file ./credentials-velero \ | |
--wait | |
- | |
name: Prepare patch for customization | |
env: | |
## the following variable all need be set if we use env_override_customized.yaml.template | |
## this is customization for eks | |
LEADER_ELECTION: "true" | |
LEADER_LEASE_DURATION: 15 | |
LEADER_RENEW_DEADLINE: 10 | |
LIVENESS_PROBE_THRESHOLD: 3 | |
LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }} | |
run: | | |
LOG_LEVEL=${LOG_LEVEL:-info} | |
envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml | |
cat config/manager/env_override.yaml | |
- | |
name: Run E2E tests | |
run: hack/e2e/run-e2e.sh | |
- | |
# Summarize the failed E2E test cases if there are any | |
name: Report failed E2E tests | |
if: failure() | |
run: | | |
set +x | |
chmod +x .github/report-failed-test.sh | |
./.github/report-failed-test.sh | |
- | |
# Create an individual artifact for each E2E test, which will be used to | |
# generate E2E test summary in the follow-up job 'summarize-e2e-tests' | |
name: Create individual artifact for each E2E test | |
if: (always() && !cancelled()) | |
env: | |
RUNNER: "eks" | |
RUN_ID: ${{ github.run_id }} | |
REPOSITORY: ${{ github.repository }} | |
GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }} | |
run: | | |
set +x | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/report.json \ | |
--environment=true | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/upgrade_report.json \ | |
--environment=true | |
fi | |
- | |
name: Archive test artifacts | |
if: (always() && !cancelled()) | |
uses: actions/upload-artifact@v4 | |
with: | |
name: testartifacts-${{ env.MATRIX }} | |
path: testartifacts-${{ env.MATRIX }}/ | |
retention-days: 7 | |
- | |
name: Cleanup test artifacts | |
if: always() | |
run: | |
rm -rf testartifacts-${{ env.MATRIX }}/ | |
- | |
name: Cleanup ginkgo JSON report | |
# Delete report.json after the analysis. File should always exist. | |
# Delete upgrade_report.json. It may not exist depending on test level. | |
if: always() | |
run: | | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
rm tests/e2e/out/upgrade_report.json | |
fi | |
if [ -f tests/e2e/out/report.json ]; then | |
rm tests/e2e/out/report.json | |
fi | |
- | |
name: Archive e2e failure contexts | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test-failure-contexts-${{ matrix.id }} | |
path: | | |
tests/*/out/ | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Archive e2e logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cluster-logs-${{ matrix.id }} | |
path: | | |
tests/e2e/cluster_logs/** | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Clean up | |
if: always() | |
run: | | |
set +e | |
CLUSTER_NAME="${{ env.CLUSTER_NAME }}" | |
REGION_NAME="${{ env.AWS_REGION }}" | |
STACK_NAME="eksctl-${CLUSTER_NAME}-cluster" | |
CLOUDFORMATION_STATUS_CURRENT=$(aws cloudformation describe-stacks --stack-name "${STACK_NAME}" | jq -r '.Stacks[].StackStatus') | |
if [[ -z "${CLOUDFORMATION_STATUS_CURRENT}" ]]; then | |
echo "CloudFormation stack not found. Nothing to cleanup." | |
exit 0 | |
fi | |
# Attempt to remove any leftover PDB (and Cluster that would recreate it) | |
# that could prevent the EKS cluster deletion | |
kubectl delete cluster --all --all-namespaces --now --timeout=30s || true | |
kubectl delete pdb --all --all-namespaces --now --timeout=30s || true | |
kubectl delete pvc --all --all-namespaces --now --timeout=30s || true | |
# Remove any LoadBalancer service | |
kubectl get service --all-namespaces -o json | jq -r '.items[] | select(.spec.type=="LoadBalancer") | .metadata | "kubectl delete service --now --timeout=30s -n " + .namespace + " " + .name' | xargs -rI X bash -c X || true | |
NODEGROUP_STACK_NAMES=$(eksctl get nodegroup --cluster "${CLUSTER_NAME}" -o json | jq -r '.[].StackName' || true) | |
attempt=1 | |
bucket_attempt=1 | |
max_attempts=3 | |
# Attempting three times to remove the Velero S3 bucket | |
VELERO_BUCKET_NAME=${VELERO_BUCKET_NAME:-"${CLUSTER_NAME,,}-velero"} | |
while [ "${bucket_attempt}" -le "${max_attempts}" ]; do | |
echo "Deleting S3 Bucket. Attempt ${bucket_attempt} of ${max_attempts}" | |
output=$( aws s3api delete-bucket --bucket "${VELERO_BUCKET_NAME}" --region "${AWS_REGION}" 2>&1 ) | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "S3 Bucket deleted" | |
break | |
fi | |
if ( grep "NoSuchBucket" <<< "$output" ); then | |
echo "S3 Bucket doesn't exist, nothing to remove" | |
break | |
fi | |
echo "Failed deleting S3 Bucket ${VELERO_BUCKET_NAME}, retrying" | |
sleep 5 | |
bucket_attempt=$((bucket_attempt+1)) | |
done | |
# Attempting three times to cleanly remove the cluster via eksctl | |
while [ "${attempt}" -le "${max_attempts}" ]; do | |
echo "Deleting cluster. Attempt ${attempt} of ${max_attempts}" | |
output=$( eksctl delete cluster -n "${CLUSTER_NAME}" -r "${REGION_NAME}" --wait --force 2>&1 ) | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "EKS cluster deleted" | |
break | |
fi | |
if ( grep "ResourceNotFoundException: No cluster found for name: ${CLUSTER_NAME}" <<< "$output" ); then | |
echo "EKS cluster doesn't exist, nothing to remove" | |
break | |
fi | |
echo "Failed deleting cluster ${CLUSTER_NAME}, retrying" | |
sleep 5 | |
attempt=$((attempt+1)) | |
done | |
# Recheck if something got stuck, and use harder methods to clean up | |
CLOUDFORMATION_STATUS_CURRENT=$(aws cloudformation describe-stacks --stack-name "${STACK_NAME}" | jq -r '.Stacks[].StackStatus') | |
if [ -n "${CLOUDFORMATION_STATUS_CURRENT}" ] ; then | |
echo "::warning file=continuous-delivery.yml::eksctl failed deleting a cluster cleanly" | |
# When the status of CloudFormation stack managed by eksctl reports an error, try to delete resources directly with AWS CLI | |
pip install boto3 | |
for vpc_id in $(aws ec2 describe-vpcs | jq -r '.Vpcs[] | select(.Tags?[]? | .Key == "Name" and (.Value | contains("'"${STACK_NAME}"'"))).VpcId'); do | |
python .github/vpc_destroy.py --vpc_id "${vpc_id}" --region "${REGION_NAME}" --services ec2 | |
done | |
# Then we try to delete the cluster cleanly and the cloudformation | |
if aws eks describe-cluster --name "${CLUSTER_NAME}" --region "${REGION_NAME}" ; then | |
eksctl delete cluster -n "${CLUSTER_NAME}" -r "${REGION_NAME}" --wait --force | |
fi | |
if [ -n "${NODEGROUP_STACK_NAMES}" ] ; then | |
for NODEGROUP_STACK_NAME in ${NODEGROUP_STACK_NAMES}; do | |
if aws cloudformation describe-stacks --stack-name "${NODEGROUP_STACK_NAME}" --region "${REGION_NAME}" ; then | |
aws cloudformation delete-stack --stack-name "${NODEGROUP_STACK_NAME}" --region "${REGION_NAME}" | |
fi | |
done | |
fi | |
if aws cloudformation describe-stacks --stack-name "${STACK_NAME}" --region "${REGION_NAME}" ; then | |
aws cloudformation delete-stack --stack-name "${STACK_NAME}" --region "${REGION_NAME}" | |
fi | |
fi | |
# Clear up leftover volumes | |
while read -r volume; do | |
echo "Deleting $volume of cluster $CLUSTER_NAME ..." | |
if ! aws ec2 delete-volume --region "${REGION_NAME}" --volume-id "$volume" ; then | |
echo "::warning file=continuous-delivery.yml::Failed deleting $volume of cluster $CLUSTER_NAME" | |
fi | |
done < <(aws ec2 describe-volumes --region "${REGION_NAME}" --query 'Volumes[?not_null(Tags[?Key == `kubernetes.io/cluster/'"$CLUSTER_NAME"'` && Value == `owned`].Value)].VolumeId' | jq -r '.[]' || true) | |
# GKE Secrets required | |
# secrets.GCP_SERVICE_ACCOUNT | |
# secrets.GCP_PROJECT_ID | |
e2e-gke: | |
name: Run E2E on Google GKE | |
if: | | |
(always() && !cancelled()) && | |
vars.GKE_ENABLED == 'true' && | |
needs.generate-jobs.outputs.gkeEnabled == 'true' && | |
needs.generate-jobs.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- evaluate_options | |
strategy: | |
fail-fast: false | |
max-parallel: 6 | |
matrix: ${{ fromJSON(needs.generate-jobs.outputs.gkeMatrix) }} | |
runs-on: ubuntu-24.04 | |
env: | |
# TEST_DEPTH determines the maximum test level the suite should be running | |
TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }} | |
# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type | |
FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }} | |
K8S_VERSION: "${{ matrix.k8s_version }}" | |
POSTGRES_VERSION: ${{ matrix.postgres_version }} | |
POSTGRES_KIND: ${{ matrix.postgres_kind }} | |
MATRIX: ${{ matrix.id }} | |
POSTGRES_IMG: "${{ matrix.postgres_img }}" | |
# The version of operator to upgrade FROM, in the rolling upgrade E2E test | |
E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}" | |
TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.gkeTimeout }} | |
BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }} | |
DEBUG: "true" | |
BUILD_IMAGE: "false" | |
CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }} | |
E2E_DEFAULT_STORAGE_CLASS: standard-rwo | |
E2E_CSI_STORAGE_CLASS: standard-rwo | |
E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: pd-csi-snapclass | |
REGION: europe-west3 | |
TEST_CLOUD_VENDOR: "gke" | |
steps: | |
- | |
name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
- | |
name: Install Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GOLANG_VERSION }} | |
check-latest: true | |
- | |
## In case hack/setup-cluster.sh need pull operand image from registry | |
name: Login into docker registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ env.REGISTRY_USER }} | |
password: ${{ env.REGISTRY_PASSWORD }} | |
- | |
name: Prepare the environment | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_seconds: 300 | |
max_attempts: 3 | |
command: | | |
sudo apt-get update | |
sudo apt-get install -y gettext-base | |
- | |
name: Install ginkgo | |
uses: nick-fields/retry@v3 | |
with: | |
timeout_seconds: 120 | |
max_attempts: 3 | |
command: | | |
go install github.com/onsi/ginkgo/v2/ginkgo | |
- | |
name: Set cluster name | |
run: | | |
# GKE cluster names rules: | |
# only lowercase alphanumerics and '-' allowed, must start with a letter and end with an alphanumeric, | |
# and must be no longer than 40 characters | |
# We need to shorten the name and lower the case | |
SHORT_ID=$( echo ${{ matrix.id }} | tr -d '_.-' | tr '[:upper:]' '[:lower:]') | |
echo "CLUSTER_NAME=${{ env.E2E_SUFFIX }}-test-${{ github.run_number }}-${SHORT_ID}" >> $GITHUB_ENV | |
- | |
name: Authenticate to Google Cloud | |
id: 'auth' | |
uses: google-github-actions/auth@v2 | |
with: | |
credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT }}' | |
- | |
name: Set up Cloud SDK and kubectl | |
uses: google-github-actions/setup-gcloud@v2 | |
with: | |
project_id: ${{ secrets.GCP_PROJECT_ID }} | |
install_components: 'kubectl,gke-gcloud-auth-plugin' | |
- | |
name: Create GKE cluster | |
run: | | |
set +e | |
# We may go over the amount of API requests allowed | |
# by Google when creating all the clusters at the same time. | |
# We give a few attempts at creating the cluster before giving up. | |
# The following command will create a 3 nodes cluster, with each | |
# node deployed in its own availability zone. | |
for i in `seq 1 5`; do | |
if gcloud container clusters create ${{ env.CLUSTER_NAME }} \ | |
--num-nodes=1 \ | |
--cluster-version=${{ env.K8S_VERSION }} \ | |
--region=${{ env.REGION }} \ | |
--disk-size=20 \ | |
--machine-type=e2-standard-2 \ | |
--labels=cluster=${{ env.CLUSTER_NAME }} | |
then | |
exit 0 | |
fi | |
echo "Couldn't create the cluster. Retrying in 100s." | |
sleep 100 | |
done | |
echo "Couldn't create the cluster. Failing." | |
exit 1 | |
- | |
name: Get GKE kubeconfig credentials | |
env: | |
USE_GKE_GCLOUD_AUTH_PLUGIN: "True" | |
run: | | |
gcloud container clusters get-credentials ${{ env.CLUSTER_NAME }} --region ${{ env.REGION }} --project ${{ secrets.GCP_PROJECT_ID }} | |
- | |
name: Configure Storage | |
run: | | |
# Install volume snapshot class | |
kubectl apply -f hack/e2e/volumesnapshotclass-pd-csi.yaml | |
# Change to use standard-rwo as default storage account | |
kubectl annotate storageclass ${{env.E2E_DEFAULT_STORAGE_CLASS}} storage.kubernetes.io/default-snapshot-class=${{env.E2E_DEFAULT_VOLUMESNAPSHOT_CLASS}} --overwrite | |
kubectl get storageclass | |
- | |
name: Prepare patch for customization | |
env: | |
## the following variable all need be set if we use env_override_customized.yaml.template | |
## this is customization for gke | |
LEADER_ELECTION: "false" | |
LEADER_LEASE_DURATION: 240 | |
LEADER_RENEW_DEADLINE: 230 | |
LIVENESS_PROBE_THRESHOLD: 9 | |
LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }} | |
run: | | |
LOG_LEVEL=${LOG_LEVEL:-info} | |
envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml | |
cat config/manager/env_override.yaml | |
- | |
name: Run E2E tests | |
run: hack/e2e/run-e2e.sh | |
- | |
name: Report failed E2E tests | |
if: failure() | |
run: | | |
set +x | |
chmod +x .github/report-failed-test.sh | |
./.github/report-failed-test.sh | |
- | |
# Create an individual artifact for each E2E test, which will be used to | |
# generate E2E test summary in the follow-up job 'summarize-e2e-tests' | |
name: Create individual artifact for each E2E test | |
if: (always() && !cancelled()) | |
env: | |
RUNNER: "gke" | |
RUN_ID: ${{ github.run_id }} | |
REPOSITORY: ${{ github.repository }} | |
GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }} | |
run: | | |
set +x | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/report.json \ | |
--environment=true | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/upgrade_report.json \ | |
--environment=true | |
fi | |
- | |
name: Archive test artifacts | |
if: (always() && !cancelled()) | |
uses: actions/upload-artifact@v4 | |
with: | |
name: testartifacts-${{ env.MATRIX }} | |
path: testartifacts-${{ env.MATRIX }}/ | |
retention-days: 7 | |
- | |
name: Cleanup test artifacts | |
if: always() | |
run: | |
rm -rf testartifacts-${{ env.MATRIX }}/ | |
- | |
name: Cleanup ginkgo JSON report | |
# Delete report.json after the analysis. File should always exist. | |
# Delete upgrade_report.json. It may not exist depending on test level. | |
if: always() | |
run: | | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
rm tests/e2e/out/upgrade_report.json | |
fi | |
if [ -f tests/e2e/out/report.json ]; then | |
rm tests/e2e/out/report.json | |
fi | |
- | |
name: Archive e2e failure contexts | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test-failure-contexts-${{ matrix.id }} | |
path: | | |
tests/*/out/ | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Archive e2e logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cluster-logs-${{ matrix.id }} | |
path: | | |
tests/e2e/cluster_logs/** | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Clean up | |
if: always() | |
run: | | |
set +e | |
# Attempt to remove any leftover resource | |
kubectl delete cluster --all --all-namespaces --now --timeout=30s || true | |
kubectl delete pdb --all --all-namespaces --now --timeout=30s || true | |
kubectl delete pvc --all --all-namespaces --now --timeout=30s || true | |
# Wait until all the PVs provisioned are actually reclaimed | |
kubectl wait --for delete --all pv --timeout=60s || true | |
attempt=1 | |
max_attempts=3 | |
while [ "${attempt}" -le "${max_attempts}" ]; do | |
gcloud container clusters delete ${{ env.CLUSTER_NAME }} --region=${{ env.REGION }} --quiet | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "GKS cluster ${{ env.CLUSTER_NAME }} deleted from region ${{ env.REGION }}" | |
break | |
fi | |
echo "Failed deleting cluster ${{ env.CLUSTER_NAME }} from region ${{ env.REGION }}, retrying" | |
sleep 5 | |
attempt=$((attempt+1)) | |
done | |
# The node's disks are not automatically deleted when the cluster is removed. | |
# We delete all the disks tagged with the name of the cluster that are not | |
# owned by anyone. | |
attempt=1 | |
max_attempts=3 | |
while [ "${attempt}" -le "${max_attempts}" ]; do | |
IDS=$(gcloud compute disks list --filter="labels.cluster=${{ env.CLUSTER_NAME }} AND region:${{ env.REGION }} AND -users:*" --format="value(id)") | |
amount="$(echo $IDS | awk '{print NF}')" | |
if [[ "$amount" == 3 ]]; then | |
echo -e "Found the 3 disks to be removed:\n$IDS" | |
break | |
fi | |
echo "Expected 3 disks to delete but found $amount, waiting and retrying" | |
sleep 20 | |
attempt=$((attempt+1)) | |
done | |
for ID in ${IDS} | |
do | |
attempt=1 | |
max_attempts=3 | |
while [ "${attempt}" -le "${max_attempts}" ]; do | |
gcloud compute disks delete --region "${{ env.REGION }}" --quiet "${ID}" | |
status=$? | |
if [[ $status == 0 ]]; then | |
echo "computer disk ${ID} deleted" | |
break | |
fi | |
echo "Failed deleting disk ${ID} from region ${{ env.REGION }}, retrying" | |
sleep 5 | |
attempt=$((attempt+1)) | |
done | |
done | |
# OpenShift Secrets required | |
# secrets.AWS_EKS_ADMIN_IAM_ROLES | |
# secrets.AWS_ACCESS_KEY_ID | |
# secrets.AWS_SECRET_ACCESS_KEY | |
e2e-openshift: | |
name: Run E2E on OpenShift | |
if: | | |
always() && !cancelled() && | |
vars.OPENSHIFT_ENABLED == 'true' && | |
needs.generate-jobs.outputs.openshiftEnabled == 'true' && | |
needs.generate-jobs.result == 'success' | |
needs: | |
- buildx | |
- generate-jobs | |
- evaluate_options | |
strategy: | |
fail-fast: false | |
max-parallel: 6 | |
matrix: ${{ fromJSON(needs.generate-jobs.outputs.openshiftMatrix) }} | |
runs-on: ubuntu-24.04 | |
env: | |
# TEST_DEPTH determines the maximum test level the suite should be running | |
TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }} | |
# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type | |
FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }} | |
K8S_VERSION: "${{ matrix.k8s_version }}" | |
POSTGRES_VERSION: ${{ matrix.postgres_version }} | |
POSTGRES_KIND: ${{ matrix.postgres_kind }} | |
MATRIX: ${{ matrix.id }} | |
POSTGRES_IMG: "${{ matrix.postgres_img }}" | |
# The version of operator to upgrade FROM, in the rolling upgrade E2E test | |
E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}" | |
TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.openshiftTimeout }} | |
BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }} | |
DEBUG: "true" | |
BUILD_IMAGE: "false" | |
CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }} | |
E2E_DEFAULT_STORAGE_CLASS: gp3-csi | |
E2E_CSI_STORAGE_CLASS: gp3-csi | |
E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: csi-aws-vsc | |
TEST_CLOUD_VENDOR: "ocp" | |
# AWS configuration | |
AWS_BASE_DOMAIN: ${{ secrets.AWS_BASE_DOMAIN }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
AWS_REGION: eu-central-1 | |
AWS_EKS_ADMIN_IAM_ROLES: ${{ secrets.AWS_EKS_ADMIN_IAM_ROLES }} | |
REDHAT_PULL: ${{ secrets.REDHAT_PULL }} | |
SSH_PUBLIC_KEY: ${{ secrets.SSH_PUBLIC_KEY }} | |
steps: | |
- | |
name: Set cluster name | |
run: | | |
echo "CLUSTER_NAME=${{ env.E2E_SUFFIX }}-ocp-${{ github.run_number}}-$( echo ${{ matrix.k8s_version }} | tr -d '.' )" >> $GITHUB_ENV | |
- | |
name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ needs.evaluate_options.outputs.git_ref }} | |
fetch-depth: 0 | |
- | |
name: Install Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GOLANG_VERSION }} | |
check-latest: true | |
- | |
name: Set up QEMU | |
uses: docker/setup-qemu-action@v3 | |
with: | |
platforms: ${{ env.PLATFORMS }} | |
- | |
name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- | |
## In case hack/setup-cluster.sh need pull operand image from registry | |
name: Login into docker registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ env.REGISTRY_USER }} | |
password: ${{ env.REGISTRY_PASSWORD }} | |
- | |
name: Build and push the operator and catalog | |
env: | |
CONTROLLER_IMG: ${{ needs.buildx.outputs.controller_img_ubi8 }} | |
BUNDLE_IMG: ${{ needs.buildx.outputs.bundle_img }} | |
CATALOG_IMG: ${{ needs.buildx.outputs.catalog_img }} | |
run: | | |
make olm-catalog | |
- | |
name: Install OC Installer and client | |
uses: redhat-actions/openshift-tools-installer@v1 | |
with: | |
source: "mirror" | |
openshift-install: ${{ matrix.k8s_version }} | |
oc: ${{ matrix.k8s_version }} | |
- | |
name: Install OpenShift Cluster ${{ matrix.k8s_version }} | |
run: | | |
envsubst < hack/install-config.yaml.template > hack/install-config.yaml | |
openshift-install create cluster --dir hack/ --log-level warn | |
- | |
name: Run E2E tests | |
if: (always() && !cancelled()) | |
run: | | |
# Before running on OpenShift we make sure that the catalog is created | |
# in the openshift-marketplace namespace | |
sed -i -e 's/namespace: operators/namespace: openshift-marketplace/' cloudnative-pg-catalog.yaml | |
find -type f -name "cloudnative-pg-catalog.yaml" | |
cat cloudnative-pg-catalog.yaml | |
KUBECONFIG=$(pwd)/hack/auth/kubeconfig bash -x hack/e2e/run-e2e-ocp.sh | |
- | |
# Summarize the failed E2E tests cases if there are any | |
name: Report failed E2E tests | |
if: failure() | |
run: | | |
set +x | |
chmod +x .github/report-failed-test.sh | |
./.github/report-failed-test.sh | |
- | |
# Create an individual artifact for each E2E test, which will be used to | |
# generate E2E test summary in the follow-up job 'summarize-e2e-tests' | |
name: Create individual artifact for each E2E test | |
if: (always() && !cancelled()) | |
env: | |
RUNNER: "openshift" | |
RUN_ID: ${{ github.run_id }} | |
REPOSITORY: ${{ github.repository }} | |
GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }} | |
run: | | |
set +x | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/report.json \ | |
--environment=true | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
python .github/generate-test-artifacts.py \ | |
-o testartifacts-${{ env.MATRIX }} \ | |
-f tests/e2e/out/upgrade_report.json \ | |
--environment=true | |
fi | |
- | |
name: Archive test artifacts | |
if: (always() && !cancelled()) | |
uses: actions/upload-artifact@v4 | |
with: | |
name: testartifacts-${{ env.MATRIX }} | |
path: testartifacts-${{ env.MATRIX }}/ | |
retention-days: 7 | |
- | |
name: Cleanup test artifacts | |
if: always() | |
run: | |
rm -rf testartifacts-${{ env.MATRIX }}/ | |
- | |
name: Cleanup ginkgo JSON report | |
# Delete report.json after the analysis. File should always exist. | |
# Delete upgrade_report.json. It may not exist depending on test level. | |
if: always() | |
run: | | |
if [ -f tests/e2e/out/upgrade_report.json ]; then | |
rm tests/e2e/out/upgrade_report.json | |
fi | |
if [ -f tests/e2e/out/report.json ]; then | |
rm tests/e2e/out/report.json | |
fi | |
- | |
name: Archive e2e failure contexts | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: test-failure-contexts-${{ matrix.id }} | |
path: | | |
tests/*/out/ | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Archive e2e logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cluster-logs-${{ matrix.id }} | |
path: | | |
tests/e2e/cluster_logs/** | |
retention-days: 7 | |
if-no-files-found: ignore | |
- | |
name: Destroy OpenShift Cluster ${{ matrix.k8s_version }} | |
if: always() | |
run: | | |
openshift-install destroy cluster --dir hack/ | |
# Summarize E2E test results, display in the GitHub 'summary' view | |
summarize-e2e-tests: | |
name: E2E test suite | |
needs: | |
- evaluate_options | |
- e2e-local | |
- e2e-eks | |
- e2e-aks | |
- e2e-gke | |
- e2e-openshift | |
if: | | |
(always() && !cancelled()) && | |
(( | |
needs.e2e-local.result == 'success' || | |
needs.e2e-local.result == 'failure' | |
) || | |
( | |
needs.e2e-eks.result == 'success' || | |
needs.e2e-eks.result == 'failure' | |
) || | |
( | |
needs.e2e-aks.result == 'success' || | |
needs.e2e-aks.result == 'failure' | |
) || | |
( | |
needs.e2e-gke.result == 'success' || | |
needs.e2e-gke.result == 'failure' | |
) || | |
( | |
needs.e2e-openshift.result == 'success' || | |
needs.e2e-openshift.result == 'failure' | |
)) | |
runs-on: ubuntu-24.04 | |
steps: | |
- name: Create a directory for the artifacts | |
run: mkdir test-artifacts | |
- name: Download all artifacts to the directory | |
uses: actions/download-artifact@v4 | |
with: | |
path: test-artifacts | |
pattern: testartifacts-* | |
- name: Flatten all artifacts onto directory | |
# The download-artifact action, since we did not give it a name, | |
# downloads all artifacts and creates a new folder for each. | |
# In this step we bring all the JSONs to a single folder | |
run: | | |
mkdir test-artifacts/data | |
mv test-artifacts/*/*.json test-artifacts/data || true | |
- name: Display the structure of the artifact folder | |
run: ls -R test-artifacts/data | |
- name: Compute the E2E test summary | |
id: generate-summary | |
uses: cloudnative-pg/[email protected] | |
with: | |
artifact_directory: test-artifacts/data | |
- name: If there is an overflow summary, archive it | |
if: steps.generate-summary.outputs.Overflow | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ steps.generate-summary.outputs.Overflow }} | |
path: ${{ steps.generate-summary.outputs.Overflow }} | |
retention-days: 7 | |
- name: Send the Ciclops view over Slack | |
# Send the Ciclops thermometer on every scheduled run on `main`. | |
# or when there are systematic failures in release branches | |
uses: rtCamp/action-slack-notify@v2 | |
if: | | |
github.repository_owner == env.REPOSITORY_OWNER && | |
( | |
github.event_name == 'schedule' || | |
( | |
steps.generate-summary.outputs.alerts && | |
startsWith(needs.evaluate_options.outputs.git_ref, 'refs/heads/release-') | |
) | |
) | |
env: | |
# SLACK_COLOR is where we distinguish a run with/without alerts. It's where the | |
# action has hooks for conditionality in the message body (yeah, weird) | |
SLACK_COLOR: ${{ steps.generate-summary.outputs.alerts && 'failure' || 'success' }} | |
SLACK_ICON: https://avatars.githubusercontent.com/u/85171364?size=48 | |
SLACK_USERNAME: ${{ env.SLACK_USERNAME }} | |
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
SLACK_TITLE: CICLOPS view for ${{ github.repository }} | |
SLACK_MESSAGE_ON_SUCCESS: | | |
${{ steps.generate-summary.outputs.thermometer }} | |
SLACK_MESSAGE_ON_FAILURE: | | |
${{ steps.generate-summary.outputs.thermometer }} | |
:warning: *Systematic failures!* | |
${{ steps.generate-summary.outputs.alerts }} | |
SLACK_FOOTER: | | |
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|*See full CI run*> | |
- name: Delete the downloaded files | |
run: rm -rf test-artifacts | |
# Adds the 'ok-to-merge' label to workflows that have run successfully and | |
# have adequate test and matrix coverage. | |
# This label is a prerequisite to be able to merge a PR. | |
# Also see to 'require-labels.yml' | |
ok-to-merge: | |
name: Label the PR as "ok to merge :ok_hand:" | |
needs: | |
- evaluate_options | |
- e2e-local | |
if: | | |
always() && | |
needs.e2e-local.result == 'success' && | |
github.event_name == 'issue_comment' && | |
needs.evaluate_options.outputs.test_level == '4' | |
runs-on: ubuntu-24.04 | |
steps: | |
- name: Check preconditions | |
id: get_pr_number_and_labels | |
env: | |
GITHUB_TOKEN: ${{ secrets.REPO_GHA_PAT }} | |
run: | | |
ok_label=$(gh pr view "${{ github.event.issue.number }}" --json labels -q ".labels.[].name" 2>/dev/null | grep "ok to merge :ok_hand:" || :) | |
echo "OK_LABEL=${ok_label}" >> $GITHUB_ENV | |
- name: Label the PR as "ok to merge :ok_hand:" | |
if: | | |
env.OK_LABEL == '' | |
uses: actions-ecosystem/[email protected] | |
with: | |
github_token: ${{ secrets.REPO_GHA_PAT }} | |
number: ${{ github.event.issue.number }} | |
labels: "ok to merge :ok_hand:" | |
# Remove the "ok to merge :ok_hand:" label if the E2E tests or previous steps failed | |
unlabel-ok-to-merge: | |
name: Remove the "ok to merge :ok_hand:" label from the PR | |
needs: | |
- evaluate_options | |
- e2e-local | |
if: | | |
always() && | |
needs.e2e-local.result == 'failure' && | |
github.event_name == 'issue_comment' | |
runs-on: ubuntu-24.04 | |
steps: | |
- name: Check preconditions | |
id: get_pr_number_and_labels | |
env: | |
GITHUB_TOKEN: ${{ secrets.REPO_GHA_PAT }} | |
run: | | |
ok_label=$(gh pr view "${{ github.event.issue.number }}" --json labels -q ".labels.[].name" 2>/dev/null | grep "ok to merge :ok_hand:" || :) | |
echo "OK_LABEL=${ok_label}" >> $GITHUB_ENV | |
- name: Remove "ok to merge :ok_hand:" label from PR | |
if: | | |
env.OK_LABEL != '' | |
uses: actions-ecosystem/[email protected] | |
with: | |
github_token: ${{ secrets.REPO_GHA_PAT }} | |
number: ${{ github.event.issue.number }} | |
labels: "ok to merge :ok_hand:" |