Skip to content

Conformance EKS (ci-eks) #1318

Conformance EKS (ci-eks)

Conformance EKS (ci-eks) #1318

name: Conformance EKS (ci-eks)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
# Run every 6 hours
schedule:
- cron: '0 1/6 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To read actions state with catchpoint/workflow-telemetry-action
actions: read
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
# To be able to request the JWT from GitHub's OIDC provider
id-token: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
clusterName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}
cilium_cli_ci_version:
# renovate: datasource=github-releases depName=eksctl-io/eksctl
eksctl_version: v0.183.0
# renovate: datasource=github-releases depName=kubernetes/kubernetes
kubectl_version: v1.30.2
jobs:
echo-inputs:
if: ${{ github.event_name == 'workflow_dispatch' }}
name: Echo Workflow Dispatch Inputs
runs-on: ubuntu-22.04
steps:
- name: Echo Workflow Dispatch Inputs
run: |
echo '${{ tojson(inputs) }}'
commit-status-start:
name: Commit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
generate-matrix:
name: Generate Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Convert YAML to JSON
run: |
work_dir=".github/actions/aws"
destination_directory="/tmp/generated/aws"
mkdir -p "${destination_directory}"
yq -o=json "${work_dir}/k8s-versions.yaml" | jq . > "${destination_directory}/aws.json"
- name: Generate Matrix
run: |
cd /tmp/generated/aws
# Use complete matrix in case of scheduled run
# main -> event_name = schedule
# other stable branches -> PR-number starting with v (e.g. v1.14)
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.PR-number }}" == v* ]];then
cp aws.json /tmp/matrix.json
else
jq '{ "include": [ .include[] | select(.default) ] }' aws.json > /tmp/matrix.json
fi
echo "Generated matrix:"
cat /tmp/matrix.json
# We use latest eksctl just to fetch recent supported versions.
# We don't use that eksctl to create cluster.
# Eksctl has hardcoded list of supported versions in the binary.
# This is hack until https://github.com/aws/containers-roadmap/issues/982 is resolved.
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Filter Matrix
id: set-matrix
run: |
cp /tmp/matrix.json /tmp/result.json
jq -c '.include[]' /tmp/matrix.json | while read i; do
VERSION=$(echo $i | jq -r '.version')
eksctl version -o json | jq -r '.EKSServerSupportedVersions[]' > /tmp/output
if grep -q -F $VERSION /tmp/output; then
echo "Version $VERSION is supported"
else
echo "::notice::Removing version $VERSION as it's not supported"
jq 'del(.include[] | select(.version == "'$VERSION'"))' /tmp/result.json > /tmp/result.json.tmp
mv /tmp/result.json.tmp /tmp/result.json
fi
done
echo "Filtered matrix:"
cat /tmp/result.json
echo "matrix=$(jq -c . < /tmp/result.json)" >> $GITHUB_OUTPUT
installation-and-connectivity:
name: Installation and Connectivity Test
needs: generate-matrix
runs-on: ubuntu-latest
timeout-minutes: 90
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Collect Workflow Telemetry
uses: catchpoint/workflow-telemetry-action@94c3c3d9567a0205de6da68a76c428ce4e769af1 # v2.0.0
with:
comment_on_pr: false
- name: Checkout context ref (trusted)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ inputs.SHA }}
chart-dir: ./untrusted/install/kubernetes/cilium
- name: Set up job variables
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
OWNER="${{ inputs.PR-number }}"
else
OWNER="${{ github.ref_name }}"
OWNER="${OWNER/./-}"
fi
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--helm-set=cluster.name=${{ env.clusterName }} \
--helm-set=hubble.relay.enabled=true \
--helm-set loadBalancer.l7.backend=envoy \
--helm-set tls.secretsBackend=k8s \
--helm-set=bpf.monitorAggregation=none \
--wait=false"
if [[ "${{ matrix.ipsec }}" == "true" ]]; then
CILIUM_INSTALL_DEFAULTS+=" --helm-set encryption.enabled=true --helm-set encryption.type=ipsec"
fi
if [[ "${{ matrix.kpr }}" == "true" ]]; then
CILIUM_INSTALL_DEFAULTS+=" --helm-set kubeProxyReplacement=true"
fi
CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --collect-sysdump-on-failure \
--external-target amazon.com."
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT
echo owner=${OWNER} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@511f0173c21db1c3c959b96fd68eef18f83a0a9f # v0.16.10
with:
repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
release-version: ${{ env.CILIUM_CLI_VERSION }}
ci-version: ${{ env.cilium_cli_ci_version }}
binary-name: cilium-cli
binary-dir: ./
- name: Install kubectl
run: |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl"
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256"
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/download/${{ env.eksctl_version }}/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Set up AWS CLI credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
role-to-assume: ${{ secrets.AWS_PR_ASSUME_ROLE }}
aws-region: ${{ matrix.region }}
- name: Create EKS cluster
uses: ./.github/actions/setup-eks-cluster
with:
cluster_name: ${{ env.clusterName }}
region: ${{ matrix.region }}
owner: "${{ steps.vars.outputs.owner }}"
version: ${{ matrix.version }}
spot: false
- name: Create IPsec key
if: ${{ matrix.ipsec == true }}
shell: bash
run: |
KEYID=15
kubectl create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="${KEYID} rfc4106(gcm(aes)) $(dd if=/dev/urandom count=20 bs=1 2> /dev/null | xxd -p -c 64) 128"
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-aws-ci hubble-relay-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
- name: Make sure images available from cluster
run: |
kubectl create -f - <<EOF
apiVersion: batch/v1
kind: Job
metadata:
name: wait-for-images
spec:
completions: 1
backoffLimit: 3
template:
spec:
containers:
- name: wait-for-images
image: quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/cilium-ci:${{ steps.vars.outputs.sha }}
command: ["true"]
tolerations:
- key: "node.cilium.io/agent-not-ready"
operator: "Equal"
value: "true"
effect: "NoExecute"
restartPolicy: Never
EOF
kubectl wait --for=condition=complete --timeout=10m job/wait-for-images
# This is a workaround for flake #16938.
- name: Remove AWS-CNI
run: |
kubectl -n kube-system delete daemonset aws-node
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
path: untrusted
sparse-checkout: |
install/kubernetes/cilium
- name: Install Cilium
id: install-cilium
run: |
./cilium-cli install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Wait for Cilium to be ready
run: |
./cilium-cli status --wait --wait-duration=10m
kubectl get pods -n kube-system
- name: Check that AWS leftover iptables chains have been removed
run: |
for pod in $(kubectl get po -n kube-system -l app.kubernetes.io/name=cilium-agent -o name); do
echo "Checking ${pod}"
if kubectl exec -n kube-system ${pod} -c cilium-agent -- iptables-save | grep --silent ':AWS'; then
echo "Unexpected AWS leftover iptables chains"
kubectl exec -n kube-system ds/cilium -- iptables-save | grep ':AWS'
exit 1
fi
done
- name: Port forward Relay
run: |
./cilium-cli hubble port-forward&
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
./cilium-cli connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}) - 1.xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Setup conn-disrupt-test before rotating (${{ join(matrix.*, ', ') }})
if: ${{ matrix.ipsec == true }}
uses: ./.github/actions/conn-disrupt-test-setup
- name: Run IPsec key rotation tests (${{ join(matrix.*, ', ') }})
if: ${{ matrix.ipsec == true }}
uses: ./.github/actions/ipsec-key-rotate
with:
key-algo: "gcm(aes)"
key-type-one: ""
key-type-two: ""
- name: Check conn-disrupt-test after rotating (${{ join(matrix.*, ', ') }})
if: ${{ matrix.ipsec == true }}
uses: ./.github/actions/conn-disrupt-test-check
with:
full-test: 'true'
extra-connectivity-test-flags: ${{ steps.vars.outputs.connectivity_test_defaults }}
- name: Post-test information gathering
if: ${{ !success() && steps.install-cilium.outcome != 'skipped' }}
run: |
kubectl get pods --all-namespaces -o wide
./cilium-cli status
./cilium-cli sysdump --output-filename cilium-sysdump-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-sysdumps-${{ matrix.version }}
path: cilium-sysdump-*.zip
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-junits-${{ matrix.version }}
path: cilium-junits/*.xml
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
merge-upload:
if: ${{ always() }}
name: Merge and Upload Artifacts
runs-on: ubuntu-latest
needs: installation-and-connectivity
steps:
- name: Merge Sysdumps
if: ${{ needs.installation-and-connectivity.result == 'failure' }}
uses: actions/upload-artifact/merge@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-sysdumps
pattern: cilium-sysdumps-*
retention-days: 5
delete-merged: true
continue-on-error: true
- name: Merge JUnits
uses: actions/upload-artifact/merge@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: cilium-junits
pattern: cilium-junits-*
retention-days: 5
delete-merged: true
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@3730c0a348a2ace3c110851bed53331bc6406e9f # v2.0.1
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}
cleanup:
name: Cleanup EKS Clusters
if: ${{ always() }}
continue-on-error: true
needs: [generate-matrix, installation-and-connectivity]
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/download/${{ env.eksctl_version }}/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Set up AWS CLI credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
role-to-assume: ${{ secrets.AWS_PR_ASSUME_ROLE }}
aws-region: ${{ matrix.region }}
- name: Clean up EKS
run: |
eksctl delete cluster --name ${{ env.clusterName }} --region ${{ matrix.region }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently