Skip to content

Commit

Permalink
CI: enable no-retry/retry testing separation and workflows cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: apostasie <[email protected]>
  • Loading branch information
apostasie committed Oct 18, 2024
1 parent 44c7315 commit 8f40924
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 73 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ghcr-image-build-and-publish.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Container Image Build
name: image

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
Expand All @@ -21,7 +21,6 @@ env:
# github.repository as <account>/<repo>
IMAGE_NAME: ${{ github.repository }}


jobs:
build:

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:

jobs:
project:
name: Project Checks
name: checks
runs-on: ubuntu-24.04
timeout-minutes: 20
steps:
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/test-canary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@ jobs:
- name: "Run unit tests"
run: go test -v ./pkg/...
- name: "Run integration tests"
run: docker run -t --rm --privileged test-integration
run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=false
- name: "Run integration tests (flaky)"
run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=true

windows:
runs-on: windows-latest
timeout-minutes: 30
runs-on: windows-latest
defaults:
run:
shell: bash
Expand All @@ -74,6 +76,7 @@ jobs:
cache: true
check-latest: true
- run: go install ./cmd/nerdctl
- run: go install -v gotest.tools/gotestsum@v1
# This here is solely to get the cni install script, which has not been modified in 3+ years.
# There is little to no reason to update this to latest containerd
- uses: actions/[email protected]
Expand All @@ -91,5 +94,6 @@ jobs:
ctrdVersion: ${{ env.CONTAINERD_VERSION }}
run: powershell hack/configure-windows-ci.ps1
- name: "Run integration tests"
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
run: go test -p 1 -v ./cmd/nerdctl/...
run: ./hack/test-integration.sh -test.only-flaky=false
- name: "Run integration tests (flaky)"
run: ./hack/test-integration.sh -test.only-flaky=true
5 changes: 2 additions & 3 deletions .github/workflows/test-kube.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ on:
paths-ignore:
- '**.md'

env:
ROOTFUL: true

jobs:
linux:
runs-on: "ubuntu-24.04"
timeout-minutes: 40
env:
ROOTFUL: true
steps:
- uses: actions/[email protected]
with:
Expand Down
98 changes: 45 additions & 53 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@ on:

env:
GO_VERSION: 1.23.x
SHORT_TIMEOUT: 5
LONG_TIMEOUT: 60

jobs:
test-unit:
# Supposed to work: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#example-returning-a-json-data-type
# Apparently does not
# timeout-minutes: ${{ fromJSON(env.SHORT_TIMEOUT) }}
timeout-minutes: 5
name: unit | ${{ matrix.goos }}
runs-on: "${{ matrix.os }}"
Expand Down Expand Up @@ -48,11 +53,12 @@ jobs:
working-directory: containerd
run: GOPATH=$(go env GOPATH) script/setup/install-cni-windows
- name: "Run unit tests"
run: go test -v ./pkg/...
run: make test-unit

test-integration:
timeout-minutes: 60
name: rootful | ${{ matrix.containerd }} | ${{ matrix.runner }}
runs-on: "${{ matrix.runner }}"
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -95,23 +101,21 @@ jobs:
docker run --privileged --rm tonistiigi/binfmt --install linux/arm64
docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7
- name: "Run integration tests"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
command: docker run -t --rm --privileged test-integration
run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=false
- name: "Run integration tests (flaky)"
run: docker run -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-flaky=true

test-integration-ipv6:
timeout-minutes: 60
name: ipv6 | ${{ matrix.containerd }} | ${{ matrix.ubuntu }}
runs-on: "ubuntu-${{ matrix.ubuntu }}"
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
# ubuntu-20.04: cgroup v1, ubuntu-22.04 and later: cgroup v2
include:
- ubuntu: 24.04
containerd: v1.7.23
containerd: v2.0.0-rc.5
env:
UBUNTU_VERSION: "${{ matrix.ubuntu }}"
CONTAINERD_VERSION: "${{ matrix.containerd }}"
Expand All @@ -129,7 +133,7 @@ jobs:
echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "experimental": true, "ip6tables": true}' | sudo tee /etc/docker/daemon.json
sudo systemctl restart docker
- name: "Prepare integration test environment"
run: docker build -t test-integration-ipv6 --target test-integration-ipv6 --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} .
run: docker build -t test-integration --target test-integration --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} .
- name: "Remove snap loopback devices (conflicts with our loopback devices in TestRunDevice)"
run: |
sudo systemctl disable --now snapd.service snapd.socket
Expand All @@ -147,20 +151,16 @@ jobs:
docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7
- name: "Run integration tests"
# The nested IPv6 network inside docker and qemu is complex and needs a bunch of sysctl config.
# Therefore it's hard to debug why the IPv6 tests fail in such an isolation layer.
# Therefore, it's hard to debug why the IPv6 tests fail in such an isolation layer.
# On the other side, using the host network is easier at configuration.
# Besides, each job is running on a different instance, which means using host network here
# is safe and has no side effects on others.
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
command: docker run --network host -t --rm --privileged test-integration-ipv6
run: docker run --network host -t --rm --privileged test-integration ./hack/test-integration.sh -test.only-ipv6

test-integration-rootless:
runs-on: "ubuntu-${{ matrix.ubuntu }}"
timeout-minutes: 60
name: "${{ matrix.target }} | ${{ matrix.containerd }} | ${{ matrix.rootlesskit }} | ${{ matrix.ubuntu }}"
runs-on: "ubuntu-${{ matrix.ubuntu }}"
strategy:
fail-fast: false
matrix:
Expand All @@ -169,24 +169,24 @@ jobs:
- ubuntu: 20.04
containerd: v1.6.36
rootlesskit: v1.1.1 # Deprecated
target: test-integration-rootless
target: rootless
- ubuntu: 22.04
containerd: v1.7.23
rootlesskit: v2.3.1
target: test-integration-rootless
target: rootless
- ubuntu: 24.04
containerd: v2.0.0-rc.5
rootlesskit: v2.3.1
target: test-integration-rootless
target: rootless
- ubuntu: 24.04
containerd: v1.7.23
rootlesskit: v2.3.1
target: test-integration-rootless-port-slirp4netns
target: rootless-port-slirp4netns
env:
UBUNTU_VERSION: "${{ matrix.ubuntu }}"
CONTAINERD_VERSION: "${{ matrix.containerd }}"
ROOTLESSKIT_VERSION: "${{ matrix.rootlesskit }}"
TEST_TARGET: "${{ matrix.target }}"
TEST_TARGET: "test-integration-${{ matrix.target }}"
steps:
- name: "Set up AppArmor"
if: matrix.ubuntu == '24.04'
Expand Down Expand Up @@ -226,16 +226,14 @@ jobs:
fi
echo "WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622}" >> "$GITHUB_ENV"
- name: "Test (network driver=slirp4netns, port driver=builtin)"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
command: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET}
run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./hack/test-integration.sh -test.only-flaky=false
- name: "Test (network driver=slirp4netns, port driver=builtin) (flaky)"
run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./hack/test-integration.sh -test.only-flaky=true

cross:
build:
timeout-minutes: 5
name: "build | ${{ matrix.go-version }}"
runs-on: ubuntu-24.04
timeout-minutes: 40
strategy:
matrix:
go-version: ["1.22.x", "1.23.x"]
Expand All @@ -248,12 +246,13 @@ jobs:
go-version: ${{ matrix.go-version }}
cache: true
check-latest: true
- name: "Cross"
- name: "build"
run: GO_VERSION="$(echo ${{ matrix.go-version }} | sed -e s/.x//)" make binaries

test-integration-docker-compatibility:
timeout-minutes: 60
name: docker
runs-on: ubuntu-24.04
timeout-minutes: 45
steps:
- uses: actions/[email protected]
with:
Expand All @@ -280,26 +279,18 @@ jobs:
- name: "Prepare integration test environment"
run: |
sudo apt-get install -y expect
go install -v gotest.tools/gotestsum@v1
- name: "Ensure that the integration test suite is compatible with Docker"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon
run: ./hack/test-integration.sh -test.target=docker
- name: "Ensure that the IPv6 integration test suite is compatible with Docker"
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 2
retry_on: error
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon -test.only-ipv6
run: ./hack/test-integration.sh -test.target=docker -test.only-ipv6
- name: "Ensure that the integration test suite is compatible with Docker (flaky only)"
run: ./hack/test-integration.sh -test.target=docker -test.only-flaky

test-integration-windows:
runs-on: windows-2022
timeout-minutes: 30
name: windows
runs-on: windows-2022
defaults:
run:
shell: bash
Expand All @@ -313,6 +304,7 @@ jobs:
cache: true
check-latest: true
- run: go install ./cmd/nerdctl
- run: go install -v gotest.tools/gotestsum@v1
- uses: actions/[email protected]
with:
repository: containerd/containerd
Expand All @@ -326,16 +318,16 @@ jobs:
env:
ctrdVersion: 1.7.23
run: powershell hack/configure-windows-ci.ps1
# TODO: Run unit tests
- name: "Run integration tests"
# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
run: go test -p 1 -v ./cmd/nerdctl/...
run: ./hack/test-integration.sh -test.only-flaky=false
- name: "Run integration tests (flaky)"
run: ./hack/test-integration.sh -test.only-flaky=true

test-integration-freebsd:
timeout-minutes: 60
name: FreeBSD
# ubuntu-24.04 lacks the vagrant package
runs-on: ubuntu-22.04
timeout-minutes: 20

steps:
- uses: actions/[email protected]
Expand Down
14 changes: 4 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,8 @@ ARG DEBIAN_FRONTEND=noninteractive
# `expect` package contains `unbuffer(1)`, which is used for emulating TTY for testing
RUN apt-get update -qq && apt-get install -qq --no-install-recommends \
expect \
git
git \
make
COPY --from=goversion /GOVERSION /GOVERSION
ARG TARGETARCH
RUN curl -fsSL --proto '=https' --tlsv1.2 https://golang.org/dl/$(cat /GOVERSION).linux-${TARGETARCH:-amd64}.tar.gz | tar xzvC /usr/local
Expand Down Expand Up @@ -314,8 +315,7 @@ RUN curl -o nydus-static.tgz -fsSL --proto '=https' --tlsv1.2 "https://github.co
tar xzf nydus-static.tgz && \
mv nydus-static/nydus-image nydus-static/nydusd nydus-static/nydusify /usr/bin/ && \
rm nydus-static.tgz
CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \
"--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"]
CMD ["./hack/test-integration.sh"]

FROM test-integration AS test-integration-rootless
# Install SSH for creating systemd user session.
Expand All @@ -338,17 +338,11 @@ RUN systemctl disable test-integration-ipfs-offline
VOLUME /home/rootless/.local/share
COPY ./Dockerfile.d/test-integration-rootless.sh /
RUN chmod a+rx /test-integration-rootless.sh
CMD ["/test-integration-rootless.sh", \
"gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \
"--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"]
CMD ["/test-integration-rootless.sh", "./hack/test-integration.sh"]

# test for CONTAINERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=slirp4netns
FROM test-integration-rootless AS test-integration-rootless-port-slirp4netns
COPY ./Dockerfile.d/home_rootless_.config_systemd_user_containerd.service.d_port-slirp4netns.conf /home/rootless/.config/systemd/user/containerd.service.d/port-slirp4netns.conf
RUN chown -R rootless:rootless /home/rootless/.config

FROM test-integration AS test-integration-ipv6
CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \
"--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon", "-test.only-ipv6"]

FROM base AS demo
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ lint-yaml:
lint-shell: $(call recursive_wildcard,$(MAKEFILE_DIR)/,*.sh)
shellcheck -a -x $^

test-unit:
go test -v $(MAKEFILE_DIR)/pkg/...

binaries: nerdctl

install:
Expand Down
44 changes: 44 additions & 0 deletions hack/test-integration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

# Copyright The containerd Authors.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# shellcheck disable=SC2034,SC2015
set -o errexit -o errtrace -o functrace -o nounset -o pipefail
root="$(cd "$(dirname "${BASH_SOURCE[0]:-$PWD}")" 2>/dev/null 1>&2 && pwd)"
readonly root

readonly timeout="60m"
readonly retries="2"

# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization
args=(--format=testname --jsonfile /tmp/test-integration.log --packages="$root"/../cmd/nerdctl/...)

if [ "$#" == 0 ]; then
"$root"/test-integration.sh -test.only-flaky=false
"$root"/test-integration.sh -test.only-flaky=true
exit
fi

for arg in "$@"; do
if [ "$arg" == "-test.only-flaky=true" ] || [ "$arg" == "-test.only-flaky" ]; then
args+=("--rerun-fails=$retries")
break
fi
done

gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -args -test.allow-kill-daemon "$@"

echo "These are the tests that took more than 10 seconds:"
gotestsum tool slowest --threshold 10s --jsonfile /tmp/test-integration.log

0 comments on commit 8f40924

Please sign in to comment.