Skip to content

Commit

Permalink
Merge pull request #530 from amazonlinux/sonobuoy-conformance-script
Browse files Browse the repository at this point in the history
tools: Adds a Sonobuoy conformance testing script
  • Loading branch information
etungsten authored Dec 9, 2019
2 parents 3aee900 + a4496e5 commit b2f1355
Show file tree
Hide file tree
Showing 4 changed files with 550 additions and 0 deletions.
29 changes: 29 additions & 0 deletions tools/conformance-test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Thar Kubernetes Conformance Testing
`setup-test-cluster.sh` sets up an EKS cluster and generates an env file containing cluster information and the user data used to launch Thar worker nodes.

`run-conformance-test.sh` uses the generated env file to launch specified number of worker nodes in the described EKS cluster and runs Kubernetes conformance tests.
Once the tests are done, the script outputs the test results in the current directory and cleans up the worker nodes.

`clean-up-test-cluster` deletes the EKS test cluster specified by the env file and any files generated by `setup-test-cluster.sh`.

## Running conformance tests against Thar nodes using these scripts:
1. Run `setup-test-cluster.sh` to set up a test cluster:
```
setup-test-cluster.sh --region us-west-2 --cluster-name my-test-cluster
```
Once the setup completes successfully, there should be a user data file and an env file in the current directory.
2. Run `run-conformance-test.sh` to launch Thar worker nodes and run the Kubernetes conformance test:
```
run-conformance-test.sh --node-ami ami-07245e9300b9290c1 \
--cluster-env-file my-test-cluster.env
--instance-type m5.large \
--num-nodes 3
```
3. The Sonobuoy Kubernetes conformance test results can be examined with:
```
sonobuoy results my-test-cluster-conformance-test-results/TIMESTAMP.tar.gz
```
4. To clean up the test cluster and other associated resources, run `clean-up-test-cluster.sh`:
```
clean-up-test-cluster.sh --cluster-env-file my-test-cluster.env
```
98 changes: 98 additions & 0 deletions tools/conformance-test/clean-up-test-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash

# Check for required tools
if ! command -v eksctl >/dev/null; then
echo "* Can't find executable 'eksctl'" >&2
exit 2
fi

# Helper functions

usage() {
cat >&2 <<EOF
${0##*/}
--cluster-env-file my-test-cluster.env
Cleans up the EKS cluster described by the specified env file and any files generated by 'setup-test-cluster.sh'
Required:
--cluster-env-file Path to the env file containing EKS cluster information targetted for clean up
EOF
}

required_arg() {
local arg="${1:?}"
local value="${2}"
if [ -z "${value}" ]; then
echo "ERROR: ${arg} is required" >&2
exit 2
fi
}

parse_args() {
while [ ${#} -gt 0 ] ; do
case "${1}" in
--cluster-env-file ) shift; ENV_FILE="${1}" ;;

--help ) usage; exit 0 ;;
*)
echo "ERROR: Unknown argument: ${1}" >&2
usage
exit 2
;;
esac
shift
done

# Required arguments
required_arg "--cluster-env-file" "${ENV_FILE}"
}

exit_on_error() {
local rc="${1:?}"
local msg="${2:?}"

if [ "${rc}" -ne 0 ]; then
echo "${msg}" >&2
exit 1
fi
}

# Initial setup and checks
parse_args "${@}"

# Load the env file created by the 'setup-test-cluster' script, if it doesn't exist, exit
if [ -f "${ENV_FILE}" ]; then
. "${ENV_FILE}"
else
echo "* Failed to open env file at ${ENV_FILE}." >&2
exit 1
fi

echo "Removing security group dependencies."
aws ec2 revoke-security-group-ingress \
--region "${REGION}" \
--group-id "${NODEGROUP_SG}" \
--protocol tcp \
--port 1-1024 \
--source-group "${CONTROLPLANE_SG}"
exit_on_error ${?} "* Failed to remove nodegroup sg ingress rules"

aws ec2 revoke-security-group-egress \
--region "${REGION}" \
--group-id "${CONTROLPLANE_SG}" \
--protocol tcp \
--port 1-1024 \
--source-group "${NODEGROUP_SG}"
exit_on_error ${?} "* Failed to remove control plane sg egress rules"

echo "Deleting the test cluster."
eksctl delete cluster -r "${REGION}" -n "${CLUSTER_NAME}"
exit_on_error ${?} "* Failed to delete ${CLUSTER_NAME} with eksctl, there might be leftover CloudFormation stacks that needs to be deleted. Look for eksctl-${CLUSTER_NAME}-*" >&2

echo "Deleting env file, userdata file, and kubeconfig file"
rm -f "${ENV_FILE}"
rm -f "${USERDATA_FILE}"
rm -f "${KUBECONFIG_FILE}"

echo "Clean up done."
190 changes: 190 additions & 0 deletions tools/conformance-test/run-conformance-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
#!/usr/bin/env bash

# Check for required tools
for tool in jq sonobuoy aws kubectl; do
if ! command -v ${tool} > /dev/null; then
echo "* Can't find executable '${tool}'" >&2
exit 2
fi
done

DEFAULT_KUBE_CONFORMANCE_VERSION=v1.14.7
DEFAULT_NUM_NODES=3

# Helper functions

usage() {
cat >&2 <<EOF
${0##*/}
--node-ami <AMI ID>
--instance-type m5.large
--cluster-env-file my-test-cluster.env
[ --kube-conformance-version v1.14.6 ]
Spins up worker nodes to join the EKS cluster described in the env file and then runs Sonobuoy Kubernetes conformance tests against said cluster.
Once the tests are done, retrieves the results and cleans up the created worker node instances.
Required:
--node-ami The AMI ID of the AMI to use for the worker nodes
--instance-type Instance type launched for worker nodes
--cluster-env-file Path to the env file containing cluster information for setting up worker nodes. Typically generated by 'setup-test-cluster.sh'.
Optional:
--kube-conformance-version The version of the conformance image to use for conformance testing (default "${DEFAULT_KUBE_CONFORMANCE_VERSION}")
--num-nodes The number of Thar worker nodes to launch (default ${DEFAULT_NUM_NODES})
EOF
}

required_arg() {
local arg="${1:?}"
local value="${2}"
if [ -z "${value}" ]; then
echo "ERROR: ${arg} is required" >&2
exit 2
fi
}

parse_args() {
while [ ${#} -gt 0 ] ; do
case "${1}" in
--node-ami ) shift; NODE_AMI="${1}" ;;
--instance-type ) shift; INSTANCE_TYPE="${1}" ;;
--cluster-env-file ) shift; ENV_FILE="${1}" ;;
--kube-conformance-version ) shift; KUBE_CONFORMANCE_VERSION="${1}" ;;
--num-nodes ) shift; NUM_NODES="${1}" ;;

--help ) usage; exit 0 ;;
*)
echo "ERROR: Unknown argument: ${1}" >&2
usage
exit 2
;;
esac
shift
done

KUBE_CONFORMANCE_VERSION="${KUBE_CONFORMANCE_VERSION:-${DEFAULT_KUBE_CONFORMANCE_VERSION}}"
NUM_NODES="${NUM_NODES:-${DEFAULT_NUM_NODES}}"

# Required arguments
required_arg "--node-ami" "${NODE_AMI}"
required_arg "--instance-type" "${INSTANCE_TYPE}"
required_arg "--cluster-env-file" "${ENV_FILE}"
}

cleanup() {
if [ ${#instance_ids[@]} -ne 0 ]; then
echo "Cleaning up Thar worker node instances"
for instance_id in "${instance_ids[@]}"; do
aws ec2 terminate-instances \
--output text \
--region "${REGION}" \
--instance-ids "${instance_id}"
done
unset instance_ids
fi

# Wait at most 20 minutes for Sonobuoy to delete its namespace
if [ -n "${sonobuoy_run_attempted}" ]; then
echo "Cleaning up Sonobuoy namespace, may take up to 20 minutes"
${SONOBUOY} delete --wait=20
exit_on_error ${?} "* Failed to delete Sonobuoy namespace."
unset sonobuoy_run_attempted
fi
}

trap 'cleanup' EXIT SIGINT SIGTERM

exit_on_error() {
local rc="${1:?}"
local msg="${2:?}"

if [ "${rc}" -ne 0 ]; then
echo "${msg}" >&2
exit 1
fi
}

# Initial setup and checks
parse_args "${@}"

# Load the env file created by the `setup-test-cluster` script, if it doesn't exist, exit
if [ -f "${ENV_FILE}" ]; then
. "${ENV_FILE}"
else
echo "* Failed to open env file at ${ENV_FILE}." >&2
exit 1
fi

echo "Launching ${NUM_NODES} Thar worker nodes"
unset instance_ids
counter=0
while [ ${counter} -lt "${NUM_NODES}" ]; do
instance_id=$(aws ec2 run-instances \
--subnet-id "${SUBNET_ID}" \
--security-group-ids "${NODEGROUP_SG}" "${CLUSTERSHARED_SG}" \
--image-id "${NODE_AMI}" \
--instance-type "${INSTANCE_TYPE}" \
--region "${REGION}" \
--tag-specifications "ResourceType=instance,Tags=[{Key=kubernetes.io/cluster/${CLUSTER_NAME},Value=owned}]" \
--user-data file://"${USERDATA_FILE}" \
--iam-instance-profile Name="${INSTANCE_PROFILE}" \
--query "Instances[*].InstanceId" \
--output text)
if [ -n "${instance_id}" ]; then
instance_ids=("${instance_ids[@]}" "${instance_id}")
fi
sleep 1
((counter+=1))
done
actual_num_nodes=${#instance_ids[@]}
if [ "${actual_num_nodes}" -ne "${NUM_NODES}" ]; then
echo "* Failed to launch requested number of Thar instances: launched ${actual_num_nodes} out of ${NUM_NODES}." >&2
exit 1
fi

echo "Waiting for all Thar worker nodes to become 'Ready' in ${CLUSTER_NAME} cluster"
KUBECTL="kubectl --kubeconfig ${KUBECONFIG_FILE}"
MAX_ATTEMPTS=30
attempts=0
sleep 30
while true; do
((attempts+=1))
if [ "${attempts}" -gt ${MAX_ATTEMPTS} ]; then
echo "* Retry limit (${MAX_ATTEMPTS}) reached! Worker nodes are not becoming ready in cluster ${CLUSTER_NAME}" >&2
exit 1
fi
sleep 5
nodes=$(${KUBECTL} get nodes --no-headers)
exit_on_error ${?} "* Failed to get node information for ${CLUSTER_NAME} cluster"

found=$(${KUBECTL} get nodes --no-headers -o name | wc -l)
ready=$(echo -n "${nodes}" | grep -c -w "Ready")
echo "ready: ${ready}"

if [ "${found}" -eq "${actual_num_nodes}" ] && [ "${ready}" -eq "${actual_num_nodes}" ]; then
break
fi
done

echo "Starting Sonobuoy Kubernetes conformance test! Test may take up to 60 minutes to finish"
sonobuoy_run_attempted=true
SONOBUOY="sonobuoy --kubeconfig ${KUBECONFIG_FILE}"
${SONOBUOY} run \
--mode certified-conformance \
--kube-conformance-image-version "${KUBE_CONFORMANCE_VERSION}" \
--wait
exit_on_error ${?} "* Failed to run Sonobuoy Kubernetes conformance tests"
${SONOBUOY} status
exit_on_error ${?} "* Failed to retrieve conformance test status"
results_file=$(${SONOBUOY} retrieve "${CLUSTER_NAME}"-conformance-test-results)
exit_on_error ${?} "* Failed to retrieve Sonobuoy Kubernetes conformance test results"
sonobuoy results "${results_file}"
exit_on_error ${?} "* Failed to examine sonobuoy results in ${results_file}"
echo "Sonobuoy test results available at ${results_file}"

# Exit non-zero if any of the Kubernetes conformance tests fail
(set -o pipefail; \
sonobuoy results "${results_file}" \
| awk '$2 == "failed" || $2 == "unknown" {exit 1}')
exit_on_error ${?} "* Found conformance test failures or conformance tests failed to run (unknown status)."
Loading

0 comments on commit b2f1355

Please sign in to comment.