Skip to content

Commit

Permalink
ci: add robustness for e2e ci
Browse files Browse the repository at this point in the history
Signed-off-by: iGxnon <[email protected]>
  • Loading branch information
iGxnon committed Oct 23, 2023
1 parent 68810f8 commit 725d896
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 27 deletions.
54 changes: 27 additions & 27 deletions tests/e2e/cases/ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ function test::ci::_etcdctl_expect() {
fi
}

function test::ci::_wait_for_cluster_ready() {
size=$1
scaled=${2-false}
$scaled && k8s::kubectl wait --for=jsonpath='{.status.updatedReplicas}'="$size" sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
k8s::kubectl wait --for=jsonpath='{.status.readyReplicas}'="$size" sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
}

function test::ci::_start() {
log::info "starting cluster"
k8s::kubectl create clusterrolebinding serviceaccount-cluster-admin --clusterrole=cluster-admin --serviceaccount=default:default 2>/dev/null || true
Expand All @@ -41,8 +48,7 @@ function test::ci::_start() {
k8s::kubectl::wait_resource_creation crd xlineclusters.xlineoperator.xline.cloud
k8s::kubectl apply -f "$(dirname "${BASH_SOURCE[0]}")/manifests/cluster.yml" >/dev/null 2>&1
k8s::kubectl::wait_resource_creation sts "${_TEST_CI_CLUSTER_NAME}-nodes"
k8s::kubectl wait --for=jsonpath='{.status.updatedReplicas}'=$_TEST_CI_START_SIZE sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
k8s::kubectl wait --for=jsonpath='{.status.readyReplicas}'=$_TEST_CI_START_SIZE sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
test::ci::_wait_for_cluster_ready $_TEST_CI_START_SIZE true
log::info "cluster started"
}

Expand All @@ -57,8 +63,7 @@ function test::ci::_teardown() {
function test::ci::_scale_cluster() {
log::info "scaling cluster to $1"
k8s::kubectl scale xc $_TEST_CI_CLUSTER_NAME --replicas="$1" >/dev/null 2>&1
k8s::kubectl wait --for=jsonpath='{.status.updatedReplicas}'="$1" sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
k8s::kubectl wait --for=jsonpath='{.status.readyReplicas}'="$1" sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
test::ci::_wait_for_cluster_ready "$1" true
got=$(k8s::kubectl get xc $_TEST_CI_CLUSTER_NAME -o=jsonpath='{.spec.size}')
if [ "$got" -ne "$1" ]; then
echo "failed scale cluster"
Expand All @@ -69,28 +74,22 @@ function test::ci::_scale_cluster() {
log::info "cluster scaled to $1"
}

function test::ci::_chaos() {
function test::ci::_random_crash() {
size=$1
iters=$2
max_kill=$((size / 2))
log::info "chaos: size=$size, iters=$iters, max_kill=$max_kill"
for ((i = 0; i < iters; i++)); do
log::info "chaos: iter=$i"
endpoints=$(test::ci::_mk_endpoints size)
test::ci::_etcdctl_expect "$endpoints" "put A $i" "OK" || return $?
test::ci::_etcdctl_expect "$endpoints" "get A" "A\n$i" || return $?
kill=$((RANDOM % max_kill + 1))
log::info "chaos: kill=$kill"
for ((j = 0; j < kill; j++)); do
pod="${_TEST_CI_CLUSTER_NAME}-nodes-$((RANDOM % size))"
log::info "chaos: kill pod=$pod"
k8s::kubectl delete pod "$pod" --force --grace-period=0 2>/dev/null
done
test::ci::_etcdctl_expect "$endpoints" "put B $i" "OK" || return $?
test::ci::_etcdctl_expect "$endpoints" "get B" "B\n$i" || return $?
k8s::kubectl wait --for=jsonpath='{.status.readyReplicas}'="$size" sts "${_TEST_CI_CLUSTER_NAME}-nodes" --timeout=300s >/dev/null 2>&1
log::info "wait for log synchronization" && sleep $_TEST_CI_LOG_SYNC_TIMEOUT
kill=$((RANDOM % max_kill + 1))
log::info "random_crash_pods: size=$size, kill=$kill"
for ((j = 0; j < kill; j++)); do
pod="${_TEST_CI_CLUSTER_NAME}-nodes-$((RANDOM % size))"
log::info "random_crash_pods: kill pod=$pod"
k8s::kubectl delete pod "$pod" --force --grace-period=0 2>/dev/null
done
# check if it is ok
endpoints=$(test::ci::_mk_endpoints "$size")
test::ci::_etcdctl_expect "$endpoints" "put B 1" "OK" || return $?
test::ci::_etcdctl_expect "$endpoints" "get B" "B\n1" || return $?
test::ci::_wait_for_cluster_ready "$size"
log::info "wait for log synchronization" && sleep $_TEST_CI_LOG_SYNC_TIMEOUT
}

function test::run::ci::basic_validation() {
Expand Down Expand Up @@ -121,11 +120,12 @@ function test::run::ci::scale_validation() {
test::ci::_etcdctl_expect "$endpoints" "get A" "A\n2" || return $?
}

function test::run::ci::basic_chaos() {
function test::run::ci::pod_crash_recovery() {
test::ci::_teardown
test::ci::_start

test::ci::_chaos 3 5 || return $?
test::ci::_scale_cluster 5 || return $?
test::ci::_chaos 5 3 || return $?
test::ci::_random_crash 3
endpoints=$(test::ci::_mk_endpoints 5)
test::ci::_etcdctl_expect "$endpoints" "put A 1" "OK" || return $?
test::ci::_etcdctl_expect "$endpoints" "get A" "A\n1" || return $?
}
3 changes: 3 additions & 0 deletions tests/e2e/cases/manifests/cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ spec:
size: 3
container:
image: "ghcr.io/xline-kv/xline:latest"
env:
- name: RUST_LOG
value: debug
imagePullPolicy: IfNotPresent # we will try to load image into cluster first.
name: "my-xline"
ports:
Expand Down

0 comments on commit 725d896

Please sign in to comment.