Skip to content

Commit

Permalink
ci: add terraform setup retry mechanism
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Chiu <[email protected]>
  • Loading branch information
yangchiu authored and David Ko committed Sep 11, 2023
1 parent 914bba8 commit f39c8e2
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 92 deletions.
32 changes: 19 additions & 13 deletions test_framework/scripts/cleanup.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
#!/usr/bin/env bash

# terminate any terraform processes
TERRAFORM_PIDS=( `ps aux | grep -i terraform | grep -v grep | awk '{printf("%s ",$1)}'` )
if [[ -n ${TERRAFORM_PIDS[@]} ]] ; then
for PID in "${TERRAFORM_PIDS[@]}"; do
kill "${TERRAFORM_PIDS}"
done
fi
cleanup(){
# terminate any terraform processes
TERRAFORM_PIDS=( `ps aux | grep -i terraform | grep -v grep | grep -v terraform-setup | awk '{printf("%s ",$1)}'` )
if [[ -n ${TERRAFORM_PIDS[@]} ]] ; then
for PID in "${TERRAFORM_PIDS[@]}"; do
kill "${TERRAFORM_PIDS}"
done
fi

# wait 30 seconds for graceful terraform termination
sleep 30
# wait 30 seconds for graceful terraform termination
sleep 30

if [[ ${TF_VAR_k8s_distro_name} == "aks" ]] || [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
DISTRO=${TF_VAR_k8s_distro_name}
fi
if [[ ${TF_VAR_k8s_distro_name} == "aks" ]] || [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
DISTRO=${TF_VAR_k8s_distro_name}
fi

terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} destroy -auto-approve -no-color
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} destroy -auto-approve -no-color
}

if [[ "${BASH_SOURCE[0]}" -ef "$0" ]]; then
cleanup
fi
31 changes: 3 additions & 28 deletions test_framework/scripts/download-support-bundle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,13 @@

set -ex

source test_framework/scripts/kubeconfig.sh

SUPPORT_BUNDLE_FILE_NAME=${1:-"lh-support-bundle.zip"}
SUPPORT_BUNDLE_ISSUE_URL=${2:-""}
SUPPORT_BUNDLE_ISSUE_DESC=${3:-"Auto-generated support buundle"}

set_kubeconfig_envvar(){
local ARCH=${1}
local BASEDIR=${2}

if [[ ${ARCH} == "amd64" ]] ; then
if [[ ${TF_VAR_k8s_distro_name} == [rR][kK][eE] ]]; then
export KUBECONFIG="${BASEDIR}/kube_config_rke.yml"
elif [[ ${TF_VAR_k8s_distro_name} == [rR][kK][eE]2 ]]; then
export KUBECONFIG="${BASEDIR}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/rke2.yaml"
elif [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
export KUBECONFIG="${BASEDIR}/aks.yml"
elif [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
export KUBECONFIG="${BASEDIR}/eks.yml"
else
export KUBECONFIG="${BASEDIR}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/k3s.yaml"
fi
elif [[ ${ARCH} == "arm64" ]]; then
if [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
export KUBECONFIG="${BASEDIR}/aks.yml"
elif [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
export KUBECONFIG="${BASEDIR}/eks.yml"
else
export KUBECONFIG="${BASEDIR}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/k3s.yaml"
fi
fi
}

set_kubeconfig_envvar ${TF_VAR_arch} ${TF_VAR_tf_workspace}
set_kubeconfig

LH_FRONTEND_ADDR=`kubectl get svc -n longhorn-system longhorn-frontend -o json | jq -r '.spec.clusterIP + ":" + (.spec.ports[0].port|tostring)'`

Expand Down
13 changes: 13 additions & 0 deletions test_framework/scripts/kubeconfig.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set_kubeconfig(){
if [[ "${TF_VAR_k8s_distro_name}" == "rke" ]]; then
export KUBECONFIG="test_framework/kube_config_rke.yml"
elif [[ "${TF_VAR_k8s_distro_name}" == "rke2" ]]; then
export KUBECONFIG="test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/rke2.yaml"
elif [[ "${TF_VAR_k8s_distro_name}" == "k3s" ]]; then
export KUBECONFIG="test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/k3s.yaml"
elif [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
export KUBECONFIG="test_framework/aks.yml"
elif [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
export KUBECONFIG="test_framework/eks.yml"
fi
}
31 changes: 3 additions & 28 deletions test_framework/scripts/longhorn-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

set -x

source test_framework/scripts/kubeconfig.sh

# create and clean tmpdir
TMPDIR="/tmp/longhorn"
mkdir -p ${TMPDIR}
Expand All @@ -21,33 +23,6 @@ LONGHORN_MANIFEST_URL="https://raw.githubusercontent.com/longhorn/longhorn/${LON
LONGHORN_REPO_URL="https://github.com/longhorn/longhorn"
LONGHORN_REPO_DIR="${TMPDIR}/longhorn"

set_kubeconfig_envvar(){
ARCH=${1}
BASEDIR=${2}

if [[ ${ARCH} == "amd64" ]] ; then
if [[ ${TF_VAR_k8s_distro_name} == [rR][kK][eE] ]]; then
export KUBECONFIG="${BASEDIR}/kube_config_rke.yml"
elif [[ ${TF_VAR_k8s_distro_name} == [rR][kK][eE]2 ]]; then
export KUBECONFIG="${BASEDIR}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/rke2.yaml"
elif [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
export KUBECONFIG="${BASEDIR}/aks.yml"
elif [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
export KUBECONFIG="${BASEDIR}/eks.yml"
else
export KUBECONFIG="${BASEDIR}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/k3s.yaml"
fi
elif [[ ${ARCH} == "arm64" ]]; then
if [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
export KUBECONFIG="${BASEDIR}/aks.yml"
elif [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
export KUBECONFIG="${BASEDIR}/eks.yml"
else
export KUBECONFIG="${BASEDIR}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/k3s.yaml"
fi
fi
}


create_admin_service_account(){
kubectl apply -f "${TF_VAR_tf_workspace}/templates/kubeconfig_service_account.yaml"
Expand Down Expand Up @@ -429,7 +404,7 @@ run_longhorn_tests(){


main(){
set_kubeconfig_envvar ${TF_VAR_arch} ${TF_VAR_tf_workspace}
set_kubeconfig

if [[ ${DISTRO} == "rhel" ]] || [[ ${DISTRO} == "rockylinux" ]] || [[ ${DISTRO} == "oracle" ]]; then
apply_selinux_workaround
Expand Down
59 changes: 39 additions & 20 deletions test_framework/scripts/terraform-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,54 @@

set -x

if [[ ${TF_VAR_k8s_distro_name} == "aks" ]] || [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
DISTRO=${TF_VAR_k8s_distro_name}
fi
source test_framework/scripts/kubeconfig.sh
source test_framework/scripts/cleanup.sh

terraform_setup(){
if [[ ${TF_VAR_k8s_distro_name} == "aks" ]] || [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
DISTRO=${TF_VAR_k8s_distro_name}
fi

if [[ ${TF_VAR_arch} == "amd64" ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} init
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} apply -auto-approve -no-color
if [[ ${TF_VAR_k8s_distro_name} =~ [rR][kK][eE] ]]; then

if [[ ${TF_VAR_k8s_distro_name} == "rke" ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} apply -auto-approve -no-color -refresh-only
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw rke_config > ${TF_VAR_tf_workspace}/rke.yml
sleep 30
rke up --config ${TF_VAR_tf_workspace}/rke.yml
fi
else
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} init
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} apply -auto-approve -no-color
fi

if [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw kubeconfig > ${TF_VAR_tf_workspace}/aks.yml
sleep 120
fi
if [[ ${TF_VAR_k8s_distro_name} == "aks" ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw kubeconfig > ${TF_VAR_tf_workspace}/aks.yml
sleep 120
fi

if [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw kubeconfig > ${TF_VAR_tf_workspace}/eks.yml
fi
if [[ ${TF_VAR_k8s_distro_name} == "eks" ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw kubeconfig > ${TF_VAR_tf_workspace}/eks.yml
fi

if [[ "${TF_VAR_create_load_balancer}" == true ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw load_balancer_url > ${TF_VAR_tf_workspace}/load_balancer_url
fi
}

if [[ "${TF_VAR_create_load_balancer}" == true ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw load_balancer_url > ${TF_VAR_tf_workspace}/load_balancer_url
fi

exit $?
if [[ "${BASH_SOURCE[0]}" -ef "$0" ]]; then
CLUSTER_READY=false
MAX_RETRY=3
RETRY=0
while [[ "${CLUSTER_READY}" == false ]] && [[ ${RETRY} -lt ${MAX_RETRY} ]]; do
terraform_setup
set_kubeconfig
if ! kubectl get pods -A | grep -q 'Running'; then
cleanup
RETRY=$((RETRY+1))
else
CLUSTER_READY=true
fi
done
if [[ "${CLUSTER_READY}" == false ]]; then
exit 1
fi
fi
2 changes: 1 addition & 1 deletion test_framework/terraform/aws/sles/k3s_instances.tf
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ resource "null_resource" "rsync_kubeconfig_file" {
inline = [
"cloud-init status --wait",
"if [ \"`cloud-init status | grep error`\" ]; then sudo cat /var/log/cloud-init-output.log; fi",
"until([ -f /etc/rancher/k3s/k3s.yaml ] && [ `sudo /usr/local/bin/kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == \"Ready\").status' | grep -ci true` -eq $((${var.lh_aws_instance_count_controlplane} + ${var.lh_aws_instance_count_worker})) ]); do echo \"waiting for k3s cluster nodes to be running\"; sleep 2; done"
"RETRY=0; MAX_RETRY=450; until([ -f /etc/rancher/k3s/k3s.yaml ] && [ `sudo /usr/local/bin/kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == \"Ready\").status' | grep -ci true` -eq $((${var.lh_aws_instance_count_controlplane} + ${var.lh_aws_instance_count_worker})) ]); do echo \"waiting for k3s cluster nodes to be running\"; sleep 2; if [ $RETRY -eq $MAX_RETRY ]; then break; fi; RETRY=$((RETRY+1)); done"
]

connection {
Expand Down
2 changes: 1 addition & 1 deletion test_framework/terraform/aws/sles/rke2_instances.tf
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ resource "null_resource" "rsync_kubeconfig_file_rke2" {
]

provisioner "remote-exec" {
inline = ["until([ -f /etc/rancher/rke2/rke2.yaml ] && [ `sudo KUBECONFIG=/etc/rancher/rke2/rke2.yaml /var/lib/rancher/rke2/bin/kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == \"Ready\").status' | grep -ci true` -eq $((${var.lh_aws_instance_count_controlplane} + ${var.lh_aws_instance_count_worker})) ]); do echo \"waiting for rke2 cluster nodes to be running\"; sleep 2; done"]
inline = ["RETRY=0; MAX_RETRY=450; until([ -f /etc/rancher/rke2/rke2.yaml ] && [ `sudo KUBECONFIG=/etc/rancher/rke2/rke2.yaml /var/lib/rancher/rke2/bin/kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == \"Ready\").status' | grep -ci true` -eq $((${var.lh_aws_instance_count_controlplane} + ${var.lh_aws_instance_count_worker})) ]); do echo \"waiting for rke2 cluster nodes to be running\"; sleep 2; if [ $RETRY -eq $MAX_RETRY ]; then break; fi; RETRY=$((RETRY+1)); done"]


connection {
Expand Down
2 changes: 1 addition & 1 deletion test_framework/terraform/aws/sles/rke_instances.tf
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ resource "null_resource" "wait_for_docker_start_controlplane" {

provisioner "remote-exec" {

inline = ["until( systemctl is-active docker.service ); do echo \"waiting for docker to start \"; sleep 2; done"]
inline = ["until( systemctl is-active docker.service ); do echo \"waiting for docker to start \"; sleep 2; done"]

connection {
type = "ssh"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@ until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --node-taint "nod
sleep 2
done

RETRY=0
MAX_RETRY=180
until (kubectl get pods -A | grep 'Running'); do
echo 'Waiting for k3s startup'
sleep 5
if [ $RETRY -eq $MAX_RETRY ]; then
break
fi
RETRY=$((RETRY+1))
done

Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ systemctl start rke2-server.service
# TODO: It looks like "set -e" will break the intended functionality of the remaining code. Consider a refactor.
set +e

RETRY=0
MAX_RETRY=180
until (KUBECONFIG=/etc/rancher/rke2/rke2.yaml /var/lib/rancher/rke2/bin/kubectl get pods -A | grep 'Running'); do
echo 'Waiting for rke2 startup'
sleep 5
if [ $RETRY -eq $MAX_RETRY ]; then
break
fi
RETRY=$((RETRY+1))
done

0 comments on commit f39c8e2

Please sign in to comment.