From e6e0e45e843759406b0c1e5deb079df728cca024 Mon Sep 17 00:00:00 2001
From: Florent Poinsard <florent.poinsard@outlook.fr>
Date: Fri, 24 May 2024 10:19:56 -0600
Subject: [PATCH] Apply review suggestions

Signed-off-by: Florent Poinsard <florent.poinsard@outlook.fr>
---
 .gitignore                                    |  2 +-
 docs/api/index.html                           | 23 ++++----
 .../v2/vitessbackupschedule_methods.go        |  4 +-
 .../v2/vitessbackupschedule_types.go          | 27 +++++----
 .../planetscale/v2/zz_generated.deepcopy.go   |  4 +-
 .../vitessbackupschedule_controller.go        | 56 +++++++++----------
 .../reconcile_backup_schedule.go              |  4 +-
 test/endtoend/backup_restore_test.sh          | 10 ++--
 test/endtoend/backup_schedule_test.sh         | 13 +++--
 test/endtoend/upgrade_test.sh                 |  4 +-
 test/endtoend/utils.sh                        | 38 ++++++-------
 test/endtoend/vtorc_vtadmin_test.sh           | 28 +++++-----
 tools/test.env                                |  2 +
 13 files changed, 113 insertions(+), 102 deletions(-)
diff --git a/.gitignore b/.gitignore
index 86834eaa..53742bf2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -80,4 +80,4 @@ tags
 .idea/*
 # End of https://www.gitignore.io/api/go,vim,emacs,visualstudiocode
 
-vtdataroot
\ No newline at end of file
+**/vtdataroot
\ No newline at end of file
diff --git a/docs/api/index.html b/docs/api/index.html
index 3afaad6f..6bef4cf0 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -2693,7 +2693,7 @@ <h3 id="planetscale.com/v2.VitessBackupScheduleTemplate">VitessBackupScheduleTem
 </td>
 <td>
 <em>(Optional)</em>
-<p>Suspend enables the suspension of the VitessBackupSchedule, pausing any further scheduled
+<p>Suspend pause the associated backup schedule. Pausing any further scheduled
 runs until Suspend is set to false again. This is useful if you want to pause backup without
 having to remove the entire VitessBackupSchedule object from the cluster.</p>
 </td>
@@ -2707,8 +2707,11 @@ <h3 id="planetscale.com/v2.VitessBackupScheduleTemplate">VitessBackupScheduleTem
 </td>
 <td>
 <em>(Optional)</em>
-<p>StartingDeadlineSeconds allows for the VitessBackupSchedule controller to start jobs that late
-by the given amount of seconds.</p>
+<p>StartingDeadlineSeconds enables the VitessBackupSchedule to start a job even though it is late by
+the given amount of seconds. Let&rsquo;s say for some reason the controller process a schedule run on
+second after its scheduled time, if StartingDeadlineSeconds is set to 0, the job will be skipped
+as it&rsquo;s too late, but on the other hand, if StartingDeadlineSeconds is greater than one second,
+the job will be processed as usual.</p>
 </td>
 </tr>
 <tr>
@@ -2738,12 +2741,12 @@ <h3 id="planetscale.com/v2.VitessBackupScheduleTemplate">VitessBackupScheduleTem
 </td>
 <td>
 <em>(Optional)</em>
-<p>AllowedMissedRun defines how many missed run of the schedule will be allowed before giving up on finding the last job.
-If the operator&rsquo;s clocked is skewed and we end-up missing a certain number of jobs, finding the last
-job might be very consuming, depending on the frequency of the schedule and the duration during which
-the operator&rsquo;s clock was misbehaving, also depending on how laggy the clock is, we can end-up with thousands
-or missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will allow us to give up finding
-the last job and simply wait for the next job on the schedule.
+<p>AllowedMissedRuns defines how many missed run of the schedule will be allowed before giving up on finding the last job.
+If the operator&rsquo;s clock is skewed and we end-up missing a certain number of jobs, finding the last
+job might be very time-consuming, depending on the frequency of the schedule and the duration during which
+the operator&rsquo;s clock was misbehaving. Also depending on how laggy the clock is, we can end-up with thousands
+of missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will short circuit the search
+and simply wait for the next job on the schedule.
 Unless you are experiencing issue with missed runs due to a misconfiguration of the clock, we recommend leaving
 this field to its default value.</p>
 </td>
@@ -2757,7 +2760,7 @@ <h3 id="planetscale.com/v2.VitessBackupScheduleTemplate">VitessBackupScheduleTem
 </td>
 <td>
 <em>(Optional)</em>
-<p>JobTimeoutMinute defines after how many minutes a job that has not yet finished should be stopped and removed.
+<p>JobTimeoutMinutes defines after how many minutes a job that has not yet finished should be stopped and removed.
 Default value is 10 minutes.</p>
 </td>
 </tr>
diff --git a/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go b/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go
index 8cc954c6..86026b49 100644
--- a/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go
+++ b/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go
@@ -46,8 +46,8 @@ const DefaultAllowedMissedRuns = 100
 // GetMissedRunsLimit returns the maximum number of missed run we can allow.
 // Returns DefaultAllowedMissedRuns if the value was not specified by the user.
 func (vbsc *VitessBackupSchedule) GetMissedRunsLimit() int {
-	if vbsc.Spec.AllowedMissedRun == nil {
+	if vbsc.Spec.AllowedMissedRuns == nil {
 		return DefaultAllowedMissedRuns
 	}
-	return *vbsc.Spec.AllowedMissedRun
+	return *vbsc.Spec.AllowedMissedRuns
 }
diff --git a/pkg/apis/planetscale/v2/vitessbackupschedule_types.go b/pkg/apis/planetscale/v2/vitessbackupschedule_types.go
index b21c1f69..8e1f3cd0 100644
--- a/pkg/apis/planetscale/v2/vitessbackupschedule_types.go
+++ b/pkg/apis/planetscale/v2/vitessbackupschedule_types.go
@@ -122,14 +122,17 @@ type VitessBackupScheduleTemplate struct {
 	// +kubebuilder:validation:Minimum=0
 	FailedJobsHistoryLimit *int32 `json:"failedJobsHistoryLimit,omitempty"`
 
-	// Suspend enables the suspension of the VitessBackupSchedule, pausing any further scheduled
+	// Suspend pause the associated backup schedule. Pausing any further scheduled
 	// runs until Suspend is set to false again. This is useful if you want to pause backup without
 	// having to remove the entire VitessBackupSchedule object from the cluster.
 	// +optional
 	Suspend *bool `json:"suspend,omitempty"`
 
-	// StartingDeadlineSeconds allows for the VitessBackupSchedule controller to start jobs that late
-	// by the given amount of seconds.
+	// StartingDeadlineSeconds enables the VitessBackupSchedule to start a job even though it is late by
+	// the given amount of seconds. Let's say for some reason the controller process a schedule run on
+	// second after its scheduled time, if StartingDeadlineSeconds is set to 0, the job will be skipped
+	// as it's too late, but on the other hand, if StartingDeadlineSeconds is greater than one second,
+	// the job will be processed as usual.
 	// +optional
 	// +kubebuilder:validation:Minimum=0
 	StartingDeadlineSeconds *int64 `json:"startingDeadlineSeconds,omitempty"`
@@ -143,24 +146,24 @@ type VitessBackupScheduleTemplate struct {
 	// +kubebuilder:example="Forbid"
 	ConcurrencyPolicy ConcurrencyPolicy `json:"concurrencyPolicy,omitempty"`
 
-	// AllowedMissedRun defines how many missed run of the schedule will be allowed before giving up on finding the last job.
-	// If the operator's clocked is skewed and we end-up missing a certain number of jobs, finding the last
-	// job might be very consuming, depending on the frequency of the schedule and the duration during which
-	// the operator's clock was misbehaving, also depending on how laggy the clock is, we can end-up with thousands
-	// or missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will allow us to give up finding
-	// the last job and simply wait for the next job on the schedule.
+	// AllowedMissedRuns defines how many missed run of the schedule will be allowed before giving up on finding the last job.
+	// If the operator's clock is skewed and we end-up missing a certain number of jobs, finding the last
+	// job might be very time-consuming, depending on the frequency of the schedule and the duration during which
+	// the operator's clock was misbehaving. Also depending on how laggy the clock is, we can end-up with thousands
+	// of missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will short circuit the search
+	// and simply wait for the next job on the schedule.
 	// Unless you are experiencing issue with missed runs due to a misconfiguration of the clock, we recommend leaving
 	// this field to its default value.
 	// +optional
 	// +kubebuilder:validation:Minimum=0
-	AllowedMissedRun *int `json:"allowedMissedRun,omitempty"`
+	AllowedMissedRuns *int `json:"allowedMissedRun,omitempty"`
 
-	// JobTimeoutMinute defines after how many minutes a job that has not yet finished should be stopped and removed.
+	// JobTimeoutMinutes defines after how many minutes a job that has not yet finished should be stopped and removed.
 	// Default value is 10 minutes.
 	// +optional
 	// +kubebuilder:validation:Minimum=0
 	// +kubebuilder:default=10
-	JobTimeoutMinute int32 `json:"jobTimeoutMinute,omitempty"`
+	JobTimeoutMinutes int32 `json:"jobTimeoutMinute,omitempty"`
 }
 
 // VitessBackupScheduleStrategy defines how we are going to take a backup.
diff --git a/pkg/apis/planetscale/v2/zz_generated.deepcopy.go b/pkg/apis/planetscale/v2/zz_generated.deepcopy.go
index dbeff8d7..807c99ac 100644
--- a/pkg/apis/planetscale/v2/zz_generated.deepcopy.go
+++ b/pkg/apis/planetscale/v2/zz_generated.deepcopy.go
@@ -888,8 +888,8 @@ func (in *VitessBackupScheduleTemplate) DeepCopyInto(out *VitessBackupScheduleTe
 		*out = new(int64)
 		**out = **in
 	}
-	if in.AllowedMissedRun != nil {
-		in, out := &in.AllowedMissedRun, &out.AllowedMissedRun
+	if in.AllowedMissedRuns != nil {
+		in, out := &in.AllowedMissedRuns, &out.AllowedMissedRuns
 		*out = new(int)
 		**out = **in
 	}
diff --git a/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go b/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go
index 2463f739..135c52f4 100644
--- a/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go
+++ b/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go
@@ -20,6 +20,7 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"strings"
 
 	"sort"
 	"time"
@@ -186,7 +187,7 @@ func (r *ReconcileVitessBackupsSchedule) Reconcile(ctx context.Context, req ctrl
 	r.cleanupJobsWithLimit(ctx, jobs.failed, vbsc.GetFailedJobsLimit())
 	r.cleanupJobsWithLimit(ctx, jobs.successful, vbsc.GetSuccessfulJobsLimit())
 
-	err = r.removeTimeoutJobs(ctx, jobs.successful, vbsc.Spec.JobTimeoutMinute)
+	err = r.removeTimeoutJobs(ctx, jobs.successful, vbsc.Spec.JobTimeoutMinutes)
 	if err != nil {
 		// We had an error while removing timed out jobs, we can requeue
 		return resultBuilder.Error(err)
@@ -262,36 +263,35 @@ func (r *ReconcileVitessBackupsSchedule) Reconcile(ctx context.Context, req ctrl
 func getNextSchedule(vbsc planetscalev2.VitessBackupSchedule, now time.Time) (time.Time, time.Time, error) {
 	sched, err := cron.ParseStandard(vbsc.Spec.Schedule)
 	if err != nil {
-		return time.Time{}, time.Time{}, fmt.Errorf("unparaseable schedule %q: %v", vbsc.Spec.Schedule, err)
+		return time.Time{}, time.Time{}, fmt.Errorf("unable to parse schedule %q: %v", vbsc.Spec.Schedule, err)
 	}
 
-	// for optimization purposes, cheat a bit and start from our last observed run time
-	// we could reconstitute this here, but there's not much point, since we've
-	// just updated it.
-	var earliestTime time.Time
+	// Set the last scheduled time by either looking at the VitessBackupSchedule's Status or
+	// by looking at its creation time.
+	var latestRun time.Time
 	if vbsc.Status.LastScheduledTime != nil {
-		earliestTime = vbsc.Status.LastScheduledTime.Time
+		latestRun = vbsc.Status.LastScheduledTime.Time
 	} else {
-		earliestTime = vbsc.ObjectMeta.CreationTimestamp.Time
+		latestRun = vbsc.ObjectMeta.CreationTimestamp.Time
 	}
 
 	if vbsc.Spec.StartingDeadlineSeconds != nil {
 		// controller is not going to schedule anything below this point
 		schedulingDeadline := now.Add(-time.Second * time.Duration(*vbsc.Spec.StartingDeadlineSeconds))
 
-		if schedulingDeadline.After(earliestTime) {
-			earliestTime = schedulingDeadline
+		if schedulingDeadline.After(latestRun) {
+			latestRun = schedulingDeadline
 		}
 	}
 
 	// Next schedule is later, simply return the next scheduled time.
-	if earliestTime.After(now) {
+	if latestRun.After(now) {
 		return time.Time{}, sched.Next(now), nil
 	}
 
 	var lastMissed time.Time
 	missedRuns := 0
-	for t := sched.Next(earliestTime); !t.After(now); t = sched.Next(t) {
+	for t := sched.Next(latestRun); !t.After(now); t = sched.Next(t) {
 		lastMissed = t
 		missedRuns++
 
@@ -311,7 +311,7 @@ func (r *ReconcileVitessBackupsSchedule) updateVitessBackupScheduleStatus(ctx co
 		vbsc.Status.LastScheduledTime = nil
 	}
 
-	vbsc.Status.Active = nil
+	vbsc.Status.Active = make([]corev1.ObjectReference, 0, len(activeJobs))
 	for _, activeJob := range activeJobs {
 		jobRef, err := ref.GetReference(r.scheme, activeJob)
 		if err != nil {
@@ -361,12 +361,8 @@ func (r *ReconcileVitessBackupsSchedule) getJobsList(ctx context.Context, req ct
 			log.WithError(err).Errorf("unable to parse schedule time for existing job, found: %s", job.Annotations[scheduledTimeAnnotation])
 			continue
 		}
-		if scheduledTimeForJob != nil {
-			if mostRecentTime == nil {
-				mostRecentTime = scheduledTimeForJob
-			} else if mostRecentTime.Before(*scheduledTimeForJob) {
-				mostRecentTime = scheduledTimeForJob
-			}
+		if scheduledTimeForJob != nil && (mostRecentTime == nil || mostRecentTime.Before(*scheduledTimeForJob)) {
+			mostRecentTime = scheduledTimeForJob
 		}
 	}
 	return jobs, mostRecentTime, nil
@@ -407,7 +403,7 @@ func (r *ReconcileVitessBackupsSchedule) removeTimeoutJobs(ctx context.Context,
 		if err != nil {
 			return err
 		}
-		if jobStartTime.After(time.Now().Add(time.Minute * time.Duration(timeout))) {
+		if jobStartTime.Add(time.Minute * time.Duration(timeout)).After(time.Now()) {
 			if err := r.client.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); (err) != nil {
 				log.WithError(err).Errorf("unable to delete timed out job: %s", job.Name)
 			} else {
@@ -495,28 +491,28 @@ func (r *ReconcileVitessBackupsSchedule) createJobPod(ctx context.Context, vbsc
 	// ensures that there will be at least one item in this list. The YAML cannot be applied with
 	// empty list of strategies.
 	args := []string{"/bin/sh", "-c"}
-	var cmd string
+	var cmd strings.Builder
 
 	for i, strategy := range vbsc.Spec.Strategy {
 		if i > 0 {
-			cmd = fmt.Sprintf("%s && ", cmd)
+			cmd.WriteString(fmt.Sprintf("%s && ", cmd))
 		}
 		// At this point, strategy.Name is either BackupShard or BackupTablet, the validation
 		// is made at the CRD level on the YAML directly.
 
-		cmd = fmt.Sprintf("%s%s %s", cmd, vtctldclientPath, vtctldclientServerArg)
+		cmd.WriteString(fmt.Sprintf("%s%s %s", cmd, vtctldclientPath, vtctldclientServerArg))
 
 		// Add the vtctldclient command
 		switch strategy.Name {
 		case planetscalev2.BackupShard:
-			cmd = fmt.Sprintf("%s BackupShard", cmd)
+			cmd.WriteString(fmt.Sprintf("%s BackupShard", cmd))
 		case planetscalev2.BackupTablet:
-			cmd = fmt.Sprintf("%s Backup", cmd)
+			cmd.WriteString(fmt.Sprintf("%s Backup", cmd))
 		}
 
 		// Add any flags
 		for key, value := range strategy.ExtraFlags {
-			cmd = fmt.Sprintf("%s --%s=%s", cmd, key, value)
+			cmd.WriteString(fmt.Sprintf("%s --%s=%s", cmd, key, value))
 		}
 
 		// Add keyspace/shard or tablet alias
@@ -525,15 +521,15 @@ func (r *ReconcileVitessBackupsSchedule) createJobPod(ctx context.Context, vbsc
 			if strategy.KeyspaceShard == "" {
 				return pod, fmt.Errorf("the KeyspaceShard field is missing from VitessBackupScheduleStrategy %s", planetscalev2.BackupShard)
 			}
-			cmd = fmt.Sprintf("%s %s", cmd, strategy.KeyspaceShard)
+			cmd.WriteString(fmt.Sprintf("%s %s", cmd, strategy.KeyspaceShard))
 		case planetscalev2.BackupTablet:
 			if strategy.TabletAlias == "" {
 				return pod, fmt.Errorf("the TabletAlias field is missing from VitessBackupScheduleStrategy %s", planetscalev2.BackupTablet)
 			}
-			cmd = fmt.Sprintf("%s %s", cmd, strategy.TabletAlias)
+			cmd.WriteString(fmt.Sprintf("%s %s", cmd, strategy.TabletAlias))
 		}
 	}
-	args = append(args, cmd)
+	args = append(args, cmd.String())
 
 	pod = corev1.PodSpec{
 		Containers: []corev1.Container{{
@@ -572,7 +568,7 @@ func (r *ReconcileVitessBackupsSchedule) getVtctldServiceName(ctx context.Contex
 	}
 
 	if svcName == "" || svcPort == 0 {
-		return "", 0, fmt.Errorf("no vtctld service in %q found", vbsc.Namespace)
+		return "", 0, fmt.Errorf("no vtctld service found in %q namespace", vbsc.Namespace)
 	}
 	return svcName, svcPort, nil
 }
diff --git a/pkg/controller/vitesscluster/reconcile_backup_schedule.go b/pkg/controller/vitesscluster/reconcile_backup_schedule.go
index f8d777b3..77216534 100644
--- a/pkg/controller/vitesscluster/reconcile_backup_schedule.go
+++ b/pkg/controller/vitesscluster/reconcile_backup_schedule.go
@@ -34,8 +34,8 @@ func (r *ReconcileVitessCluster) reconcileBackupSchedule(ctx context.Context, vt
 
 	// Generate keys (object names) for all desired cells.
 	// Keep a map back from generated names to the  specs.
-	var keys []client.ObjectKey
-	scheduleMap := make(map[client.ObjectKey]*planetscalev2.VitessBackupScheduleTemplate)
+	keys := make([]client.ObjectKey, 0, len(vt.Spec.Backup.Schedules))
+	scheduleMap := make(map[client.ObjectKey]*planetscalev2.VitessBackupScheduleTemplate, len(vt.Spec.Backup.Schedules))
 	if vt.Spec.Backup != nil {
 		for i := range vt.Spec.Backup.Schedules {
 			schedule := &vt.Spec.Backup.Schedules[i]
diff --git a/test/endtoend/backup_restore_test.sh b/test/endtoend/backup_restore_test.sh
index e79dde58..69d4bf4a 100755
--- a/test/endtoend/backup_restore_test.sh
+++ b/test/endtoend/backup_restore_test.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -euo pipefail
+
 source ./tools/test.env
 source ./test/endtoend/utils.sh
 
@@ -30,14 +32,14 @@ function resurrectShard() {
   sleep 5
 
   echo "show databases;" | mysql | grep "commerce" > /dev/null 2>&1
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo "Could not find commerce database"
     printMysqlErrorFiles
     exit 1
   fi
 
   echo "show tables;" | mysql commerce | grep -E 'corder|customer|product' | wc -l | grep 3 > /dev/null 2>&1
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo "Could not find commerce's tables"
     printMysqlErrorFiles
     exit 1
@@ -76,7 +78,7 @@ EOF
 }
 
 function setupKindConfig() {
-  if [ "$BUILDKITE_BUILD_ID" != "0" ]; then
+  if [[ "$BUILDKITE_BUILD_ID" != "0" ]]; then
     # The script is being run from buildkite, so we can't mount the current
     # working directory to kind. The current directory in the docker is workdir
     # So if we try and mount that, we get an error. Instead we need to mount the
@@ -99,7 +101,7 @@ docker build -f build/Dockerfile.release -t vitess-operator-pr:latest .
 echo "Setting up the kind config"
 setupKindConfig
 echo "Creating Kind cluster"
-kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image kindest/node:v1.28.0
+kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image ${KIND_VERSION}
 echo "Loading docker image into Kind cluster"
 kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID}
 
diff --git a/test/endtoend/backup_schedule_test.sh b/test/endtoend/backup_schedule_test.sh
index 6710cddb..050b699d 100755
--- a/test/endtoend/backup_schedule_test.sh
+++ b/test/endtoend/backup_schedule_test.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -euo pipefail
+
 source ./tools/test.env
 source ./test/endtoend/utils.sh
 
@@ -15,9 +17,8 @@ function checkVitessBackupScheduleStatusWithTimeout() {
   regex=$1
 
   for i in {1..1200} ; do
-    out=$(kubectl get VitessBackupSchedule)
-    echo "$out" | grep -E "$regex" | wc -l | grep "1" > /dev/null 2>&1
-    if [ $? -eq 0 ]; then
+    kubectl get VitessBackupSchedule | grep -E "$regex" | wc -l | grep "1" > /dev/null 2>&1
+    if [[ $? -eq 0 ]]; then
       echo "$regex found"
       return
     fi
@@ -38,7 +39,7 @@ function verifyListBackupsOutputWithSchedule() {
   sleep 360
 
   backupCount=$(kubectl get vtb --no-headers | wc -l)
-  if [ "${backupCount}" -lt 7 ]; then
+  if [[ "${backupCount}" -lt 7 ]]; then
     echo "Did not find at least 7 backups"
     return 0
   fi
@@ -49,7 +50,7 @@ function verifyListBackupsOutputWithSchedule() {
 }
 
 function setupKindConfig() {
-  if [ "$BUILDKITE_BUILD_ID" != "0" ]; then
+  if [[ "$BUILDKITE_BUILD_ID" != "0" ]]; then
     # The script is being run from buildkite, so we can't mount the current
     # working directory to kind. The current directory in the docker is workdir
     # So if we try and mount that, we get an error. Instead we need to mount the
@@ -72,7 +73,7 @@ docker build -f build/Dockerfile.release -t vitess-operator-pr:latest .
 echo "Setting up the kind config"
 setupKindConfig
 echo "Creating Kind cluster"
-kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image kindest/node:v1.28.0
+kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image ${KIND_VERSION}
 echo "Loading docker image into Kind cluster"
 kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID}
 
diff --git a/test/endtoend/upgrade_test.sh b/test/endtoend/upgrade_test.sh
index 22d6e1cc..d3033b58 100755
--- a/test/endtoend/upgrade_test.sh
+++ b/test/endtoend/upgrade_test.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -euo pipefail
+
 source ./tools/test.env
 source ./test/endtoend/utils.sh
 
@@ -236,7 +238,7 @@ EOF
 echo "Building the docker image"
 docker build -f build/Dockerfile.release -t vitess-operator-pr:latest .
 echo "Creating Kind cluster"
-kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image kindest/node:v1.28.0
+kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image ${KIND_VERSION}
 echo "Loading docker image into Kind cluster"
 kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID}
 
diff --git a/test/endtoend/utils.sh b/test/endtoend/utils.sh
index d58a5eab..21624ea4 100644
--- a/test/endtoend/utils.sh
+++ b/test/endtoend/utils.sh
@@ -20,7 +20,7 @@ function checkSemiSyncWithRetry() {
   vttablet=$1
   for i in {1..600} ; do
     kubectl exec "$vttablet" -c mysqld -- mysql -S "/vt/socket/mysql.sock" -u root -e "show variables like 'rpl_semi_sync_%_enabled'" | grep "ON"
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       return
     fi
     sleep 1
@@ -44,7 +44,7 @@ function runSQLWithRetry() {
   query=$1
   for i in {1..600} ; do
     mysql -e "$query"
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       return
     fi
     echo "failed to run query $query, retrying (attempt #$i) ..."
@@ -94,7 +94,7 @@ function takeBackup() {
   for i in {1..600} ; do
     out=$(kubectl get vtb --no-headers | wc -l)
     echo "$out" | grep "$finalBackupCount" > /dev/null 2>&1
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       echo "Backup created"
       return 0
     fi
@@ -109,7 +109,7 @@ function verifyListBackupsOutput() {
   for i in {1..600} ; do
     out=$(vtctldclient LegacyVtctlCommand -- ListBackups "$keyspaceShard" | wc -l)
     echo "$out" | grep "$backupCount" > /dev/null 2>&1
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       echo "ListBackupsOutputCorrect"
       return 0
     fi
@@ -134,7 +134,7 @@ function checkPodStatusWithTimeout() {
   nb=$2
 
   # Number of pods to match defaults to one
-  if [ -z "$nb" ]; then
+  if [[ -z "$nb" ]]; then
     nb=1
   fi
 
@@ -143,7 +143,7 @@ function checkPodStatusWithTimeout() {
   for i in {1..1200} ; do
     out=$(kubectl get pods)
     echo "$out" | grep -E "$regex" | wc -l | grep "$nb" > /dev/null 2>&1
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       echo "$regex found"
       return
     fi
@@ -151,7 +151,7 @@ function checkPodStatusWithTimeout() {
   done
   echo -e "ERROR: checkPodStatusWithTimeout timeout to find pod matching:\ngot:\n$out\nfor regex: $regex"
   echo "$regex" | grep "vttablet" > /dev/null 2>&1
-  if [ $? -eq 0 ]; then
+  if [[ $? -eq 0 ]]; then
     printMysqlErrorFiles
   fi
   exit 1
@@ -171,7 +171,7 @@ function ensurePodResourcesSet() {
 
     numContainers=$(echo "$out" | grep -E "$regex" | awk '{print $2}' | awk -F ',' '{print NF}')
     numContainersWithResources=$(echo "$out" | grep -E "$regex" | awk '{print $3}' | awk -F ',' '{print NF}')
-    if [ $numContainers != $numContainersWithResources ]; then
+    if [[ $numContainers != $numContainersWithResources ]]; then
       echo "one or more containers in pods with $regex do not have $resource set"
       exit 1
     fi
@@ -181,7 +181,7 @@ function ensurePodResourcesSet() {
 function insertWithRetry() {
   for i in {1..600} ; do
     mysql --table < ../common/delete_commerce_data.sql && mysql --table < ../common/insert_commerce_data.sql
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       return
     fi
     echo "failed to insert commerce data, retrying (attempt #$i) ..."
@@ -194,7 +194,7 @@ function verifyVtGateVersion() {
   podName=$(kubectl get pods --no-headers -o custom-columns=":metadata.name" | grep "vtgate")
   data=$(kubectl logs "$podName" | head)
   echo "$data" | grep "$version" > /dev/null 2>&1
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo -e "The vtgate version is incorrect, expected: $version, got:\n$data"
     exit 1
   fi
@@ -208,7 +208,7 @@ function verifyDurabilityPolicy() {
   durabilityPolicy=$2
   data=$(vtctldclient LegacyVtctlCommand -- GetKeyspace "$keyspace")
   echo "$data" | grep "\"durability_policy\": \"$durabilityPolicy\"" > /dev/null 2>&1
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo -e "The durability policy in $keyspace is incorrect, got:\n$data"
     exit 1
   fi
@@ -221,7 +221,7 @@ function waitForKeyspaceToBeServing() {
   for i in {1..600} ; do
     out=$(mysql --table --execute="show vitess_tablets")
     numtablets=$(echo "$out" | grep -E "$ks(.*)$shard(.*)PRIMARY(.*)SERVING|$ks(.*)$shard(.*)REPLICA(.*)SERVING" | wc -l)
-    if [[ $numtablets -ge $((nb_of_replica+1)) ]]; then
+    if [[[ $numtablets -ge $((nb_of_replica+1)) ]]]; then
       echo "Shard $ks/$shard is serving"
       return
     fi
@@ -237,10 +237,10 @@ function applySchemaWithRetry() {
   drop_sql=$3
   for i in {1..600} ; do
     vtctldclient ApplySchema --sql-file="$schema" $ks
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       return
     fi
-    if [ -n "$drop_sql" ]; then
+    if [[ -n "$drop_sql" ]]; then
       mysql --table < $drop_sql
     fi
     echo "failed to apply schema $schema, retrying (attempt #$i) ..."
@@ -254,14 +254,14 @@ function assertSelect() {
   expected=$3
   data=$(mysql --table < $sql)
   echo "$data" | grep "$expected" > /dev/null 2>&1
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo -e "The data in $shard's tables is incorrect, got:\n$data"
     exit 1
   fi
 }
 
 function setupKubectlAccessForCI() {
-  if [ "$BUILDKITE_BUILD_ID" != "0" ]; then
+  if [[ "$BUILDKITE_BUILD_ID" != "0" ]]; then
     # The script is being run from buildkite, so we need to do stuff
     # https://github.com/kubernetes-sigs/kind/issues/1846#issuecomment-691565834
     # Since kind is running in a sibling container, communicating with it through kubectl is not trivial.
@@ -301,7 +301,7 @@ function get_started() {
 
     applySchemaWithRetry create_commerce_schema.sql commerce drop_all_commerce_tables.sql
     vtctldclient ApplyVSchema --vschema-file="vschema_commerce_initial.json" commerce
-    if [ $? -ne 0 ]; then
+    if [[ $? -ne 0 ]]; then
       echo "ApplySchema failed for initial commerce"
       printMysqlErrorFiles
       exit 1
@@ -309,14 +309,14 @@ function get_started() {
     sleep 5
 
     echo "show databases;" | mysql | grep "commerce" > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if [[ $? -ne 0 ]]; then
       echo "Could not find commerce database"
       printMysqlErrorFiles
       exit 1
     fi
 
     echo "show tables;" | mysql commerce | grep -E 'corder|customer|product' | wc -l | grep 3 > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if [[ $? -ne 0 ]]; then
       echo "Could not find commerce's tables"
       printMysqlErrorFiles
       exit 1
diff --git a/test/endtoend/vtorc_vtadmin_test.sh b/test/endtoend/vtorc_vtadmin_test.sh
index de515671..4104a452 100755
--- a/test/endtoend/vtorc_vtadmin_test.sh
+++ b/test/endtoend/vtorc_vtadmin_test.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+set -euo pipefail
+
 source ./tools/test.env
 source ./test/endtoend/utils.sh
 
@@ -31,7 +33,7 @@ function get_started_vtorc_vtadmin() {
 
     applySchemaWithRetry create_commerce_schema.sql commerce drop_all_commerce_tables.sql
     vtctldclient ApplyVSchema --vschema-file="vschema_commerce_initial.json" commerce
-    if [ $? -ne 0 ]; then
+    if [[ $? -ne 0 ]]; then
       echo "ApplySchema failed for initial commerce"
       printMysqlErrorFiles
       exit 1
@@ -39,14 +41,14 @@ function get_started_vtorc_vtadmin() {
     sleep 5
 
     echo "show databases;" | mysql | grep "commerce" > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if [[ $? -ne 0 ]]; then
       echo "Could not find commerce database"
       printMysqlErrorFiles
       exit 1
     fi
 
     echo "show tables;" | mysql commerce | grep -E 'corder|customer|product' | wc -l | grep 3 > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if [[ $? -ne 0 ]]; then
       echo "Could not find commerce's tables"
       printMysqlErrorFiles
       exit 1
@@ -159,9 +161,9 @@ function chromiumHeadlessRequest() {
   for i in {1..600} ; do
     chromiumBinary=$(getChromiumBinaryName)
     res=$($chromiumBinary --headless --no-sandbox --disable-gpu --enable-logging --dump-dom  --virtual-time-budget=900000000 "$url")
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       echo "$res" | grep "$dataToAssert" > /dev/null 2>&1
-      if [ $? -ne 0 ]; then
+      if [[ $? -ne 0 ]]; then
         echo -e "The data in $url is incorrect, got:\n$res, retrying"
         sleep 1
         continue
@@ -175,12 +177,12 @@ function chromiumHeadlessRequest() {
 
 function getChromiumBinaryName() {
   which chromium-browser > /dev/null
-  if [ $? -eq 0 ]; then
+  if [[ $? -eq 0 ]]; then
       echo "chromium-browser"
       return
   fi
   which chromium > /dev/null
-  if [ $? -eq 0 ]; then
+  if [[ $? -eq 0 ]]; then
       echo "chromium"
       return
   fi
@@ -191,9 +193,9 @@ function curlGetRequestWithRetry() {
   dataToAssert=$2
   for i in {1..600} ; do
     res=$(curl "$url")
-    if [ $? -eq 0 ]; then
+    if [[ $? -eq 0 ]]; then
       echo "$res" | grep "$dataToAssert" > /dev/null 2>&1
-      if [ $? -ne 0 ]; then
+      if [[ $? -ne 0 ]]; then
         echo -e "The data in $url is incorrect, got:\n$res"
         exit 1
       fi
@@ -208,12 +210,12 @@ function curlDeleteRequest() {
   url=$1
   dataToAssert=$2
   res=$(curl -X DELETE "$url")
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo -e "The DELETE request to $url failed\n"
     exit 1
   fi
   echo "$res" | grep "$dataToAssert" > /dev/null 2>&1
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo -e "The data in delete request to $url is incorrect, got:\n$res"
     exit 1
   fi
@@ -223,7 +225,7 @@ function curlPostRequest() {
   url=$1
   data=$2
   curl -X POST -d "$data" "$url"
-  if [ $? -ne 0 ]; then
+  if [[ $? -ne 0 ]]; then
     echo -e "The POST request to $url with data $data failed\n"
     exit 1
   fi
@@ -233,7 +235,7 @@ function curlPostRequest() {
 echo "Building the docker image"
 docker build -f build/Dockerfile.release -t vitess-operator-pr:latest .
 echo "Creating Kind cluster"
-kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image kindest/node:v1.28.0
+kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image ${KIND_VERSION}
 echo "Loading docker image into Kind cluster"
 kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID}
 
diff --git a/tools/test.env b/tools/test.env
index c427317c..6532efd6 100755
--- a/tools/test.env
+++ b/tools/test.env
@@ -3,3 +3,5 @@
 # We add the tools/_bin directory to PATH variable
 # since this is where we install the binaries that are needed
 export PATH="$PATH:$PWD/tools/_bin"
+
+export KIND_VERSION=kindest/node:v1.28.0
\ No newline at end of file