From e6e0e45e843759406b0c1e5deb079df728cca024 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Fri, 24 May 2024 10:19:56 -0600 Subject: [PATCH] Apply review suggestions Signed-off-by: Florent Poinsard --- .gitignore | 2 +- docs/api/index.html | 23 ++++---- .../v2/vitessbackupschedule_methods.go | 4 +- .../v2/vitessbackupschedule_types.go | 27 +++++---- .../planetscale/v2/zz_generated.deepcopy.go | 4 +- .../vitessbackupschedule_controller.go | 56 +++++++++---------- .../reconcile_backup_schedule.go | 4 +- test/endtoend/backup_restore_test.sh | 10 ++-- test/endtoend/backup_schedule_test.sh | 13 +++-- test/endtoend/upgrade_test.sh | 4 +- test/endtoend/utils.sh | 38 ++++++------- test/endtoend/vtorc_vtadmin_test.sh | 28 +++++----- tools/test.env | 2 + 13 files changed, 113 insertions(+), 102 deletions(-) diff --git a/.gitignore b/.gitignore index 86834eaa..53742bf2 100644 --- a/.gitignore +++ b/.gitignore @@ -80,4 +80,4 @@ tags .idea/* # End of https://www.gitignore.io/api/go,vim,emacs,visualstudiocode -vtdataroot \ No newline at end of file +**/vtdataroot \ No newline at end of file diff --git a/docs/api/index.html b/docs/api/index.html index 3afaad6f..6bef4cf0 100644 --- a/docs/api/index.html +++ b/docs/api/index.html @@ -2693,7 +2693,7 @@

VitessBackupScheduleTem (Optional) -

Suspend enables the suspension of the VitessBackupSchedule, pausing any further scheduled +

Suspend pause the associated backup schedule. Pausing any further scheduled runs until Suspend is set to false again. This is useful if you want to pause backup without having to remove the entire VitessBackupSchedule object from the cluster.

@@ -2707,8 +2707,11 @@

VitessBackupScheduleTem (Optional) -

StartingDeadlineSeconds allows for the VitessBackupSchedule controller to start jobs that late -by the given amount of seconds.

+

StartingDeadlineSeconds enables the VitessBackupSchedule to start a job even though it is late by +the given amount of seconds. Let’s say for some reason the controller process a schedule run on +second after its scheduled time, if StartingDeadlineSeconds is set to 0, the job will be skipped +as it’s too late, but on the other hand, if StartingDeadlineSeconds is greater than one second, +the job will be processed as usual.

@@ -2738,12 +2741,12 @@

VitessBackupScheduleTem (Optional) -

AllowedMissedRun defines how many missed run of the schedule will be allowed before giving up on finding the last job. -If the operator’s clocked is skewed and we end-up missing a certain number of jobs, finding the last -job might be very consuming, depending on the frequency of the schedule and the duration during which -the operator’s clock was misbehaving, also depending on how laggy the clock is, we can end-up with thousands -or missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will allow us to give up finding -the last job and simply wait for the next job on the schedule. +

AllowedMissedRuns defines how many missed run of the schedule will be allowed before giving up on finding the last job. +If the operator’s clock is skewed and we end-up missing a certain number of jobs, finding the last +job might be very time-consuming, depending on the frequency of the schedule and the duration during which +the operator’s clock was misbehaving. Also depending on how laggy the clock is, we can end-up with thousands +of missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will short circuit the search +and simply wait for the next job on the schedule. Unless you are experiencing issue with missed runs due to a misconfiguration of the clock, we recommend leaving this field to its default value.

@@ -2757,7 +2760,7 @@

VitessBackupScheduleTem (Optional) -

JobTimeoutMinute defines after how many minutes a job that has not yet finished should be stopped and removed. +

JobTimeoutMinutes defines after how many minutes a job that has not yet finished should be stopped and removed. Default value is 10 minutes.

diff --git a/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go b/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go index 8cc954c6..86026b49 100644 --- a/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go +++ b/pkg/apis/planetscale/v2/vitessbackupschedule_methods.go @@ -46,8 +46,8 @@ const DefaultAllowedMissedRuns = 100 // GetMissedRunsLimit returns the maximum number of missed run we can allow. // Returns DefaultAllowedMissedRuns if the value was not specified by the user. func (vbsc *VitessBackupSchedule) GetMissedRunsLimit() int { - if vbsc.Spec.AllowedMissedRun == nil { + if vbsc.Spec.AllowedMissedRuns == nil { return DefaultAllowedMissedRuns } - return *vbsc.Spec.AllowedMissedRun + return *vbsc.Spec.AllowedMissedRuns } diff --git a/pkg/apis/planetscale/v2/vitessbackupschedule_types.go b/pkg/apis/planetscale/v2/vitessbackupschedule_types.go index b21c1f69..8e1f3cd0 100644 --- a/pkg/apis/planetscale/v2/vitessbackupschedule_types.go +++ b/pkg/apis/planetscale/v2/vitessbackupschedule_types.go @@ -122,14 +122,17 @@ type VitessBackupScheduleTemplate struct { // +kubebuilder:validation:Minimum=0 FailedJobsHistoryLimit *int32 `json:"failedJobsHistoryLimit,omitempty"` - // Suspend enables the suspension of the VitessBackupSchedule, pausing any further scheduled + // Suspend pause the associated backup schedule. Pausing any further scheduled // runs until Suspend is set to false again. This is useful if you want to pause backup without // having to remove the entire VitessBackupSchedule object from the cluster. // +optional Suspend *bool `json:"suspend,omitempty"` - // StartingDeadlineSeconds allows for the VitessBackupSchedule controller to start jobs that late - // by the given amount of seconds. + // StartingDeadlineSeconds enables the VitessBackupSchedule to start a job even though it is late by + // the given amount of seconds. Let's say for some reason the controller process a schedule run on + // second after its scheduled time, if StartingDeadlineSeconds is set to 0, the job will be skipped + // as it's too late, but on the other hand, if StartingDeadlineSeconds is greater than one second, + // the job will be processed as usual. // +optional // +kubebuilder:validation:Minimum=0 StartingDeadlineSeconds *int64 `json:"startingDeadlineSeconds,omitempty"` @@ -143,24 +146,24 @@ type VitessBackupScheduleTemplate struct { // +kubebuilder:example="Forbid" ConcurrencyPolicy ConcurrencyPolicy `json:"concurrencyPolicy,omitempty"` - // AllowedMissedRun defines how many missed run of the schedule will be allowed before giving up on finding the last job. - // If the operator's clocked is skewed and we end-up missing a certain number of jobs, finding the last - // job might be very consuming, depending on the frequency of the schedule and the duration during which - // the operator's clock was misbehaving, also depending on how laggy the clock is, we can end-up with thousands - // or missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will allow us to give up finding - // the last job and simply wait for the next job on the schedule. + // AllowedMissedRuns defines how many missed run of the schedule will be allowed before giving up on finding the last job. + // If the operator's clock is skewed and we end-up missing a certain number of jobs, finding the last + // job might be very time-consuming, depending on the frequency of the schedule and the duration during which + // the operator's clock was misbehaving. Also depending on how laggy the clock is, we can end-up with thousands + // of missed runs. For this reason, AllowedMissedRun, which is set to 100 by default, will short circuit the search + // and simply wait for the next job on the schedule. // Unless you are experiencing issue with missed runs due to a misconfiguration of the clock, we recommend leaving // this field to its default value. // +optional // +kubebuilder:validation:Minimum=0 - AllowedMissedRun *int `json:"allowedMissedRun,omitempty"` + AllowedMissedRuns *int `json:"allowedMissedRun,omitempty"` - // JobTimeoutMinute defines after how many minutes a job that has not yet finished should be stopped and removed. + // JobTimeoutMinutes defines after how many minutes a job that has not yet finished should be stopped and removed. // Default value is 10 minutes. // +optional // +kubebuilder:validation:Minimum=0 // +kubebuilder:default=10 - JobTimeoutMinute int32 `json:"jobTimeoutMinute,omitempty"` + JobTimeoutMinutes int32 `json:"jobTimeoutMinute,omitempty"` } // VitessBackupScheduleStrategy defines how we are going to take a backup. diff --git a/pkg/apis/planetscale/v2/zz_generated.deepcopy.go b/pkg/apis/planetscale/v2/zz_generated.deepcopy.go index dbeff8d7..807c99ac 100644 --- a/pkg/apis/planetscale/v2/zz_generated.deepcopy.go +++ b/pkg/apis/planetscale/v2/zz_generated.deepcopy.go @@ -888,8 +888,8 @@ func (in *VitessBackupScheduleTemplate) DeepCopyInto(out *VitessBackupScheduleTe *out = new(int64) **out = **in } - if in.AllowedMissedRun != nil { - in, out := &in.AllowedMissedRun, &out.AllowedMissedRun + if in.AllowedMissedRuns != nil { + in, out := &in.AllowedMissedRuns, &out.AllowedMissedRuns *out = new(int) **out = **in } diff --git a/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go b/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go index 2463f739..135c52f4 100644 --- a/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go +++ b/pkg/controller/vitessbackupschedule/vitessbackupschedule_controller.go @@ -20,6 +20,7 @@ import ( "context" "flag" "fmt" + "strings" "sort" "time" @@ -186,7 +187,7 @@ func (r *ReconcileVitessBackupsSchedule) Reconcile(ctx context.Context, req ctrl r.cleanupJobsWithLimit(ctx, jobs.failed, vbsc.GetFailedJobsLimit()) r.cleanupJobsWithLimit(ctx, jobs.successful, vbsc.GetSuccessfulJobsLimit()) - err = r.removeTimeoutJobs(ctx, jobs.successful, vbsc.Spec.JobTimeoutMinute) + err = r.removeTimeoutJobs(ctx, jobs.successful, vbsc.Spec.JobTimeoutMinutes) if err != nil { // We had an error while removing timed out jobs, we can requeue return resultBuilder.Error(err) @@ -262,36 +263,35 @@ func (r *ReconcileVitessBackupsSchedule) Reconcile(ctx context.Context, req ctrl func getNextSchedule(vbsc planetscalev2.VitessBackupSchedule, now time.Time) (time.Time, time.Time, error) { sched, err := cron.ParseStandard(vbsc.Spec.Schedule) if err != nil { - return time.Time{}, time.Time{}, fmt.Errorf("unparaseable schedule %q: %v", vbsc.Spec.Schedule, err) + return time.Time{}, time.Time{}, fmt.Errorf("unable to parse schedule %q: %v", vbsc.Spec.Schedule, err) } - // for optimization purposes, cheat a bit and start from our last observed run time - // we could reconstitute this here, but there's not much point, since we've - // just updated it. - var earliestTime time.Time + // Set the last scheduled time by either looking at the VitessBackupSchedule's Status or + // by looking at its creation time. + var latestRun time.Time if vbsc.Status.LastScheduledTime != nil { - earliestTime = vbsc.Status.LastScheduledTime.Time + latestRun = vbsc.Status.LastScheduledTime.Time } else { - earliestTime = vbsc.ObjectMeta.CreationTimestamp.Time + latestRun = vbsc.ObjectMeta.CreationTimestamp.Time } if vbsc.Spec.StartingDeadlineSeconds != nil { // controller is not going to schedule anything below this point schedulingDeadline := now.Add(-time.Second * time.Duration(*vbsc.Spec.StartingDeadlineSeconds)) - if schedulingDeadline.After(earliestTime) { - earliestTime = schedulingDeadline + if schedulingDeadline.After(latestRun) { + latestRun = schedulingDeadline } } // Next schedule is later, simply return the next scheduled time. - if earliestTime.After(now) { + if latestRun.After(now) { return time.Time{}, sched.Next(now), nil } var lastMissed time.Time missedRuns := 0 - for t := sched.Next(earliestTime); !t.After(now); t = sched.Next(t) { + for t := sched.Next(latestRun); !t.After(now); t = sched.Next(t) { lastMissed = t missedRuns++ @@ -311,7 +311,7 @@ func (r *ReconcileVitessBackupsSchedule) updateVitessBackupScheduleStatus(ctx co vbsc.Status.LastScheduledTime = nil } - vbsc.Status.Active = nil + vbsc.Status.Active = make([]corev1.ObjectReference, 0, len(activeJobs)) for _, activeJob := range activeJobs { jobRef, err := ref.GetReference(r.scheme, activeJob) if err != nil { @@ -361,12 +361,8 @@ func (r *ReconcileVitessBackupsSchedule) getJobsList(ctx context.Context, req ct log.WithError(err).Errorf("unable to parse schedule time for existing job, found: %s", job.Annotations[scheduledTimeAnnotation]) continue } - if scheduledTimeForJob != nil { - if mostRecentTime == nil { - mostRecentTime = scheduledTimeForJob - } else if mostRecentTime.Before(*scheduledTimeForJob) { - mostRecentTime = scheduledTimeForJob - } + if scheduledTimeForJob != nil && (mostRecentTime == nil || mostRecentTime.Before(*scheduledTimeForJob)) { + mostRecentTime = scheduledTimeForJob } } return jobs, mostRecentTime, nil @@ -407,7 +403,7 @@ func (r *ReconcileVitessBackupsSchedule) removeTimeoutJobs(ctx context.Context, if err != nil { return err } - if jobStartTime.After(time.Now().Add(time.Minute * time.Duration(timeout))) { + if jobStartTime.Add(time.Minute * time.Duration(timeout)).After(time.Now()) { if err := r.client.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); (err) != nil { log.WithError(err).Errorf("unable to delete timed out job: %s", job.Name) } else { @@ -495,28 +491,28 @@ func (r *ReconcileVitessBackupsSchedule) createJobPod(ctx context.Context, vbsc // ensures that there will be at least one item in this list. The YAML cannot be applied with // empty list of strategies. args := []string{"/bin/sh", "-c"} - var cmd string + var cmd strings.Builder for i, strategy := range vbsc.Spec.Strategy { if i > 0 { - cmd = fmt.Sprintf("%s && ", cmd) + cmd.WriteString(fmt.Sprintf("%s && ", cmd)) } // At this point, strategy.Name is either BackupShard or BackupTablet, the validation // is made at the CRD level on the YAML directly. - cmd = fmt.Sprintf("%s%s %s", cmd, vtctldclientPath, vtctldclientServerArg) + cmd.WriteString(fmt.Sprintf("%s%s %s", cmd, vtctldclientPath, vtctldclientServerArg)) // Add the vtctldclient command switch strategy.Name { case planetscalev2.BackupShard: - cmd = fmt.Sprintf("%s BackupShard", cmd) + cmd.WriteString(fmt.Sprintf("%s BackupShard", cmd)) case planetscalev2.BackupTablet: - cmd = fmt.Sprintf("%s Backup", cmd) + cmd.WriteString(fmt.Sprintf("%s Backup", cmd)) } // Add any flags for key, value := range strategy.ExtraFlags { - cmd = fmt.Sprintf("%s --%s=%s", cmd, key, value) + cmd.WriteString(fmt.Sprintf("%s --%s=%s", cmd, key, value)) } // Add keyspace/shard or tablet alias @@ -525,15 +521,15 @@ func (r *ReconcileVitessBackupsSchedule) createJobPod(ctx context.Context, vbsc if strategy.KeyspaceShard == "" { return pod, fmt.Errorf("the KeyspaceShard field is missing from VitessBackupScheduleStrategy %s", planetscalev2.BackupShard) } - cmd = fmt.Sprintf("%s %s", cmd, strategy.KeyspaceShard) + cmd.WriteString(fmt.Sprintf("%s %s", cmd, strategy.KeyspaceShard)) case planetscalev2.BackupTablet: if strategy.TabletAlias == "" { return pod, fmt.Errorf("the TabletAlias field is missing from VitessBackupScheduleStrategy %s", planetscalev2.BackupTablet) } - cmd = fmt.Sprintf("%s %s", cmd, strategy.TabletAlias) + cmd.WriteString(fmt.Sprintf("%s %s", cmd, strategy.TabletAlias)) } } - args = append(args, cmd) + args = append(args, cmd.String()) pod = corev1.PodSpec{ Containers: []corev1.Container{{ @@ -572,7 +568,7 @@ func (r *ReconcileVitessBackupsSchedule) getVtctldServiceName(ctx context.Contex } if svcName == "" || svcPort == 0 { - return "", 0, fmt.Errorf("no vtctld service in %q found", vbsc.Namespace) + return "", 0, fmt.Errorf("no vtctld service found in %q namespace", vbsc.Namespace) } return svcName, svcPort, nil } diff --git a/pkg/controller/vitesscluster/reconcile_backup_schedule.go b/pkg/controller/vitesscluster/reconcile_backup_schedule.go index f8d777b3..77216534 100644 --- a/pkg/controller/vitesscluster/reconcile_backup_schedule.go +++ b/pkg/controller/vitesscluster/reconcile_backup_schedule.go @@ -34,8 +34,8 @@ func (r *ReconcileVitessCluster) reconcileBackupSchedule(ctx context.Context, vt // Generate keys (object names) for all desired cells. // Keep a map back from generated names to the specs. - var keys []client.ObjectKey - scheduleMap := make(map[client.ObjectKey]*planetscalev2.VitessBackupScheduleTemplate) + keys := make([]client.ObjectKey, 0, len(vt.Spec.Backup.Schedules)) + scheduleMap := make(map[client.ObjectKey]*planetscalev2.VitessBackupScheduleTemplate, len(vt.Spec.Backup.Schedules)) if vt.Spec.Backup != nil { for i := range vt.Spec.Backup.Schedules { schedule := &vt.Spec.Backup.Schedules[i] diff --git a/test/endtoend/backup_restore_test.sh b/test/endtoend/backup_restore_test.sh index e79dde58..69d4bf4a 100755 --- a/test/endtoend/backup_restore_test.sh +++ b/test/endtoend/backup_restore_test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -euo pipefail + source ./tools/test.env source ./test/endtoend/utils.sh @@ -30,14 +32,14 @@ function resurrectShard() { sleep 5 echo "show databases;" | mysql | grep "commerce" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "Could not find commerce database" printMysqlErrorFiles exit 1 fi echo "show tables;" | mysql commerce | grep -E 'corder|customer|product' | wc -l | grep 3 > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "Could not find commerce's tables" printMysqlErrorFiles exit 1 @@ -76,7 +78,7 @@ EOF } function setupKindConfig() { - if [ "$BUILDKITE_BUILD_ID" != "0" ]; then + if [[ "$BUILDKITE_BUILD_ID" != "0" ]]; then # The script is being run from buildkite, so we can't mount the current # working directory to kind. The current directory in the docker is workdir # So if we try and mount that, we get an error. Instead we need to mount the @@ -99,7 +101,7 @@ docker build -f build/Dockerfile.release -t vitess-operator-pr:latest . echo "Setting up the kind config" setupKindConfig echo "Creating Kind cluster" -kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image kindest/node:v1.28.0 +kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image ${KIND_VERSION} echo "Loading docker image into Kind cluster" kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID} diff --git a/test/endtoend/backup_schedule_test.sh b/test/endtoend/backup_schedule_test.sh index 6710cddb..050b699d 100755 --- a/test/endtoend/backup_schedule_test.sh +++ b/test/endtoend/backup_schedule_test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -euo pipefail + source ./tools/test.env source ./test/endtoend/utils.sh @@ -15,9 +17,8 @@ function checkVitessBackupScheduleStatusWithTimeout() { regex=$1 for i in {1..1200} ; do - out=$(kubectl get VitessBackupSchedule) - echo "$out" | grep -E "$regex" | wc -l | grep "1" > /dev/null 2>&1 - if [ $? -eq 0 ]; then + kubectl get VitessBackupSchedule | grep -E "$regex" | wc -l | grep "1" > /dev/null 2>&1 + if [[ $? -eq 0 ]]; then echo "$regex found" return fi @@ -38,7 +39,7 @@ function verifyListBackupsOutputWithSchedule() { sleep 360 backupCount=$(kubectl get vtb --no-headers | wc -l) - if [ "${backupCount}" -lt 7 ]; then + if [[ "${backupCount}" -lt 7 ]]; then echo "Did not find at least 7 backups" return 0 fi @@ -49,7 +50,7 @@ function verifyListBackupsOutputWithSchedule() { } function setupKindConfig() { - if [ "$BUILDKITE_BUILD_ID" != "0" ]; then + if [[ "$BUILDKITE_BUILD_ID" != "0" ]]; then # The script is being run from buildkite, so we can't mount the current # working directory to kind. The current directory in the docker is workdir # So if we try and mount that, we get an error. Instead we need to mount the @@ -72,7 +73,7 @@ docker build -f build/Dockerfile.release -t vitess-operator-pr:latest . echo "Setting up the kind config" setupKindConfig echo "Creating Kind cluster" -kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image kindest/node:v1.28.0 +kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --config ./vtdataroot/config.yaml --image ${KIND_VERSION} echo "Loading docker image into Kind cluster" kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID} diff --git a/test/endtoend/upgrade_test.sh b/test/endtoend/upgrade_test.sh index 22d6e1cc..d3033b58 100755 --- a/test/endtoend/upgrade_test.sh +++ b/test/endtoend/upgrade_test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -euo pipefail + source ./tools/test.env source ./test/endtoend/utils.sh @@ -236,7 +238,7 @@ EOF echo "Building the docker image" docker build -f build/Dockerfile.release -t vitess-operator-pr:latest . echo "Creating Kind cluster" -kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image kindest/node:v1.28.0 +kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image ${KIND_VERSION} echo "Loading docker image into Kind cluster" kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID} diff --git a/test/endtoend/utils.sh b/test/endtoend/utils.sh index d58a5eab..21624ea4 100644 --- a/test/endtoend/utils.sh +++ b/test/endtoend/utils.sh @@ -20,7 +20,7 @@ function checkSemiSyncWithRetry() { vttablet=$1 for i in {1..600} ; do kubectl exec "$vttablet" -c mysqld -- mysql -S "/vt/socket/mysql.sock" -u root -e "show variables like 'rpl_semi_sync_%_enabled'" | grep "ON" - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then return fi sleep 1 @@ -44,7 +44,7 @@ function runSQLWithRetry() { query=$1 for i in {1..600} ; do mysql -e "$query" - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then return fi echo "failed to run query $query, retrying (attempt #$i) ..." @@ -94,7 +94,7 @@ function takeBackup() { for i in {1..600} ; do out=$(kubectl get vtb --no-headers | wc -l) echo "$out" | grep "$finalBackupCount" > /dev/null 2>&1 - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "Backup created" return 0 fi @@ -109,7 +109,7 @@ function verifyListBackupsOutput() { for i in {1..600} ; do out=$(vtctldclient LegacyVtctlCommand -- ListBackups "$keyspaceShard" | wc -l) echo "$out" | grep "$backupCount" > /dev/null 2>&1 - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "ListBackupsOutputCorrect" return 0 fi @@ -134,7 +134,7 @@ function checkPodStatusWithTimeout() { nb=$2 # Number of pods to match defaults to one - if [ -z "$nb" ]; then + if [[ -z "$nb" ]]; then nb=1 fi @@ -143,7 +143,7 @@ function checkPodStatusWithTimeout() { for i in {1..1200} ; do out=$(kubectl get pods) echo "$out" | grep -E "$regex" | wc -l | grep "$nb" > /dev/null 2>&1 - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "$regex found" return fi @@ -151,7 +151,7 @@ function checkPodStatusWithTimeout() { done echo -e "ERROR: checkPodStatusWithTimeout timeout to find pod matching:\ngot:\n$out\nfor regex: $regex" echo "$regex" | grep "vttablet" > /dev/null 2>&1 - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then printMysqlErrorFiles fi exit 1 @@ -171,7 +171,7 @@ function ensurePodResourcesSet() { numContainers=$(echo "$out" | grep -E "$regex" | awk '{print $2}' | awk -F ',' '{print NF}') numContainersWithResources=$(echo "$out" | grep -E "$regex" | awk '{print $3}' | awk -F ',' '{print NF}') - if [ $numContainers != $numContainersWithResources ]; then + if [[ $numContainers != $numContainersWithResources ]]; then echo "one or more containers in pods with $regex do not have $resource set" exit 1 fi @@ -181,7 +181,7 @@ function ensurePodResourcesSet() { function insertWithRetry() { for i in {1..600} ; do mysql --table < ../common/delete_commerce_data.sql && mysql --table < ../common/insert_commerce_data.sql - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then return fi echo "failed to insert commerce data, retrying (attempt #$i) ..." @@ -194,7 +194,7 @@ function verifyVtGateVersion() { podName=$(kubectl get pods --no-headers -o custom-columns=":metadata.name" | grep "vtgate") data=$(kubectl logs "$podName" | head) echo "$data" | grep "$version" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The vtgate version is incorrect, expected: $version, got:\n$data" exit 1 fi @@ -208,7 +208,7 @@ function verifyDurabilityPolicy() { durabilityPolicy=$2 data=$(vtctldclient LegacyVtctlCommand -- GetKeyspace "$keyspace") echo "$data" | grep "\"durability_policy\": \"$durabilityPolicy\"" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The durability policy in $keyspace is incorrect, got:\n$data" exit 1 fi @@ -221,7 +221,7 @@ function waitForKeyspaceToBeServing() { for i in {1..600} ; do out=$(mysql --table --execute="show vitess_tablets") numtablets=$(echo "$out" | grep -E "$ks(.*)$shard(.*)PRIMARY(.*)SERVING|$ks(.*)$shard(.*)REPLICA(.*)SERVING" | wc -l) - if [[ $numtablets -ge $((nb_of_replica+1)) ]]; then + if [[[ $numtablets -ge $((nb_of_replica+1)) ]]]; then echo "Shard $ks/$shard is serving" return fi @@ -237,10 +237,10 @@ function applySchemaWithRetry() { drop_sql=$3 for i in {1..600} ; do vtctldclient ApplySchema --sql-file="$schema" $ks - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then return fi - if [ -n "$drop_sql" ]; then + if [[ -n "$drop_sql" ]]; then mysql --table < $drop_sql fi echo "failed to apply schema $schema, retrying (attempt #$i) ..." @@ -254,14 +254,14 @@ function assertSelect() { expected=$3 data=$(mysql --table < $sql) echo "$data" | grep "$expected" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The data in $shard's tables is incorrect, got:\n$data" exit 1 fi } function setupKubectlAccessForCI() { - if [ "$BUILDKITE_BUILD_ID" != "0" ]; then + if [[ "$BUILDKITE_BUILD_ID" != "0" ]]; then # The script is being run from buildkite, so we need to do stuff # https://github.com/kubernetes-sigs/kind/issues/1846#issuecomment-691565834 # Since kind is running in a sibling container, communicating with it through kubectl is not trivial. @@ -301,7 +301,7 @@ function get_started() { applySchemaWithRetry create_commerce_schema.sql commerce drop_all_commerce_tables.sql vtctldclient ApplyVSchema --vschema-file="vschema_commerce_initial.json" commerce - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "ApplySchema failed for initial commerce" printMysqlErrorFiles exit 1 @@ -309,14 +309,14 @@ function get_started() { sleep 5 echo "show databases;" | mysql | grep "commerce" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "Could not find commerce database" printMysqlErrorFiles exit 1 fi echo "show tables;" | mysql commerce | grep -E 'corder|customer|product' | wc -l | grep 3 > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "Could not find commerce's tables" printMysqlErrorFiles exit 1 diff --git a/test/endtoend/vtorc_vtadmin_test.sh b/test/endtoend/vtorc_vtadmin_test.sh index de515671..4104a452 100755 --- a/test/endtoend/vtorc_vtadmin_test.sh +++ b/test/endtoend/vtorc_vtadmin_test.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -euo pipefail + source ./tools/test.env source ./test/endtoend/utils.sh @@ -31,7 +33,7 @@ function get_started_vtorc_vtadmin() { applySchemaWithRetry create_commerce_schema.sql commerce drop_all_commerce_tables.sql vtctldclient ApplyVSchema --vschema-file="vschema_commerce_initial.json" commerce - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "ApplySchema failed for initial commerce" printMysqlErrorFiles exit 1 @@ -39,14 +41,14 @@ function get_started_vtorc_vtadmin() { sleep 5 echo "show databases;" | mysql | grep "commerce" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "Could not find commerce database" printMysqlErrorFiles exit 1 fi echo "show tables;" | mysql commerce | grep -E 'corder|customer|product' | wc -l | grep 3 > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo "Could not find commerce's tables" printMysqlErrorFiles exit 1 @@ -159,9 +161,9 @@ function chromiumHeadlessRequest() { for i in {1..600} ; do chromiumBinary=$(getChromiumBinaryName) res=$($chromiumBinary --headless --no-sandbox --disable-gpu --enable-logging --dump-dom --virtual-time-budget=900000000 "$url") - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "$res" | grep "$dataToAssert" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The data in $url is incorrect, got:\n$res, retrying" sleep 1 continue @@ -175,12 +177,12 @@ function chromiumHeadlessRequest() { function getChromiumBinaryName() { which chromium-browser > /dev/null - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "chromium-browser" return fi which chromium > /dev/null - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "chromium" return fi @@ -191,9 +193,9 @@ function curlGetRequestWithRetry() { dataToAssert=$2 for i in {1..600} ; do res=$(curl "$url") - if [ $? -eq 0 ]; then + if [[ $? -eq 0 ]]; then echo "$res" | grep "$dataToAssert" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The data in $url is incorrect, got:\n$res" exit 1 fi @@ -208,12 +210,12 @@ function curlDeleteRequest() { url=$1 dataToAssert=$2 res=$(curl -X DELETE "$url") - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The DELETE request to $url failed\n" exit 1 fi echo "$res" | grep "$dataToAssert" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The data in delete request to $url is incorrect, got:\n$res" exit 1 fi @@ -223,7 +225,7 @@ function curlPostRequest() { url=$1 data=$2 curl -X POST -d "$data" "$url" - if [ $? -ne 0 ]; then + if [[ $? -ne 0 ]]; then echo -e "The POST request to $url with data $data failed\n" exit 1 fi @@ -233,7 +235,7 @@ function curlPostRequest() { echo "Building the docker image" docker build -f build/Dockerfile.release -t vitess-operator-pr:latest . echo "Creating Kind cluster" -kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image kindest/node:v1.28.0 +kind create cluster --wait 30s --name kind-${BUILDKITE_BUILD_ID} --image ${KIND_VERSION} echo "Loading docker image into Kind cluster" kind load docker-image vitess-operator-pr:latest --name kind-${BUILDKITE_BUILD_ID} diff --git a/tools/test.env b/tools/test.env index c427317c..6532efd6 100755 --- a/tools/test.env +++ b/tools/test.env @@ -3,3 +3,5 @@ # We add the tools/_bin directory to PATH variable # since this is where we install the binaries that are needed export PATH="$PATH:$PWD/tools/_bin" + +export KIND_VERSION=kindest/node:v1.28.0 \ No newline at end of file