From f3d88e2278fd16adf91200bcfafd6f1baab4db90 Mon Sep 17 00:00:00 2001 From: devthejo Date: Sat, 17 Dec 2022 01:02:23 +0100 Subject: [PATCH] fix(sts)+feat(retry-limit)+feat(pending-deadline) --- cmd/main.go | 7 ++++- pkg/config/options.go | 4 ++- pkg/status/container_status.go | 13 ++++++---- pkg/status/job.go | 10 ++++--- pkg/status/pod.go | 13 +++++++--- pkg/status/replicaset.go | 2 +- pkg/status/resource_type.go | 9 +++++++ pkg/status/statefulset.go | 2 +- tests/poc-manifests/dep.yaml | 44 +++++++++++++++++++++++++++++++ tests/poc-manifests/job.yaml | 34 ++++++++++++++++++++++++ tests/poc-manifests/sts.yaml | 46 +++++++++++++++++++++++++++++++++ tests/poc-manifests/test-dep.sh | 5 ++++ tests/poc-manifests/test-job.sh | 5 ++++ tests/poc-manifests/test-sts.sh | 5 ++++ 14 files changed, 182 insertions(+), 17 deletions(-) create mode 100644 pkg/status/resource_type.go create mode 100644 tests/poc-manifests/dep.yaml create mode 100644 tests/poc-manifests/job.yaml create mode 100644 tests/poc-manifests/sts.yaml create mode 100755 tests/poc-manifests/test-dep.sh create mode 100755 tests/poc-manifests/test-job.sh create mode 100755 tests/poc-manifests/test-sts.sh diff --git a/cmd/main.go b/cmd/main.go index 9e1b838..e83bfab 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -20,6 +20,9 @@ func main() { selector := flag.String("selector", "", "Label selector to watch, kubectl format such as release=foo,component=frontend") kubecontext := flag.String("kubecontext", "", "Kubeconfig context to use") interval := flag.String("interval", "5s", "Interval between status checks") + retryLimit := flag.Int64("retry-limit", 6, "Retry limit for deployments and statefulsets, default 6, -1 to disable") + pendingDeadLineSeconds := flag.Int("pending-deadline", 180, "Pending deadLine in seconds, default 180, -1 to disable") + ignoreSecretNotFound := flag.Bool("ignore-secret-not-found", false, "Ignore secret not found error") var kubeconfig *string @@ -33,7 +36,9 @@ func main() { flag.Parse() options := &config.Options{ - IgnoreSecretNotFound: *ignoreSecretNotFound, + IgnoreSecretNotFound: *ignoreSecretNotFound, + RetryLimit: int32(*retryLimit), + PendingDeadLineSeconds: *pendingDeadLineSeconds, } clientset := makeClientset(*kubeconfig, *kubecontext) diff --git a/pkg/config/options.go b/pkg/config/options.go index 12c2867..b8dca8c 100644 --- a/pkg/config/options.go +++ b/pkg/config/options.go @@ -1,5 +1,7 @@ package config type Options struct { - IgnoreSecretNotFound bool + IgnoreSecretNotFound bool + RetryLimit int32 + PendingDeadLineSeconds int } diff --git a/pkg/status/container_status.go b/pkg/status/container_status.go index d092a55..409bb5e 100644 --- a/pkg/status/container_status.go +++ b/pkg/status/container_status.go @@ -7,7 +7,7 @@ import ( v1 "k8s.io/api/core/v1" ) -func TestContainerStatus(status *v1.ContainerStatus, options *config.Options) RolloutStatus { +func TestContainerStatus(status *v1.ContainerStatus, options *config.Options, resourceType ResourceType) RolloutStatus { // https://github.com/kubernetes/kubernetes/blob/4fda1207e347af92e649b59d60d48c7021ba0c54/pkg/kubelet/container/sync_result.go#L37 if status.State.Waiting != nil { reason := status.State.Waiting.Reason @@ -19,11 +19,15 @@ func TestContainerStatus(status *v1.ContainerStatus, options *config.Options) Ro return RolloutErrorProgressing(err) case "CrashLoopBackOff": - // TODO this should retry but have a deadline, all restarts fall to CrashLoopBackOff err := MakeRolloutError(FailureProcessCrashing, "Container %q is in %q: %v", status.Name, reason, status.State.Waiting.Message) - return RolloutFatal(err) + if ((resourceType == ResourceTypeDeployment || resourceType == ResourceTypeStatefulSet) && + (status.RestartCount <= options.RetryLimit || options.RetryLimit == -1)) || + resourceType == ResourceTypeJob { + return RolloutErrorProgressing(err) + } else { + return RolloutFatal(err) + } case "RunContainerError": - // TODO this should retry but have a deadline, all restarts fall to CrashLoopBackOff err := MakeRolloutError(FailureProcessCrashing, "Container %q is in %q: %v", status.Name, reason, status.State.Waiting.Message) return RolloutErrorMaybeProgressing(err) @@ -48,7 +52,6 @@ func TestContainerStatus(status *v1.ContainerStatus, options *config.Options) Ro reason := status.State.Terminated.Reason switch reason { case "Error": - // TODO this should retry but have a deadline, all restarts fall to CrashLoopBackOff err := MakeRolloutError(FailureProcessCrashing, "Container %q is in %q", status.Name, reason) return RolloutErrorMaybeProgressing(err) case "OOMKilled": diff --git a/pkg/status/job.go b/pkg/status/job.go index f665433..371d29b 100644 --- a/pkg/status/job.go +++ b/pkg/status/job.go @@ -17,7 +17,7 @@ func TestJobStatus(wrapper client.Kubernetes, job batchv1.Job, options *config.O aggr := Aggregator{} for _, pod := range podList.Items { - status := TestPodStatus(&pod, options) + status := TestPodStatus(&pod, options, ResourceTypeJob) aggr.Add(status) if fatal := aggr.Fatal(); fatal != nil { @@ -38,7 +38,11 @@ func JobStatus(wrapper client.Kubernetes, job *batchv1.Job, options *config.Opti } if condition.Type == batchv1.JobFailed && condition.Status == v1.ConditionTrue { status := TestJobStatus(wrapper, *job, options) - aggr.Add(status) + err := status.Error + if err == nil { + err = errors.New("") + } + aggr.Add(RolloutFatal(err)) return aggr.Resolve() } } @@ -47,8 +51,6 @@ func JobStatus(wrapper client.Kubernetes, job *batchv1.Job, options *config.Opti if status.Error != nil { if status.MaybeContinue { aggr.Add(RolloutErrorProgressing(status.Error)) - } else { - aggr.Add(status) } } else { err := errors.New("") diff --git a/pkg/status/pod.go b/pkg/status/pod.go index e9137db..609851e 100644 --- a/pkg/status/pod.go +++ b/pkg/status/pod.go @@ -8,10 +8,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestPodStatus(pod *v1.Pod, options *config.Options) RolloutStatus { +func TestPodStatus(pod *v1.Pod, options *config.Options, resourceType ResourceType) RolloutStatus { aggr := Aggregator{} for _, initStatus := range pod.Status.InitContainerStatuses { - status := TestContainerStatus(&initStatus, options) + status := TestContainerStatus(&initStatus, options, resourceType) if status.Error != nil { if !status.Continue { if re, ok := status.Error.(RolloutError); ok { @@ -30,7 +30,7 @@ func TestPodStatus(pod *v1.Pod, options *config.Options) RolloutStatus { } for _, containerStatus := range pod.Status.ContainerStatuses { - status := TestContainerStatus(&containerStatus, options) + status := TestContainerStatus(&containerStatus, options, resourceType) if status.Error != nil { if !status.Continue { if re, ok := status.Error.(RolloutError); ok { @@ -56,9 +56,14 @@ func TestPodStatus(pod *v1.Pod, options *config.Options) RolloutStatus { for _, condition := range pod.Status.Conditions { // fail if the pod is pending for X time if condition.Type == v1.PodScheduled { - deadline := metav1.NewTime(time.Now().Add(time.Minute * -3)) // TODO configure err := MakeRolloutError(FailureScheduling, "Failed to schedule pod: %v", condition.Message) + if options.PendingDeadLineSeconds == -1 { + return RolloutErrorProgressing(err) + } + + deadline := metav1.NewTime(time.Now().Add(time.Second * -1 * time.Duration(options.PendingDeadLineSeconds))) // TODO configure + if condition.LastTransitionTime.Before(&deadline) { return RolloutFatal(err) } else { diff --git a/pkg/status/replicaset.go b/pkg/status/replicaset.go index c4ca09a..3eef9f3 100644 --- a/pkg/status/replicaset.go +++ b/pkg/status/replicaset.go @@ -22,7 +22,7 @@ func TestReplicaSetStatus(wrapper client.Kubernetes, replicaSet appsv1.ReplicaSe aggr := Aggregator{} for _, pod := range podList.Items { - status := TestPodStatus(&pod, options) + status := TestPodStatus(&pod, options, ResourceTypeDeployment) aggr.Add(status) if fatal := aggr.Fatal(); fatal != nil { return *fatal diff --git a/pkg/status/resource_type.go b/pkg/status/resource_type.go new file mode 100644 index 0000000..46bff4c --- /dev/null +++ b/pkg/status/resource_type.go @@ -0,0 +1,9 @@ +package status + +type ResourceType int + +const ( + ResourceTypeDeployment ResourceType = iota + ResourceTypeStatefulSet + ResourceTypeJob +) diff --git a/pkg/status/statefulset.go b/pkg/status/statefulset.go index ae74d50..ec573d9 100644 --- a/pkg/status/statefulset.go +++ b/pkg/status/statefulset.go @@ -16,7 +16,7 @@ func TestStatefulSetStatus(wrapper client.Kubernetes, statefulSet appsv1.Statefu aggr := Aggregator{} for _, pod := range podList.Items { - status := TestPodStatus(&pod, options) + status := TestPodStatus(&pod, options, ResourceTypeStatefulSet) aggr.Add(status) if fatal := aggr.Fatal(); fatal != nil { return *fatal diff --git a/tests/poc-manifests/dep.yaml b/tests/poc-manifests/dep.yaml new file mode 100644 index 0000000..9920c3f --- /dev/null +++ b/tests/poc-manifests/dep.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dep + labels: + type: dep +spec: + replicas: 1 + selector: + matchLabels: + app: test + template: + metadata: + labels: + app: test + spec: + containers: + - name: dep-pi + image: perl + command: + - /bin/bash + - -c + - | + random() { + min="$1" + max="$2" + range=$((max - min + 1)) + rand=$((min + (RANDOM % range))) + echo "$rand" + } + + sleep 2 + if [ "`random 0 2`" = "2" ]; then + touch /tmp/success + tail -f + fi + echo Failed!!! + exit 1 + readinessProbe: + exec: + command: + - sh + - -c + - test -S /tmp/success diff --git a/tests/poc-manifests/job.yaml b/tests/poc-manifests/job.yaml new file mode 100644 index 0000000..0eef139 --- /dev/null +++ b/tests/poc-manifests/job.yaml @@ -0,0 +1,34 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: job + labels: + type: job +spec: + backoffLimit: 2 + template: + spec: + containers: + - name: job-pi + image: perl + command: + - /bin/bash + - -c + - | + sleep 1 + + random() { + min="$1" + max="$2" + range=$((max - min + 1)) + rand=$((min + (RANDOM % range))) + echo "$rand" + } + + if [ "`random 1 5`" = "5" ]; then + echo Success!!! + exit 0 + fi + echo Failed!!! + exit 1 + restartPolicy: Never diff --git a/tests/poc-manifests/sts.yaml b/tests/poc-manifests/sts.yaml new file mode 100644 index 0000000..b96f463 --- /dev/null +++ b/tests/poc-manifests/sts.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: sts + labels: + type: sts +spec: + replicas: 1 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: sts-rand + image: perl + command: + - /bin/bash + - -c + - | + random() { + min="$1" + max="$2" + range=$((max - min + 1)) + rand=$((min + (RANDOM % range))) + echo "$rand" + } + + sleep 2 + if [ "`random 0 2`" = "2" ]; then + touch /tmp/success + tail -f + fi + echo Failed!!! + exit 1 + readinessProbe: + exec: + command: + - sh + - -c + - test -S /tmp/success + restartPolicy: Always + serviceName: nginx \ No newline at end of file diff --git a/tests/poc-manifests/test-dep.sh b/tests/poc-manifests/test-dep.sh new file mode 100755 index 0000000..8f5fc5e --- /dev/null +++ b/tests/poc-manifests/test-dep.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +kubectl delete -f dep.yaml || true && kubectl apply -f dep.yaml +sleep 5 +../../rollout-status -selector type=dep -retry-limit 2 \ No newline at end of file diff --git a/tests/poc-manifests/test-job.sh b/tests/poc-manifests/test-job.sh new file mode 100755 index 0000000..43ad9aa --- /dev/null +++ b/tests/poc-manifests/test-job.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +kubectl delete -f job.yaml || true && kubectl apply -f job.yaml +sleep 5 +../../rollout-status -selector type=job \ No newline at end of file diff --git a/tests/poc-manifests/test-sts.sh b/tests/poc-manifests/test-sts.sh new file mode 100755 index 0000000..9eea6a8 --- /dev/null +++ b/tests/poc-manifests/test-sts.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +kubectl delete -f sts.yaml || true && kubectl apply -f sts.yaml +sleep 5 +../../rollout-status -selector type=sts -retry-limit 3 \ No newline at end of file