Skip to content

Commit

Permalink
fix(sts)+feat(retry-limit)+feat(pending-deadline)
Browse files Browse the repository at this point in the history
  • Loading branch information
devthejo committed Dec 17, 2022
1 parent 4213ddd commit f3d88e2
Show file tree
Hide file tree
Showing 14 changed files with 182 additions and 17 deletions.
7 changes: 6 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ func main() {
selector := flag.String("selector", "", "Label selector to watch, kubectl format such as release=foo,component=frontend")
kubecontext := flag.String("kubecontext", "", "Kubeconfig context to use")
interval := flag.String("interval", "5s", "Interval between status checks")
retryLimit := flag.Int64("retry-limit", 6, "Retry limit for deployments and statefulsets, default 6, -1 to disable")
pendingDeadLineSeconds := flag.Int("pending-deadline", 180, "Pending deadLine in seconds, default 180, -1 to disable")

ignoreSecretNotFound := flag.Bool("ignore-secret-not-found", false, "Ignore secret not found error")

var kubeconfig *string
Expand All @@ -33,7 +36,9 @@ func main() {
flag.Parse()

options := &config.Options{
IgnoreSecretNotFound: *ignoreSecretNotFound,
IgnoreSecretNotFound: *ignoreSecretNotFound,
RetryLimit: int32(*retryLimit),
PendingDeadLineSeconds: *pendingDeadLineSeconds,
}

clientset := makeClientset(*kubeconfig, *kubecontext)
Expand Down
4 changes: 3 additions & 1 deletion pkg/config/options.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package config

type Options struct {
IgnoreSecretNotFound bool
IgnoreSecretNotFound bool
RetryLimit int32
PendingDeadLineSeconds int
}
13 changes: 8 additions & 5 deletions pkg/status/container_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
v1 "k8s.io/api/core/v1"
)

func TestContainerStatus(status *v1.ContainerStatus, options *config.Options) RolloutStatus {
func TestContainerStatus(status *v1.ContainerStatus, options *config.Options, resourceType ResourceType) RolloutStatus {
// https://github.com/kubernetes/kubernetes/blob/4fda1207e347af92e649b59d60d48c7021ba0c54/pkg/kubelet/container/sync_result.go#L37
if status.State.Waiting != nil {
reason := status.State.Waiting.Reason
Expand All @@ -19,11 +19,15 @@ func TestContainerStatus(status *v1.ContainerStatus, options *config.Options) Ro
return RolloutErrorProgressing(err)

case "CrashLoopBackOff":
// TODO this should retry but have a deadline, all restarts fall to CrashLoopBackOff
err := MakeRolloutError(FailureProcessCrashing, "Container %q is in %q: %v", status.Name, reason, status.State.Waiting.Message)
return RolloutFatal(err)
if ((resourceType == ResourceTypeDeployment || resourceType == ResourceTypeStatefulSet) &&
(status.RestartCount <= options.RetryLimit || options.RetryLimit == -1)) ||
resourceType == ResourceTypeJob {
return RolloutErrorProgressing(err)
} else {
return RolloutFatal(err)
}
case "RunContainerError":
// TODO this should retry but have a deadline, all restarts fall to CrashLoopBackOff
err := MakeRolloutError(FailureProcessCrashing, "Container %q is in %q: %v", status.Name, reason, status.State.Waiting.Message)
return RolloutErrorMaybeProgressing(err)

Expand All @@ -48,7 +52,6 @@ func TestContainerStatus(status *v1.ContainerStatus, options *config.Options) Ro
reason := status.State.Terminated.Reason
switch reason {
case "Error":
// TODO this should retry but have a deadline, all restarts fall to CrashLoopBackOff
err := MakeRolloutError(FailureProcessCrashing, "Container %q is in %q", status.Name, reason)
return RolloutErrorMaybeProgressing(err)
case "OOMKilled":
Expand Down
10 changes: 6 additions & 4 deletions pkg/status/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func TestJobStatus(wrapper client.Kubernetes, job batchv1.Job, options *config.O

aggr := Aggregator{}
for _, pod := range podList.Items {
status := TestPodStatus(&pod, options)
status := TestPodStatus(&pod, options, ResourceTypeJob)

aggr.Add(status)
if fatal := aggr.Fatal(); fatal != nil {
Expand All @@ -38,7 +38,11 @@ func JobStatus(wrapper client.Kubernetes, job *batchv1.Job, options *config.Opti
}
if condition.Type == batchv1.JobFailed && condition.Status == v1.ConditionTrue {
status := TestJobStatus(wrapper, *job, options)
aggr.Add(status)
err := status.Error
if err == nil {
err = errors.New("")
}
aggr.Add(RolloutFatal(err))
return aggr.Resolve()
}
}
Expand All @@ -47,8 +51,6 @@ func JobStatus(wrapper client.Kubernetes, job *batchv1.Job, options *config.Opti
if status.Error != nil {
if status.MaybeContinue {
aggr.Add(RolloutErrorProgressing(status.Error))
} else {
aggr.Add(status)
}
} else {
err := errors.New("")
Expand Down
13 changes: 9 additions & 4 deletions pkg/status/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestPodStatus(pod *v1.Pod, options *config.Options) RolloutStatus {
func TestPodStatus(pod *v1.Pod, options *config.Options, resourceType ResourceType) RolloutStatus {
aggr := Aggregator{}
for _, initStatus := range pod.Status.InitContainerStatuses {
status := TestContainerStatus(&initStatus, options)
status := TestContainerStatus(&initStatus, options, resourceType)
if status.Error != nil {
if !status.Continue {
if re, ok := status.Error.(RolloutError); ok {
Expand All @@ -30,7 +30,7 @@ func TestPodStatus(pod *v1.Pod, options *config.Options) RolloutStatus {
}

for _, containerStatus := range pod.Status.ContainerStatuses {
status := TestContainerStatus(&containerStatus, options)
status := TestContainerStatus(&containerStatus, options, resourceType)
if status.Error != nil {
if !status.Continue {
if re, ok := status.Error.(RolloutError); ok {
Expand All @@ -56,9 +56,14 @@ func TestPodStatus(pod *v1.Pod, options *config.Options) RolloutStatus {
for _, condition := range pod.Status.Conditions {
// fail if the pod is pending for X time
if condition.Type == v1.PodScheduled {
deadline := metav1.NewTime(time.Now().Add(time.Minute * -3)) // TODO configure
err := MakeRolloutError(FailureScheduling, "Failed to schedule pod: %v", condition.Message)

if options.PendingDeadLineSeconds == -1 {
return RolloutErrorProgressing(err)
}

deadline := metav1.NewTime(time.Now().Add(time.Second * -1 * time.Duration(options.PendingDeadLineSeconds))) // TODO configure

if condition.LastTransitionTime.Before(&deadline) {
return RolloutFatal(err)
} else {
Expand Down
2 changes: 1 addition & 1 deletion pkg/status/replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func TestReplicaSetStatus(wrapper client.Kubernetes, replicaSet appsv1.ReplicaSe

aggr := Aggregator{}
for _, pod := range podList.Items {
status := TestPodStatus(&pod, options)
status := TestPodStatus(&pod, options, ResourceTypeDeployment)
aggr.Add(status)
if fatal := aggr.Fatal(); fatal != nil {
return *fatal
Expand Down
9 changes: 9 additions & 0 deletions pkg/status/resource_type.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package status

type ResourceType int

const (
ResourceTypeDeployment ResourceType = iota
ResourceTypeStatefulSet
ResourceTypeJob
)
2 changes: 1 addition & 1 deletion pkg/status/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func TestStatefulSetStatus(wrapper client.Kubernetes, statefulSet appsv1.Statefu

aggr := Aggregator{}
for _, pod := range podList.Items {
status := TestPodStatus(&pod, options)
status := TestPodStatus(&pod, options, ResourceTypeStatefulSet)
aggr.Add(status)
if fatal := aggr.Fatal(); fatal != nil {
return *fatal
Expand Down
44 changes: 44 additions & 0 deletions tests/poc-manifests/dep.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: dep
labels:
type: dep
spec:
replicas: 1
selector:
matchLabels:
app: test
template:
metadata:
labels:
app: test
spec:
containers:
- name: dep-pi
image: perl
command:
- /bin/bash
- -c
- |
random() {
min="$1"
max="$2"
range=$((max - min + 1))
rand=$((min + (RANDOM % range)))
echo "$rand"
}
sleep 2
if [ "`random 0 2`" = "2" ]; then
touch /tmp/success
tail -f
fi
echo Failed!!!
exit 1
readinessProbe:
exec:
command:
- sh
- -c
- test -S /tmp/success
34 changes: 34 additions & 0 deletions tests/poc-manifests/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: batch/v1
kind: Job
metadata:
name: job
labels:
type: job
spec:
backoffLimit: 2
template:
spec:
containers:
- name: job-pi
image: perl
command:
- /bin/bash
- -c
- |
sleep 1
random() {
min="$1"
max="$2"
range=$((max - min + 1))
rand=$((min + (RANDOM % range)))
echo "$rand"
}
if [ "`random 1 5`" = "5" ]; then
echo Success!!!
exit 0
fi
echo Failed!!!
exit 1
restartPolicy: Never
46 changes: 46 additions & 0 deletions tests/poc-manifests/sts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: sts
labels:
type: sts
spec:
replicas: 1
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: sts-rand
image: perl
command:
- /bin/bash
- -c
- |
random() {
min="$1"
max="$2"
range=$((max - min + 1))
rand=$((min + (RANDOM % range)))
echo "$rand"
}
sleep 2
if [ "`random 0 2`" = "2" ]; then
touch /tmp/success
tail -f
fi
echo Failed!!!
exit 1
readinessProbe:
exec:
command:
- sh
- -c
- test -S /tmp/success
restartPolicy: Always
serviceName: nginx
5 changes: 5 additions & 0 deletions tests/poc-manifests/test-dep.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

kubectl delete -f dep.yaml || true && kubectl apply -f dep.yaml
sleep 5
../../rollout-status -selector type=dep -retry-limit 2
5 changes: 5 additions & 0 deletions tests/poc-manifests/test-job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

kubectl delete -f job.yaml || true && kubectl apply -f job.yaml
sleep 5
../../rollout-status -selector type=job
5 changes: 5 additions & 0 deletions tests/poc-manifests/test-sts.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

kubectl delete -f sts.yaml || true && kubectl apply -f sts.yaml
sleep 5
../../rollout-status -selector type=sts -retry-limit 3

0 comments on commit f3d88e2

Please sign in to comment.