Skip to content

Commit

Permalink
feat: add a buffer period for ReplicaSet health evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
adityathebe committed Oct 18, 2024
1 parent 4d2b366 commit 41f9e05
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 8 deletions.
34 changes: 26 additions & 8 deletions pkg/health/health_replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import (
"k8s.io/apimachinery/pkg/runtime"
)

// duration after the creation of a resource
// within which we deem the health to be Unknown
const replicaSetBufferPeriod = time.Minute * 10

func getReplicaSetHealth(obj *unstructured.Unstructured) (*HealthStatus, error) {
gvk := obj.GroupVersionKind()
switch gvk {
Expand All @@ -27,10 +31,19 @@ func getReplicaSetHealth(obj *unstructured.Unstructured) (*HealthStatus, error)
}

func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, error) {
if time.Since(replicaSet.CreationTimestamp.Time) <= replicaSetBufferPeriod {
return &HealthStatus{
Health: HealthUnknown,
Status: HealthStatusStarting,
}, nil
}

var containersWaitingForReadiness []string
for _, container := range replicaSet.Spec.Template.Spec.Containers {
if container.ReadinessProbe != nil && container.ReadinessProbe.InitialDelaySeconds > 0 {
deadline := replicaSet.CreationTimestamp.Add(time.Second * time.Duration(container.ReadinessProbe.InitialDelaySeconds))
deadline := replicaSet.CreationTimestamp.Add(
time.Second * time.Duration(container.ReadinessProbe.InitialDelaySeconds),
)
if time.Now().Before(deadline) {
containersWaitingForReadiness = append(containersWaitingForReadiness, container.Name)
}
Expand All @@ -39,9 +52,12 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er

if len(containersWaitingForReadiness) > 0 {
return &HealthStatus{
Health: HealthUnknown,
Status: HealthStatusStarting,
Message: fmt.Sprintf("Container(s) %s is waiting for readiness probe", strings.Join(containersWaitingForReadiness, ",")),
Health: HealthUnknown,
Status: HealthStatusStarting,
Message: fmt.Sprintf(
"Container(s) %s is waiting for readiness probe",
strings.Join(containersWaitingForReadiness, ","),
),
}, nil
}

Expand All @@ -62,7 +78,8 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er
health = HealthUnhealthy
}

if replicaSet.Generation == replicaSet.Status.ObservedGeneration && replicaSet.Status.ReadyReplicas == *replicaSet.Spec.Replicas {
if replicaSet.Generation == replicaSet.Status.ObservedGeneration &&
replicaSet.Status.ReadyReplicas == *replicaSet.Spec.Replicas {
return &HealthStatus{
Health: health,
Status: HealthStatusRunning,
Expand All @@ -84,7 +101,6 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er
Health: health,
Status: HealthStatusScalingUp,
Message: fmt.Sprintf("%d of %d pods ready", replicaSet.Status.ReadyReplicas, *replicaSet.Spec.Replicas),
Ready: true,
}, nil
}

Expand All @@ -93,7 +109,6 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er
Health: health,
Status: HealthStatusScalingDown,
Message: fmt.Sprintf("%d pods terminating", replicaSet.Status.ReadyReplicas-*replicaSet.Spec.Replicas),
Ready: true,
}, nil
}

Expand All @@ -103,7 +118,10 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er
}, nil
}

func getAppsv1ReplicaSetCondition(status appsv1.ReplicaSetStatus, condType appsv1.ReplicaSetConditionType) *appsv1.ReplicaSetCondition {
func getAppsv1ReplicaSetCondition(
status appsv1.ReplicaSetStatus,
condType appsv1.ReplicaSetConditionType,
) *appsv1.ReplicaSetCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == condType {
Expand Down
6 changes: 6 additions & 0 deletions pkg/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ func TestHPA(t *testing.T) {
assertAppHealth(t, "./testdata/hpa-v1-progressing-with-no-annotations.yaml", health.HealthStatusProgressing, health.HealthHealthy, false)
}

func TestReplicaSet(t *testing.T) {
assertAppHealthWithOverwrite(t, "./testdata/replicaset-ittools.yml", map[string]string{
"2024-08-03T06:06:18Z": time.Now().Add(-time.Minute * 2).UTC().Format("2006-01-02T15:04:05Z"),
}, health.HealthStatusStarting, health.HealthUnknown, false)
}

func TestPod(t *testing.T) {
assertAppHealth(t, "./testdata/terminating-stuck.yaml", "TerminatingStalled", health.HealthUnhealthy, false)
assertAppHealth(t, "./testdata/terminating-namespace.yaml", "TerminatingStalled", health.HealthUnhealthy, false)
Expand Down
70 changes: 70 additions & 0 deletions pkg/health/testdata/replicaset-ittools.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
annotations:
deployment.kubernetes.io/desired-replicas: "1"
deployment.kubernetes.io/max-replicas: "1"
deployment.kubernetes.io/revision: "2"
meta.helm.sh/release-name: ittools
meta.helm.sh/release-namespace: default
creationTimestamp: "2024-08-03T06:06:18Z"
generation: 52
labels:
app.kubernetes.io/component: main
app.kubernetes.io/instance: ittools
app.kubernetes.io/name: ittools
pod-template-hash: 5fbf458f49
name: ittools-5fbf458f49
namespace: default
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: Deployment
name: ittools
uid: d2beccff-8da9-42e8-8459-e7ff938b2ffd
resourceVersion: "96413911"
uid: c044b250-2445-4813-b00f-22b696c5fcf2
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: main
app.kubernetes.io/instance: ittools
app.kubernetes.io/name: ittools
pod-template-hash: 5fbf458f49
template:
metadata:
labels:
app.kubernetes.io/component: main
app.kubernetes.io/instance: ittools
app.kubernetes.io/name: ittools
pod-template-hash: 5fbf458f49
spec:
automountServiceAccountToken: true
containers:
- image: corentinth/it-tools:latest
imagePullPolicy: Always
name: it-tools
resources:
limits:
memory: 50Mi
requests:
cpu: 25m
memory: 10Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
enableServiceLinks: false
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
status:
availableReplicas: 1
fullyLabeledReplicas: 1
observedGeneration: 52
readyReplicas: 1
replicas: 1

0 comments on commit 41f9e05

Please sign in to comment.