diff --git a/pkg/health/health_replicaset.go b/pkg/health/health_replicaset.go index e4caa67..bc0591b 100644 --- a/pkg/health/health_replicaset.go +++ b/pkg/health/health_replicaset.go @@ -11,8 +11,8 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -// duration after the creation of a resource -// within which we deem the health to be Unknown +// duration after the creation of a replica set +// within which we never deem the it to be unhealthy. const replicaSetBufferPeriod = time.Minute * 10 func getReplicaSetHealth(obj *unstructured.Unstructured) (*HealthStatus, error) { @@ -31,36 +31,23 @@ func getReplicaSetHealth(obj *unstructured.Unstructured) (*HealthStatus, error) } func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, error) { - if time.Since(replicaSet.CreationTimestamp.Time) <= replicaSetBufferPeriod { - return &HealthStatus{ - Health: HealthUnknown, - Status: HealthStatusStarting, - }, nil - } + isWithinBufferPeriod := replicaSet.CreationTimestamp.Add(replicaSetBufferPeriod).After(time.Now()) var containersWaitingForReadiness []string for _, container := range replicaSet.Spec.Template.Spec.Containers { - bufferPeriod := replicaSet.CreationTimestamp.Add(replicaSetBufferPeriod) - if container.ReadinessProbe != nil && container.ReadinessProbe.InitialDelaySeconds > 0 { - bufferPeriod = replicaSet.CreationTimestamp.Add( - time.Second * time.Duration(container.ReadinessProbe.InitialDelaySeconds), - ) - } - - if time.Now().Before(bufferPeriod) { - containersWaitingForReadiness = append(containersWaitingForReadiness, container.Name) + deadline := replicaSet.CreationTimestamp.Add(time.Second * time.Duration(container.ReadinessProbe.InitialDelaySeconds)) + if time.Now().Before(deadline) { + containersWaitingForReadiness = append(containersWaitingForReadiness, container.Name) + } } } if len(containersWaitingForReadiness) > 0 { return &HealthStatus{ - Health: HealthUnknown, - Status: HealthStatusStarting, - Message: fmt.Sprintf( - "Container(s) %s is waiting for readiness probe", - strings.Join(containersWaitingForReadiness, ","), - ), + Health: HealthUnknown, + Status: HealthStatusStarting, + Message: fmt.Sprintf("Container(s) %s is waiting for readiness probe", strings.Join(containersWaitingForReadiness, ",")), }, nil } @@ -81,8 +68,12 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er health = HealthUnhealthy } - if replicaSet.Generation == replicaSet.Status.ObservedGeneration && - replicaSet.Status.ReadyReplicas == *replicaSet.Spec.Replicas { + if (health == HealthUnhealthy || health == HealthWarning) && isWithinBufferPeriod { + // within the buffer period, we don't mark a ReplicaSet as unhealthy + health = HealthUnknown + } + + if replicaSet.Generation == replicaSet.Status.ObservedGeneration && replicaSet.Status.ReadyReplicas == *replicaSet.Spec.Replicas { return &HealthStatus{ Health: health, Status: HealthStatusRunning, @@ -121,10 +112,7 @@ func getAppsv1ReplicaSetHealth(replicaSet *appsv1.ReplicaSet) (*HealthStatus, er }, nil } -func getAppsv1ReplicaSetCondition( - status appsv1.ReplicaSetStatus, - condType appsv1.ReplicaSetConditionType, -) *appsv1.ReplicaSetCondition { +func getAppsv1ReplicaSetCondition(status appsv1.ReplicaSetStatus, condType appsv1.ReplicaSetConditionType) *appsv1.ReplicaSetCondition { for i := range status.Conditions { c := status.Conditions[i] if c.Type == condType { diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index ba4bcae..3da44e8 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -166,7 +166,12 @@ func TestHPA(t *testing.T) { func TestReplicaSet(t *testing.T) { assertAppHealthWithOverwrite(t, "./testdata/replicaset-ittools.yml", map[string]string{ "2024-08-03T06:06:18Z": time.Now().Add(-time.Minute * 2).UTC().Format("2006-01-02T15:04:05Z"), - }, health.HealthStatusStarting, health.HealthUnknown, false) + }, health.HealthStatusRunning, health.HealthHealthy, true) + + assertAppHealthWithOverwrite(t, "./testdata/replicaset-unhealthy-pods.yaml", map[string]string{ + "2024-10-21T11:20:19Z": time.Now().Add(-time.Minute * 2).UTC().Format("2006-01-02T15:04:05Z"), + }, health.HealthStatusScalingUp, health.HealthUnknown, false) + } func TestPod(t *testing.T) { diff --git a/pkg/health/testdata/replicaset-unhealthy-pods.yaml b/pkg/health/testdata/replicaset-unhealthy-pods.yaml new file mode 100644 index 0000000..42bb8e1 --- /dev/null +++ b/pkg/health/testdata/replicaset-unhealthy-pods.yaml @@ -0,0 +1,53 @@ +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + uid: f6579017-448f-425a-9645-ea3c93700948 + name: failing-deployment-866585899d + labels: + app: failing-app + pod-template-hash: 866585899d + namespace: default + annotations: + deployment.kubernetes.io/revision: "1" + deployment.kubernetes.io/max-replicas: "2" + deployment.kubernetes.io/desired-replicas: "1" + ownerReferences: + - uid: 1ab20b2b-e2c8-4e85-b7b6-5709ba594c0d + kind: Deployment + name: failing-deployment + apiVersion: apps/v1 + controller: true + blockOwnerDeletion: true + creationTimestamp: 2024-10-21T11:20:19Z +spec: + replicas: 1 + selector: + matchLabels: + app: failing-app + pod-template-hash: 866585899d + template: + spec: + dnsPolicy: ClusterFirst + containers: + - args: + - -c + - sleep 5 && exit 1 + name: failing-container + image: busybox + command: + - /bin/sh + resources: {} + imagePullPolicy: Always + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + metadata: + labels: + app: failing-app + pod-template-hash: 866585899d +status: + replicas: 1 + fullyLabeledReplicas: 1