Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Status API support with BackOffLimit API #21

Merged
merged 4 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 42 additions & 7 deletions apis/apps/v1alpha1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 27 additions & 4 deletions apis/apps/v1alpha1/sidekick_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ const (
PodSelectionPolicyLast LeaderSelectionPolicy = "Last"
)

// +kubebuilder:validation:Enum=Pending;Current;Failed;Succeeded
type SideKickPhase string

const (
SideKickPhaseCurrent SideKickPhase = "Current"
SideKickPhaseFailed SideKickPhase = "Failed"
SidekickPhaseSucceeded SideKickPhase = "Succeeded"
SideKickPhasePending SideKickPhase = "Pending"
)

type LeaderSpec struct {
Name string `json:"name,omitempty"`

Expand Down Expand Up @@ -100,8 +110,17 @@ type SidekickSpec struct {
// One of Always, OnFailure, Never.
// Default to Always.
// More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy
// If your sidekick has restartPolicy = "OnFailure", keep in mind that your Pod running the Job will be
// terminated once the job backoff limit has been reached. This can make debugging the Job's executable
// more difficult. We suggest setting restartPolicy = "Never" when debugging the Job or using a logging
// system to ensure output from failed Jobs is not lost inadvertently.
// +kubebuilder:validation:Enum=Never;Always;OnFailure
// +optional
RestartPolicy core.RestartPolicy `json:"restartPolicy,omitempty"`
// Specifies the number of retries before marking this job failed.
// +optional
BackoffLimit *int32 `json:"backoffLimit,omitempty"`

// Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request.
// Value must be non-negative integer. The value zero indicates stop immediately via
// the kill signal (no opportunity to shut down).
Expand Down Expand Up @@ -519,24 +538,28 @@ type VolumeMount struct {
}

type LeaderStatus struct {
Name string `json:"name"`
Name string `json:"name,omitempty"`
}

// SidekickStatus defines the observed state of Sidekick
type SidekickStatus struct {
Leader LeaderStatus `json:"leader"`
Pod core.PodPhase `json:"pod"`
Leader LeaderStatus `json:"leader,omitempty"`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need review here. Removing this field as required.

Pod core.PodPhase `json:"pod,omitempty"`

// Specifies the current phase of the sidekick CR
// +optional
Phase string `json:"phase,omitempty"`
Phase SideKickPhase `json:"phase,omitempty"`
// observedGeneration is the most recent generation observed for this resource. It corresponds to the
// resource's generation, which is updated on mutation by the API Server.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// Conditions applied to the database, such as approval or denial.
// +optional
Conditions []kmapi.Condition `json:"conditions,omitempty"`
// ContainerRestartCountsPerPod stores the sum of all container restart counts of a pod
ContainerRestartCountsPerPod map[string]int32 `json:"containerRestartCountsPerPod,omitempty"`
// FailuerCount tracks the total number of failed pods
FailureCount map[string]bool `json:"failureCount,omitempty"`
}

// +genclient
Expand Down
19 changes: 19 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 33 additions & 6 deletions crds/apps.k8s.appscode.com_sidekicks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1068,6 +1068,11 @@ spec:
description: AutomountServiceAccountToken indicates whether a service
account token should be automatically mounted.
type: boolean
backoffLimit:
description: Specifies the number of retries before marking this job
failed.
format: int32
type: integer
containers:
description: List of containers belonging to the pod. Containers cannot
currently be added or removed. There must be at least one container
Expand Down Expand Up @@ -5399,7 +5404,17 @@ spec:
type: array
restartPolicy:
description: 'Restart policy for all containers within the pod. One
of Always, OnFailure, Never. Default to Always. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy'
of Always, OnFailure, Never. Default to Always. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy
If your sidekick has restartPolicy = "OnFailure", keep in mind that
your Pod running the Job will be terminated once the job backoff
limit has been reached. This can make debugging the Job''s executable
more difficult. We suggest setting restartPolicy = "Never" when
debugging the Job or using a logging system to ensure output from
failed Jobs is not lost inadvertently.'
enum:
- Never
- Always
- OnFailure
type: string
runtimeClassName:
description: 'RuntimeClassName refers to a RuntimeClass object in
Expand Down Expand Up @@ -7659,12 +7674,22 @@ spec:
- type
type: object
type: array
containerRestartCountsPerPod:
additionalProperties:
format: int32
type: integer
description: ContainerRestartCountsPerPod stores the sum of all container
restart counts of a pod
type: object
failureCount:
additionalProperties:
type: boolean
description: FailuerCount tracks the total number of failed pods
type: object
leader:
properties:
name:
type: string
required:
- name
type: object
observedGeneration:
description: observedGeneration is the most recent generation observed
Expand All @@ -7674,14 +7699,16 @@ spec:
type: integer
phase:
description: Specifies the current phase of the sidekick CR
enum:
- Pending
- Current
- Failed
- Succeeded
type: string
pod:
description: PodPhase is a label for the condition of a pod at the
current time.
type: string
required:
- leader
- pod
type: object
type: object
served: true
Expand Down
123 changes: 123 additions & 0 deletions pkg/controllers/apps/pod.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
Copyright AppsCode Inc. and Contributors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package apps

import (
"context"

appsv1alpha1 "kubeops.dev/sidekick/apis/apps/v1alpha1"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
cu "kmodules.xyz/client-go/client"
core_util "kmodules.xyz/client-go/core/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
deletionInitiatorKey = "sidekick.appscode.com/deletion-initiator"
deletionInitiatesBySidekickOperator = "sidekick-operator"
)

func (r *SidekickReconciler) removePodFinalizerIfMarkedForDeletion(ctx context.Context, req ctrl.Request) (bool, error) {
var pod corev1.Pod
err := r.Get(ctx, req.NamespacedName, &pod)
if err != nil && !errors.IsNotFound(err) {
return false, err
}

if err == nil && pod.DeletionTimestamp != nil {
// Increase the failureCount if the pod was terminated externally
// if the pod was terminated externally, then it will not have
// deletionInitiatorKey set in its annotations

_, exists := pod.ObjectMeta.Annotations[deletionInitiatorKey]
if !exists {
var sk appsv1alpha1.Sidekick
err = r.Get(ctx, req.NamespacedName, &sk)
if err != nil && !errors.IsNotFound(err) {
return false, err
}
// if sidekick is not found or it is in deletion state,
// ignore updating failureCount in this case

if err == nil && sk.DeletionTimestamp == nil {
if sk.Status.FailureCount == nil {
sk.Status.FailureCount = make(map[string]bool)
}
sk.Status.FailureCount[string(pod.GetUID())] = true
err = r.updateSidekickStatus(ctx, &sk)
if err != nil && !errors.IsNotFound(err) {
return false, err
}
}
}

// removing finalizer, the reason behind adding this finalizer is stated below
// where we created the pod
if core_util.HasFinalizer(pod.ObjectMeta, getFinalizerName()) {
err = r.removePodFinalizer(ctx, &pod)
if err != nil {
return false, err
}
return true, nil
}
}
return false, nil
}

func (r *SidekickReconciler) removePodFinalizer(ctx context.Context, pod *corev1.Pod) error {
_, err := cu.CreateOrPatch(ctx, r.Client, pod,
func(in client.Object, createOp bool) client.Object {
po := in.(*corev1.Pod)
po.ObjectMeta = core_util.RemoveFinalizer(po.ObjectMeta, getFinalizerName())
return po
},
)
return client.IgnoreNotFound(err)
}

func (r *SidekickReconciler) deletePod(ctx context.Context, pod *corev1.Pod) error {
err := r.setDeletionInitiatorAnnotation(ctx, pod)
if err != nil {
return err
}
return r.Delete(ctx, pod)
}

func (r *SidekickReconciler) setDeletionInitiatorAnnotation(ctx context.Context, pod *corev1.Pod) error {
_, err := cu.CreateOrPatch(ctx, r.Client, pod,
func(in client.Object, createOp bool) client.Object {
po := in.(*corev1.Pod)
po.ObjectMeta.Annotations[deletionInitiatorKey] = deletionInitiatesBySidekickOperator
return po
},
)
return err
}

func getContainerRestartCounts(pod *corev1.Pod) int32 {
restartCounter := int32(0)
for _, cs := range pod.Status.ContainerStatuses {
restartCounter += cs.RestartCount
}
for _, ics := range pod.Status.InitContainerStatuses {
restartCounter += ics.RestartCount
}
return restartCounter
}
Loading
Loading