Skip to content

Commit

Permalink
address feedback: 1. add grace period for resyn. 2. read non cache fi…
Browse files Browse the repository at this point in the history
…rst. 3. watch update comp event
  • Loading branch information
chihshenghuang committed Nov 27, 2024
1 parent 2a04893 commit 90e68bb
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 34 deletions.
20 changes: 11 additions & 9 deletions cmd/eno-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,15 @@ func main() {
func runController() error {
ctx := ctrl.SetupSignalHandler()
var (
debugLogging bool
watchdogThres time.Duration
rolloutCooldown time.Duration
dispatchCooldown time.Duration
taintToleration string
nodeAffinity string
concurrencyLimit int
synconf = &synthesis.Config{}
debugLogging bool
watchdogThres time.Duration
rolloutCooldown time.Duration
dispatchCooldown time.Duration
selfHealingGracePeriod time.Duration
taintToleration string
nodeAffinity string
concurrencyLimit int
synconf = &synthesis.Config{}

mgrOpts = &manager.Options{
Rest: ctrl.GetConfigOrDie(),
Expand All @@ -74,6 +75,7 @@ func runController() error {
flag.StringVar(&taintToleration, "taint-toleration", "", "Node NoSchedule taint to be tolerated by synthesizer pods e.g. taintKey=taintValue to match on value, just taintKey to match on presence of the taint")
flag.StringVar(&nodeAffinity, "node-affinity", "", "Synthesizer pods will be created with this required node affinity expression e.g. labelKey=labelValue to match on value, just labelKey to match on presence of the label")
flag.IntVar(&concurrencyLimit, "concurrency-limit", 10, "Upper bound on active syntheses. This effectively limits the number of running synthesizer pods spawned by Eno.")
flag.DurationVar(&selfHealingGracePeriod, "self-healing-grace-period", time.Minute*5, "How long before the self-healing controllers are allowed to start the resynthesis process.")
mgrOpts.Bind(flag.CommandLine)
flag.Parse()

Expand Down Expand Up @@ -114,7 +116,7 @@ func runController() error {
return fmt.Errorf("constructing rollout controller: %w", err)
}

err = selfhealing.NewSliceController(mgr)
err = selfhealing.NewSliceController(mgr, selfHealingGracePeriod)
if err != nil {
return fmt.Errorf("constructing self healing resource slice controller: %w", err)
}
Expand Down
2 changes: 2 additions & 0 deletions internal/controllers/reconciliation/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/Azure/eno/internal/controllers/liveness"
"github.com/Azure/eno/internal/controllers/replication"
"github.com/Azure/eno/internal/controllers/rollout"
"github.com/Azure/eno/internal/controllers/selfhealing"
"github.com/Azure/eno/internal/controllers/synthesis"
"github.com/Azure/eno/internal/controllers/watch"
"github.com/Azure/eno/internal/controllers/watchdog"
Expand All @@ -32,6 +33,7 @@ func registerControllers(t *testing.T, mgr *testutil.Manager) {
require.NoError(t, flowcontrol.NewSynthesisConcurrencyLimiter(mgr.Manager, 10, 0))
require.NoError(t, liveness.NewNamespaceController(mgr.Manager, 3, time.Second))
require.NoError(t, watch.NewController(mgr.Manager))
require.NoError(t, selfhealing.NewSliceController(mgr.Manager, time.Minute*5))
}

func writeGenericComposition(t *testing.T, client client.Client) (*apiv1.Synthesizer, *apiv1.Composition) {
Expand Down
91 changes: 66 additions & 25 deletions internal/controllers/selfhealing/slice.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,23 @@ import (
"github.com/go-logr/logr"
)

const (
PodTimeout = time.Minute * 2
)

// sliceController check if the resource slice is deleted but it is still present in the composition current synthesis status.
// If yes, it will update the composition PendingResynthesis status to trigger re-synthesis process.
type sliceController struct {
client client.Client
noCacheReader client.Reader
client client.Client
noCacheReader client.Reader
selfHealingGracePeriod time.Duration
}

func NewSliceController(mgr ctrl.Manager) error {
func NewSliceController(mgr ctrl.Manager, selfHealingGracePeriod time.Duration) error {
s := &sliceController{
client: mgr.GetClient(),
noCacheReader: mgr.GetAPIReader(),
client: mgr.GetClient(),
noCacheReader: mgr.GetAPIReader(),
selfHealingGracePeriod: selfHealingGracePeriod,
}
return ctrl.NewControllerManagedBy(mgr).
Named("selfHealingSliceController").
Watches(&apiv1.Composition{}, newCompositionHandler()).
Watches(&apiv1.ResourceSlice{}, newSliceHandler()).
WithLogConstructor(manager.NewLogConstructor(mgr, "selfHealingSliceController")).
Complete(s)
Expand Down Expand Up @@ -68,31 +67,38 @@ func (s *sliceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
// Skip if the composition is not eligible for resynthesis, and check the synthesis result later
if notEligibleForResynthesis(comp) {
logger.V(1).Info("not eligible for resynthesis when checking the missing resource slice")
// Use default pod timeout
if syn.Spec.PodTimeout == nil {
return ctrl.Result{Requeue: true, RequeueAfter: PodTimeout}, nil
// Use default grace period
if comp.Status.CurrentSynthesis == nil || comp.Status.CurrentSynthesis.Synthesized == nil {
return ctrl.Result{Requeue: true, RequeueAfter: s.selfHealingGracePeriod}, nil
}
return ctrl.Result{Requeue: true, RequeueAfter: syn.Spec.PodTimeout.Duration}, nil
return ctrl.Result{Requeue: true, RequeueAfter: s.selfHealingGracePeriod - time.Since(comp.Status.CurrentSynthesis.Synthesized.Time)}, nil
}

// Check if any resource slice referenced by the composition is deleted.
for _, ref := range comp.Status.CurrentSynthesis.ResourceSlices {
slice := &apiv1.ResourceSlice{}
slice.Name = ref.Name
slice.Namespace = comp.Namespace
err := s.noCacheReader.Get(ctx, client.ObjectKeyFromObject(slice), slice)
err := s.client.Get(ctx, client.ObjectKeyFromObject(slice), slice)
if errors.IsNotFound(err) {
// The resource slice should not be deleted if it is still referenced by the composition.
// Update the composition status to trigger re-synthesis process.
logger.V(1).Info("found missing resource slice and start resynthesis", "compositionName", comp.Name, "resourceSliceName", ref.Name)
comp.Status.PendingResynthesis = ptr.To(metav1.Now())
err = s.client.Status().Update(ctx, comp)
// Ensure the resource slice is missing by checking the resource from api-server
isMissing, err := s.isSliceMissing(ctx, slice)
if err != nil {
return ctrl.Result{}, fmt.Errorf("updating composition pending resynthesis: %w", err)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}

if isMissing {
// The resource slice should not be deleted if it is still referenced by the composition.
// Update the composition status to trigger re-synthesis process.
logger.V(1).Info("found missing resource slice and start resynthesis", "compositionName", comp.Name, "resourceSliceName", ref.Name)
comp.Status.PendingResynthesis = ptr.To(metav1.Now())
err := s.client.Status().Update(ctx, comp)
if err != nil {
return ctrl.Result{}, fmt.Errorf("updating composition pending resynthesis: %w", err)
}
return ctrl.Result{}, nil
}
}
if err != nil {
return ctrl.Result{}, fmt.Errorf("getting resource slice: %w", err)
}
Expand All @@ -101,20 +107,55 @@ func (s *sliceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, nil
}

// Compositions aren't eligible to receive an updated synthesizer when:
func (s *sliceController) isSliceMissing(ctx context.Context, slice *apiv1.ResourceSlice) (bool, error) {
err := s.noCacheReader.Get(ctx, client.ObjectKeyFromObject(slice), slice)
if errors.IsNotFound(err) {
return true, nil
}
if err != nil {
return false, fmt.Errorf("getting resource slice from non cache reader: %w", err)
}

return false, nil
}

// Compositions aren't eligible to trigger resynthesis when:
// - They haven't ever been synthesized (they'll use the latest inputs anyway)
// - They are currently being synthesized or deleted
// - They are already pending resynthesis
//
// Composition should be resynthesized when the referenced resource slice is deleted even
// the composition should ignore side effect.
// Composition should be resynthesized when the referenced resource slice is deleted
func notEligibleForResynthesis(comp *apiv1.Composition) bool {
return comp.Status.CurrentSynthesis == nil ||
comp.Status.CurrentSynthesis.Synthesized == nil ||
comp.DeletionTimestamp != nil ||
comp.Status.PendingResynthesis != nil
}

func newCompositionHandler() handler.EventHandler {
apply := func(ctx context.Context, rli workqueue.RateLimitingInterface, obj client.Object) {
comp, ok := obj.(*apiv1.Composition)
if !ok {
logr.FromContextOrDiscard(ctx).V(0).Info("unexpected type given to newCompositionHandler")
return
}

rli.Add(reconcile.Request{NamespacedName: types.NamespacedName{Namespace: comp.Namespace, Name: comp.Name}})
}
return &handler.Funcs{
CreateFunc: func(ctx context.Context, ce event.CreateEvent, rli workqueue.RateLimitingInterface) {
// No need to handle composition creation event for now
},
UpdateFunc: func(ctx context.Context, ue event.UpdateEvent, rli workqueue.RateLimitingInterface) {
// Check the updated composition only
apply(ctx, rli, ue.ObjectNew)
},
DeleteFunc: func(ctx context.Context, de event.DeleteEvent, rli workqueue.RateLimitingInterface) {
// No need to handle composition deletion event for now
},
}
}

func newSliceHandler() handler.EventHandler {
apply := func(rli workqueue.RateLimitingInterface, obj client.Object) {
owner := metav1.GetControllerOf(obj)
Expand Down

0 comments on commit 90e68bb

Please sign in to comment.