Skip to content

Commit

Permalink
Allow symphonies to progress when their namespace is missing (#182)
Browse files Browse the repository at this point in the history
This fixes a weird edge case in which namespaces are ripped out from
under symphony resources.

In this case all updates will fail - the only option is to briefly
recreate the namespace, delete it, and remove any finalizers while it is
deleting.

Co-authored-by: Jordan Olshevski <[email protected]>
  • Loading branch information
jveski and Jordan Olshevski authored Jul 30, 2024
1 parent c312bcd commit 808406e
Show file tree
Hide file tree
Showing 6 changed files with 319 additions and 4 deletions.
6 changes: 6 additions & 0 deletions cmd/eno-reconciler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"

"github.com/Azure/eno/internal/controllers/aggregation"
"github.com/Azure/eno/internal/controllers/liveness"
"github.com/Azure/eno/internal/controllers/reconciliation"
"github.com/Azure/eno/internal/controllers/synthesis"
"github.com/Azure/eno/internal/flowcontrol"
Expand Down Expand Up @@ -95,6 +96,11 @@ func run() error {
return fmt.Errorf("constructing resource slice cleanup controller: %w", err)
}

err = liveness.NewNamespaceController(mgr)
if err != nil {
return fmt.Errorf("constructing namespace liveness controller: %w", err)
}

remoteConfig := mgr.GetConfig()
if remoteKubeconfigFile != "" {
if remoteConfig, err = k8s.GetRESTConfig(remoteKubeconfigFile); err != nil {
Expand Down
10 changes: 10 additions & 0 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,13 @@ spec:
name: a-different-input
namespace: default
```

### Deletion Behavior

Symphonies are high-level resources designed to always converge, even in the face of rare split-brain states.

Force deleting namespaces leaves resources in a strange state in which they exist but cannot be updated.
Symphonies recover from this state by carefully recreating the namespace and forcibly removing internal finalizers.
Because of this it's possible that managed resources will not be cleaned up if they exist outside of the orphaned namespace.
Worst case, managed resources might be recreated by the Eno reconciler if it outpaces kube-controller-manager's namespace controller.
Beware of this if you plan to use symphony resources to manage resources outside of the symphony's own namespace.
130 changes: 130 additions & 0 deletions internal/controllers/liveness/namespace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package liveness

import (
"context"
"fmt"
"time"

apiv1 "github.com/Azure/eno/api/v1"
"github.com/Azure/eno/internal/manager"
"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// namespaceController is responsible for progressing symphony deletion when its namespace is forcibly deleted.
// This can happen if clients get tricky with the /finalize API.
// Without this controller Eno resources will never be deleted since updates to remove the finalizers will fail.
type namespaceController struct {
client client.Client
}

func NewNamespaceController(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&corev1.Namespace{}).
Watches(&apiv1.Symphony{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) []reconcile.Request {
return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: o.GetNamespace()}}}
})).
WithLogConstructor(manager.NewLogConstructor(mgr, "namespaceController")).
Complete(&namespaceController{
client: mgr.GetClient(),
})
}

func (c *namespaceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
ns := &corev1.Namespace{}
err := c.client.Get(ctx, req.NamespacedName, ns)
if client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, fmt.Errorf("getting namespace: %w", err)
}

const annoKey = "eno.azure.io/recreation-reason"
const annoValue = "OrphanedResources"

// Delete the recreated namespace immediately.
// Its finalizers will keep it around until we've had time to remove our finalizers.
logger := logr.FromContextOrDiscard(ctx).WithValues("symphonyNamespace", ns.Name)
if ns.Annotations != nil && ns.Annotations[annoKey] == annoValue {
if ns.DeletionTimestamp != nil {
return ctrl.Result{}, c.cleanup(ctx, req.Name)
}
err := c.client.Delete(ctx, ns)
if err != nil {
return ctrl.Result{}, fmt.Errorf("deleting namespace: %w", err)
}
logger.V(0).Info("deleting recreated namespace")
return ctrl.Result{}, nil
}
if err == nil { // important that this is the GET error
return ctrl.Result{}, nil // namespace exists, nothing to do
}

// Nothing to do if the namespace doesn't have any symphonies
list := &apiv1.SymphonyList{}
err = c.client.List(ctx, list, client.InNamespace(req.Name))
if err != nil {
return ctrl.Result{}, fmt.Errorf("listing symphonies: %w", err)
}
if len(list.Items) == 0 {
return ctrl.Result{}, nil // no orphaned resources, nothing to do
}
if time.Since(mostRecentCreation(list)) < time.Second {
return ctrl.Result{RequeueAfter: time.Second}, nil // namespace probably just hasn't hit the cache yet
}

// Recreate the namespace briefly so we can remove the finalizers.
// Any updates (including finalizer updates) will fail if the namespace doesn't exist.
ns.Name = req.Name
ns.Annotations = map[string]string{annoKey: annoValue}
err = c.client.Create(ctx, ns)
if err != nil {
return ctrl.Result{}, fmt.Errorf("creating namespace: %w", err)
}
logger.V(0).Info("recreating missing namespace to free orphaned symphony")
return ctrl.Result{}, nil
}

const removeFinalizersPatch = `[{ "op": "remove", "path": "/metadata/finalizers" }]`

func (c *namespaceController) cleanup(ctx context.Context, ns string) error {
logger := logr.FromContextOrDiscard(ctx).WithValues("symphonyNamespace", ns)
logger.V(0).Info("deleting any remaining symphonies in orphaned namespace")
err := c.client.DeleteAllOf(ctx, &apiv1.Symphony{}, client.InNamespace(ns))
if err != nil {
return fmt.Errorf("deleting symphonies: %w", err)
}

list := &apiv1.ResourceSliceList{}
err = c.client.List(ctx, list, client.InNamespace(ns))
if err != nil {
return fmt.Errorf("listing resource slices: %w", err)
}

for _, item := range list.Items {
if len(item.Finalizers) == 0 {
continue
}
err = c.client.Patch(ctx, &item, client.RawPatch(types.JSONPatchType, []byte(removeFinalizersPatch)))
if err != nil {
return fmt.Errorf("removing finalizers from resource slice %q: %w", item.Name, err)
}
logger := logger.WithValues("resourceSliceName", item.Name)
logger.V(0).Info("forcibly removed finalizers")
}

return nil
}

func mostRecentCreation(list *apiv1.SymphonyList) time.Time {
var max time.Time
for _, item := range list.Items {
if item.CreationTimestamp.After(max) {
max = item.CreationTimestamp.Time
}
}
return max
}
86 changes: 86 additions & 0 deletions internal/controllers/liveness/namespace_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package liveness

import (
"testing"

apiv1 "github.com/Azure/eno/api/v1"
"github.com/Azure/eno/internal/testutil"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/client-go/rest"
"k8s.io/client-go/util/retry"
"k8s.io/kubectl/pkg/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func TestMissingNamespace(t *testing.T) {
ctx := testutil.NewContext(t)
mgr := testutil.NewManager(t)
cli := mgr.GetClient()

require.NoError(t, NewNamespaceController(mgr.Manager))
mgr.Start(t)

ns := &corev1.Namespace{}
ns.Name = "test"
require.NoError(t, cli.Create(ctx, ns))

sym := &apiv1.Symphony{}
sym.Name = "test-symphony"
sym.Namespace = ns.Name
sym.Finalizers = []string{"eno.azure.io/cleanup"} // this would normally be set by another controller
require.NoError(t, cli.Create(ctx, sym))

// Force delete the namespace
require.NoError(t, cli.Delete(ctx, ns))

conf := rest.CopyConfig(mgr.RestConfig)
conf.GroupVersion = &schema.GroupVersion{Version: "v1"}
conf.NegotiatedSerializer = serializer.WithoutConversionCodecFactory{CodecFactory: scheme.Codecs}
rc, err := rest.RESTClientFor(conf)
require.NoError(t, err)

err = retry.RetryOnConflict(testutil.Backoff, func() error {
cli.Get(ctx, client.ObjectKeyFromObject(ns), ns)
ns.Spec.Finalizers = nil

_, err = rc.Put().
AbsPath("/api/v1/namespaces", ns.Name, "/finalize").
Body(ns).
Do(ctx).Raw()
return err
})
require.NoError(t, err)

// The namespace should be completely gone
testutil.Eventually(t, func() bool {
return errors.IsNotFound(cli.Get(ctx, client.ObjectKeyFromObject(ns), ns))
})

// But we should still be able to eventually remove the symphony's finalizer
require.NoError(t, cli.Delete(ctx, sym))
testutil.Eventually(t, func() bool {
if sym.Finalizers != nil {
sym.Finalizers = nil
err = cli.Update(ctx, sym)
if err != nil {
t.Logf("error while removing finalizer from symphony: %s", err)
}
}

missing := errors.IsNotFound(cli.Get(ctx, client.ObjectKeyFromObject(sym), sym))
if !missing {
t.Logf("symphony still exists")
}
return missing
})

// Namespace should end up being deleted
testutil.Eventually(t, func() bool {
cli.Get(ctx, client.ObjectKeyFromObject(ns), ns)
return ns.DeletionTimestamp != nil
})
}
2 changes: 2 additions & 0 deletions internal/controllers/reconciliation/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
apiv1 "github.com/Azure/eno/api/v1"
"github.com/Azure/eno/internal/controllers/aggregation"
"github.com/Azure/eno/internal/controllers/flowcontrol"
"github.com/Azure/eno/internal/controllers/liveness"
"github.com/Azure/eno/internal/controllers/replication"
"github.com/Azure/eno/internal/controllers/rollout"
"github.com/Azure/eno/internal/controllers/synthesis"
Expand All @@ -28,6 +29,7 @@ func registerControllers(t *testing.T, mgr *testutil.Manager) {
require.NoError(t, rollout.NewController(mgr.Manager, time.Millisecond))
require.NoError(t, rollout.NewSynthesizerController(mgr.Manager))
require.NoError(t, flowcontrol.NewSynthesisConcurrencyLimiter(mgr.Manager, 10, 0))
require.NoError(t, liveness.NewNamespaceController(mgr.Manager))
}

func writeGenericComposition(t *testing.T, client client.Client) (*apiv1.Synthesizer, *apiv1.Composition) {
Expand Down
89 changes: 85 additions & 4 deletions internal/controllers/reconciliation/symphony_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ import (
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/client-go/rest"
"k8s.io/client-go/util/retry"
"k8s.io/kubectl/pkg/scheme"
ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -22,9 +25,6 @@ import (

// TestSymphonyIntegration proves that a basic symphony creation/deletion workflow works.
func TestSymphonyIntegration(t *testing.T) {
scheme := runtime.NewScheme()
corev1.SchemeBuilder.AddToScheme(scheme)

ctx := testutil.NewContext(t)
mgr := testutil.NewManager(t, testutil.WithCompositionNamespace(ctrlcache.AllNamespaces))
upstream := mgr.GetClient()
Expand Down Expand Up @@ -155,3 +155,84 @@ func TestSymphonyIntegration(t *testing.T) {
require.NoError(t, err)
assert.Len(t, comps.Items, 0)
}

// TestOrphanedNamespaceSymphony covers a special behavior of symphonies: resilience against missing namespaces.
// Ripping the namespace out from under a symphony and its associated compositions/resource slices shouldn't prevent them from being deleted.
func TestOrphanedNamespaceSymphony(t *testing.T) {
ctx := testutil.NewContext(t)
mgr := testutil.NewManager(t, testutil.WithCompositionNamespace(ctrlcache.AllNamespaces))
upstream := mgr.GetClient()

// Create test namespace.
ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "test"}}
require.NoError(t, upstream.Create(ctx, ns))

registerControllers(t, mgr)
testutil.WithFakeExecutor(t, mgr, func(ctx context.Context, s *apiv1.Synthesizer, input *krmv1.ResourceList) (*krmv1.ResourceList, error) {
output := &krmv1.ResourceList{}
output.Items = []*unstructured.Unstructured{{
Object: map[string]any{
"apiVersion": "v1",
"kind": "ConfigMap",
"metadata": map[string]string{
"name": "test-obj",
"namespace": "default",
},
},
}}
return output, nil
})

// Test subject
setupTestSubject(t, mgr)
mgr.Start(t)

syn := &apiv1.Synthesizer{}
syn.Name = "test-syn"
syn.Spec.Image = "anything"
require.NoError(t, upstream.Create(ctx, syn))

symph := &apiv1.Symphony{}
symph.Name = "test-comp"
symph.Namespace = ns.Name
symph.Spec.Variations = []apiv1.Variation{{Synthesizer: apiv1.SynthesizerRef{Name: syn.Name}}}
require.NoError(t, upstream.Create(ctx, symph))

testutil.Eventually(t, func() bool {
upstream.Get(ctx, client.ObjectKeyFromObject(symph), symph)
return symph.Status.Reconciled != nil && symph.Status.ObservedGeneration == symph.Generation
})

// Establish that resource slices did at one point exist
slices := &apiv1.ResourceSliceList{}
require.NoError(t, upstream.List(ctx, slices))
require.True(t, len(slices.Items) > 0)

// Force delete the namespace
require.NoError(t, upstream.Delete(ctx, ns))

conf := rest.CopyConfig(mgr.RestConfig)
conf.GroupVersion = &schema.GroupVersion{Version: "v1"}
conf.NegotiatedSerializer = serializer.WithoutConversionCodecFactory{CodecFactory: scheme.Codecs}
rc, err := rest.RESTClientFor(conf)
require.NoError(t, err)

err = retry.RetryOnConflict(testutil.Backoff, func() error {
upstream.Get(ctx, client.ObjectKeyFromObject(ns), ns)
ns.Spec.Finalizers = nil

_, err = rc.Put().
AbsPath("/api/v1/namespaces", ns.Name, "/finalize").
Body(ns).
Do(ctx).Raw()
return err
})
require.NoError(t, err)

// The symphony and its associated resources should eventually be cleaned up
testutil.Eventually(t, func() bool {
return errors.IsNotFound(upstream.Get(ctx, client.ObjectKeyFromObject(symph), symph))
})
require.NoError(t, upstream.List(ctx, slices))
require.Empty(t, slices.Items)
}

0 comments on commit 808406e

Please sign in to comment.