-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: effective tls policies reconciler #927
base: main
Are you sure you want to change the base?
Changes from all commits
6cb9d26
332f7f8
19a55d0
8f5fc77
8f8c0d1
530a34d
c076078
bc3f71b
59ab5ed
b75f287
bd7fc23
6c7b249
d8d388d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,396 @@ | ||
package controllers | ||
|
||
import ( | ||
"context" | ||
"reflect" | ||
"sync" | ||
|
||
certmanv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" | ||
"github.com/kuadrant/policy-machinery/controller" | ||
"github.com/kuadrant/policy-machinery/machinery" | ||
"github.com/samber/lo" | ||
corev1 "k8s.io/api/core/v1" | ||
apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/apimachinery/pkg/util/validation/field" | ||
"k8s.io/client-go/dynamic" | ||
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" | ||
crlog "sigs.k8s.io/controller-runtime/pkg/log" | ||
gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1" | ||
|
||
kuadrantv1alpha1 "github.com/kuadrant/kuadrant-operator/api/v1alpha1" | ||
"github.com/kuadrant/kuadrant-operator/pkg/library/utils" | ||
) | ||
|
||
type EffectiveTLSPoliciesReconciler struct { | ||
client *dynamic.DynamicClient | ||
scheme *runtime.Scheme | ||
} | ||
|
||
func NewEffectiveTLSPoliciesReconciler(client *dynamic.DynamicClient, scheme *runtime.Scheme) *EffectiveTLSPoliciesReconciler { | ||
return &EffectiveTLSPoliciesReconciler{client: client, scheme: scheme} | ||
} | ||
|
||
func (t *EffectiveTLSPoliciesReconciler) Subscription() *controller.Subscription { | ||
return &controller.Subscription{ | ||
Events: []controller.ResourceEventMatcher{ | ||
{Kind: &machinery.GatewayGroupKind}, | ||
{Kind: &kuadrantv1alpha1.TLSPolicyGroupKind}, | ||
{Kind: &CertManagerCertificateKind}, | ||
}, | ||
ReconcileFunc: t.Reconcile, | ||
} | ||
} | ||
|
||
//+kubebuilder:rbac:groups=kuadrant.io,resources=tlspolicies,verbs=get;list;watch;update;patch;delete | ||
//+kubebuilder:rbac:groups=kuadrant.io,resources=tlspolicies/status,verbs=get;update;patch | ||
//+kubebuilder:rbac:groups=kuadrant.io,resources=tlspolicies/finalizers,verbs=update | ||
//+kubebuilder:rbac:groups="cert-manager.io",resources=issuers,verbs=get;list;watch; | ||
//+kubebuilder:rbac:groups="cert-manager.io",resources=clusterissuers,verbs=get;list;watch; | ||
//+kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch | ||
//+kubebuilder:rbac:groups="cert-manager.io",resources=certificates,verbs=get;list;watch;create;update;patch;delete | ||
|
||
func (t *EffectiveTLSPoliciesReconciler) Reconcile(ctx context.Context, _ []controller.ResourceEvent, topology *machinery.Topology, _ error, s *sync.Map) error { | ||
logger := controller.LoggerFromContext(ctx).WithName("EffectiveTLSPoliciesReconciler").WithName("Reconcile") | ||
|
||
listeners := topology.Targetables().Items(func(object machinery.Object) bool { | ||
_, ok := object.(*machinery.Listener) | ||
return ok | ||
}) | ||
|
||
// Get all certs in topology for comparison with expected certs to determine orphaned certs later | ||
certs := lo.FilterMap(topology.Objects().Items(), func(item machinery.Object, index int) (*certmanv1.Certificate, bool) { | ||
r, ok := item.(*controller.RuntimeObject) | ||
if !ok { | ||
return nil, false | ||
} | ||
c, ok := r.Object.(*certmanv1.Certificate) | ||
if !ok { | ||
return nil, false | ||
} | ||
|
||
// Only want certs owned by TLSPolicies | ||
if isObjectOwnedByGroupKind(c, kuadrantv1alpha1.TLSPolicyGroupKind) { | ||
return c, true | ||
} | ||
|
||
return nil, false | ||
}) | ||
|
||
var expectedCerts []*certmanv1.Certificate | ||
|
||
for _, listener := range listeners { | ||
l := listener.(*machinery.Listener) | ||
|
||
policies := l.Policies() | ||
if len(policies) == 0 { | ||
policies = l.Gateway.Policies() | ||
} | ||
|
||
for _, p := range policies { | ||
policy := p.(*kuadrantv1alpha1.TLSPolicy) | ||
|
||
// Policy is deleted | ||
if policy.DeletionTimestamp != nil { | ||
logger.V(1).Info("policy is marked for deletion, nothing to do", "name", policy.Name, "namespace", policy.Namespace, "uid", policy.GetUID()) | ||
continue | ||
} | ||
|
||
// Policy is not valid | ||
isValid, _ := IsTLSPolicyValid(ctx, s, policy) | ||
if !isValid { | ||
logger.V(1).Info("deleting certs for invalid policy", "name", policy.Name, "namespace", policy.Namespace, "uid", policy.GetUID()) | ||
if err := t.deleteCertificatesForPolicy(ctx, topology, policy); err != nil { | ||
logger.Error(err, "unable to delete certs for invalid policy", "name", policy.Name, "namespace", policy.Namespace, "uid", policy.GetUID()) | ||
} | ||
continue | ||
} | ||
|
||
// Policy is valid | ||
// Need to use Gateway as listener hosts can be merged into a singular cert if using the same cert reference | ||
expectedCertificates := expectedCertificatesForGateway(ctx, l.Gateway.Gateway, policy) | ||
|
||
for _, cert := range expectedCertificates { | ||
resource := t.client.Resource(CertManagerCertificatesResource).Namespace(cert.GetNamespace()) | ||
|
||
// Check is cert already in topology | ||
objs := topology.Objects().Children(l) | ||
obj, ok := lo.Find(objs, func(o machinery.Object) bool { | ||
return o.GroupVersionKind().GroupKind() == CertManagerCertificateKind && o.GetNamespace() == cert.GetNamespace() && o.GetName() == cert.GetName() | ||
}) | ||
|
||
// Create | ||
if !ok { | ||
expectedCerts = append(expectedCerts, cert) | ||
if err := controllerutil.SetControllerReference(policy, cert, t.scheme); err != nil { | ||
logger.Error(err, "failed to set owner reference on certificate", "name", policy.Name, "namespace", policy.Namespace, "uid", policy.GetUID()) | ||
continue | ||
} | ||
|
||
un, err := controller.Destruct(cert) | ||
if err != nil { | ||
logger.Error(err, "unable to destruct cert") | ||
continue | ||
} | ||
_, err = resource.Create(ctx, un, metav1.CreateOptions{}) | ||
if err != nil && !apierrors.IsAlreadyExists(err) { | ||
logger.Error(err, "unable to create certificate", "name", policy.Name, "namespace", policy.Namespace, "uid", policy.GetUID()) | ||
} | ||
|
||
continue | ||
} | ||
|
||
// Update | ||
tCert := obj.(*controller.RuntimeObject).Object.(*certmanv1.Certificate) | ||
expectedCerts = append(expectedCerts, tCert) | ||
if reflect.DeepEqual(tCert.Spec, cert.Spec) { | ||
logger.V(1).Info("skipping update, cert specs are the same, nothing to do") | ||
continue | ||
} | ||
|
||
tCert.Spec = cert.Spec | ||
un, err := controller.Destruct(tCert) | ||
if err != nil { | ||
logger.Error(err, "unable to destruct cert") | ||
continue | ||
} | ||
_, err = resource.Update(ctx, un, metav1.UpdateOptions{}) | ||
if err != nil { | ||
logger.Error(err, "unable to update certificate", "policy", policy.Name) | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Clean up orphaned certs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cleaning up of orphaned certs gives me an odd feeling, which I have yet to put my finger on. Had the same feeling with the DNS records. The question I ask find asking my self is what does the cleaning up of orphaned certs have to do with TLS polices and their reconcile. I have this feeling that this clean up should be structured as a Task in the Workflow over being part of the TLS policy reconcile. There seems to be a mixing of concerns. I understand how this mixing of concerns is ingrained in us from using the controller runtime. This currently a feeling that this is a code smell, but yet I don't have a good though on what to do about it. One thing that I don't know at time of writing is how do certs become orphaned. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is because there is a indirect link between policies and the subresouce (certificates in this case) and so gives the expected state of the cluster. Though true, this probably can be done as a separate task, that I can look into if you feel strongly about this. A cert can be orphaned when a gateway listener is removed, or is the target ref of a tls policy is changed to another gateway. In both of these cases the cert(s) is created and will be orphaned since the policy still exists and still valid |
||
uniqueExpectedCerts := lo.UniqBy(expectedCerts, func(item *certmanv1.Certificate) types.UID { | ||
return item.GetUID() | ||
}) | ||
orphanedCerts, _ := lo.Difference(certs, uniqueExpectedCerts) | ||
for _, orphanedCert := range orphanedCerts { | ||
resource := t.client.Resource(CertManagerCertificatesResource).Namespace(orphanedCert.GetNamespace()) | ||
if err := resource.Delete(ctx, orphanedCert.Name, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) { | ||
logger.Error(err, "unable to delete orphaned certificate", "name", orphanedCert.GetName(), "namespace", orphanedCert.GetNamespace(), "uid", orphanedCert.GetUID()) | ||
continue | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (t *EffectiveTLSPoliciesReconciler) deleteCertificatesForPolicy(ctx context.Context, topology *machinery.Topology, p *kuadrantv1alpha1.TLSPolicy) error { | ||
certs := lo.FilterMap(topology.Objects().Items(), func(item machinery.Object, index int) (*certmanv1.Certificate, bool) { | ||
r, ok := item.(*controller.RuntimeObject) | ||
if !ok { | ||
return nil, false | ||
} | ||
c, ok := r.Object.(*certmanv1.Certificate) | ||
if !ok { | ||
return nil, false | ||
} | ||
|
||
// Only want certs owned by this policy | ||
return c, utils.IsOwnedBy(c, p) | ||
}) | ||
|
||
for _, cert := range certs { | ||
resource := t.client.Resource(CertManagerCertificatesResource).Namespace(cert.GetNamespace()) | ||
|
||
if err := resource.Delete(ctx, cert.Name, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) { | ||
return err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want this returning an error at this stage? There may be more certs to delete and if there is an error on the first one. We would never try to delete the rest. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, I'll log the error here instead 👍 |
||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func expectedCertificatesForGateway(ctx context.Context, gateway *gatewayapiv1.Gateway, tlsPolicy *kuadrantv1alpha1.TLSPolicy) []*certmanv1.Certificate { | ||
log := crlog.FromContext(ctx) | ||
|
||
tlsHosts := make(map[corev1.ObjectReference][]string) | ||
for i, l := range gateway.Spec.Listeners { | ||
hostname := "*" | ||
if l.Hostname != nil { | ||
hostname = string(*l.Hostname) | ||
} | ||
|
||
err := validateGatewayListenerBlock(field.NewPath("spec", "listeners").Index(i), l, gateway).ToAggregate() | ||
if err != nil { | ||
log.Info("Skipped a listener block: " + err.Error()) | ||
continue | ||
} | ||
|
||
for _, certRef := range l.TLS.CertificateRefs { | ||
secretRef := corev1.ObjectReference{ | ||
Name: string(certRef.Name), | ||
} | ||
if certRef.Namespace != nil { | ||
secretRef.Namespace = string(*certRef.Namespace) | ||
} else { | ||
secretRef.Namespace = gateway.GetNamespace() | ||
} | ||
// Gateway API hostname explicitly disallows IP addresses, so this | ||
// should be OK. | ||
tlsHosts[secretRef] = append(tlsHosts[secretRef], hostname) | ||
} | ||
} | ||
|
||
certs := make([]*certmanv1.Certificate, 0, len(tlsHosts)) | ||
for secretRef, hosts := range tlsHosts { | ||
certs = append(certs, buildCertManagerCertificate(tlsPolicy, secretRef, hosts)) | ||
} | ||
return certs | ||
} | ||
|
||
func expectedCertificatesForListener(l *machinery.Listener, tlsPolicy *kuadrantv1alpha1.TLSPolicy) []*certmanv1.Certificate { | ||
tlsHosts := make(map[corev1.ObjectReference][]string) | ||
|
||
hostname := "*" | ||
if l.Hostname != nil { | ||
hostname = string(*l.Hostname) | ||
} | ||
|
||
for _, certRef := range l.TLS.CertificateRefs { | ||
secretRef := corev1.ObjectReference{ | ||
Name: string(certRef.Name), | ||
} | ||
if certRef.Namespace != nil { | ||
secretRef.Namespace = string(*certRef.Namespace) | ||
} else { | ||
secretRef.Namespace = l.GetNamespace() | ||
} | ||
// Gateway API hostname explicitly disallows IP addresses, so this | ||
// should be OK. | ||
tlsHosts[secretRef] = append(tlsHosts[secretRef], hostname) | ||
} | ||
|
||
certs := make([]*certmanv1.Certificate, 0, len(tlsHosts)) | ||
for secretRef, hosts := range tlsHosts { | ||
certs = append(certs, buildCertManagerCertificate(tlsPolicy, secretRef, hosts)) | ||
} | ||
return certs | ||
} | ||
|
||
func buildCertManagerCertificate(tlsPolicy *kuadrantv1alpha1.TLSPolicy, secretRef corev1.ObjectReference, hosts []string) *certmanv1.Certificate { | ||
crt := &certmanv1.Certificate{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: secretRef.Name, | ||
Namespace: secretRef.Namespace, | ||
}, | ||
TypeMeta: metav1.TypeMeta{ | ||
Kind: certmanv1.CertificateKind, | ||
APIVersion: certmanv1.SchemeGroupVersion.String(), | ||
}, | ||
Spec: certmanv1.CertificateSpec{ | ||
DNSNames: hosts, | ||
SecretName: secretRef.Name, | ||
IssuerRef: tlsPolicy.Spec.IssuerRef, | ||
Usages: certmanv1.DefaultKeyUsages(), | ||
}, | ||
} | ||
translatePolicy(crt, tlsPolicy.Spec) | ||
return crt | ||
} | ||
|
||
// https://cert-manager.io/docs/usage/gateway/#supported-annotations | ||
// Helper functions largely based on cert manager https://github.com/cert-manager/cert-manager/blob/master/pkg/controller/certificate-shim/sync.go | ||
|
||
func validateGatewayListenerBlock(path *field.Path, l gatewayapiv1.Listener, ingLike metav1.Object) field.ErrorList { | ||
var errs field.ErrorList | ||
|
||
if l.Hostname == nil || *l.Hostname == "" { | ||
errs = append(errs, field.Required(path.Child("hostname"), "the hostname cannot be empty")) | ||
} | ||
|
||
if l.TLS == nil { | ||
errs = append(errs, field.Required(path.Child("tls"), "the TLS block cannot be empty")) | ||
return errs | ||
} | ||
|
||
if len(l.TLS.CertificateRefs) == 0 { | ||
errs = append(errs, field.Required(path.Child("tls").Child("certificateRef"), | ||
"listener has no certificateRefs")) | ||
} else { | ||
// check that each CertificateRef is valid | ||
for i, secretRef := range l.TLS.CertificateRefs { | ||
if *secretRef.Group != "core" && *secretRef.Group != "" { | ||
errs = append(errs, field.NotSupported(path.Child("tls").Child("certificateRef").Index(i).Child("group"), | ||
*secretRef.Group, []string{"core", ""})) | ||
} | ||
|
||
if *secretRef.Kind != "Secret" && *secretRef.Kind != "" { | ||
errs = append(errs, field.NotSupported(path.Child("tls").Child("certificateRef").Index(i).Child("kind"), | ||
*secretRef.Kind, []string{"Secret", ""})) | ||
} | ||
|
||
if secretRef.Namespace != nil && string(*secretRef.Namespace) != ingLike.GetNamespace() { | ||
errs = append(errs, field.Invalid(path.Child("tls").Child("certificateRef").Index(i).Child("namespace"), | ||
*secretRef.Namespace, "cross-namespace secret references are not allowed in listeners")) | ||
} | ||
} | ||
} | ||
|
||
if l.TLS.Mode == nil { | ||
errs = append(errs, field.Required(path.Child("tls").Child("mode"), | ||
"the mode field is required")) | ||
} else { | ||
if *l.TLS.Mode != gatewayapiv1.TLSModeTerminate { | ||
errs = append(errs, field.NotSupported(path.Child("tls").Child("mode"), | ||
*l.TLS.Mode, []string{string(gatewayapiv1.TLSModeTerminate)})) | ||
} | ||
} | ||
|
||
return errs | ||
} | ||
|
||
// translatePolicy updates the Certificate spec using the TLSPolicy spec | ||
// converted from https://github.com/cert-manager/cert-manager/blob/master/pkg/controller/certificate-shim/helper.go#L63 | ||
func translatePolicy(crt *certmanv1.Certificate, tlsPolicy kuadrantv1alpha1.TLSPolicySpec) { | ||
if tlsPolicy.CommonName != "" { | ||
crt.Spec.CommonName = tlsPolicy.CommonName | ||
} | ||
|
||
if tlsPolicy.Duration != nil { | ||
crt.Spec.Duration = tlsPolicy.Duration | ||
} | ||
|
||
if tlsPolicy.RenewBefore != nil { | ||
crt.Spec.RenewBefore = tlsPolicy.RenewBefore | ||
} | ||
|
||
if tlsPolicy.RenewBefore != nil { | ||
crt.Spec.RenewBefore = tlsPolicy.RenewBefore | ||
} | ||
|
||
if tlsPolicy.Usages != nil { | ||
crt.Spec.Usages = tlsPolicy.Usages | ||
} | ||
|
||
if tlsPolicy.RevisionHistoryLimit != nil { | ||
crt.Spec.RevisionHistoryLimit = tlsPolicy.RevisionHistoryLimit | ||
} | ||
|
||
if tlsPolicy.PrivateKey != nil { | ||
if crt.Spec.PrivateKey == nil { | ||
crt.Spec.PrivateKey = &certmanv1.CertificatePrivateKey{} | ||
} | ||
|
||
if tlsPolicy.PrivateKey.Algorithm != "" { | ||
crt.Spec.PrivateKey.Algorithm = tlsPolicy.PrivateKey.Algorithm | ||
} | ||
|
||
if tlsPolicy.PrivateKey.Encoding != "" { | ||
crt.Spec.PrivateKey.Encoding = tlsPolicy.PrivateKey.Encoding | ||
} | ||
|
||
if tlsPolicy.PrivateKey.Size != 0 { | ||
crt.Spec.PrivateKey.Size = tlsPolicy.PrivateKey.Size | ||
} | ||
|
||
if tlsPolicy.PrivateKey.RotationPolicy != "" { | ||
crt.Spec.PrivateKey.RotationPolicy = tlsPolicy.PrivateKey.RotationPolicy | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we be more positive in our checks, doing
if ok
instead of `if !ok. I also wonder if it is better to have the more common action listed first. So if we do more creates that should be listed first but if we do more creates that should be.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure but whether to do
!ok
orok
first depends also on whether wecreate
orupdate
first, so if we want to be more positive in our check hereif ok
, this would meanupdate
is first 🤔Not sure which action do we expect more of. Maybe @mikenairn can answer on this