Skip to content

Commit

Permalink
Add Alertmanager controller (#201)
Browse files Browse the repository at this point in the history
Reconciles the Alertmanager secret created by the observability-operator Helm chart and load the configuration and templates from the secret into Mimir Alertmanager.
It also watches the Mimir Alertmanager pod and re-queue events to the controller when the pod is restarted, so the configuration is reloaded.

Predicates are being used to filter the secret and pods being watched to only act on Alertmanager related resources.
Finalizers are not used by this controller as the configuration is never deleted, only updated.

---------

Co-authored-by: Taylor Bot <[email protected]>
Co-authored-by: Quentin Bisson <[email protected]>
  • Loading branch information
3 people authored Dec 16, 2024
1 parent fbfd0f1 commit b16ca41
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Add Alertmanager controller

## [0.10.1] - 2024-12-12

### Fixed
Expand Down
3 changes: 3 additions & 0 deletions helm/observability-operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,15 @@ spec:
- --management-cluster-pipeline={{ $.Values.managementCluster.pipeline }}
- --management-cluster-region={{ $.Values.managementCluster.region }}
# Monitoring configuration
- --alertmanager-enabled={{ $.Values.alerting.enabled }}
- --alertmanager-secret-name={{ include "alertmanager-secret.name" . }}
- --alertmanager-url={{ $.Values.alerting.alertmanagerURL }}
- --monitoring-enabled={{ $.Values.monitoring.enabled }}
- --monitoring-agent={{ $.Values.monitoring.agent }}
- --monitoring-sharding-scale-up-series-count={{ $.Values.monitoring.sharding.scaleUpSeriesCount }}
- --monitoring-sharding-scale-down-percentage={{ $.Values.monitoring.sharding.scaleDownPercentage }}
- --monitoring-wal-truncate-frequency={{ $.Values.monitoring.wal.truncateFrequency }}
- --operator-namespace={{ include "resource.default.namespace" . }}
{{- if .Values.monitoring.prometheusVersion }}
- --prometheus-version={{ $.Values.monitoring.prometheusVersion }}
{{- end }}
Expand Down
1 change: 1 addition & 0 deletions helm/observability-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ managementCluster:
region: region

alerting:
enabled: false
alertmanagerURL: ""
grafanaAddress: ""
proxyURL: ""
Expand Down
95 changes: 95 additions & 0 deletions internal/controller/alertmanager_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package controller

import (
"context"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/pkg/errors"

"github.com/giantswarm/observability-operator/internal/controller/predicates"
"github.com/giantswarm/observability-operator/pkg/alertmanager"
"github.com/giantswarm/observability-operator/pkg/config"
)

// AlertmanagerReconciler reconciles the Alertmanager secret created by the observability-operator Helm chart
// and configures the Alertmanager instance with the configuration stored in the secret.
// This controller do not make use of finalizers as the configuration is not removed from Alertmanager when the secret is deleted.
type AlertmanagerReconciler struct {
client client.Client

alertmanagerService alertmanager.Service
}

// SetupAlertmanagerReconciler adds a controller into mgr that reconciles the Alertmanager secret.
func SetupAlertmanagerReconciler(mgr ctrl.Manager, conf config.Config) error {
r := &AlertmanagerReconciler{
client: mgr.GetClient(),
alertmanagerService: alertmanager.New(conf),
}

// Filter only the Alertmanager secret created by the observability-operator Helm chart
secretPredicate := predicates.NewAlertmanagerSecretPredicate(conf)

// Filter only the Mimir Alertmanager pod
podPredicate := predicates.NewAlertmanagerPodPredicate()

// Requeue the Alertmanager secret when the Mimir Alertmanager pod changes
p := podEventHandler(conf)

// Setup the controller
return ctrl.NewControllerManagedBy(mgr).
For(&v1.Secret{}, builder.WithPredicates(secretPredicate)).
Watches(&v1.Pod{}, p, builder.WithPredicates(podPredicate)).
Complete(r)
}

// podEventHandler returns an event handler that enqueues requests for the Alertmanager secret only.
// For now there is only one Alertmanager secret to be reconciled.
func podEventHandler(conf config.Config) handler.EventHandler {
return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []ctrl.Request {
return []reconcile.Request{
{
NamespacedName: types.NamespacedName{
Name: conf.Monitoring.AlertmanagerSecretName,
Namespace: conf.OperatorNamespace,
},
},
}
})
}

// Reconcile main logic
func (r AlertmanagerReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx)

logger.Info("Started reconciling")

// Retrieve the secret being reconciled
secret := &v1.Secret{}
if err := r.client.Get(ctx, req.NamespacedName, secret); err != nil {
return ctrl.Result{}, errors.WithStack(err)
}

if !secret.DeletionTimestamp.IsZero() {
// Nothing to do if the secret is being deleted
// Configuration is not removed from Alertmanager when the secret is deleted.
return ctrl.Result{}, nil
}

err := r.alertmanagerService.Configure(ctx, secret)
if err != nil {
return ctrl.Result{}, errors.WithStack(err)
}

logger.Info("Finished reconciling")

return ctrl.Result{}, nil
}
78 changes: 78 additions & 0 deletions internal/controller/predicates/alertmanager_predicates.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package predicates

import (
v1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"github.com/giantswarm/observability-operator/pkg/config"
)

// NewAlertmanagerSecretPredicate returns a predicate that filters only the Alertmanager secret created by the observability-operator Helm chart.
func NewAlertmanagerSecretPredicate(conf config.Config) predicate.Predicate {
filter := func(object client.Object) bool {
if object == nil {
return false
}

secret, ok := object.(*v1.Secret)
if !ok {
return false
}

if !secret.DeletionTimestamp.IsZero() {
return false
}

labels := secret.GetLabels()

ok = secret.GetName() == conf.Monitoring.AlertmanagerSecretName &&
secret.GetNamespace() == conf.OperatorNamespace &&
labels != nil &&
labels["app.kubernetes.io/name"] == "observability-operator"

return ok
}

p := predicate.NewPredicateFuncs(filter)

return p
}

const (
mimirNamespace = "mimir"
mimirInstance = "mimir"
mimirAlertmanagerComponent = "alertmanager"
)

// NewAlertmanagerPodPredicate returns a predicate that filters only the Mimir Alertmanager pod.
func NewAlertmanagerPodPredicate() predicate.Predicate {
filter := func(object client.Object) bool {
if object == nil {
return false
}

pod, ok := object.(*v1.Pod)
if !ok {
return false
}

if !pod.DeletionTimestamp.IsZero() {
return false
}

labels := pod.GetLabels()

ok = pod.GetNamespace() == mimirNamespace &&
labels != nil &&
labels["app.kubernetes.io/component"] == mimirAlertmanagerComponent &&
labels["app.kubernetes.io/instance"] == mimirInstance &&
isPodReady(pod)

return ok
}

p := predicate.NewPredicateFuncs(filter)

return p
}
15 changes: 15 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ func main() {
"If set the metrics endpoint is served securely")
flag.BoolVar(&conf.EnableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.StringVar(&conf.OperatorNamespace, "operator-namespace", "",
"The namespace where the observability-operator is running.")

// Management cluster configuration flags.
flag.StringVar(&conf.ManagementCluster.BaseDomain, "management-cluster-base-domain", "",
Expand All @@ -90,6 +92,10 @@ func main() {
"The region of the management cluster.")

// Monitoring configuration flags.
flag.BoolVar(&conf.Monitoring.AlertmanagerEnabled, "alertmanager-enabled", false,
"Enable Alertmanager controller.")
flag.StringVar(&conf.Monitoring.AlertmanagerSecretName, "alertmanager-secret-name", "",
"The name of the secret containing the Alertmanager configuration.")
flag.StringVar(&conf.Monitoring.AlertmanagerURL, "alertmanager-url", "",
"The URL of the Alertmanager API.")
flag.StringVar(&conf.Monitoring.MonitoringAgent, "monitoring-agent", commonmonitoring.MonitoringAgentAlloy,
Expand Down Expand Up @@ -184,6 +190,15 @@ func main() {
setupLog.Error(err, "unable to setup controller", "controller", "GrafanaOrganizationReconciler")
os.Exit(1)
}

if conf.Monitoring.AlertmanagerEnabled {
// Setup controller for Alertmanager
err = controller.SetupAlertmanagerReconciler(mgr, conf)
if err != nil {
setupLog.Error(err, "unable to setup controller", "controller", "AlertmanagerReconciler")
os.Exit(1)
}
}
//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
Expand Down
1 change: 1 addition & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ type Config struct {
ProbeAddr string
SecureMetrics bool
EnableHTTP2 bool
OperatorNamespace string

ManagementCluster common.ManagementCluster

Expand Down
4 changes: 3 additions & 1 deletion pkg/monitoring/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ const MonitoringLabel = "giantswarm.io/monitoring"
type Config struct {
Enabled bool

AlertmanagerURL string
AlertmanagerSecretName string
AlertmanagerURL string
AlertmanagerEnabled bool

MonitoringAgent string
DefaultShardingStrategy sharding.Strategy
Expand Down

0 comments on commit b16ca41

Please sign in to comment.