From 9dbf9aafbf571198a8c6e98cb5a6b482c9603093 Mon Sep 17 00:00:00 2001 From: Hugo Bollon Date: Mon, 22 Jan 2024 13:49:24 +0100 Subject: [PATCH] feat: add variable for additional alerts labels --- .../longhorn/templates/prometheus-rules.yaml | 24 +++++++++++++++++++ locals.tf | 1 + variables.tf | 6 +++++ 3 files changed, 31 insertions(+) diff --git a/charts/longhorn/templates/prometheus-rules.yaml b/charts/longhorn/templates/prometheus-rules.yaml index 636f1cd..c014fd6 100644 --- a/charts/longhorn/templates/prometheus-rules.yaml +++ b/charts/longhorn/templates/prometheus-rules.yaml @@ -19,6 +19,9 @@ spec: for: 5m labels: severity: warning + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornVolumeStatusCritical annotations: description: {{"Longhorn volume {{$labels.volume}} on {{$labels.node}} is Fault for more than 2 minutes."}} @@ -27,6 +30,9 @@ spec: for: 5m labels: severity: critical + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornVolumeStatusWarning annotations: description: {{"Longhorn volume {{$labels.volume}} on {{$labels.node}} is Degraded for more than 5 minutes."}} @@ -35,6 +41,9 @@ spec: for: 5m labels: severity: warning + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornNodeStorageWarning annotations: description: {{"The used storage of node {{$labels.node}} is at {{$value}}% capacity for more than 5 minutes."}} @@ -43,6 +52,9 @@ spec: for: 5m labels: severity: warning + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornDiskStorageWarning annotations: description: {{"The used storage of disk {{$labels.disk}} on node {{$labels.node}} is at {{$value}}% capacity for more than 5 minutes."}} @@ -51,6 +63,9 @@ spec: for: 5m labels: severity: warning + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornNodeDown annotations: description: {{"There are {{$value}} Longhorn nodes which have been offline for more than 5 minutes."}} @@ -59,6 +74,9 @@ spec: for: 5m labels: severity: critical + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornIntanceManagerCPUUsageWarning annotations: description: {{"Longhorn instance manager {{$labels.instance_manager}} on {{$labels.node}} has CPU Usage / CPU request is {{$value}}% for more than 5 minutes."}} @@ -67,6 +85,9 @@ spec: for: 5m labels: severity: warning + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} - alert: LonghornNodeCPUUsageWarning annotations: description: {{"Longhorn node {{$labels.node}} has CPU Usage / CPU capacity is {{$value}}% for more than 5 minutes."}} @@ -75,5 +96,8 @@ spec: for: 5m labels: severity: warning + {{- with $.Values.servicemonitor.additionalAlertLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} {{- end -}} diff --git a/locals.tf b/locals.tf index f7cf0e6..6b4fcb5 100644 --- a/locals.tf +++ b/locals.tf @@ -69,6 +69,7 @@ locals { } servicemonitor = { enabled = var.enable_service_monitor + additionalAlertLabels = var.additional_alert_labels } automaticFilesystemTrim = { enabled = var.automatic_filesystem_trim.enabled diff --git a/variables.tf b/variables.tf index 1924dff..36c134d 100644 --- a/variables.tf +++ b/variables.tf @@ -154,6 +154,12 @@ variable "enable_service_monitor" { default = false } +variable "additional_alert_labels" { + description = "Additional labels to add to Longhorn alerts." + type = map(string) + default = {} +} + variable "enable_dashboard_ingress" { description = "Boolean to enable the creation of an ingress for the Longhorn's dashboard. **If enabled, you must provide a value for `base_domain`.**" type = bool