Skip to content

Commit

Permalink
Merge branch 'main' into badges
Browse files Browse the repository at this point in the history
  • Loading branch information
itay-grudev authored Apr 3, 2024
2 parents e36cf83 + b7e9e3e commit 5b6bf02
Show file tree
Hide file tree
Showing 15 changed files with 44 additions and 10 deletions.
1 change: 1 addition & 0 deletions charts/cloudnative-pg/README.md

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion charts/cloudnative-pg/templates/podmonitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ metadata:
name: {{ include "cloudnative-pg.fullname" . }}
labels:
{{- include "cloudnative-pg.labels" . | nindent 4 }}
{{- with .Values.monitoring.podMonitorAdditionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end}}
{{- with .Values.commonAnnotations }}
annotations:
{{- toYaml . | nindent 4 }}
Expand All @@ -15,4 +18,4 @@ spec:
{{- include "cloudnative-pg.selectorLabels" . | nindent 6 }}
podMetricsEndpoints:
- port: metrics
{{- end }}
{{- end }}
3 changes: 3 additions & 0 deletions charts/cloudnative-pg/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@
},
"podMonitorEnabled": {
"type": "boolean"
},
"podMonitorAdditionalLabels": {
"type": "object"
}
}
},
Expand Down
4 changes: 4 additions & 0 deletions charts/cloudnative-pg/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,12 @@ tolerations: []
affinity: {}

monitoring:

# -- Specifies whether the monitoring should be enabled. Requires Prometheus Operator CRDs.
podMonitorEnabled: false
# -- Additional labels for the podMonitor
podMonitorAdditionalLabels: {}

grafanaDashboard:
create: false
# -- Allows overriding the namespace where the ConfigMap will be created, defaulting to the same one as the Release.
Expand Down
2 changes: 2 additions & 0 deletions charts/cluster/prometheus_rules/cluster-ha-critical.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ expr: |
for: 5m
labels:
severity: critical
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
2 changes: 2 additions & 0 deletions charts/cluster/prometheus_rules/cluster-ha-warning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ expr: |
for: 5m
labels:
severity: warning
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ alert: {{ $alert }}
annotations:
summary: CNPG Instance maximum number of connections critical!
description: |-
CloudNativePG Cluster "{{ .cluster }}" instance {{ .labels.pod }} is using {{ .value }}% of
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" instance {{ .labels.pod }} is using {{ .value }}% of
the maximum number of connections.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md
expr: |
sum by (pod) (cnpg_backends_total{namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) * 100 > 95
for: 5m
labels:
severity: critical
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ alert: {{ $alert }}
annotations:
summary: CNPG Instance is approaching the maximum number of connections.
description: |-
CloudNativePG Cluster "{{ .cluster }}" instance {{ .labels.pod }} is using {{ .value }}% of
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" instance {{ .labels.pod }} is using {{ .value }}% of
the maximum number of connections.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md
expr: |
sum by (pod) (cnpg_backends_total{namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) * 100 > 80
for: 5m
labels:
severity: warning
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ alert: {{ $alert }}
annotations:
summary: CNPG Cluster high replication lag
description: |-
CloudNativePG Cluster "{{ .cluster }}" is experiencing a high replication lag of
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" is experiencing a high replication lag of
{{ .value }}ms.
High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration.
Expand All @@ -14,4 +14,6 @@ expr: |
for: 5m
labels:
severity: warning
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ alert: {{ $alert }}
annotations:
summary: CNPG Cluster instances are located on the same node.
description: |-
CloudNativePG Cluster "{{ .cluster }}" has {{ .value }}
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" has {{ .value }}
instances on the same node {{ .labels.node }}.
A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss.
Expand All @@ -14,4 +14,6 @@ expr: |
for: 5m
labels:
severity: warning
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ alert: {{ $alert }}
annotations:
summary: CNPG Instance is running out of disk space!
description: |-
CloudNativePG Cluster "{{ .cluster }}" is running extremely low on disk space. Check attached PVCs!
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" is running extremely low on disk space. Check attached PVCs!
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md
expr: |
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"} / kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"})) > 0.9 OR
Expand All @@ -19,4 +19,6 @@ expr: |
for: 5m
labels:
severity: critical
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ alert: {{ $alert }}
annotations:
summary: CNPG Instance is running out of disk space.
description: |-
CloudNativePG Cluster "{{ .cluster }}" is running low on disk space. Check attached PVCs.
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" is running low on disk space. Check attached PVCs.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md
expr: |
max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"} / kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"})) > 0.7 OR
Expand All @@ -19,4 +19,6 @@ expr: |
for: 5m
labels:
severity: warning
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
2 changes: 2 additions & 0 deletions charts/cluster/prometheus_rules/cluster-offline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ expr: |
for: 5m
labels:
severity: critical
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ alert: {{ $alert }}
annotations:
summary: CNPG Cluster instances in the same zone.
description: |-
CloudNativePG Cluster "{{ .cluster }}" has instances in the same availability zone.
CloudNativePG Cluster "{{ .namespace }}/{{ .cluster }}" has instances in the same availability zone.
A disaster in one availability zone will lead to a potential service disruption and/or data loss.
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md
Expand All @@ -13,4 +13,6 @@ expr: |
for: 5m
labels:
severity: warning
namespace: {{ .namespace }}
cnpg_cluster: {{ .cluster }}
{{- end -}}
7 changes: 5 additions & 2 deletions charts/cluster/templates/prometheus-rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ spec:
{{- $dict := dict "excludeRules" .Values.cluster.monitoring.prometheusRule.excludeRules -}}
{{- $_ := set $dict "value" "{{ $value }}" -}}
{{- $_ := set $dict "namespace" .Release.Namespace -}}
{{- $_ := set $dict "cluster" (printf "%s/%s" .Release.Namespace (include "cluster.fullname" .)) -}}
{{- $_ := set $dict "cluster" (include "cluster.fullname" .) -}}
{{- $_ := set $dict "labels" (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}") -}}
{{- $_ := set $dict "podSelector" (printf "%s-([1-9][0-9]*)$" (include "cluster.fullname" .)) -}}
{{- $_ := set $dict "Values" .Values -}}
{{- $_ := set $dict "Template" .Template -}}
{{- range $path, $_ := .Files.Glob "prometheus_rules/**.yaml" }}
- {{ tpl ($.Files.Get $path) $dict | nindent 10 | trim -}}
{{- $tpl := tpl ($.Files.Get $path) $dict | nindent 10 | trim -}}
{{- with $tpl }}
- {{ $tpl }}
{{- end -}}
{{- end -}}
{{ end }}

0 comments on commit 5b6bf02

Please sign in to comment.