From 181e782ae7d6514f30d57c12bbce063291c86e84 Mon Sep 17 00:00:00 2001
From: Josh <kaio11604@gmail.com>
Date: Tue, 26 Mar 2024 08:26:44 -0700
Subject: [PATCH 1/5] Add config value endpointCA for private S3 such as MinIO
 (#230)

* Add config value endpointCA for private S3 such as MinIO

Signed-off-by: PseudoResonance <kaio11604@gmail.com>
---
 charts/cluster/README.md                      |  4 +++
 .../templates/_barman_object_store.tpl        |  6 ++++
 charts/cluster/templates/ca-bundle.yaml       |  9 +++++
 charts/cluster/values.schema.json             | 34 +++++++++++++++++++
 charts/cluster/values.yaml                    | 14 ++++++++
 5 files changed, 67 insertions(+)
 create mode 100644 charts/cluster/templates/ca-bundle.yaml
diff --git a/charts/cluster/README.md b/charts/cluster/README.md
index 85094a2ce..b21feedfe 100644
--- a/charts/cluster/README.md
+++ b/charts/cluster/README.md
@@ -125,6 +125,8 @@ refer to  the [CloudNativePG Documentation](https://cloudnative-pg.io/documentat
 | backups.data.jobs | int | `2` | Number of data files to be archived or restored in parallel. |
 | backups.destinationPath | string | `""` | Overrides the provider specific default path. Defaults to: S3: s3://<bucket><path> Azure: https://<storageAccount>.<serviceName>.core.windows.net/<clusterName><path> Google: gs://<bucket><path> |
 | backups.enabled | bool | `false` | You need to configure backups manually, so backups are disabled by default. |
+| backups.endpointCA | object | `{"create":false,"key":"","name":"","value":""}` | Specifies a CA bundle to validate a privately signed certificate. |
+| backups.endpointCA.create | bool | `false` | Creates a secret with the given value if true, otherwise uses an existing secret. |
 | backups.endpointURL | string | `""` | Overrides the provider specific default endpoint. Defaults to: S3: https://s3.<region>.amazonaws.com" |
 | backups.google.applicationCredentials | string | `""` |  |
 | backups.google.bucket | string | `""` |  |
@@ -190,6 +192,8 @@ refer to  the [CloudNativePG Documentation](https://cloudnative-pg.io/documentat
 | recovery.backupName | string | `""` | Backup Recovery Method |
 | recovery.clusterName | string | `""` | Object Store Recovery Method |
 | recovery.destinationPath | string | `""` | Overrides the provider specific default path. Defaults to: S3: s3://<bucket><path> Azure: https://<storageAccount>.<serviceName>.core.windows.net/<clusterName><path> Google: gs://<bucket><path> |
+| recovery.endpointCA | object | `{"create":false,"key":"","name":"","value":""}` | Specifies a CA bundle to validate a privately signed certificate. |
+| recovery.endpointCA.create | bool | `false` | Creates a secret with the given value if true, otherwise uses an existing secret. |
 | recovery.endpointURL | string | `""` | Overrides the provider specific default endpoint. Defaults to: S3: https://s3.<region>.amazonaws.com" Leave empty if using the default S3 endpoint |
 | recovery.google.applicationCredentials | string | `""` |  |
 | recovery.google.bucket | string | `""` |  |
diff --git a/charts/cluster/templates/_barman_object_store.tpl b/charts/cluster/templates/_barman_object_store.tpl
index 96278f11a..f002800e5 100644
--- a/charts/cluster/templates/_barman_object_store.tpl
+++ b/charts/cluster/templates/_barman_object_store.tpl
@@ -4,6 +4,12 @@
   endpointURL: {{ .scope.endpointURL }}
 {{- end }}
 
+{{- if or (.scope.endpointCA.create) (.scope.endpointCA.name) }}
+  endpointCA:
+    name: {{ .chartFullname }}-ca-bundle
+    key: ca-bundle.crt
+{{- end }}
+
 {{- if .scope.destinationPath }}
   destinationPath: {{ .scope.destinationPath }}
 {{- end }}
diff --git a/charts/cluster/templates/ca-bundle.yaml b/charts/cluster/templates/ca-bundle.yaml
new file mode 100644
index 000000000..12991c163
--- /dev/null
+++ b/charts/cluster/templates/ca-bundle.yaml
@@ -0,0 +1,9 @@
+{{- if .Values.backups.endpointCA.create }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ .Values.backups.endpointCA.name | default (printf "%s-ca-bundle" (include "cluster.fullname" .)) | quote }}
+data:
+  {{ .Values.backups.endpointCA.key | default "ca-bundle.crt" | quote }}: {{ .Values.backups.endpointCA.value }}
+  
+{{- end }}
diff --git a/charts/cluster/values.schema.json b/charts/cluster/values.schema.json
index 3ee174fdc..5a1c46e28 100644
--- a/charts/cluster/values.schema.json
+++ b/charts/cluster/values.schema.json
@@ -54,6 +54,23 @@
                 "enabled": {
                     "type": "boolean"
                 },
+                "endpointCA": {
+                    "type": "object",
+                    "properties": {
+                        "create": {
+                            "type": "boolean"
+                        },
+                        "key": {
+                            "type": "string"
+                        },
+                        "name": {
+                            "type": "string"
+                        },
+                        "value": {
+                            "type": "string"
+                        }
+                    }
+                },
                 "endpointURL": {
                     "type": "string"
                 },
@@ -335,6 +352,23 @@
                 "destinationPath": {
                     "type": "string"
                 },
+                "endpointCA": {
+                    "type": "object",
+                    "properties": {
+                        "create": {
+                            "type": "boolean"
+                        },
+                        "key": {
+                            "type": "string"
+                        },
+                        "name": {
+                            "type": "string"
+                        },
+                        "value": {
+                            "type": "string"
+                        }
+                    }
+                },
                 "endpointURL": {
                     "type": "string"
                 },
diff --git a/charts/cluster/values.yaml b/charts/cluster/values.yaml
index 07d979a4c..d4db49e32 100644
--- a/charts/cluster/values.yaml
+++ b/charts/cluster/values.yaml
@@ -41,6 +41,13 @@ recovery:
   # S3: https://s3.<region>.amazonaws.com"
   # Leave empty if using the default S3 endpoint
   endpointURL: ""
+  # -- Specifies a CA bundle to validate a privately signed certificate.
+  endpointCA:
+    # -- Creates a secret with the given value if true, otherwise uses an existing secret.
+    create: false
+    name: ""
+    key: ""
+    value: ""
   # -- Overrides the provider specific default path. Defaults to:
   # S3: s3://<bucket><path>
   # Azure: https://<storageAccount>.<serviceName>.core.windows.net/<clusterName><path>
@@ -184,6 +191,13 @@ backups:
   # -- Overrides the provider specific default endpoint. Defaults to:
   # S3: https://s3.<region>.amazonaws.com"
   endpointURL: ""  # Leave empty if using the default S3 endpoint
+  # -- Specifies a CA bundle to validate a privately signed certificate.
+  endpointCA:
+    # -- Creates a secret with the given value if true, otherwise uses an existing secret.
+    create: false
+    name: ""
+    key: ""
+    value: ""
 
   # -- Overrides the provider specific default path. Defaults to:
   # S3: s3://<bucket><path>

From ac0a34ee73106e0df6f77394b3258920248d4b52 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 26 Mar 2024 20:05:44 +0200
Subject: [PATCH 2/5] chore(deps): update actions/setup-python digest to
 82c7e63 (#231)

* chore(deps): update actions/setup-python digest to 82c7e63

Signed-off-by: Itay Grudev <itay.grudev@essentim.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Itay Grudev <itay.grudev@essentim.com>
---
 .github/workflows/lint.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 2f836614a..9e0fccaa5 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -20,7 +20,7 @@ jobs:
         with:
           version: v3.4.0
 
-      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
+      - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
         with:
           python-version: 3.7
 

From 741234645197be23cef35584f9bc17b6b63c7a80 Mon Sep 17 00:00:00 2001
From: Itay Grudev <itay.grudev@essentim.com>
Date: Tue, 26 Mar 2024 23:01:58 +0200
Subject: [PATCH 3/5] Added the ability to exclude specific PrometheusRules
 (#232)

* Added the ability to exclude specific PrometheusRules

Signed-off-by: Itay Grudev <itay.grudev@essentim.com>
---
 charts/cluster/README.md                      |   1 +
 .../prometheus_rules/cluster-ha-critical.yaml |  24 +++
 .../prometheus_rules/cluster-ha-warning.yaml  |  22 +++
 .../cluster-high_connection-critical.yaml     |  15 ++
 .../cluster-high_connection-warning.yaml      |  15 ++
 .../cluster-high_replication_lag.yaml         |  17 ++
 .../cluster-instances_on_same_node.yaml       |  17 ++
 .../cluster-low_disk_space-critical.yaml      |  22 +++
 .../cluster-low_disk_space-warning.yaml       |  22 +++
 .../prometheus_rules/cluster-offline.yaml     |  17 ++
 .../cluster-zone_spread-warning.yaml          |  16 ++
 charts/cluster/templates/prometheus-rule.yaml | 174 ++----------------
 charts/cluster/values.schema.json             |   3 +
 charts/cluster/values.yaml                    |   5 +-
 14 files changed, 206 insertions(+), 164 deletions(-)
 create mode 100644 charts/cluster/prometheus_rules/cluster-ha-critical.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-ha-warning.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-high_connection-critical.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-high_connection-warning.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-high_replication_lag.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-instances_on_same_node.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-low_disk_space-critical.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-low_disk_space-warning.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-offline.yaml
 create mode 100644 charts/cluster/prometheus_rules/cluster-zone_spread-warning.yaml

diff --git a/charts/cluster/README.md b/charts/cluster/README.md
index b21feedfe..af047fba9 100644
--- a/charts/cluster/README.md
+++ b/charts/cluster/README.md
@@ -160,6 +160,7 @@ refer to  the [CloudNativePG Documentation](https://cloudnative-pg.io/documentat
 | cluster.monitoring.enabled | bool | `false` | Whether to enable monitoring |
 | cluster.monitoring.podMonitor.enabled | bool | `true` | Whether to enable the PodMonitor |
 | cluster.monitoring.prometheusRule.enabled | bool | `true` | Whether to enable the PrometheusRule automated alerts |
+| cluster.monitoring.prometheusRule.excludeRules | list | `[]` | Exclude specified rules |
 | cluster.postgresGID | int | `26` | The GID of the postgres user inside the image, defaults to 26 |
 | cluster.postgresUID | int | `26` | The UID of the postgres user inside the image, defaults to 26 |
 | cluster.postgresql | object | `{}` | Configuration of the PostgreSQL server. See: https://cloudnative-pg.io/documentation/current/cloudnative-pg.v1/#postgresql-cnpg-io-v1-PostgresConfiguration |
diff --git a/charts/cluster/prometheus_rules/cluster-ha-critical.yaml b/charts/cluster/prometheus_rules/cluster-ha-critical.yaml
new file mode 100644
index 000000000..014e9ec44
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-ha-critical.yaml
@@ -0,0 +1,24 @@
+{{- $alert := "CNPGClusterHACritical" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Cluster has no standby replicas!
+  description: |-
+    CloudNativePG Cluster "{{ .labels.job }}" has no ready standby replicas. Your cluster at a severe
+    risk of data loss and downtime if the primary instance fails.
+
+    The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint
+    will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main.
+
+    This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less
+    instances. The replaced instance may need some time to catch-up with the cluster primary instance.
+
+    This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this
+    case you may want to silence it.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md
+expr: |
+  max by (job) (cnpg_pg_replication_streaming_replicas{namespace="{{ .namespace }}"} - cnpg_pg_replication_is_wal_receiver_up{namespace="{{ .namespace }}"}) < 1
+for: 5m
+labels:
+  severity: critical
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-ha-warning.yaml b/charts/cluster/prometheus_rules/cluster-ha-warning.yaml
new file mode 100644
index 000000000..15a5d4d13
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-ha-warning.yaml
@@ -0,0 +1,22 @@
+{{- $alert := "CNPGClusterHAWarning" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Cluster less than 2 standby replicas.
+  description: |-
+    CloudNativePG Cluster "{{ .labels.job }}" has only {{ .value }} standby replicas, putting
+    your cluster at risk if another instance fails. The cluster is still able to operate normally, although
+    the `-ro` and `-r` endpoints operate at reduced capacity.
+
+    This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may
+    need some time to catch-up with the cluster primary instance.
+
+    This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances.
+    In this case you may want to silence it.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md
+expr: |
+  max by (job) (cnpg_pg_replication_streaming_replicas{namespace="{{ .namespace }}"} - cnpg_pg_replication_is_wal_receiver_up{namespace="{{ .namespace }}"}) < 2
+for: 5m
+labels:
+  severity: warning
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-high_connection-critical.yaml b/charts/cluster/prometheus_rules/cluster-high_connection-critical.yaml
new file mode 100644
index 000000000..ac83376ab
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-high_connection-critical.yaml
@@ -0,0 +1,15 @@
+{{- $alert := "CNPGClusterHighConnectionsCritical" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Instance maximum number of connections critical!
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" instance {{ .labels.pod }} is using {{ .value }}% of
+    the maximum number of connections.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md
+expr: |
+  sum by (pod) (cnpg_backends_total{namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) * 100 > 95
+for: 5m
+labels:
+  severity: critical
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-high_connection-warning.yaml b/charts/cluster/prometheus_rules/cluster-high_connection-warning.yaml
new file mode 100644
index 000000000..126abd863
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-high_connection-warning.yaml
@@ -0,0 +1,15 @@
+{{- $alert := "CNPGClusterHighConnectionsWarning" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Instance is approaching the maximum number of connections.
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" instance {{ .labels.pod }} is using {{ .value }}% of
+    the maximum number of connections.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md
+expr: |
+  sum by (pod) (cnpg_backends_total{namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) * 100 > 80
+for: 5m
+labels:
+  severity: warning
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-high_replication_lag.yaml b/charts/cluster/prometheus_rules/cluster-high_replication_lag.yaml
new file mode 100644
index 000000000..4cf1610d2
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-high_replication_lag.yaml
@@ -0,0 +1,17 @@
+{{- $alert := "CNPGClusterHighReplicationLag" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Cluster high replication lag
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" is experiencing a high replication lag of
+    {{ .value }}ms.
+
+    High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md
+expr: |
+  max(cnpg_pg_replication_lag{namespace=~"{{ .namespace }}",pod=~"{{ .podSelector }}"}) * 1000 > 1000
+for: 5m
+labels:
+  severity: warning
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-instances_on_same_node.yaml b/charts/cluster/prometheus_rules/cluster-instances_on_same_node.yaml
new file mode 100644
index 000000000..39900cf20
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-instances_on_same_node.yaml
@@ -0,0 +1,17 @@
+{{- $alert := "CNPGClusterInstancesOnSameNode" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Cluster instances are located on the same node.
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" has {{ .value }}
+    instances on the same node {{ .labels.node }}.
+
+    A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md
+expr: |
+  count by (node) (kube_pod_info{namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"}) > 1
+for: 5m
+labels:
+  severity: warning
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-low_disk_space-critical.yaml b/charts/cluster/prometheus_rules/cluster-low_disk_space-critical.yaml
new file mode 100644
index 000000000..fcacab9be
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-low_disk_space-critical.yaml
@@ -0,0 +1,22 @@
+{{- $alert := "CNPGClusterLowDiskSpaceCritical" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Instance is running out of disk space!
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" is running extremely low on disk space. Check attached PVCs!
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md
+expr: |
+  max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"} / kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"})) > 0.9 OR
+  max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-wal"} / kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-wal"})) > 0.9 OR
+  max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-tbs.*"})
+      /
+      sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-tbs.*"})
+      *
+      on(namespace, persistentvolumeclaim) group_left(volume)
+      kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"{{ .podSelector }}"}
+  ) > 0.9
+for: 5m
+labels:
+  severity: critical
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-low_disk_space-warning.yaml b/charts/cluster/prometheus_rules/cluster-low_disk_space-warning.yaml
new file mode 100644
index 000000000..7f36f4351
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-low_disk_space-warning.yaml
@@ -0,0 +1,22 @@
+{{- $alert := "CNPGClusterLowDiskSpaceWarning" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Instance is running out of disk space.
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" is running low on disk space. Check attached PVCs.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md
+expr: |
+  max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"} / kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}"})) > 0.7 OR
+  max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-wal"} / kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-wal"})) > 0.7 OR
+  max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-tbs.*"})
+      /
+      sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="{{ .namespace }}", persistentvolumeclaim=~"{{ .podSelector }}-tbs.*"})
+      *
+      on(namespace, persistentvolumeclaim) group_left(volume)
+      kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"{{ .podSelector }}"}
+  ) > 0.7
+for: 5m
+labels:
+  severity: warning
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-offline.yaml b/charts/cluster/prometheus_rules/cluster-offline.yaml
new file mode 100644
index 000000000..75647f7cc
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-offline.yaml
@@ -0,0 +1,17 @@
+{{- $alert := "CNPGClusterOffline" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Cluster has no running instances!
+  description: |-
+    CloudNativePG Cluster "{{ .labels.job }}" has no ready instances.
+
+    Having an offline cluster means your applications will not be able to access the database, leading to
+    potential service disruption and/or data loss.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md
+expr: |
+  ({{ .Values.cluster.instances }} - count(cnpg_collector_up{namespace=~"{{ .namespace }}",pod=~"{{ .podSelector }}"}) OR vector(0)) > 0
+for: 5m
+labels:
+  severity: critical
+{{- end -}}
diff --git a/charts/cluster/prometheus_rules/cluster-zone_spread-warning.yaml b/charts/cluster/prometheus_rules/cluster-zone_spread-warning.yaml
new file mode 100644
index 000000000..17183986e
--- /dev/null
+++ b/charts/cluster/prometheus_rules/cluster-zone_spread-warning.yaml
@@ -0,0 +1,16 @@
+{{- $alert := "CNPGClusterZoneSpreadWarning" -}}
+{{- if not (has $alert .excludeRules) -}}
+alert: {{ $alert }}
+annotations:
+  summary: CNPG Cluster instances in the same zone.
+  description: |-
+    CloudNativePG Cluster "{{ .cluster }}" has instances in the same availability zone.
+
+    A disaster in one availability zone will lead to a potential service disruption and/or data loss.
+  runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md
+expr: |
+  {{ .Values.cluster.instances }} > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace=~"{{ .namespace }}", pod=~"{{ .podSelector }}"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3
+for: 5m
+labels:
+  severity: warning
+{{- end -}}
diff --git a/charts/cluster/templates/prometheus-rule.yaml b/charts/cluster/templates/prometheus-rule.yaml
index bc3ffde84..380f90a7d 100644
--- a/charts/cluster/templates/prometheus-rule.yaml
+++ b/charts/cluster/templates/prometheus-rule.yaml
@@ -1,9 +1,4 @@
 {{- if and .Values.cluster.monitoring.enabled .Values.cluster.monitoring.prometheusRule.enabled -}}
-{{- $value := "{{ $value }}" -}}
-{{- $namespace := .Release.Namespace -}}
-{{- $cluster := printf "%s/%s" $namespace (include "cluster.fullname" .)}}
-{{- $labels := dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}" -}}
-{{- $podSelector := printf "%s-([1-9][0-9]*)$" (include "cluster.fullname" .) -}}
 apiVersion: monitoring.coreos.com/v1
 kind: PrometheusRule
 metadata:
@@ -16,162 +11,15 @@ metadata:
 spec:
   groups:
     - name: cloudnative-pg/{{ include "cluster.fullname" . }}
-      rules:
-        - alert: CNPGClusterHAWarning
-          annotations:
-            summary: CNPG Cluster less than 2 standby replicas.
-            description: |-
-              CloudNativePG Cluster "{{ $labels.job }}" has only {{ $value }} standby replicas, putting
-              your cluster at risk if another instance fails. The cluster is still able to operate normally, although
-              the `-ro` and `-r` endpoints operate at reduced capacity.
-
-              This can happen during a normal fail-over or automated minor version upgrades. The replaced instance may
-              need some time to catch-up with the cluster primary instance.
-
-              This alarm will be constantly triggered if your cluster is configured to run with less than 3 instances.
-              In this case you may want to silence it.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHAWarning.md
-          expr: |
-            max by (job) (cnpg_pg_replication_streaming_replicas{namespace="{{ $namespace }}"} - cnpg_pg_replication_is_wal_receiver_up{namespace="{{ $namespace }}"}) < 2
-          for: 5m
-          labels:
-            severity: warning
-        - alert: CNPGClusterHACritical
-          annotations:
-            summary: CNPG Cluster has no standby replicas!
-            description: |-
-              CloudNativePG Cluster "{{ $labels.job }}" has no ready standby replicas. Your cluster at a severe
-              risk of data loss and downtime if the primary instance fails.
-
-              The primary instance is still online and able to serve queries, although connections to the `-ro` endpoint
-              will fail. The `-r` endpoint os operating at reduced capacity and all traffic is being served by the main.
-
-              This can happen during a normal fail-over or automated minor version upgrades in a cluster with 2 or less
-              instances. The replaced instance may need some time to catch-up with the cluster primary instance.
-
-              This alarm will be always trigger if your cluster is configured to run with only 1 instance. In this
-              case you may want to silence it.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHACritical.md
-          expr: |
-            max by (job) (cnpg_pg_replication_streaming_replicas{namespace="{{ $namespace }}"} - cnpg_pg_replication_is_wal_receiver_up{namespace="{{ $namespace }}"}) < 1
-          for: 5m
-          labels:
-            severity: critical
-        - alert: CNPGClusterOffline
-          annotations:
-            summary: CNPG Cluster has no running instances!
-            description: |-
-              CloudNativePG Cluster "{{ $labels.job }}" has no ready instances.
-
-              Having an offline cluster means your applications will not be able to access the database, leading to
-              potential service disruption and/or data loss.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterOffline.md
-          expr: |
-            ({{ .Values.cluster.instances }} - count(cnpg_collector_up{namespace=~"{{ $namespace }}",pod=~"{{ $podSelector }}"}) OR vector(0)) > 0
-          for: 5m
-          labels:
-            severity: critical
-        - alert: CNPGClusterZoneSpreadWarning
-          annotations:
-            summary: CNPG Cluster instances in the same zone.
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" has instances in the same availability zone.
-
-              A disaster in one availability zone will lead to a potential service disruption and/or data loss.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterZoneSpreadWarning.md
-          expr: |
-            {{ .Values.cluster.instances }} > count(count by (label_topology_kubernetes_io_zone) (kube_pod_info{namespace=~"{{ $namespace }}", pod=~"{{ $podSelector }}"} * on(node,instance) group_left(label_topology_kubernetes_io_zone) kube_node_labels)) < 3
-          for: 5m
-          labels:
-            severity: warning
-        - alert: CNPGClusterInstancesOnSameNode
-          annotations:
-            summary: CNPG Cluster instances are located on the same node.
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" has {{ $value }}
-              instances on the same node {{ $labels.node }}.
-
-              A failure or scheduled downtime of a single node will lead to a potential service disruption and/or data loss.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterInstancesOnSameNode.md
-          expr: |
-            count by (node) (kube_pod_info{namespace=~"{{ $namespace }}", pod=~"{{ $podSelector }}"}) > 1
-          for: 5m
-          labels:
-            severity: warning
-        - alert: CNPGClusterHighReplicationLag
-          annotations:
-            summary: CNPG Cluster high replication lag
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" is experiencing a high replication lag of
-              {{ "{{ $value }}" }}ms.
-
-              High replication lag indicates network issues, busy instances, slow queries or suboptimal configuration.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighReplicationLag.md
-          expr: |
-            max(cnpg_pg_replication_lag{namespace=~"{{ $namespace }}",pod=~"{{ $podSelector }}"}) * 1000 > 1000
-          for: 5m
-          labels:
-            severity: warning
-        - alert: CNPGClusterHighConnectionsWarning
-          annotations:
-            summary: CNPG Instance is approaching the maximum number of connections.
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" instance {{ $labels.pod }} is using {{ "{{ $value }}" }}% of
-              the maximum number of connections.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsWarning.md
-          expr: |
-            sum by (pod) (cnpg_backends_total{namespace=~"{{ $namespace }}", pod=~"{{ $podSelector }}"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace=~"{{ $namespace }}", pod=~"{{ $podSelector }}"}) * 100 > 80
-          for: 5m
-          labels:
-            severity: warning
-        - alert: CNPGClusterHighConnectionsCritical
-          annotations:
-            summary: CNPG Instance maximum number of connections critical!
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" instance {{ $labels.pod }} is using {{ "{{ $value }}" }}% of
-              the maximum number of connections.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterHighConnectionsCritical.md
-          expr: |
-            sum by (pod) (cnpg_backends_total{namespace=~"{{ $namespace }}", pod=~"{{ $podSelector }}"}) / max by (pod) (cnpg_pg_settings_setting{name="max_connections", namespace=~"{{ $namespace }}", pod=~"{{ $podSelector }}"}) * 100 > 95
-          for: 5m
-          labels:
-            severity: critical
-        - alert: CNPGClusterLowDiskSpaceWarning
-          annotations:
-            summary: CNPG Instance is running out of disk space.
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" is running low on disk space. Check attached PVCs.
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceWarning.md
-          expr: |
-            max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}"} / kubelet_volume_stats_capacity_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}"})) > 0.7 OR
-            max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-wal"} / kubelet_volume_stats_capacity_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-wal"})) > 0.7 OR
-            max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-tbs.*"})
-                /
-                sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-tbs.*"})
-                *
-                on(namespace, persistentvolumeclaim) group_left(volume)
-                kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"{{ $podSelector }}"}
-            ) > 0.7
-          for: 5m
-          labels:
-            severity: warning
-        - alert: CNPGClusterLowDiskSpaceCritical
-          annotations:
-            summary: CNPG Instance is running out of disk space!
-            description: |-
-              CloudNativePG Cluster "{{ $cluster }}" is running extremely low on disk space. Check attached PVCs!
-            runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLowDiskSpaceCritical.md
-          expr: |
-            max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}"} / kubelet_volume_stats_capacity_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}"})) > 0.9 OR
-            max(max by(persistentvolumeclaim) (1 - kubelet_volume_stats_available_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-wal"} / kubelet_volume_stats_capacity_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-wal"})) > 0.9 OR
-            max(sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-tbs.*"})
-                /
-                sum by (namespace,persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{namespace="{{ $namespace }}", persistentvolumeclaim=~"{{ $podSelector }}-tbs.*"})
-                *
-                on(namespace, persistentvolumeclaim) group_left(volume)
-                kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~"{{ $podSelector }}"}
-            ) > 0.9
-          for: 5m
-          labels:
-            severity: critical
+      rules: |
+        {{ $dict := dict "excludeRules" .Values.cluster.monitoring.prometheusRule.excludeRules -}}
+        {{- $_ := set $dict "value"       "{{ $value }}" -}}
+        {{- $_ := set $dict "namespace"   .Release.Namespace -}}
+        {{- $_ := set $dict "cluster"     (printf "%s/%s" .Release.Namespace (include "cluster.fullname" .)) -}}
+        {{- $_ := set $dict "labels"      (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}") -}}
+        {{- $_ := set $dict "podSelector" (printf "%s-([1-9][0-9]*)$" (include "cluster.fullname" .)) -}}
+        {{- $_ := set $dict "Values"      .Values -}}
+        {{- range $path, $_ := .Files.Glob  "prometheus_rules/**.yaml" }}
+        - {{ tpl ($.Files.Get $path) $dict | nindent 10 | trim -}}
+        {{- end -}}
 {{ end }}
diff --git a/charts/cluster/values.schema.json b/charts/cluster/values.schema.json
index 5a1c46e28..0c7c1bbe0 100644
--- a/charts/cluster/values.schema.json
+++ b/charts/cluster/values.schema.json
@@ -213,6 +213,9 @@
                             "properties": {
                                 "enabled": {
                                     "type": "boolean"
+                                },
+                                "excludeRules": {
+                                    "type": "array"
                                 }
                             }
                         }
diff --git a/charts/cluster/values.yaml b/charts/cluster/values.yaml
index d4db49e32..bff9d2202 100644
--- a/charts/cluster/values.yaml
+++ b/charts/cluster/values.yaml
@@ -151,8 +151,11 @@ cluster:
       # -- Whether to enable the PodMonitor
       enabled: true
     prometheusRule:
-        # -- Whether to enable the PrometheusRule automated alerts
+      # -- Whether to enable the PrometheusRule automated alerts
       enabled: true
+      # -- Exclude specified rules
+      excludeRules: []
+        # - CNPGClusterZoneSpreadWarning
     # -- Custom Prometheus metrics
     customQueries: []
       # - name: "pg_cache_hit_ratio"

From f2d6ea7effe8c7220ddcd7f2001345abf46a14a8 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 27 Mar 2024 17:04:26 +0200
Subject: [PATCH 4/5] Release cluster-v0.0.4 (#234)

* Added the ability to exclude specific PrometheusRules by @itay-grudev (#232)
* Added config value endpointCA for private S3 such as MinIO by @PseudoResonance (#229, #230)
* Bug Fix: Severity of CNPGClusterLowDiskSpaceCritical should be critical not warning by @baurmatt (#223)
* New `backup.barmanObjectStore.wal` and `backup.barmanObjectStore.data` to support disabling encryption by @itay-grudev (#198, #221)

Signed-off-by: Itay Grudev <itay.grudev@essentim.com>
Co-authored-by: Itay Grudev <itay.grudev@essentim.com>
---
 RELEASE.md                | 4 ++--
 charts/cluster/Chart.yaml | 2 +-
 charts/cluster/README.md  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 984d1d550..d328344e0 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -32,7 +32,7 @@ In order to create a new release of the `cloudnative-pg` chart, follow these ste
     ```
 3. Create a branch named `release/cloudnative-pg-vX.Y.Z` and switch to it:
     ```bash
-    git checkout -b release/cloudnative-pg-v$NEW_VERSION
+    git switch --create release/cloudnative-pg-v$NEW_VERSION
     ```
 4. Update the `.version` in the [Chart.yaml](./charts/cloudnative-pg/Chart.yaml) file to `"X.Y.Z"`
     ```bash
@@ -113,7 +113,7 @@ In order to create a new release of the `cluster` chart, follow these steps:
     ```
 3. Create a branch: named `release/cluster-vX.Y.Z` and switch to it
     ```bash
-    git checkout -b release/cluster-v$NEW_VERSION
+    git switch --create release/cluster-v$NEW_VERSION
     ```
 4. Update the `.version` in the [Chart.yaml](./charts/cluster/Chart.yaml) file to `"X.Y.Z"`
     ```bash
diff --git a/charts/cluster/Chart.yaml b/charts/cluster/Chart.yaml
index c57c3e6a9..dccec1ead 100644
--- a/charts/cluster/Chart.yaml
+++ b/charts/cluster/Chart.yaml
@@ -18,7 +18,7 @@ name: cluster
 description: Deploys and manages a CloudNativePG cluster and its associated resources.
 icon: https://raw.githubusercontent.com/cloudnative-pg/artwork/main/cloudnativepg-logo.svg
 type: application
-version: 0.0.3
+version: 0.0.4
 sources:
   - https://github.com/cloudnative-pg/charts
 keywords:
diff --git a/charts/cluster/README.md b/charts/cluster/README.md
index af047fba9..ce4d45567 100644
--- a/charts/cluster/README.md
+++ b/charts/cluster/README.md
@@ -1,6 +1,6 @@
 # cluster
 
-![Version: 0.0.3](https://img.shields.io/badge/Version-0.0.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
+![Version: 0.0.4](https://img.shields.io/badge/Version-0.0.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
 
 > **Warning**
 > ### This chart is under active development.

From 0a85ff4cc0d73f9bd494ae641d74fe29342b77b6 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 27 Mar 2024 17:48:51 +0200
Subject: [PATCH 5/5] Release cluster-v0.0.5 (#236)

Bug Fix: PrometheusRules template issue prevents upgrading the chart

Signed-off-by: Itay Grudev <itay.grudev@essentim.com>
Co-authored-by: Itay Grudev <itay.grudev@essentim.com>
---
 charts/cluster/Chart.yaml                     | 2 +-
 charts/cluster/README.md                      | 2 +-
 charts/cluster/templates/prometheus-rule.yaml | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/charts/cluster/Chart.yaml b/charts/cluster/Chart.yaml
index dccec1ead..fd2be2809 100644
--- a/charts/cluster/Chart.yaml
+++ b/charts/cluster/Chart.yaml
@@ -18,7 +18,7 @@ name: cluster
 description: Deploys and manages a CloudNativePG cluster and its associated resources.
 icon: https://raw.githubusercontent.com/cloudnative-pg/artwork/main/cloudnativepg-logo.svg
 type: application
-version: 0.0.4
+version: 0.0.5
 sources:
   - https://github.com/cloudnative-pg/charts
 keywords:
diff --git a/charts/cluster/README.md b/charts/cluster/README.md
index ce4d45567..fb2fdb814 100644
--- a/charts/cluster/README.md
+++ b/charts/cluster/README.md
@@ -1,6 +1,6 @@
 # cluster
 
-![Version: 0.0.4](https://img.shields.io/badge/Version-0.0.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
+![Version: 0.0.5](https://img.shields.io/badge/Version-0.0.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
 
 > **Warning**
 > ### This chart is under active development.
diff --git a/charts/cluster/templates/prometheus-rule.yaml b/charts/cluster/templates/prometheus-rule.yaml
index 380f90a7d..1836f51d9 100644
--- a/charts/cluster/templates/prometheus-rule.yaml
+++ b/charts/cluster/templates/prometheus-rule.yaml
@@ -11,14 +11,15 @@ metadata:
 spec:
   groups:
     - name: cloudnative-pg/{{ include "cluster.fullname" . }}
-      rules: |
-        {{ $dict := dict "excludeRules" .Values.cluster.monitoring.prometheusRule.excludeRules -}}
+      rules:
+        {{- $dict := dict "excludeRules" .Values.cluster.monitoring.prometheusRule.excludeRules -}}
         {{- $_ := set $dict "value"       "{{ $value }}" -}}
         {{- $_ := set $dict "namespace"   .Release.Namespace -}}
         {{- $_ := set $dict "cluster"     (printf "%s/%s" .Release.Namespace (include "cluster.fullname" .)) -}}
         {{- $_ := set $dict "labels"      (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}") -}}
         {{- $_ := set $dict "podSelector" (printf "%s-([1-9][0-9]*)$" (include "cluster.fullname" .)) -}}
         {{- $_ := set $dict "Values"      .Values -}}
+        {{- $_ := set $dict "Template"    .Template -}}
         {{- range $path, $_ := .Files.Glob  "prometheus_rules/**.yaml" }}
         - {{ tpl ($.Files.Get $path) $dict | nindent 10 | trim -}}
         {{- end -}}