Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding postgresql-ha on obs cluster #597

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ai-telemetry/base/externalsecrets/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ kind: Kustomization

resources:
- solr-secret.yaml
- postgresql-ha-secret.yaml
- postgresql-ha-pgpool-secret.yaml
15 changes: 15 additions & 0 deletions ai-telemetry/base/externalsecrets/postgresql-ha-pgpool-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: postgresql-ha-pgpool-secret
namespace: postgresql-ha
spec:
refreshInterval: 15s
secretStoreRef:
name: nerc-cluster-secrets
kind: ClusterSecretStore
target:
name: postgresql-ha-pgpool
dataFrom:
- extract:
key: $ENV/$CLUSTER/ai-telemetry/postgresql-ha-pgpool
15 changes: 15 additions & 0 deletions ai-telemetry/base/externalsecrets/postgresql-ha-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: postgresql-ha-secret
namespace: postgresql-ha
spec:
refreshInterval: 15s
secretStoreRef:
name: nerc-cluster-secrets
kind: ClusterSecretStore
target:
name: postgresql-ha-secret
dataFrom:
- extract:
key: $ENV/$CLUSTER/ai-telemetry/postgresql-ha
1 change: 1 addition & 0 deletions ai-telemetry/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ resources:
- externalsecrets
- zookeeper
- solr
- postgresql-ha
129 changes: 129 additions & 0 deletions ai-telemetry/base/postgresql-ha/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: postgresql-ha-postgresql-hooks-scripts
labels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: postgresql
data:
pre-stop.sh: |-
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset

# Debug section
exec 3>&1
exec 4>&2

# Process input parameters
MIN_DELAY_AFTER_PG_STOP_SECONDS=$1

# Load Libraries
. /opt/bitnami/scripts/liblog.sh
. /opt/bitnami/scripts/libpostgresql.sh
. /opt/bitnami/scripts/librepmgr.sh

# Load PostgreSQL & repmgr environment variables
. /opt/bitnami/scripts/postgresql-env.sh

# Auxiliary functions
is_new_primary_ready() {
return_value=1
currenty_primary_node="$(repmgr_get_primary_node)"
currenty_primary_host="$(echo $currenty_primary_node | awk '{print $1}')"

info "$currenty_primary_host != $REPMGR_NODE_NETWORK_NAME"
if [[ $(echo $currenty_primary_node | wc -w) -eq 2 ]] && [[ "$currenty_primary_host" != "$REPMGR_NODE_NETWORK_NAME" ]]; then
info "New primary detected, leaving the cluster..."
return_value=0
else
info "Waiting for a new primary to be available..."
fi
return $return_value
}

export MODULE="pre-stop-hook"

if [[ "${BITNAMI_DEBUG}" == "true" ]]; then
info "Bash debug is on"
else
info "Bash debug is off"
exec 1>/dev/null
exec 2>/dev/null
fi

postgresql_enable_nss_wrapper

# Prepare env vars for managing roles
readarray -t primary_node < <(repmgr_get_upstream_node)
primary_host="${primary_node[0]}"

# Stop postgresql for graceful exit.
PG_STOP_TIME=$EPOCHSECONDS
postgresql_stop

if [[ -z "$primary_host" ]] || [[ "$primary_host" == "$REPMGR_NODE_NETWORK_NAME" ]]; then
info "Primary node need to wait for a new primary node before leaving the cluster"
retry_while is_new_primary_ready 10 5
else
info "Standby node doesn't need to wait for a new primary switchover. Leaving the cluster"
fi

# Make sure pre-stop hook waits at least 25 seconds after stop of PG to make sure PGPOOL detects node is down.
# default terminationGracePeriodSeconds=30 seconds
PG_STOP_DURATION=$(($EPOCHSECONDS - $PG_STOP_TIME))
if (( $PG_STOP_DURATION < $MIN_DELAY_AFTER_PG_STOP_SECONDS )); then
WAIT_TO_PG_POOL_TIME=$(($MIN_DELAY_AFTER_PG_STOP_SECONDS - $PG_STOP_DURATION))
info "PG stopped including primary switchover in $PG_STOP_DURATION. Waiting additional $WAIT_TO_PG_POOL_TIME seconds for PG pool"
sleep $WAIT_TO_PG_POOL_TIME
fi

readiness-probe.sh: |-
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset

# Debug section
exec 3>&1
exec 4>&2

# Load Libraries
. /opt/bitnami/scripts/liblog.sh
. /opt/bitnami/scripts/libpostgresql.sh

# Load PostgreSQL & repmgr environment variables
. /opt/bitnami/scripts/postgresql-env.sh

# Process input parameters
MIN_DELAY_AFTER_POD_READY_FIRST_TIME=$1
TMP_FIRST_READY_FILE_TS="/tmp/ts-first-ready.mark"
TMP_DELAY_APPLIED_FILE="/tmp/delay-applied.mark"

DB_CHECK_RESULT=$(echo "SELECT 1" | postgresql_execute_print_output "$POSTGRESQL_DATABASE" "$POSTGRESQL_USERNAME" "$POSTGRESQL_PASSWORD" "-h 127.0.0.1 -tA" || echo "command failed")
if [[ "$DB_CHECK_RESULT" == "1" ]]; then
if [[ ! -f "$TMP_DELAY_APPLIED_FILE" ]]; then
# DB up, but initial readiness delay not applied
if [[ -f "$TMP_FIRST_READY_FILE_TS" ]]; then
# calculate delay from the first readiness success
FIRST_READY_TS=$(cat $TMP_FIRST_READY_FILE_TS)
CURRENT_DELAY_SECONDS=$(($EPOCHSECONDS - $FIRST_READY_TS))
if (( $CURRENT_DELAY_SECONDS > $MIN_DELAY_AFTER_POD_READY_FIRST_TIME )); then
# minimal delay of the first readiness state passed - report success and mark delay as applied
touch "$TMP_DELAY_APPLIED_FILE"
else
# minimal delay of the first readiness state not reached yet - report failure
exit 1
fi
else
# first ever readiness test success - store timestamp and report failure
echo $EPOCHSECONDS > $TMP_FIRST_READY_FILE_TS
exit 1
fi
fi
else
# DB test failed - report failure
exit 1
fi
143 changes: 143 additions & 0 deletions ai-telemetry/base/postgresql-ha/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: postgresql-ha-pgpool
labels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
template:
metadata:
labels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
spec:
automountServiceAccountToken: false
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
topologyKey: kubernetes.io/hostname
weight: 1
serviceAccountName: postgresql-ha
# Auxiliary vars to populate environment variables
containers:
- name: pgpool
image: docker.io/bitnami/pgpool:4.5.4-debian-12-r2
imagePullPolicy: "IfNotPresent"
env:
- name: BITNAMI_DEBUG
value: "false"
- name: PGPOOL_BACKEND_NODES
value: 0:postgresql-ha-postgresql-0.postgresql-ha-postgresql-headless:5432,1:postgresql-ha-postgresql-1.postgresql-ha-postgresql-headless:5432,2:postgresql-ha-postgresql-2.postgresql-ha-postgresql-headless:5432,
- name: PGPOOL_SR_CHECK_USER
value: "repmgr"
- name: PGPOOL_SR_CHECK_PASSWORD
valueFrom:
secretKeyRef:
name: postgresql-ha-secret
key: repmgr-password
- name: PGPOOL_SR_CHECK_DATABASE
value: "postgres"
- name: PGPOOL_ENABLE_LDAP
value: "no"
- name: PGPOOL_POSTGRES_USERNAME
value: "postgres"
- name: PGPOOL_POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgresql-ha-secret
key: password
- name: PGPOOL_ADMIN_USERNAME
value: "admin"
- name: PGPOOL_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: postgresql-ha-pgpool
key: admin-password
- name: PGPOOL_AUTHENTICATION_METHOD
value: "scram-sha-256"
- name: PGPOOL_ENABLE_LOAD_BALANCING
value: "yes"
- name: PGPOOL_DISABLE_LOAD_BALANCE_ON_WRITE
value: "transaction"
- name: PGPOOL_ENABLE_LOG_CONNECTIONS
value: "no"
- name: PGPOOL_ENABLE_LOG_HOSTNAME
value: "yes"
- name: PGPOOL_ENABLE_LOG_PER_NODE_STATEMENT
value: "no"
- name: PGPOOL_RESERVED_CONNECTIONS
value: '1'
- name: PGPOOL_CHILD_LIFE_TIME
value: ""
- name: PGPOOL_ENABLE_TLS
value: "no"
- name: PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT
value: "6"
envFrom:
ports:
- name: postgresql
containerPort: 5432
protocol: TCP
livenessProbe:
failureThreshold: 5
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
exec:
command:
- /opt/bitnami/scripts/pgpool/healthcheck.sh
readinessProbe:
failureThreshold: 5
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 5
exec:
command:
- bash
- -ec
- 'PGPASSWORD=$PGPOOL_POSTGRES_PASSWORD psql -U "postgres" -d "postgres" -h /opt/bitnami/pgpool/tmp -tA -c "SELECT 1" >/dev/null'
resources:
limits:
cpu: 1
ephemeral-storage: 2Gi
memory: 1Gi
requests:
cpu: 500m
ephemeral-storage: 100Mi
memory: 100Mi
volumeMounts:
- name: empty-dir
mountPath: /tmp
subPath: tmp-dir
- name: empty-dir
mountPath: /opt/bitnami/pgpool/etc
subPath: app-etc-dir
- name: empty-dir
mountPath: /opt/bitnami/pgpool/conf
subPath: app-conf-dir
- name: empty-dir
mountPath: /opt/bitnami/pgpool/tmp
subPath: app-tmp-dir
- name: empty-dir
mountPath: /opt/bitnami/pgpool/logs
subPath: app-logs-dir
volumes:
- name: empty-dir
emptyDir: {}
16 changes: 16 additions & 0 deletions ai-telemetry/base/postgresql-ha/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: postgresql-ha
resources:
- configmap.yaml
- deployment.yaml
- networkpolicy-pgpool.yaml
- networkpolicy.yaml
- pdb-pgpool.yaml
- pdb-witness.yaml
- pdb.yaml
- service-headless.yaml
- service-pgpool.yaml
- service.yaml
- serviceaccount.yaml
- statefulset.yaml
23 changes: 23 additions & 0 deletions ai-telemetry/base/postgresql-ha/networkpolicy-pgpool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
kind: NetworkPolicy
apiVersion: networking.k8s.io/v1
metadata:
name: postgresql-ha-pgpool
labels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
role: data
spec:
podSelector:
matchLabels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
policyTypes:
- Ingress
- Egress
egress:
- {}
ingress:
- ports:
- port: 5432
23 changes: 23 additions & 0 deletions ai-telemetry/base/postgresql-ha/networkpolicy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
kind: NetworkPolicy
apiVersion: networking.k8s.io/v1
metadata:
name: postgresql-ha-postgresql
labels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: postgresql
role: data
spec:
podSelector:
matchLabels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: postgresql
policyTypes:
- Ingress
- Egress
egress:
- {}
ingress:
- ports:
- port: 5432
15 changes: 15 additions & 0 deletions ai-telemetry/base/postgresql-ha/pdb-pgpool.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: postgresql-ha-pgpool
labels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
spec:
maxUnavailable: 1
selector:
matchLabels:
app.kubernetes.io/instance: postgresql-ha
app.kubernetes.io/name: postgresql-ha
app.kubernetes.io/component: pgpool
Loading