Skip to content

Commit

Permalink
feat: add startup and liveness probes for bootstrapped nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
puppetninja committed Aug 29, 2024
1 parent 2f66918 commit 7f23405
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 16 deletions.
18 changes: 16 additions & 2 deletions charts/tezos/templates/_containers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,25 @@
name: tezos-net
- containerPort: 9932
name: metrics
{{- if or (not (hasKey $.node_vals "bootstrapped_readiness_probe")) $.node_vals.bootstrapped_readiness_probe }}
{{- if or (not (hasKey $.node_vals "bootstrapped_probe")) $.node_vals.bootstrapped_probe }}
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10
{{- else if or (not (hasKey $.node_vals "rpc_readiness_probe")) $.node_vals.rpc_readiness_probe }}
readinessProbe:
httpGet:
Expand Down Expand Up @@ -259,7 +273,7 @@
{{- end }}

{{- define "tezos.container.sidecar" }}
{{- if or (not (hasKey $.node_vals "bootstrapped_readiness_probe")) $.node_vals.bootstrapped_readiness_probe }}
{{- if or (not (hasKey $.node_vals "bootstrapped_probe")) $.node_vals.bootstrapped_probe }}
{{- $sidecarResources := dict "requests" (dict "memory" "80Mi") "limits" (dict "memory" "100Mi") -}}
{{- include "tezos.generic_container" (dict "root" $
"type" "sidecar"
Expand Down
18 changes: 9 additions & 9 deletions charts/tezos/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,15 @@ accounts: {}
# - `rpc_readiness_probe`: Attach a probe to the node. The probe checks whether
# the RPC service is responsive, which should always be the
# case. Defaults to true.
# - `bootstrapped_readiness_probe`: Checks whether the most recent block is less than
# 600 seconds old.
# Overrides `rpc_readiness_probe`. Defaults to True.
# True is good for RPC nodes, private nodes, and
# self-contained private chains.
# Recommended to set to False when bootstrapping a new
# chain with external bakers, such as a new test chain.
# Otherwise, the chain may become unreachable externally
# while waiting for other nodes to come online.
# - `bootstrapped_probe`: Adds startup/readiness/liveness probe to check checks
# whether the most recent block is less than 600 seconds old.
# Overrides `rpc_readiness_probe`. Defaults to True.
# True is good for RPC nodes, private nodes, and
# self-contained private chains.
# Recommended to set to False when bootstrapping a new
# chain with external bakers, such as a new test chain.
# Otherwise, the chain may become unreachable externally
# while waiting for other nodes to come online.
# - `instances`: A list of nodes to fire up, each is a dictionary defining:
# - `bake_using_accounts`: List of account names that should be used for baking.
# - `authorized_keys`: List of account names that should be used as keys to
Expand Down
16 changes: 15 additions & 1 deletion test/charts/mainnet.expect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,24 @@ spec:
name: tezos-net
- containerPort: 9932
name: metrics
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10
- name: sidecar
image: "ghcr.io/tacoinfra/tezos-k8s-utils:main"
imagePullPolicy: IfNotPresent
Expand Down
30 changes: 29 additions & 1 deletion test/charts/mainnet2.expect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,24 @@ spec:
name: tezos-net
- containerPort: 9932
name: metrics
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10
- name: logger
image: "ghcr.io/tacoinfra/tezos-k8s-utils:main"
imagePullPolicy: IfNotPresent
Expand Down Expand Up @@ -622,10 +636,24 @@ spec:
name: tezos-net
- containerPort: 9932
name: metrics
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10
resources:
limits:
memory: 16192Mi
Expand Down
48 changes: 45 additions & 3 deletions test/charts/private-chain.expect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,24 @@ spec:
name: tezos-net
- containerPort: 9932
name: metrics
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10
- name: sidecar
image: "ghcr.io/tacoinfra/tezos-k8s-utils:main"
imagePullPolicy: IfNotPresent
Expand Down Expand Up @@ -771,10 +785,24 @@ spec:
name: tezos-net
- containerPort: 9932
name: metrics
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10

- name: baker-0-013-ptjakart
image: "tezos/tezos:v15-release"
Expand Down Expand Up @@ -1404,10 +1432,24 @@ spec:
name: tezos-net
- containerPort: 9932
name: metrics
startupProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 180
periodSeconds: 10
readinessProbe:
httpGet:
path: /is_synced
port: 31732
port: 31732
successThreshold: 1
periodSeconds: 10
livenessProbe:
httpGet:
path: /is_synced
port: 31732
failureThreshold: 30
periodSeconds: 10

- name: sidecar
image: "ghcr.io/tacoinfra/tezos-k8s-utils:main"
Expand Down

0 comments on commit 7f23405

Please sign in to comment.