Skip to content

Commit

Permalink
helm/manifest: Sync HPA related K8S probe settings
Browse files Browse the repository at this point in the history
Sync the mismatch between helm chart and GMC manifests introduced by PR #386

Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao committed Oct 8, 2024
1 parent bd6f76c commit b052180
Show file tree
Hide file tree
Showing 18 changed files with 169 additions and 28 deletions.
17 changes: 17 additions & 0 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ tei:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1

# To override values in subchart tgi
tgi:
Expand All @@ -24,3 +28,16 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
30 changes: 30 additions & 0 deletions helm-charts/chatqna/guardrails-gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ tei:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1

tgi:
accelDevice: "gaudi"
Expand All @@ -34,6 +38,19 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120

tgi-guardrails:
accelDevice: "gaudi"
Expand All @@ -47,3 +64,16 @@ tgi-guardrails:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
13 changes: 13 additions & 0 deletions helm-charts/chatqna/nv-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,16 @@ tgi:
resources:
limits:
nvidia.com/gpu: 1
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
13 changes: 13 additions & 0 deletions helm-charts/codegen/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,16 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
13 changes: 13 additions & 0 deletions helm-charts/codetrans/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,16 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
5 changes: 5 additions & 0 deletions helm-charts/common/tei/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ securityContext:
resources:
limits:
habana.ai/gaudi: 1

livenessProbe:
timeoutSeconds: 1
readinessProbe:
timeoutSeconds: 1
2 changes: 2 additions & 0 deletions helm-charts/common/tei/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,14 @@ livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 24
timeoutSeconds: 2
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 2
startupProbe:
httpGet:
path: /health
Expand Down
10 changes: 6 additions & 4 deletions helm-charts/common/teirerank/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,17 @@ livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 5
initialDelaySeconds: 8
periodSeconds: 8
timeoutSeconds: 4
failureThreshold: 24
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 5
initialDelaySeconds: 8
periodSeconds: 8
timeoutSeconds: 4
startupProbe:
httpGet:
path: /health
Expand Down
14 changes: 14 additions & 0 deletions helm-charts/common/tgi/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,17 @@ CUDA_GRAPHS: ""
resources:
limits:
habana.ai/gaudi: 1

livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
14 changes: 14 additions & 0 deletions helm-charts/common/tgi/nv-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,17 @@ resources:
nvidia.com/gpu: 1

CUDA_GRAPHS: ""

livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
15 changes: 9 additions & 6 deletions helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,23 @@ resources: {}
livenessProbe:
tcpSocket:
port: http
initialDelaySeconds: 5
periodSeconds: 5
initialDelaySeconds: 8
periodSeconds: 8
timeoutSeconds: 4
failureThreshold: 24
readinessProbe:
tcpSocket:
port: http
initialDelaySeconds: 5
periodSeconds: 5
initialDelaySeconds: 16
periodSeconds: 8
timeoutSeconds: 4
startupProbe:
tcpSocket:
port: http
initialDelaySeconds: 5
initialDelaySeconds: 10
periodSeconds: 5
failureThreshold: 120
failureThreshold: 180
timeoutSeconds: 2
#livenessProbe:
# httpGet:
# path: /health
Expand Down
13 changes: 13 additions & 0 deletions helm-charts/docsum/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,16 @@ tgi:
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
6 changes: 2 additions & 4 deletions microservices-connector/config/manifests/tei.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ metadata:
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tei
Expand Down Expand Up @@ -138,10 +136,10 @@ spec:
sizeLimit: 1Gi
- name: tmp
emptyDir: {}
# extra time to finish processing buffered requests before pod is forcibly terminated
# extra time to finish processing buffered requests on CPU before pod is forcibly terminated
terminationGracePeriodSeconds: 60
---
# Source: tei/templates/horizontalPodAutoscaler.yaml
# Source: tei/templates/horizontal-pod-autoscaler.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
Expand Down
6 changes: 3 additions & 3 deletions microservices-connector/config/manifests/tei_gaudi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ metadata:
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tei
Expand Down Expand Up @@ -110,12 +108,14 @@ spec:
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
failureThreshold: 120
httpGet:
Expand All @@ -138,7 +138,7 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: tei/templates/horizontalPodAutoscaler.yaml
# Source: tei/templates/horizontal-pod-autoscaler.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
Expand Down
6 changes: 2 additions & 4 deletions microservices-connector/config/manifests/teirerank.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ metadata:
app.kubernetes.io/version: "cpu-1.5"
app.kubernetes.io/managed-by: Helm
spec:
# use explicit replica counts only of HorizontalPodAutoscaler is disabled
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: teirerank
Expand Down Expand Up @@ -137,10 +135,10 @@ spec:
sizeLimit: 1Gi
- name: tmp
emptyDir: {}
# extra time to finish processing buffered requests before pod is forcibly terminated
# extra time to finish processing buffered requests on CPU before pod is forcibly terminated
terminationGracePeriodSeconds: 60
---
# Source: teirerank/templates/horizontalPodAutoscaler.yaml
# Source: teirerank/templates/horizontal-pod-autoscaler.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
Expand Down
10 changes: 5 additions & 5 deletions microservices-connector/config/manifests/tgi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,22 @@ spec:
failureThreshold: 24
initialDelaySeconds: 8
periodSeconds: 8
timeoutSeconds: 4
tcpSocket:
port: http
timeoutSeconds: 4
readinessProbe:
initialDelaySeconds: 16
periodSeconds: 8
timeoutSeconds: 4
tcpSocket:
port: http
timeoutSeconds: 4
startupProbe:
failureThreshold: 180
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 2
tcpSocket:
port: http
timeoutSeconds: 2
resources:
{}
volumes:
Expand All @@ -133,10 +133,10 @@ spec:
sizeLimit: 1Gi
- name: tmp
emptyDir: {}
# extra time to finish processing buffered requests before pod is forcibly terminated
# extra time to finish processing buffered requests on CPU before pod is forcibly terminated
terminationGracePeriodSeconds: 120
---
# Source: tgi/templates/horizontalPorAutoscaler.yaml
# Source: tgi/templates/horizontal-pod-autoscaler.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
---
Expand Down
Loading

0 comments on commit b052180

Please sign in to comment.