Skip to content

Commit

Permalink
Merge branch 'main' into switche2e
Browse files Browse the repository at this point in the history
  • Loading branch information
zhlsunshine authored Aug 9, 2024
2 parents 564cc01 + a270726 commit 3e550a3
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 34 deletions.
9 changes: 4 additions & 5 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ tei:
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false

# To override values in subchart tgi
tgi:
Expand All @@ -17,8 +19,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
7 changes: 2 additions & 5 deletions helm-charts/codegen/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
7 changes: 2 additions & 5 deletions helm-charts/codetrans/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
3 changes: 3 additions & 0 deletions helm-charts/common/tei/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ image:
repository: ghcr.io/huggingface/tei-gaudi
tag: synapse_1.16

securityContext:
readOnlyRootFilesystem: false

resources:
limits:
habana.ai/gaudi: 1
7 changes: 2 additions & 5 deletions helm-charts/common/tgi/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@ image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.1"

extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"

resources:
limits:
Expand Down
6 changes: 6 additions & 0 deletions helm-charts/common/tgi/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,9 @@ data:
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
{{- if .Values.MAX_INPUT_LENGTH }}
MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
{{- end }}
{{- if .Values.MAX_TOTAL_TOKENS }}
MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
{{- end }}
4 changes: 0 additions & 4 deletions helm-charts/common/tgi/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ spec:
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{- if .Values.extraArgs }}
args:
{{- toYaml .Values.extraArgs | nindent 12}}
{{- end }}
volumeMounts:
- mountPath: /data
name: model-volume
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ affinity: {}

LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

MAX_INPUT_LENGTH: ""
MAX_TOTAL_TOKENS: ""

global:
http_proxy: ""
https_proxy: ""
Expand Down
7 changes: 2 additions & 5 deletions helm-charts/docsum/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ tgi:
resources:
limits:
habana.ai/gaudi: 1
extraArgs:
- "--max-input-length"
- "1024"
- "--max-total-tokens"
- "2048"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
5 changes: 5 additions & 0 deletions helm-charts/docsum/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ tolerations: []

affinity: {}

# To override values in subchart llm-uservice
llm-uservice:
image:
repository: opea/llm-docsum-tgi

# To override values in subchart tgi
tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
Expand Down
7 changes: 2 additions & 5 deletions microservices-connector/config/manifests/tgi_gaudi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ data:
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
---
# Source: tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
Expand Down Expand Up @@ -90,11 +92,6 @@ spec:
{}
image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
imagePullPolicy: IfNotPresent
args:
- --max-input-length
- "1024"
- --max-total-tokens
- "2048"
volumeMounts:
- mountPath: /data
name: model-volume
Expand Down

0 comments on commit 3e550a3

Please sign in to comment.