opea-project · lianhao · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
@@ -8,6 +8,8 @@ tei:
   resources:
     limits:
       habana.ai/gaudi: 1
+  securityContext:
+    readOnlyRootFilesystem: false
 
 # To override values in subchart tgi
 tgi:
@@ -17,8 +19,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
@@ -9,6 +9,9 @@ image:
   repository: ghcr.io/huggingface/tei-gaudi
   tag: synapse_1.16
 
+securityContext:
+  readOnlyRootFilesystem: false
+
 resources:
   limits:
     habana.ai/gaudi: 1
@@ -9,11 +9,8 @@ image:
   repository: ghcr.io/huggingface/tgi-gaudi
   tag: "2.0.1"
 
-extraArgs:
-  - "--max-input-length"
-  - "1024"
-  - "--max-total-tokens"
-  - "2048"
+MAX_INPUT_LENGTH: "1024"
+MAX_TOTAL_TOKENS: "2048"
 
 resources:
   limits:

@@ -18,3 +18,9 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.MAX_INPUT_LENGTH }}
+  MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
+  {{- end }}
+  {{- if .Values.MAX_TOTAL_TOKENS }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
+  {{- end }}
@@ -45,10 +45,6 @@ spec:
             {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          {{- if .Values.extraArgs }}
-          args:
-            {{- toYaml .Values.extraArgs | nindent 12}}
-          {{- end }}
           volumeMounts:
             - mountPath: /data
               name: model-volume

@@ -98,6 +98,9 @@ affinity: {}
 
 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
+MAX_INPUT_LENGTH: ""
+MAX_TOTAL_TOKENS: ""
+
 global:
   http_proxy: ""
   https_proxy: ""

@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
@@ -36,6 +36,11 @@ tolerations: []
 
 affinity: {}
 
+# To override values in subchart llm-uservice
+llm-uservice:
+  image:
+    repository: opea/llm-docsum-tgi
+
 # To override values in subchart tgi
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

@@ -24,6 +24,8 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
 ---
 # Source: tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
@@ -90,11 +92,6 @@ spec:
             {}
           image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
           imagePullPolicy: IfNotPresent
-          args:
-            - --max-input-length
-            - "1024"
-            - --max-total-tokens
-            - "2048"
           volumeMounts:
             - mountPath: /data
               name: model-volume