opea-project · yongfengdu · Nov 14, 2024 · Nov 13, 2024 · Nov 13, 2024
@@ -8,7 +8,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 4

@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -9,7 +9,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -49,7 +49,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1
@@ -81,7 +81,7 @@ tgi-guardrails:
   LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -9,7 +9,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 4

@@ -9,7 +9,7 @@ accelDevice: "gaudi"
 
 image:
   repository: ghcr.io/huggingface/tgi-gaudi
-  tag: "2.0.5"
+  tag: "2.0.6"
 
 MAX_INPUT_LENGTH: "1024"
 MAX_TOTAL_TOKENS: "2048"

@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -18,8 +18,10 @@ Open another terminal and run the following command to verify the service if wor
 
 ```console
 curl http://localhost:8888/v1/faqgen \
-    -H "Content-Type: application/json" \
-    -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+      -H "Content-Type: multipart/form-data" \
+      -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+      -F "max_tokens=32" \
+      -F "stream=false"
 ```
 
 ### Verify the workload through UI

@@ -5,7 +5,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -20,8 +20,10 @@ spec:
           max_retry=20;
           for ((i=1; i<=max_retry; i++)); do
             curl http://{{ include "faqgen.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \
-            -H "Content-Type: application/json" \
-            -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' && break;
+            -H "Content-Type: multipart/form-data" \
+            -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+            -F "max_tokens=32" \
+            -F "stream=false" && break;
             curlcode=$?
             if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
           done;

@@ -9,7 +9,7 @@ tgi:
   accelDevice: "gaudi"
   image:
     repository: ghcr.io/huggingface/tgi-gaudi
-    tag: "2.0.5"
+    tag: "2.0.6"
   resources:
     limits:
       habana.ai/gaudi: 1

@@ -88,7 +88,7 @@ spec:
                 optional: true
           securityContext:
             {}
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data

@@ -26,7 +26,7 @@ Should you desire to use the Gaudi accelerator, two alternate images are used fo
 For Gaudi:
 
 - tei-embedding-service: ghcr.io/huggingface/tei-gaudi:1.5.0
-- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6
 
 ## Deploy ChatQnA pipeline