From 028ee4f1bbb22eb5a0bab7c0b14d44466ed5f045 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Wed, 13 Nov 2024 08:49:04 +0800 Subject: [PATCH 1/2] Upgrade tgi-gaudi to version 2.0.6 Signed-off-by: Lianhao Lu --- helm-charts/agentqna/gaudi-values.yaml | 2 +- helm-charts/audioqna/gaudi-values.yaml | 2 +- helm-charts/chatqna/gaudi-values.yaml | 2 +- helm-charts/chatqna/guardrails-gaudi-values.yaml | 4 ++-- helm-charts/codegen/gaudi-values.yaml | 2 +- helm-charts/codetrans/gaudi-values.yaml | 2 +- helm-charts/common/agent/gaudi-values.yaml | 2 +- helm-charts/common/tgi/gaudi-values.yaml | 2 +- helm-charts/docsum/gaudi-values.yaml | 2 +- helm-charts/faqgen/gaudi-values.yaml | 2 +- helm-charts/visualqna/gaudi-values.yaml | 2 +- microservices-connector/config/manifests/tgi_gaudi.yaml | 2 +- microservices-connector/config/samples/ChatQnA/use_cases.md | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/helm-charts/agentqna/gaudi-values.yaml b/helm-charts/agentqna/gaudi-values.yaml index fa7277ed..0759ddb5 100644 --- a/helm-charts/agentqna/gaudi-values.yaml +++ b/helm-charts/agentqna/gaudi-values.yaml @@ -8,7 +8,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 4 diff --git a/helm-charts/audioqna/gaudi-values.yaml b/helm-charts/audioqna/gaudi-values.yaml index 97848f9a..c6a18dfa 100644 --- a/helm-charts/audioqna/gaudi-values.yaml +++ b/helm-charts/audioqna/gaudi-values.yaml @@ -5,7 +5,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml index a3086a24..47df99fc 100644 --- a/helm-charts/chatqna/gaudi-values.yaml +++ b/helm-charts/chatqna/gaudi-values.yaml @@ -9,7 +9,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/chatqna/guardrails-gaudi-values.yaml b/helm-charts/chatqna/guardrails-gaudi-values.yaml index 9ee715cd..2ef3d129 100644 --- a/helm-charts/chatqna/guardrails-gaudi-values.yaml +++ b/helm-charts/chatqna/guardrails-gaudi-values.yaml @@ -49,7 +49,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 @@ -81,7 +81,7 @@ tgi-guardrails: LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml index b37ccc6b..e5367383 100644 --- a/helm-charts/codegen/gaudi-values.yaml +++ b/helm-charts/codegen/gaudi-values.yaml @@ -5,7 +5,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml index b37ccc6b..e5367383 100644 --- a/helm-charts/codetrans/gaudi-values.yaml +++ b/helm-charts/codetrans/gaudi-values.yaml @@ -5,7 +5,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/common/agent/gaudi-values.yaml b/helm-charts/common/agent/gaudi-values.yaml index 9ac96115..91ef5d10 100644 --- a/helm-charts/common/agent/gaudi-values.yaml +++ b/helm-charts/common/agent/gaudi-values.yaml @@ -9,7 +9,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 4 diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml index a97a9125..9c46415c 100644 --- a/helm-charts/common/tgi/gaudi-values.yaml +++ b/helm-charts/common/tgi/gaudi-values.yaml @@ -9,7 +9,7 @@ accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml index b37ccc6b..e5367383 100644 --- a/helm-charts/docsum/gaudi-values.yaml +++ b/helm-charts/docsum/gaudi-values.yaml @@ -5,7 +5,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/faqgen/gaudi-values.yaml b/helm-charts/faqgen/gaudi-values.yaml index dcfb159e..d14729c4 100644 --- a/helm-charts/faqgen/gaudi-values.yaml +++ b/helm-charts/faqgen/gaudi-values.yaml @@ -5,7 +5,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/visualqna/gaudi-values.yaml b/helm-charts/visualqna/gaudi-values.yaml index 901d5861..5a0e95c3 100644 --- a/helm-charts/visualqna/gaudi-values.yaml +++ b/helm-charts/visualqna/gaudi-values.yaml @@ -9,7 +9,7 @@ tgi: accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.0.5" + tag: "2.0.6" resources: limits: habana.ai/gaudi: 1 diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml index c090507c..cad97ad6 100644 --- a/microservices-connector/config/manifests/tgi_gaudi.yaml +++ b/microservices-connector/config/manifests/tgi_gaudi.yaml @@ -88,7 +88,7 @@ spec: optional: true securityContext: {} - image: "ghcr.io/huggingface/tgi-gaudi:2.0.5" + image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/microservices-connector/config/samples/ChatQnA/use_cases.md b/microservices-connector/config/samples/ChatQnA/use_cases.md index 4b793e24..0199a2f3 100644 --- a/microservices-connector/config/samples/ChatQnA/use_cases.md +++ b/microservices-connector/config/samples/ChatQnA/use_cases.md @@ -26,7 +26,7 @@ Should you desire to use the Gaudi accelerator, two alternate images are used fo For Gaudi: - tei-embedding-service: ghcr.io/huggingface/tei-gaudi:1.5.0 -- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5 +- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6 ## Deploy ChatQnA pipeline From 27f4b677a8176d64eea42bf80304946e9ce8dd72 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Wed, 13 Nov 2024 10:21:14 +0800 Subject: [PATCH 2/2] Fix faqgen test to allign with GenAIExamples Signed-off-by: Lianhao Lu --- helm-charts/faqgen/README.md | 6 ++++-- helm-charts/faqgen/templates/tests/test-pod.yaml | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/helm-charts/faqgen/README.md b/helm-charts/faqgen/README.md index 764e89d9..0bedfeff 100644 --- a/helm-charts/faqgen/README.md +++ b/helm-charts/faqgen/README.md @@ -18,8 +18,10 @@ Open another terminal and run the following command to verify the service if wor ```console curl http://localhost:8888/v1/faqgen \ - -H "Content-Type: application/json" \ - -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' + -H "Content-Type: multipart/form-data" \ + -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ + -F "max_tokens=32" \ + -F "stream=false" ``` ### Verify the workload through UI diff --git a/helm-charts/faqgen/templates/tests/test-pod.yaml b/helm-charts/faqgen/templates/tests/test-pod.yaml index 711571bc..a9b84d34 100644 --- a/helm-charts/faqgen/templates/tests/test-pod.yaml +++ b/helm-charts/faqgen/templates/tests/test-pod.yaml @@ -20,8 +20,10 @@ spec: max_retry=20; for ((i=1; i<=max_retry; i++)); do curl http://{{ include "faqgen.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \ - -H "Content-Type: application/json" \ - -d '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' && break; + -H "Content-Type: multipart/form-data" \ + -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \ + -F "max_tokens=32" \ + -F "stream=false" && break; curlcode=$? if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; done;