diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml index a0ef81d17..ea3c45b91 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -41,7 +41,7 @@ services: environment: TTS_ENDPOINT: ${TTS_ENDPOINT} tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml index d8ca1d7f8..3e20dbc4a 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml @@ -26,7 +26,7 @@ services: https_proxy: ${https_proxy} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml index bd7677483..6856d2b87 100644 --- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml +++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml @@ -247,7 +247,7 @@ spec: - envFrom: - configMapRef: name: audio-qna-config - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" name: llm-dependency-deploy-demo securityContext: capabilities: diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml index aa6f49bf8..2496b11e8 100644 --- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml @@ -42,7 +42,7 @@ services: environment: TTS_ENDPOINT: ${TTS_ENDPOINT} tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 4598c07ec..3f2766ec5 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -195,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface, export HF_TOKEN=${your_hf_token} export HF_ENDPOINT="https://hf-mirror.com" model_name="Intel/neural-chat-7b-v3-3" - docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name + docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name ``` 2. Offline @@ -209,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface, ```bash export HF_TOKEN=${your_hf_token} export model_path="/path/to/model" - docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data + docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data ``` ### Setup Environment Variables diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml index 14794e8d4..0c290b868 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -73,7 +73,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml index 8d37bb83a..ad7df8fa7 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml @@ -72,7 +72,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "6042:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index e497985f8..938a6690d 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -57,7 +57,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "9009:80" diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/intel/README_gmc.md index dab86381f..860bae720 100644 --- a/ChatQnA/kubernetes/intel/README_gmc.md +++ b/ChatQnA/kubernetes/intel/README_gmc.md @@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - retriever: opea/retriever-redis:latest - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 -- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu +- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - chaqna-xeon-backend-server: opea/chatqna:latest Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services. diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml index 3d5f367d0..70aa65bd6 100644 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml +++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml @@ -1100,7 +1100,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data @@ -1180,7 +1180,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml index 69e33b873..744f09591 100644 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml +++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml @@ -922,7 +922,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml index 90db7043c..b18285117 100644 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml +++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml @@ -925,7 +925,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh index f906dfabb..3535159b3 100644 --- a/ChatQnA/tests/test_compose_on_xeon.sh +++ b/ChatQnA/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index ab1e4150c..64b74db71 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "8028:80" diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml index 4e6d8f91c..d0070dc96 100644 --- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml +++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml @@ -404,7 +404,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml index 5d77fb8cc..a155af13a 100644 --- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml +++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml @@ -126,7 +126,7 @@ spec: - name: no_proxy value: securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 0821cd3cb..b184c00f3 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="codegen codegen-ui llm-tgi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml index 122028b56..16c05cf36 100644 --- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: codetrans-tgi-service ports: - "8008:80" diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml index 442908343..a778a8529 100644 --- a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml +++ b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml @@ -404,7 +404,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh index d1f55c9a3..63fe74f05 100644 --- a/CodeTrans/tests/test_compose_on_xeon.sh +++ b/CodeTrans/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="codetrans codetrans-ui llm-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml index 620ee3657..35e673563 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "8008:80" diff --git a/DocSum/kubernetes/intel/README_gmc.md b/DocSum/kubernetes/intel/README_gmc.md index b33229211..6046ca4dc 100644 --- a/DocSum/kubernetes/intel/README_gmc.md +++ b/DocSum/kubernetes/intel/README_gmc.md @@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm. The DocSum pipeline uses prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the -the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the +the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`. [NOTE] diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml index 1416bdbcb..9199888a1 100644 --- a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml +++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml @@ -404,7 +404,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml index 61e8799b0..560e34a21 100644 --- a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml +++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml @@ -126,7 +126,7 @@ spec: - name: no_proxy value: securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml index 8c5c894ae..59df3093e 100644 --- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-xeon-server ports: - "8008:80" diff --git a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml index 845ba5041..53b2d541f 100644 --- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml +++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml @@ -126,7 +126,7 @@ spec: - name: no_proxy value: securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml index 43de640ad..c3a65e92b 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml @@ -993,7 +993,7 @@ spec: name: chatqna-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml index 6c52c5d92..5eb3cd6eb 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml @@ -229,7 +229,7 @@ spec: name: codegen-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml index 0fda41f5e..44d16ee9a 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml @@ -229,7 +229,7 @@ spec: name: docsum-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml index 749d98408..2c0b3bffc 100644 --- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml +++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml @@ -138,7 +138,7 @@ spec: - configMapRef: name: faqgen-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml index 0b7995118..53be5846e 100644 --- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -73,7 +73,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "3006:80" diff --git a/SearchQnA/tests/test_compose_on_xeon.sh b/SearchQnA/tests/test_compose_on_xeon.sh index 5436cc1c5..6c73833ac 100644 --- a/SearchQnA/tests/test_compose_on_xeon.sh +++ b/SearchQnA/tests/test_compose_on_xeon.sh @@ -23,7 +23,7 @@ function build_docker_images() { docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 108a5086d..39ea18d46 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-service ports: - "8008:80" diff --git a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml index e30fee338..9cc8c2798 100644 --- a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml +++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml @@ -361,7 +361,7 @@ spec: runAsUser: 1000 seccompProfile: type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index 2d0c5306d..b7fc6acb3 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -22,7 +22,7 @@ function build_docker_images() { service_list="translation translation-ui llm-tgi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md index 8f0d5b6b3..eb1ef817b 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md @@ -67,12 +67,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt ### 4. Pull TGI Xeon Image ```bash -docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu +docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu ``` Then run the command `docker images`, you will have the following 5 Docker Images: -1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu` +1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu` 2. `opea/lvm-tgi:latest` 3. `opea/visualqna:latest` 4. `opea/visualqna-ui:latest` diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml index 553b13908..33b5e189b 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: llava-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-llava-xeon-server ports: - "8399:80" diff --git a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml index 4d3ee3bf2..1f1b2d316 100644 --- a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml +++ b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml @@ -216,7 +216,7 @@ spec: name: visualqna-tgi-config securityContext: {} - image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu" + image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /data diff --git a/VisualQnA/tests/test_compose_on_xeon.sh b/VisualQnA/tests/test_compose_on_xeon.sh index 882989638..4d9c19483 100644 --- a/VisualQnA/tests/test_compose_on_xeon.sh +++ b/VisualQnA/tests/test_compose_on_xeon.sh @@ -21,7 +21,7 @@ function build_docker_images() { echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu docker images && sleep 1s } diff --git a/VisualQnA/ui/svelte/package.json b/VisualQnA/ui/svelte/package.json index 6444d18c1..e2a39a2c4 100644 --- a/VisualQnA/ui/svelte/package.json +++ b/VisualQnA/ui/svelte/package.json @@ -15,8 +15,7 @@ "@fortawesome/free-solid-svg-icons": "6.2.0", "@playwright/test": "^1.33.0", "@sveltejs/adapter-auto": "1.0.0-next.75", - "@sveltejs/adapter-static": "^3.0.0", - "@sveltejs/kit": "^2.0.0", + "@sveltejs/kit": "^1.30.4", "@tailwindcss/typography": "0.5.7", "@types/debug": "4.1.7", "@types/node": "^20.12.13", @@ -29,20 +28,21 @@ "eslint": "^8.16.0", "eslint-config-prettier": "^8.3.0", "eslint-plugin-neverthrow": "1.1.4", + "eslint-plugin-svelte3": "^4.0.0", "postcss": "^8.4.31", "postcss-load-config": "^4.0.1", "postcss-preset-env": "^8.3.2", "prettier": "^2.8.8", "prettier-plugin-svelte": "^2.7.0", "prettier-plugin-tailwindcss": "^0.3.0", - "svelte": "^4.0.0", - "svelte-check": "^3.0.0", + "svelte": "^3.59.1", + "svelte-check": "^2.7.1", "svelte-fa": "3.0.3", - "svelte-preprocess": "^6.0.2", + "svelte-preprocess": "^4.10.7", "tailwindcss": "^3.1.5", "tslib": "^2.3.1", - "typescript": "^5.0.0", - "vite": "^5.0.0" + "typescript": "^4.7.4", + "vite": "^4.5.2" }, "type": "module", "dependencies": {