diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
index a0ef81d17..ea3c45b91 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
index d8ca1d7f8..3e20dbc4a 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -26,7 +26,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
index bd7677483..6856d2b87 100644
--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -247,7 +247,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
index aa6f49bf8..2496b11e8 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -42,7 +42,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
index 4598c07ec..3f2766ec5 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -195,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface,
    export HF_TOKEN=${your_hf_token}
    export HF_ENDPOINT="https://hf-mirror.com"
    model_name="Intel/neural-chat-7b-v3-3"
-   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
+   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
    ```
 
 2. Offline
@@ -209,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface,
      ```bash
      export HF_TOKEN=${your_hf_token}
      export model_path="/path/to/model"
-     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
+     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
      ```
 
 ### Setup Environment Variables
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 14794e8d4..0c290b868 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
index 8d37bb83a..ad7df8fa7 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -72,7 +72,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "6042:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
index e497985f8..938a6690d 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -57,7 +57,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/intel/README_gmc.md
index dab86381f..860bae720 100644
--- a/ChatQnA/kubernetes/intel/README_gmc.md
+++ b/ChatQnA/kubernetes/intel/README_gmc.md
@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - retriever: opea/retriever-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
index 3d5f367d0..70aa65bd6 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
@@ -1100,7 +1100,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
@@ -1180,7 +1180,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
index 69e33b873..744f09591 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -922,7 +922,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
index 90db7043c..b18285117 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
@@ -925,7 +925,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh
index f906dfabb..3535159b3 100644
--- a/ChatQnA/tests/test_compose_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 
     docker images && sleep 1s
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index ab1e4150c..64b74db71 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8028:80"
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 4e6d8f91c..d0070dc96 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
index 5d77fb8cc..a155af13a 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 0821cd3cb..b184c00f3 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-tgi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
index 122028b56..16c05cf36 100644
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: codetrans-tgi-service
     ports:
       - "8008:80"
diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
index 442908343..a778a8529 100644
--- a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
+++ b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh
index d1f55c9a3..63fe74f05 100644
--- a/CodeTrans/tests/test_compose_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
index 620ee3657..35e673563 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/DocSum/kubernetes/intel/README_gmc.md b/DocSum/kubernetes/intel/README_gmc.md
index b33229211..6046ca4dc 100644
--- a/DocSum/kubernetes/intel/README_gmc.md
+++ b/DocSum/kubernetes/intel/README_gmc.md
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
 
 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
-the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
 
 [NOTE]
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
index 1416bdbcb..9199888a1 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
index 61e8799b0..560e34a21 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
index 8c5c894ae..59df3093e 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-xeon-server
     ports:
       - "8008:80"
diff --git a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
index 845ba5041..53b2d541f 100644
--- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
+++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
index 43de640ad..c3a65e92b 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -993,7 +993,7 @@ spec:
                 name: chatqna-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 6c52c5d92..5eb3cd6eb 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -229,7 +229,7 @@ spec:
                 name: codegen-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
index 0fda41f5e..44d16ee9a 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -229,7 +229,7 @@ spec:
                 name: docsum-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
index 749d98408..2c0b3bffc 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
@@ -138,7 +138,7 @@ spec:
             - configMapRef:
                 name: faqgen-tgi-config
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 0b7995118..53be5846e 100644
--- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/SearchQnA/tests/test_compose_on_xeon.sh b/SearchQnA/tests/test_compose_on_xeon.sh
index 5436cc1c5..6c73833ac 100644
--- a/SearchQnA/tests/test_compose_on_xeon.sh
+++ b/SearchQnA/tests/test_compose_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
index 108a5086d..39ea18d46 100644
--- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
index e30fee338..9cc8c2798 100644
--- a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
@@ -361,7 +361,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh
index 2d0c5306d..b7fc6acb3 100644
--- a/Translation/tests/test_compose_on_xeon.sh
+++ b/Translation/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="translation translation-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
index 8f0d5b6b3..eb1ef817b 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
@@ -67,12 +67,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
 ### 4. Pull TGI Xeon Image
 
 ```bash
-docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 ```
 
 Then run the command `docker images`, you will have the following 5 Docker Images:
 
-1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`
+1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
 2. `opea/lvm-tgi:latest`
 3. `opea/visualqna:latest`
 4. `opea/visualqna-ui:latest`
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 553b13908..33b5e189b 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   llava-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-llava-xeon-server
     ports:
       - "8399:80"
diff --git a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
index 4d3ee3bf2..1f1b2d316 100644
--- a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
+++ b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
@@ -216,7 +216,7 @@ spec:
                 name: visualqna-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/VisualQnA/tests/test_compose_on_xeon.sh b/VisualQnA/tests/test_compose_on_xeon.sh
index 882989638..4d9c19483 100644
--- a/VisualQnA/tests/test_compose_on_xeon.sh
+++ b/VisualQnA/tests/test_compose_on_xeon.sh
@@ -21,7 +21,7 @@ function build_docker_images() {
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/ui/svelte/package.json b/VisualQnA/ui/svelte/package.json
index 6444d18c1..e2a39a2c4 100644
--- a/VisualQnA/ui/svelte/package.json
+++ b/VisualQnA/ui/svelte/package.json
@@ -15,8 +15,7 @@
     "@fortawesome/free-solid-svg-icons": "6.2.0",
     "@playwright/test": "^1.33.0",
     "@sveltejs/adapter-auto": "1.0.0-next.75",
-    "@sveltejs/adapter-static": "^3.0.0",
-    "@sveltejs/kit": "^2.0.0",
+    "@sveltejs/kit": "^1.30.4",
     "@tailwindcss/typography": "0.5.7",
     "@types/debug": "4.1.7",
     "@types/node": "^20.12.13",
@@ -29,20 +28,21 @@
     "eslint": "^8.16.0",
     "eslint-config-prettier": "^8.3.0",
     "eslint-plugin-neverthrow": "1.1.4",
+    "eslint-plugin-svelte3": "^4.0.0",
     "postcss": "^8.4.31",
     "postcss-load-config": "^4.0.1",
     "postcss-preset-env": "^8.3.2",
     "prettier": "^2.8.8",
     "prettier-plugin-svelte": "^2.7.0",
     "prettier-plugin-tailwindcss": "^0.3.0",
-    "svelte": "^4.0.0",
-    "svelte-check": "^3.0.0",
+    "svelte": "^3.59.1",
+    "svelte-check": "^2.7.1",
     "svelte-fa": "3.0.3",
-    "svelte-preprocess": "^6.0.2",
+    "svelte-preprocess": "^4.10.7",
     "tailwindcss": "^3.1.5",
     "tslib": "^2.3.1",
-    "typescript": "^5.0.0",
-    "vite": "^5.0.0"
+    "typescript": "^4.7.4",
+    "vite": "^4.5.2"
   },
   "type": "module",
   "dependencies": {