diff --git a/.github/workflows/manual-freeze-requirements.yml b/.github/workflows/manual-freeze-requirements.yml
new file mode 100644
index 000000000..8f00ca423
--- /dev/null
+++ b/.github/workflows/manual-freeze-requirements.yml
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Freeze-requirements
+
+on:
+  workflow_dispatch:
+
+jobs:
+  freeze-requirements:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ref: ${{ github.ref }}
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Set up Git
+        run: |
+          git config --global user.name "NeuralChatBot"
+          git config --global user.email "grp_neural_chat_bot@intel.com"
+          git remote set-url origin https://NeuralChatBot:"${{ secrets.ACTION_TOKEN }}"@github.com/opea-project/GenAIComps.git
+
+      - name: Run script
+        run: |
+          bash .github/workflows/scripts/freeze_requirements.sh
+
+      - name: Commit changes
+        run: |
+          git add .
+          git commit -s -m "Freeze requirements"
+          git push
diff --git a/.github/workflows/pr-examples-test.yml b/.github/workflows/pr-examples-test.yml
index 92354c5c2..302d6f8e1 100644
--- a/.github/workflows/pr-examples-test.yml
+++ b/.github/workflows/pr-examples-test.yml
@@ -41,26 +41,25 @@ jobs:
         env:
           HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
         run: |
+          cd ../ && sudo rm -rf GenAIExamples
           git clone https://github.com/opea-project/GenAIExamples.git
-          cd ${{ github.workspace }}/GenAIExamples/ChatQnA/docker/gaudi
-          sed -i "s#:latest#:comps#g" compose.yaml
-          cat compose.yaml
+          cd GenAIExamples/ChatQnA/docker
+          cp -r ${{ github.workspace }}/../GenAIComps .
 
-          cd ${{ github.workspace }}/GenAIExamples/ChatQnA/tests
-          GenAIComps_dir=${{github.workspace}}
+          cd ../tests
           sed -i '/GenAIComps.git/d' test_chatqna_on_gaudi.sh
-          sed -i "s#cd GenAIComps#cd ${GenAIComps_dir}#g" test_chatqna_on_gaudi.sh
-          sed -i "s#docker build -t#docker build --no-cache -q -t#g" test_chatqna_on_gaudi.sh
-          sed -i "s#:latest#:comps#g" test_chatqna_on_gaudi.sh
           cat test_chatqna_on_gaudi.sh
 
           echo "Run test..."
+          export IMAGE_TAG="comps"
           timeout 50m bash test_chatqna_on_gaudi.sh
 
+          echo "LOG_PATH=$(pwd)/*.log" >> $GITHUB_ENV
+
       - name: Clean up container
         if: cancelled() || failure()
         run: |
-          cd ${{ github.workspace }}/GenAIExamples/ChatQnA/docker/gaudi
+          cd ${{ github.workspace }}/../GenAIExamples/ChatQnA/docker/gaudi
           docker compose stop && docker compose rm -f
           docker system prune -f
 
@@ -69,4 +68,4 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: "Examples-Test-Logs"
-          path: ${{ github.workspace }}/GenAIExamples/ChatQnA/tests/*.log
+          path: ${{ env.LOG_PATH }}
diff --git a/.github/workflows/pr-microservice-test.yml b/.github/workflows/pr-microservice-test.yml
index a9a6cb26c..8dd5e6e6c 100644
--- a/.github/workflows/pr-microservice-test.yml
+++ b/.github/workflows/pr-microservice-test.yml
@@ -33,6 +33,7 @@ jobs:
         run: |
           sudo rm -rf ${{github.workspace}}/*
           docker system prune -f
+          docker rmi $(docker images --filter reference="*/*:comps" -q) || true
 
       - name: Checkout out Repo
         uses: actions/checkout@v4
diff --git a/.github/workflows/scripts/freeze_requirements.sh b/.github/workflows/scripts/freeze_requirements.sh
new file mode 100644
index 000000000..431dadaef
--- /dev/null
+++ b/.github/workflows/scripts/freeze_requirements.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+function freeze() {
+    local file=$1
+    local folder=$(dirname "$file")
+    local keep_origin_packages="true"
+    echo "::group::Check $file ..."
+    pip-compile \
+        --no-upgrade \
+        --no-annotate \
+        --no-header \
+        --output-file "$folder/freeze.txt" \
+        "$file"
+    echo "::endgroup::"
+
+    if [[ -e "$folder/freeze.txt" ]]; then
+        if [[ "$keep_origin_packages" == "true" ]]; then
+            # fix corner cases
+            sed -i '/^\s*#/d; s/#.*//; /^\s*$/d; s/ //g' "$file"
+            sed -i '/^\s*#/d; s/#.*//; /^\s*$/d; s/ //g; s/huggingface-hub\[inference\]/huggingface-hub/g; s/uvicorn\[standard\]/uvicorn/g' "$folder/freeze.txt"
+            if grep -q '^transformers$' $file && ! grep -q '^transformers\[sentencepiece\]$' $file; then
+                sed -i "s/transformers\[sentencepiece\]/transformers/" "$folder/freeze.txt"
+            fi
+            packages1=$(tr '><' '=' <"$file" | cut -d'=' -f1 | tr '[:upper:]' '[:lower:]' | sed 's/[-_]/-/g')
+            packages2=$(cut -d'=' -f1 "$folder/freeze.txt" | tr '[:upper:]' '[:lower:]' | sed 's/[-_]/-/g')
+            common_packages=$(comm -12 <(echo "$packages2" | sort) <(echo "$packages1" | sort))
+            grep '^git\+' "$file" >temp_file || touch temp_file
+            rm -rf "$file" && mv temp_file "$file"
+            while IFS= read -r line; do
+                package=$(echo "$line" | cut -d'=' -f1)
+                package_transformed=$(echo "$package" | tr '[:upper:]' '[:lower:]' | sed 's/[_-]/-/g')
+                pattern=$(echo "$package_transformed" | sed 's/\[/\\\[/g; s/\]/\\\]/g')
+                if echo "$common_packages" | grep -q "^$pattern$"; then
+                    echo "$line" >>"$file"
+                fi
+            done <"$folder/freeze.txt"
+            rm "$folder/freeze.txt"
+        else
+            mv "$folder/freeze.txt" "$file"
+        fi
+    fi
+}
+
+function check_branch_name() {
+    if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+        echo "$GITHUB_REF_NAME is protected branch"
+        exit 0
+    else
+        echo "branch name is $GITHUB_REF_NAME"
+    fi
+}
+
+function main() {
+    check_branch_name
+    echo "::group::pip install pip-tools" && pip install pip-tools --upgrade && echo "::endgroup::"
+    export -f freeze
+    find . -name "requirements.txt" | xargs -n 1 -I {} bash -c 'freeze "$@"' _ {}
+}
+
+main
diff --git a/.gitignore b/.gitignore
index bee8a64b7..567994ceb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 __pycache__
+*.egg-info/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ea9c494bb..475f5433a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,6 +14,8 @@ repos:
       - id: check-json
       - id: check-yaml
       - id: debug-statements
+      - id: mixed-line-ending
+        args: [--fix=lf]
       - id: requirements-txt-fixer
       - id: trailing-whitespace
         files: (.*\.(py|rst|cmake|yaml|yml|json|ts|js|html|svelte|sh))$
diff --git a/comps/agent/langchain/README.md b/comps/agent/langchain/README.md
index 286e95508..b48e393eb 100644
--- a/comps/agent/langchain/README.md
+++ b/comps/agent/langchain/README.md
@@ -4,32 +4,32 @@ The langchain agent model refers to a framework that integrates the reasoning ca
 
 ![Architecture Overview](agent_arch.jpg)
 
-# 🚀1. Start Microservice with Python（Option 1）
+## 🚀1. Start Microservice with Python（Option 1）
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 cd comps/agent/langchain/
 pip install -r requirements.txt
 ```
 
-## 1.2 Start Microservice with Python Script
+### 1.2 Start Microservice with Python Script
 
 ```bash
 cd comps/agent/langchain/
 python agent.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## Build Microservices
+### Build Microservices
 
 ```bash
 cd GenAIComps/ # back to GenAIComps/ folder
 docker build -t opea/comps-agent-langchain:latest -f comps/agent/langchain/docker/Dockerfile .
 ```
 
-## start microservices
+### start microservices
 
 ```bash
 export ip_address=$(hostname -I | awk '{print $1}')
@@ -56,7 +56,7 @@ docker logs comps-langchain-agent-endpoint
 > docker run --rm --runtime=runc --name="comps-langchain-agent-endpoint" -v ./comps/agent/langchain/:/home/user/comps/agent/langchain/ -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} --env-file ${agent_env} opea/comps-agent-langchain:latest
 > ```
 
-# 🚀3. Validate Microservice
+## 🚀3. Validate Microservice
 
 Once microservice starts, user can use below script to invoke.
 
@@ -73,7 +73,7 @@ data: [DONE]
 
 ```
 
-# 🚀4. Provide your own tools
+## 🚀4. Provide your own tools
 
 - Define tools
 
diff --git a/comps/asr/README.md b/comps/asr/README.md
index 4cf78cbf1..10cac9421 100644
--- a/comps/asr/README.md
+++ b/comps/asr/README.md
@@ -2,17 +2,17 @@
 
 ASR (Audio-Speech-Recognition) microservice helps users convert speech to text. When building a talking bot with LLM, users will need to convert their audio inputs (What they talk, or Input audio from other sources) to text, so the LLM is able to tokenize the text and generate an answer. This microservice is built for that conversion stage.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the ASR microservice with Python, you need to first install python packages.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start Whisper Service/Test
+### 1.2 Start Whisper Service/Test
 
 - Xeon CPU
 
@@ -40,7 +40,7 @@ nohup python whisper_server.py --device=hpu &
 python check_whisper_server.py
 ```
 
-## 1.3 Start ASR Service/Test
+### 1.3 Start ASR Service/Test
 
 ```bash
 cd ../
@@ -54,13 +54,13 @@ While the Whisper service is running, you can start the ASR service. If the ASR
 {'id': '0e686efd33175ce0ebcf7e0ed7431673', 'text': 'who is pat gelsinger'}
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 Alternatively, you can also start the ASR microservice with Docker.
 
-## 2.1 Build Images
+### 2.1 Build Images
 
-### 2.1.1 Whisper Server Image
+#### 2.1.1 Whisper Server Image
 
 - Xeon CPU
 
@@ -76,15 +76,15 @@ cd ../..
 docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile_hpu .
 ```
 
-### 2.1.2 ASR Service Image
+#### 2.1.2 ASR Service Image
 
 ```bash
 docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/Dockerfile .
 ```
 
-## 2.2 Start Whisper and ASR Service
+### 2.2 Start Whisper and ASR Service
 
-### 2.2.1 Start Whisper Server
+#### 2.2.1 Start Whisper Server
 
 - Xeon
 
@@ -98,7 +98,7 @@ docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$htt
 docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper-gaudi:latest
 ```
 
-### 2.2.2 Start ASR service
+#### 2.2.2 Start ASR service
 
 ```bash
 ip_address=$(hostname -I | awk '{print $1}')
@@ -106,7 +106,7 @@ ip_address=$(hostname -I | awk '{print $1}')
 docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest
 ```
 
-### 2.2.3 Test
+#### 2.2.3 Test
 
 ```bash
 # Use curl or python
diff --git a/comps/asr/whisper/Dockerfile b/comps/asr/whisper/Dockerfile
index 86737e7d8..57e186a4e 100644
--- a/comps/asr/whisper/Dockerfile
+++ b/comps/asr/whisper/Dockerfile
@@ -27,10 +27,9 @@ RUN pip install --no-cache-dir --upgrade pip && \
     else \
         pip install --no-cache-dir -r /home/user/comps/asr/requirements.txt ; \
     fi
-    pip list
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
 WORKDIR /home/user/comps/asr/whisper
 
-ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"]
\ No newline at end of file
+ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"]
diff --git a/comps/asr/whisper/whisper_model.py b/comps/asr/whisper/whisper_model.py
index 0af9ebfcb..85d4126cd 100644
--- a/comps/asr/whisper/whisper_model.py
+++ b/comps/asr/whisper/whisper_model.py
@@ -16,7 +16,7 @@
 class WhisperModel:
     """Convert audio to text."""
 
-    def __init__(self, model_name_or_path="openai/whisper-small", language="english", device="cpu"):
+    def __init__(self, model_name_or_path="openai/whisper-small", language="english", device="cpu", hpu_max_len=8192):
         if device == "hpu":
             # Explicitly link HPU with Torch
             from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
@@ -31,12 +31,11 @@ def __init__(self, model_name_or_path="openai/whisper-small", language="english"
         self.model.eval()
 
         self.language = language
+        self.hpu_max_len = hpu_max_len
 
         if device == "hpu":
-            # do hpu graph warmup with a long enough input audio
-            # whisper has a receptive field of 30 seconds
-            # here we select a relatively long audio (~15 sec) to quickly warmup
-            self._warmup_whisper_hpu_graph("https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav")
+            self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_60s_audio.wav")
+            self._warmup_whisper_hpu_graph("https://github.com/Spycsh/assets/raw/main/ljspeech_30s_audio.wav")
 
     def _audiosegment_to_librosawav(self, audiosegment):
         # https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples
@@ -59,11 +58,54 @@ def _warmup_whisper_hpu_graph(self, url):
         print("[ASR] warmup...")
         waveform = AudioSegment.from_file("warmup.wav").set_frame_rate(16000)
         waveform = self._audiosegment_to_librosawav(waveform)
-        # pylint: disable=E1101
-        inputs = self.processor.feature_extractor(
-            waveform, return_tensors="pt", sampling_rate=16_000
-        ).input_features.to(self.device)
-        _ = self.model.generate(inputs, language="chinese")
+
+        try:
+            processed_inputs = self.processor(
+                waveform,
+                return_tensors="pt",
+                truncation=False,
+                padding="longest",
+                return_attention_mask=True,
+                sampling_rate=16000,
+            )
+        except RuntimeError as e:
+            if "Padding size should be less than" in str(e):
+                # short-form
+                processed_inputs = self.processor(
+                    waveform,
+                    return_tensors="pt",
+                    sampling_rate=16000,
+                )
+            else:
+                raise e
+
+        if processed_inputs.input_features.shape[-1] < 3000:
+            # short-form
+            processed_inputs = self.processor(
+                waveform,
+                return_tensors="pt",
+                sampling_rate=16000,
+            )
+        else:
+            processed_inputs["input_features"] = torch.nn.functional.pad(
+                processed_inputs.input_features,
+                (0, self.hpu_max_len - processed_inputs.input_features.size(-1)),
+                value=-1.5,
+            )
+            processed_inputs["attention_mask"] = torch.nn.functional.pad(
+                processed_inputs.attention_mask,
+                (0, self.hpu_max_len + 1 - processed_inputs.attention_mask.size(-1)),
+                value=0,
+            )
+
+        _ = self.model.generate(
+            **(
+                processed_inputs.to(
+                    self.device,
+                )
+            ),
+            language=self.language,
+        )
 
     def audio2text(self, audio_path):
         """Convert audio to text.
@@ -80,11 +122,52 @@ def audio2text(self, audio_path):
             audio_dataset = Dataset.from_dict({"audio": [audio_path]}).cast_column("audio", Audio(sampling_rate=16000))
             waveform = audio_dataset[0]["audio"]["array"]
 
-        # pylint: disable=E1101
-        inputs = self.processor.feature_extractor(
-            waveform, return_tensors="pt", sampling_rate=16_000
-        ).input_features.to(self.device)
-        predicted_ids = self.model.generate(inputs, language=self.language)
+        try:
+            processed_inputs = self.processor(
+                waveform,
+                return_tensors="pt",
+                truncation=False,
+                padding="longest",
+                return_attention_mask=True,
+                sampling_rate=16000,
+            )
+        except RuntimeError as e:
+            if "Padding size should be less than" in str(e):
+                # short-form
+                processed_inputs = self.processor(
+                    waveform,
+                    return_tensors="pt",
+                    sampling_rate=16000,
+                )
+            else:
+                raise e
+        if processed_inputs.input_features.shape[-1] < 3000:
+            # short-form
+            processed_inputs = self.processor(
+                waveform,
+                return_tensors="pt",
+                sampling_rate=16000,
+            )
+        elif self.device == "hpu" and processed_inputs.input_features.shape[-1] > 3000:
+            processed_inputs["input_features"] = torch.nn.functional.pad(
+                processed_inputs.input_features,
+                (0, self.hpu_max_len - processed_inputs.input_features.size(-1)),
+                value=-1.5,
+            )
+            processed_inputs["attention_mask"] = torch.nn.functional.pad(
+                processed_inputs.attention_mask,
+                (0, self.hpu_max_len + 1 - processed_inputs.attention_mask.size(-1)),
+                value=0,
+            )
+
+        predicted_ids = self.model.generate(
+            **(
+                processed_inputs.to(
+                    self.device,
+                )
+            ),
+            language=self.language,
+        )
         # pylint: disable=E1101
         result = self.processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True, normalize=True)[0]
         if self.language in ["chinese", "mandarin"]:
@@ -96,20 +179,23 @@ def audio2text(self, audio_path):
 
 
 if __name__ == "__main__":
-    asr = WhisperModel(language="english")
+    asr = WhisperModel(model_name_or_path="openai/whisper-small", language="english", device="cpu")
 
     # Test multilanguage asr
+    asr.language = "chinese"
     urllib.request.urlretrieve(
         "https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav",
         "sample.wav",
     )
-    asr.language = "chinese"
     text = asr.audio2text("sample.wav")
 
+    asr.language = "english"
     urllib.request.urlretrieve(
         "https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav",
         "sample.wav",
     )
     text = asr.audio2text("sample.wav")
 
-    os.remove("sample.wav")
+    for i in [5, 10, 30, 60]:
+        urllib.request.urlretrieve(f"https://github.com/Spycsh/assets/raw/main/ljspeech_{i}s_audio.wav", "sample.wav")
+        text = asr.audio2text("sample.wav")
diff --git a/comps/chathistory/mongo/README.md b/comps/chathistory/mongo/README.md
index 2eaa62e55..6f3f7a93a 100644
--- a/comps/chathistory/mongo/README.md
+++ b/comps/chathistory/mongo/README.md
@@ -17,16 +17,16 @@ export DB_NAME=${DB_NAME}
 export COLLECTION_NAME=${COLLECTION_NAME}
 ```
 
-# 🚀Start Microservice with Docker
+## 🚀Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ../../../../
 docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### Run Docker with CLI
 
 - Run mongoDB image
 
@@ -40,7 +40,7 @@ docker run -d -p 27017:27017 --name=mongo mongo:latest
 docker run -d --name="chathistory-mongo-server" -p 6013:6013 -p 6012:6012 -p 6014:6014 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/chathistory-mongo-server:latest
 ```
 
-# Invoke Microservice
+## Invoke Microservice
 
 Once chathistory service is up and running, users can update the database by using the below API endpoint. The API returns a unique UUID for the saved conversation.
 
diff --git a/comps/chathistory/mongo/requirements.txt b/comps/chathistory/mongo/requirements.txt
index b0dec78c0..aa08f761a 100644
--- a/comps/chathistory/mongo/requirements.txt
+++ b/comps/chathistory/mongo/requirements.txt
@@ -1 +1 @@
-﻿motor==3.4.0
+motor==3.4.0
diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index bd52d7274..382982d27 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -160,7 +160,7 @@ class ChatCompletionRequest(BaseModel):
     logit_bias: Optional[Dict[str, float]] = None
     logprobs: Optional[bool] = False
     top_logprobs: Optional[int] = 0
-    max_tokens: Optional[int] = 16  # use https://platform.openai.com/docs/api-reference/completions/create
+    max_tokens: Optional[int] = 1024  # use https://platform.openai.com/docs/api-reference/completions/create
     n: Optional[int] = 1
     presence_penalty: Optional[float] = 0.0
     response_format: Optional[ResponseFormat] = None
diff --git a/comps/dataprep/README.md b/comps/dataprep/README.md
index 46e5e3c0f..7c3873794 100644
--- a/comps/dataprep/README.md
+++ b/comps/dataprep/README.md
@@ -17,22 +17,22 @@ Occasionally unstructured data will contain image data, to convert the image dat
 export SUMMARIZE_IMAGE_VIA_LVM=1
 ```
 
-# Dataprep Microservice with Redis
+## Dataprep Microservice with Redis
 
 For details, please refer to this [readme](redis/README.md)
 
-# Dataprep Microservice with Milvus
+## Dataprep Microservice with Milvus
 
 For details, please refer to this [readme](milvus/README.md)
 
-# Dataprep Microservice with Qdrant
+## Dataprep Microservice with Qdrant
 
 For details, please refer to this [readme](qdrant/README.md)
 
-# Dataprep Microservice with Pinecone
+## Dataprep Microservice with Pinecone
 
 For details, please refer to this [readme](pinecone/README.md)
 
-# Dataprep Microservice with PGVector
+## Dataprep Microservice with PGVector
 
 For details, please refer to this [readme](pgvector/README.md)
diff --git a/comps/dataprep/milvus/README.md b/comps/dataprep/milvus/README.md
index 738869a82..9941dbaa6 100644
--- a/comps/dataprep/milvus/README.md
+++ b/comps/dataprep/milvus/README.md
@@ -1,8 +1,8 @@
 # Dataprep Microservice with Milvus
 
-# 🚀Start Microservice with Python
+## 🚀Start Microservice with Python
 
-## Install Requirements
+### Install Requirements
 
 ```bash
 pip install -r requirements.txt
@@ -11,11 +11,11 @@ apt-get install libtesseract-dev -y
 apt-get install poppler-utils -y
 ```
 
-## Start Milvus Server
+### Start Milvus Server
 
 Please refer to this [readme](../../../vectorstores/langchain/milvus/README.md).
 
-## Setup Environment Variables
+### Setup Environment Variables
 
 ```bash
 export no_proxy=${your_no_proxy}
@@ -27,7 +27,7 @@ export COLLECTION_NAME=${your_collection_name}
 export MOSEC_EMBEDDING_ENDPOINT=${your_embedding_endpoint}
 ```
 
-## Start Document Preparation Microservice for Milvus with Python Script
+### Start Document Preparation Microservice for Milvus with Python Script
 
 Start document preparation microservice for Milvus with below command.
 
@@ -35,22 +35,22 @@ Start document preparation microservice for Milvus with below command.
 python prepare_doc_milvus.py
 ```
 
-# 🚀Start Microservice with Docker
+## 🚀Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ../../../../
 docker build -t opea/dataprep-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg no_proxy=$no_proxy -f comps/dataprep/milvus/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### Run Docker with CLI
 
 ```bash
 docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MOSEC_EMBEDDING_ENDPOINT=${your_embedding_endpoint} -e MILVUS=${your_milvus_host_ip} opea/dataprep-milvus:latest
 ```
 
-# Invoke Microservice
+## Invoke Microservice
 
 Once document preparation microservice for Milvus is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.
 
diff --git a/comps/dataprep/milvus/config.py b/comps/dataprep/milvus/config.py
index 0f8c57139..b4cb72233 100644
--- a/comps/dataprep/milvus/config.py
+++ b/comps/dataprep/milvus/config.py
@@ -12,7 +12,7 @@
 MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530))
 COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag_milvus")
 
-MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/root/bce-embedding-base_v1")
+MOSEC_EMBEDDING_MODEL = os.environ.get("MOSEC_EMBEDDING_MODEL", "/home/user/bce-embedding-base_v1")
 MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "")
 os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
 os.environ["OPENAI_API_KEY"] = "Dummy key"
diff --git a/comps/dataprep/pgvector/README.md b/comps/dataprep/pgvector/README.md
index af25ae56d..1a7772eb8 100644
--- a/comps/dataprep/pgvector/README.md
+++ b/comps/dataprep/pgvector/README.md
@@ -1,14 +1,14 @@
 # Dataprep Microservice with PGVector
 
-# 🚀1. Start Microservice with Python（Option 1）
+## 🚀1. Start Microservice with Python（Option 1）
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Setup Environment Variables
+### 1.2 Setup Environment Variables
 
 ```bash
 export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb
@@ -18,11 +18,11 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:dataprep"
 ```
 
-## 1.3 Start PGVector
+### 1.3 Start PGVector
 
 Please refer to this [readme](../../vectorstores/langchain/pgvector/README.md).
 
-## 1.4 Start Document Preparation Microservice for PGVector with Python Script
+### 1.4 Start Document Preparation Microservice for PGVector with Python Script
 
 Start document preparation microservice for PGVector with below command.
 
@@ -30,13 +30,13 @@ Start document preparation microservice for PGVector with below command.
 python prepare_doc_pgvector.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Start PGVector
+### 2.1 Start PGVector
 
 Please refer to this [readme](../../vectorstores/langchain/pgvector/README.md).
 
-## 2.2 Setup Environment Variables
+### 2.2 Setup Environment Variables
 
 ```bash
 export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb
@@ -46,29 +46,29 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/dataprep"
 ```
 
-## 2.3 Build Docker Image
+### 2.3 Build Docker Image
 
 ```bash
 cd GenAIComps
 docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pgvector/langchain/docker/Dockerfile .
 ```
 
-## 2.4 Run Docker with CLI (Option A)
+### 2.4 Run Docker with CLI (Option A)
 
 ```bash
 docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-pgvector:latest
 ```
 
-## 2.5 Run with Docker Compose (Option B)
+### 2.5 Run with Docker Compose (Option B)
 
 ```bash
 cd comps/dataprep/langchain/pgvector/docker
 docker compose -f docker-compose-dataprep-pgvector.yaml up -d
 ```
 
-# 🚀3. Consume Microservice
+## 🚀3. Consume Microservice
 
-## 3.1 Consume Upload API
+### 3.1 Consume Upload API
 
 Once document preparation microservice for PGVector is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.
 
@@ -79,7 +79,7 @@ curl -X POST \
     http://localhost:6007/v1/dataprep
 ```
 
-## 3.2 Consume get_file API
+### 3.2 Consume get_file API
 
 To get uploaded file structures, use the following command:
 
@@ -108,7 +108,7 @@ Then you will get the response JSON like this:
 ]
 ```
 
-## 4.3 Consume delete_file API
+### 4.3 Consume delete_file API
 
 To delete uploaded file/link, use the following command.
 
diff --git a/comps/dataprep/pgvector/langchain/requirements.txt b/comps/dataprep/pgvector/langchain/requirements.txt
index 87680d503..84fd48e52 100644
--- a/comps/dataprep/pgvector/langchain/requirements.txt
+++ b/comps/dataprep/pgvector/langchain/requirements.txt
@@ -22,6 +22,7 @@ psycopg2-binary
 pymupdf
 pyspark
 python-docx
+python-multipart
 python-pptx
 sentence_transformers
 shortuuid
diff --git a/comps/dataprep/pinecone/README.md b/comps/dataprep/pinecone/README.md
index 3a9f6fc30..42e3d048a 100644
--- a/comps/dataprep/pinecone/README.md
+++ b/comps/dataprep/pinecone/README.md
@@ -1,18 +1,18 @@
 # Dataprep Microservice with Pinecone
 
-# 🚀Start Microservice with Python
+## 🚀Start Microservice with Python
 
-## Install Requirements
+### Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## Start Pinecone Server
+### Start Pinecone Server
 
 Please refer to this [readme](../../../vectorstores/langchain/pinecone/README.md).
 
-## Setup Environment Variables
+### Setup Environment Variables
 
 ```bash
 export http_proxy=${your_http_proxy}
@@ -21,7 +21,7 @@ export PINECONE_API_KEY=${PINECONE_API_KEY}
 export PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME}
 ```
 
-## Start Document Preparation Microservice for Pinecone with Python Script
+### Start Document Preparation Microservice for Pinecone with Python Script
 
 Start document preparation microservice for Pinecone with below command.
 
@@ -29,22 +29,22 @@ Start document preparation microservice for Pinecone with below command.
 python prepare_doc_pinecone.py
 ```
 
-# 🚀Start Microservice with Docker
+## 🚀Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ../../../../
 docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### Run Docker with CLI
 
 ```bash
 docker run -d --name="dataprep-pinecone-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest
 ```
 
-## Setup Environment Variables
+### Setup Environment Variables
 
 ```bash
 export http_proxy=${your_http_proxy}
@@ -53,14 +53,14 @@ export PINECONE_API_KEY=${PINECONE_API_KEY}
 export PINECONE_INDEX_NAME=${PINECONE_INDEX_NAME}
 ```
 
-## Run Docker with Docker Compose
+### Run Docker with Docker Compose
 
 ```bash
 cd comps/dataprep/pinecone/docker
 docker compose -f docker-compose-dataprep-pinecone.yaml up -d
 ```
 
-# Invoke Microservice
+## Invoke Microservice
 
 Once document preparation microservice for Pinecone is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.
 
diff --git a/comps/dataprep/qdrant/README.md b/comps/dataprep/qdrant/README.md
index 24f58fc09..4b52eaeeb 100644
--- a/comps/dataprep/qdrant/README.md
+++ b/comps/dataprep/qdrant/README.md
@@ -1,8 +1,8 @@
 # Dataprep Microservice with Qdrant
 
-# 🚀Start Microservice with Python
+## 🚀Start Microservice with Python
 
-## Install Requirements
+### Install Requirements
 
 ```bash
 pip install -r requirements.txt
@@ -11,11 +11,11 @@ apt-get install libtesseract-dev -y
 apt-get install poppler-utils -y
 ```
 
-## Start Qdrant Server
+### Start Qdrant Server
 
 Please refer to this [readme](../../vectorstores/langchain/qdrant/README.md).
 
-## Setup Environment Variables
+### Setup Environment Variables
 
 ```bash
 export no_proxy=${your_no_proxy}
@@ -27,7 +27,7 @@ export COLLECTION_NAME=${your_collection_name}
 export PYTHONPATH=${path_to_comps}
 ```
 
-## Start Document Preparation Microservice for Qdrant with Python Script
+### Start Document Preparation Microservice for Qdrant with Python Script
 
 Start document preparation microservice for Qdrant with below command.
 
@@ -35,50 +35,58 @@ Start document preparation microservice for Qdrant with below command.
 python prepare_doc_qdrant.py
 ```
 
-# 🚀Start Microservice with Docker
+## 🚀Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ../../../../
 docker build -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### Run Docker with CLI
 
 ```bash
-docker run -d --name="dataprep-qdrant-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest
+docker run -d --name="dataprep-qdrant-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-qdrant:latest
 ```
 
-## Setup Environment Variables
+### Setup Environment Variables
 
 ```bash
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export QDRANT=${host_ip}
+export QDRANT_HOST=${host_ip}
 export QDRANT_PORT=6333
 export COLLECTION_NAME=${your_collection_name}
 ```
 
-## Run Docker with Docker Compose
+### Run Docker with Docker Compose
 
 ```bash
 cd comps/dataprep/qdrant/docker
 docker compose -f docker-compose-dataprep-qdrant.yaml up -d
 ```
 
-# Invoke Microservice
+## Invoke Microservice
 
 Once document preparation microservice for Qdrant is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.
 
 ```bash
-curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6000/v1/dataprep
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./file1.txt" \
+    http://localhost:6007/v1/dataprep
 ```
 
 You can specify chunk_size and chunk_size by the following commands.
 
 ```bash
-curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document","chunk_size":1500,"chunk_overlap":100}' http://localhost:6000/v1/dataprep
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./file1.txt" \
+    -F "chunk_size=1500" \
+    -F "chunk_overlap=100" \
+    http://localhost:6007/v1/dataprep
 ```
 
 We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast".
@@ -86,5 +94,10 @@ We support table extraction from pdf documents. You can specify process_table an
 Note: If you specify "table_strategy=llm", You should first start TGI Service, please refer to 1.2.1, 1.3.1 in https://github.com/opea-project/GenAIComps/tree/main/comps/llms/README.md, and then `export TGI_LLM_ENDPOINT="http://${your_ip}:8008"`.
 
 ```bash
-curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document","process_table":true,"table_strategy":"hq"}' http://localhost:6000/v1/dataprep
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./your_file.pdf" \
+    -F "process_table=true" \
+    -F "table_strategy=hq" \
+    http://localhost:6007/v1/dataprep
 ```
diff --git a/comps/dataprep/qdrant/config.py b/comps/dataprep/qdrant/config.py
index 2b30a3682..7cf37f404 100644
--- a/comps/dataprep/qdrant/config.py
+++ b/comps/dataprep/qdrant/config.py
@@ -7,7 +7,7 @@
 EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 
 # Qdrant configuration
-QDRANT_HOST = os.getenv("QDRANT", "localhost")
+QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
 QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
 COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant")
 
diff --git a/comps/dataprep/qdrant/docker/Dockerfile b/comps/dataprep/qdrant/docker/Dockerfile
index bdf0315e2..ff9f6b253 100644
--- a/comps/dataprep/qdrant/docker/Dockerfile
+++ b/comps/dataprep/qdrant/docker/Dockerfile
@@ -12,6 +12,7 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missin
     build-essential \
     libgl1-mesa-glx \
     libjemalloc-dev \
+    default-jre \
     vim
 
 RUN useradd -m -s /bin/bash user && \
@@ -22,13 +23,18 @@ USER user
 
 COPY comps /home/user/comps
 
-RUN pip install --no-cache-dir --upgrade pip && \
-    if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    if [ ${ARCH} = "cpu" ]; then pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
     pip install --no-cache-dir -r /home/user/comps/dataprep/qdrant/requirements.txt
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
+USER root
+
+RUN mkdir -p /home/user/comps/dataprep/qdrant/uploaded_files && chown -R user /home/user/comps/dataprep/qdrant/uploaded_files
+
+USER user
+
 WORKDIR /home/user/comps/dataprep/qdrant
 
 ENTRYPOINT ["python", "prepare_doc_qdrant.py"]
-
diff --git a/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml b/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml
index e86dc2c4e..aaf2a17dd 100644
--- a/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml
+++ b/comps/dataprep/qdrant/docker/docker-compose-dataprep-qdrant.yaml
@@ -9,19 +9,36 @@ services:
     ports:
       - "6333:6333"
       - "6334:6334"
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   dataprep-qdrant:
     image: opea/gen-ai-comps:dataprep-qdrant-xeon-server
     container_name: dataprep-qdrant-server
+    depends_on:
+      - qdrant-vector-db
+      - tei-embedding-service
     ports:
-      - "6000:6000"
+      - "6007:6007"
     ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      QDRANT: ${QDRANT}
+      QDRANT_HOST: ${QDRANT_HOST}
       QDRANT_PORT: ${QDRANT_PORT}
       COLLECTION_NAME: ${COLLECTION_NAME}
+      TEI_ENDPOINT: ${TEI_ENDPOINT}
     restart: unless-stopped
 
 networks:
diff --git a/comps/dataprep/qdrant/prepare_doc_qdrant.py b/comps/dataprep/qdrant/prepare_doc_qdrant.py
index 422854eec..fb8d66571 100644
--- a/comps/dataprep/qdrant/prepare_doc_qdrant.py
+++ b/comps/dataprep/qdrant/prepare_doc_qdrant.py
@@ -1,30 +1,31 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import os
+import json
+from typing import List, Optional, Union
 
-from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT
+from config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT
+from fastapi import File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain_community.vectorstores import Qdrant
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 
-from comps import DocPath, opea_microservices, opea_telemetry, register_microservice
-from comps.dataprep.utils import document_loader, get_separators, get_tables_result
+from comps import DocPath, opea_microservices, register_microservice
+from comps.dataprep.utils import (
+    document_loader,
+    encode_filename,
+    get_separators,
+    get_tables_result,
+    parse_html,
+    save_content_to_local_disk,
+)
 
-tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
+upload_folder = "./uploaded_files/"
 
 
-@register_microservice(
-    name="opea_service@prepare_doc_qdrant",
-    endpoint="/v1/dataprep",
-    host="0.0.0.0",
-    port=6000,
-    input_datatype=DocPath,
-    output_datatype=None,
-)
-@opea_telemetry
-def ingest_documents(doc_path: DocPath):
+def ingest_data_to_qdrant(doc_path: DocPath):
     """Ingest document to Qdrant."""
     path = doc_path.path
     print(f"Parsing document {path}.")
@@ -38,23 +39,30 @@ def ingest_documents(doc_path: DocPath):
         text_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
     else:
         text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=doc_path.chunk_size, chunk_overlap=100, add_start_index=True, separators=get_separators()
+            chunk_size=doc_path.chunk_size,
+            chunk_overlap=doc_path.chunk_overlap,
+            add_start_index=True,
+            separators=get_separators(),
         )
 
     content = document_loader(path)
+
     chunks = text_splitter.split_text(content)
     if doc_path.process_table and path.endswith(".pdf"):
         table_chunks = get_tables_result(path, doc_path.table_strategy)
         chunks = chunks + table_chunks
     print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf")
+
     # Create vectorstore
-    if tei_embedding_endpoint:
+    if TEI_EMBEDDING_ENDPOINT:
         # create embeddings using TEI endpoint service
-        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+        embedder = HuggingFaceEndpointEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
     else:
         # create embeddings using local embedding model
         embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
 
+    print("embedder created.")
+
     # Batch size
     batch_size = 32
     num_chunks = len(chunks)
@@ -71,6 +79,78 @@ def ingest_documents(doc_path: DocPath):
         )
         print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}")
 
+    return True
+
+
+@register_microservice(
+    name="opea_service@prepare_doc_qdrant",
+    endpoint="/v1/dataprep",
+    host="0.0.0.0",
+    port=6007,
+    input_datatype=DocPath,
+    output_datatype=None,
+)
+async def ingest_documents(
+    files: Optional[Union[UploadFile, List[UploadFile]]] = File(None),
+    link_list: Optional[str] = Form(None),
+    chunk_size: int = Form(1500),
+    chunk_overlap: int = Form(100),
+    process_table: bool = Form(False),
+    table_strategy: str = Form("fast"),
+):
+    print(f"files:{files}")
+    print(f"link_list:{link_list}")
+
+    if files:
+        if not isinstance(files, list):
+            files = [files]
+        uploaded_files = []
+        for file in files:
+            encode_file = encode_filename(file.filename)
+            save_path = upload_folder + encode_file
+            await save_content_to_local_disk(save_path, file)
+            ingest_data_to_qdrant(
+                DocPath(
+                    path=save_path,
+                    chunk_size=chunk_size,
+                    chunk_overlap=chunk_overlap,
+                    process_table=process_table,
+                    table_strategy=table_strategy,
+                )
+            )
+            uploaded_files.append(save_path)
+            print(f"Successfully saved file {save_path}")
+
+        return {"status": 200, "message": "Data preparation succeeded"}
+
+    if link_list:
+        link_list = json.loads(link_list)  # Parse JSON string to list
+        if not isinstance(link_list, list):
+            raise HTTPException(status_code=400, detail="link_list should be a list.")
+        for link in link_list:
+            encoded_link = encode_filename(link)
+            save_path = upload_folder + encoded_link + ".txt"
+            content = parse_html([link])[0][0]
+            try:
+                await save_content_to_local_disk(save_path, content)
+                ingest_data_to_qdrant(
+                    DocPath(
+                        path=save_path,
+                        chunk_size=chunk_size,
+                        chunk_overlap=chunk_overlap,
+                        process_table=process_table,
+                        table_strategy=table_strategy,
+                    )
+                )
+            except json.JSONDecodeError:
+                raise HTTPException(status_code=500, detail="Fail to ingest data into qdrant.")
+
+            print(f"Successfully saved link {link}")
+
+        return {"status": 200, "message": "Data preparation succeeded"}
+
+    raise HTTPException(status_code=400, detail="Must provide either a file or a string list.")
+
 
 if __name__ == "__main__":
     opea_microservices["opea_service@prepare_doc_qdrant"].start()
diff --git a/comps/dataprep/qdrant/requirements.txt b/comps/dataprep/qdrant/requirements.txt
index eb92e628f..e5bcf80b3 100644
--- a/comps/dataprep/qdrant/requirements.txt
+++ b/comps/dataprep/qdrant/requirements.txt
@@ -8,6 +8,7 @@ huggingface_hub
 langchain
 langchain-community
 langchain-text-splitters
+langchain_huggingface
 markdown
 numpy
 opentelemetry-api
diff --git a/comps/dataprep/redis/README.md b/comps/dataprep/redis/README.md
index 1afb6e8e0..4617dfa25 100644
--- a/comps/dataprep/redis/README.md
+++ b/comps/dataprep/redis/README.md
@@ -4,9 +4,9 @@ For dataprep microservice, we provide two frameworks: `Langchain` and `LlamaInde
 
 We organized these two folders in the same way, so you can use either framework for dataprep microservice with the following constructions.
 
-# 🚀1. Start Microservice with Python（Option 1）
+## 🚀1. Start Microservice with Python（Option 1）
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 - option 1: Install Single-process version (for 1-10 files processing)
 
@@ -29,22 +29,19 @@ pip install -r requirements.txt
 cd langchain_ray; pip install -r requirements_ray.txt
 ```
 
-## 1.2 Start Redis Stack Server
+### 1.2 Start Redis Stack Server
 
 Please refer to this [readme](../../vectorstores/langchain/redis/README.md).
 
-## 1.3 Setup Environment Variables
+### 1.3 Setup Environment Variables
 
 ```bash
 export REDIS_URL="redis://${your_ip}:6379"
 export INDEX_NAME=${your_index_name}
-export LANGCHAIN_TRACING_V2=true
-export LANGCHAIN_API_KEY=${your_langchain_api_key}
-export LANGCHAIN_PROJECT="opea/gen-ai-comps:dataprep"
 export PYTHONPATH=${path_to_comps}
 ```
 
-## 1.4 Start Embedding Service
+### 1.4 Start Embedding Service
 
 First, you need to start a TEI service.
 
@@ -70,7 +67,7 @@ After checking that it works, set up environment variables.
 export TEI_ENDPOINT="http://localhost:$your_port"
 ```
 
-## 1.4 Start Document Preparation Microservice for Redis with Python Script
+### 1.4 Start Document Preparation Microservice for Redis with Python Script
 
 Start document preparation microservice for Redis with below command.
 
@@ -86,27 +83,23 @@ python prepare_doc_redis.py
 python prepare_doc_redis_on_ray.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Start Redis Stack Server
+### 2.1 Start Redis Stack Server
 
 Please refer to this [readme](../../vectorstores/langchain/redis/README.md).
 
-## 2.2 Setup Environment Variables
+### 2.2 Setup Environment Variables
 
 ```bash
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export TEI_ENDPOINT="http://${your_ip}:6006"
-export REDIS_HOST=${your_ip}
-export REDIS_PORT=6379
 export REDIS_URL="redis://${your_ip}:6379"
 export INDEX_NAME=${your_index_name}
-export LANGCHAIN_TRACING_V2=true
-export LANGCHAIN_API_KEY=${your_langchain_api_key}
-export LANGCHAIN_PROJECT="opea/dataprep"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 ```
 
-## 2.3 Build Docker Image
+### 2.3 Build Docker Image
 
 - Build docker image with langchain
 
@@ -131,21 +124,21 @@ cd ../../../../
 docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
 ```
 
-## 2.4 Run Docker with CLI (Option A)
+### 2.4 Run Docker with CLI (Option A)
 
 - option 1: Start single-process version (for 1-10 files processing)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/dataprep-redis:latest
+docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep-redis:latest
 ```
 
 - option 2: Start multi-process version (for >10 files processing)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
+docker run -d --name="dataprep-redis-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
 ```
 
-## 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
+### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)
 
 ```bash
 # for langchain
@@ -155,15 +148,15 @@ cd comps/dataprep/redis/llama_index/docker
 docker compose -f docker-compose-dataprep-redis.yaml up -d
 ```
 
-# 🚀3. Status Microservice
+## 🚀3. Status Microservice
 
 ```bash
 docker container logs -f dataprep-redis-server
 ```
 
-# 🚀4. Consume Microservice
+## 🚀4. Consume Microservice
 
-## 4.1 Consume Upload API
+### 4.1 Consume Upload API
 
 Once document preparation microservice for Redis is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.
 
@@ -243,7 +236,7 @@ except requests.exceptions.RequestException as e:
     print("An error occurred:", e)
 ```
 
-## 4.2 Consume get_file API
+### 4.2 Consume get_file API
 
 To get uploaded file structures, use the following command:
 
@@ -272,7 +265,7 @@ Then you will get the response JSON like this:
 ]
 ```
 
-## 4.3 Consume delete_file API
+### 4.3 Consume delete_file API
 
 To delete uploaded file/link, use the following command.
 
diff --git a/comps/dataprep/redis/langchain/config.py b/comps/dataprep/redis/langchain/config.py
index b441f80d8..75715912c 100644
--- a/comps/dataprep/redis/langchain/config.py
+++ b/comps/dataprep/redis/langchain/config.py
@@ -62,3 +62,5 @@ def format_redis_conn_from_env():
 KEY_INDEX_NAME = os.getenv("KEY_INDEX_NAME", "file-keys")
 
 TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", 600))
+
+SEARCH_BATCH_SIZE = int(os.getenv("SEARCH_BATCH_SIZE", 10))
diff --git a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml
index 74e2bb78f..0ef8a1f1a 100644
--- a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml
+++ b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml
@@ -27,8 +27,6 @@ services:
     container_name: dataprep-redis-server
     ports:
       - "6007:6007"
-      - "6008:6008"
-      - "6009:6009"
     ipc: host
     environment:
       no_proxy: ${no_proxy}
@@ -39,7 +37,7 @@ services:
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: ${TEI_ENDPOINT}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
 
 networks:
diff --git a/comps/dataprep/redis/langchain/prepare_doc_redis.py b/comps/dataprep/redis/langchain/prepare_doc_redis.py
index 13af980ab..a749cd557 100644
--- a/comps/dataprep/redis/langchain/prepare_doc_redis.py
+++ b/comps/dataprep/redis/langchain/prepare_doc_redis.py
@@ -10,13 +10,13 @@
 
 # from pyspark import SparkConf, SparkContext
 import redis
-from config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL
+from config import EMBED_MODEL, INDEX_NAME, KEY_INDEX_NAME, REDIS_URL, SEARCH_BATCH_SIZE
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain_community.vectorstores import Redis
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
-from langsmith import traceable
 from redis.commands.search.field import TextField
 from redis.commands.search.indexDefinition import IndexDefinition, IndexType
 
@@ -25,7 +25,7 @@
     create_upload_folder,
     document_loader,
     encode_filename,
-    get_file_structure,
+    format_search_results,
     get_separators,
     get_tables_result,
     parse_html,
@@ -76,7 +76,7 @@ def search_by_id(client, doc_id):
     print(f"[ search by id ] searching docs of {doc_id}")
     try:
         results = client.load_document(doc_id)
-        print(f"[ search by id ] search success of {doc_id}")
+        print(f"[ search by id ] search success of {doc_id}: {results}")
         return results
     except Exception as e:
         print(f"[ search by id ] fail to search docs of {doc_id}: {e}")
@@ -109,7 +109,7 @@ def ingest_chunks_to_redis(file_name: str, chunks: List):
     # Create vectorstore
     if tei_embedding_endpoint:
         # create embeddings using TEI endpoint service
-        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+        embedder = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
     else:
         # create embeddings using local embedding model
         embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
@@ -139,8 +139,12 @@ def ingest_chunks_to_redis(file_name: str, chunks: List):
     client = r.ft(KEY_INDEX_NAME)
     if not check_index_existance(client):
         assert create_index(client)
-    assert store_by_id(client, key=file_name, value="#".join(file_ids))
 
+    try:
+        assert store_by_id(client, key=file_name, value="#".join(file_ids))
+    except Exception as e:
+        print(f"[ ingest chunks ] {e}. Fail to store chunks of file {file_name}.")
+        raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.")
     return True
 
 
@@ -177,7 +181,6 @@ def ingest_data_to_redis(doc_path: DocPath):
 
 
 @register_microservice(name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep", host="0.0.0.0", port=6007)
-@traceable(run_type="tool")
 async def ingest_documents(
     files: Optional[Union[UploadFile, List[UploadFile]]] = File(None),
     link_list: Optional[str] = Form(None),
@@ -189,12 +192,30 @@ async def ingest_documents(
     print(f"files:{files}")
     print(f"link_list:{link_list}")
 
+    r = redis.Redis(connection_pool=redis_pool)
+    client = r.ft(KEY_INDEX_NAME)
+
     if files:
         if not isinstance(files, list):
             files = [files]
         uploaded_files = []
+
         for file in files:
             encode_file = encode_filename(file.filename)
+            doc_id = "file:" + encode_file
+
+            # check whether the file already exists
+            key_ids = None
+            try:
+                key_ids = search_by_id(client, doc_id).key_ids
+                print(f"[ upload file ] File {file.filename} already exists.")
+            except Exception as e:
+                print(f"[ upload file ] File {file.filename} does not exist.")
+            if key_ids:
+                raise HTTPException(
+                    status_code=400, detail=f"Uploaded file {file.filename} already exists. Please change file name."
+                )
+
             save_path = upload_folder + encode_file
             await save_content_to_local_disk(save_path, file)
             ingest_data_to_redis(
@@ -234,28 +255,39 @@ async def ingest_documents(
         return {"status": 200, "message": "Data preparation succeeded"}
 
     if link_list:
-        try:
-            link_list = json.loads(link_list)  # Parse JSON string to list
-            if not isinstance(link_list, list):
-                raise HTTPException(status_code=400, detail="link_list should be a list.")
-            for link in link_list:
-                encoded_link = encode_filename(link)
-                save_path = upload_folder + encoded_link + ".txt"
-                content = parse_html([link])[0][0]
-                await save_content_to_local_disk(save_path, content)
-                ingest_data_to_redis(
-                    DocPath(
-                        path=save_path,
-                        chunk_size=chunk_size,
-                        chunk_overlap=chunk_overlap,
-                        process_table=process_table,
-                        table_strategy=table_strategy,
-                    )
+        link_list = json.loads(link_list)  # Parse JSON string to list
+        if not isinstance(link_list, list):
+            raise HTTPException(status_code=400, detail=f"Link_list {link_list} should be a list.")
+        for link in link_list:
+            encoded_link = encode_filename(link)
+            doc_id = "file:" + encoded_link + ".txt"
+
+            # check whether the link file already exists
+            key_ids = None
+            try:
+                key_ids = search_by_id(client, doc_id).key_ids
+                print(f"[ upload file ] Link {link} already exists.")
+            except Exception as e:
+                print(f"[ upload file ] Link {link} does not exist. Keep storing.")
+            if key_ids:
+                raise HTTPException(
+                    status_code=400, detail=f"Uploaded link {link} already exists. Please change another link."
+                )
+
+            save_path = upload_folder + encoded_link + ".txt"
+            content = parse_html([link])[0][0]
+            await save_content_to_local_disk(save_path, content)
+            ingest_data_to_redis(
+                DocPath(
+                    path=save_path,
+                    chunk_size=chunk_size,
+                    chunk_overlap=chunk_overlap,
+                    process_table=process_table,
+                    table_strategy=table_strategy,
                 )
-            print(f"Successfully saved link list {link_list}")
-            return {"status": 200, "message": "Data preparation succeeded"}
-        except json.JSONDecodeError:
-            raise HTTPException(status_code=400, detail="Invalid JSON format for link_list.")
+            )
+        print(f"Successfully saved link list {link_list}")
+        return {"status": 200, "message": "Data preparation succeeded"}
 
     raise HTTPException(status_code=400, detail="Must provide either a file or a string list.")
 
@@ -263,36 +295,73 @@ async def ingest_documents(
 @register_microservice(
     name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007
 )
-@traceable(run_type="tool")
 async def rag_get_file_structure():
     print("[ dataprep - get file ] start to get file structure")
 
-    if not Path(upload_folder).exists():
-        print("No file uploaded, return empty list.")
-        return []
-
-    file_content = get_file_structure(upload_folder)
-    return file_content
+    # define redis client
+    r = redis.Redis(connection_pool=redis_pool)
+    offset = 0
+    file_list = []
+    while True:
+        response = r.execute_command("FT.SEARCH", KEY_INDEX_NAME, "*", "LIMIT", offset, offset + SEARCH_BATCH_SIZE)
+        # no doc retrieved
+        if len(response) < 2:
+            break
+        file_list = format_search_results(response, file_list)
+        offset += SEARCH_BATCH_SIZE
+        # last batch
+        if (len(response) - 1) // 2 < SEARCH_BATCH_SIZE:
+            break
+    return file_list
 
 
 @register_microservice(
     name="opea_service@prepare_doc_redis", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007
 )
-@traceable(run_type="tool")
 async def delete_single_file(file_path: str = Body(..., embed=True)):
     """Delete file according to `file_path`.
 
     `file_path`:
         - specific file path (e.g. /path/to/file.txt)
-        - folder path (e.g. /path/to/folder)
         - "all": delete all files uploaded
     """
+
+    # define redis client
+    r = redis.Redis(connection_pool=redis_pool)
+    client = r.ft(KEY_INDEX_NAME)
+    client2 = r.ft(INDEX_NAME)
+
     # delete all uploaded files
     if file_path == "all":
         print("[dataprep - del] delete all files")
-        remove_folder_with_ignore(upload_folder)
-        assert drop_index(index_name=INDEX_NAME)
-        assert drop_index(index_name=KEY_INDEX_NAME)
+
+        # drop index KEY_INDEX_NAME
+        if check_index_existance(client):
+            try:
+                assert drop_index(index_name=KEY_INDEX_NAME)
+            except Exception as e:
+                print(f"[dataprep - del] {e}. Fail to drop index {KEY_INDEX_NAME}.")
+                raise HTTPException(status_code=500, detail=f"Fail to drop index {KEY_INDEX_NAME}.")
+        else:
+            print(f"[dataprep - del] Index {KEY_INDEX_NAME} does not exits.")
+
+        # drop index INDEX_NAME
+        if check_index_existance(client2):
+            try:
+                assert drop_index(index_name=INDEX_NAME)
+            except Exception as e:
+                print(f"[dataprep - del] {e}. Fail to drop index {INDEX_NAME}.")
+                raise HTTPException(status_code=500, detail=f"Fail to drop index {INDEX_NAME}.")
+        else:
+            print(f"[dataprep - del] Index {INDEX_NAME} does not exits.")
+
+        # delete files on local disk
+        try:
+            remove_folder_with_ignore(upload_folder)
+        except Exception as e:
+            print(f"[dataprep - del] {e}. Fail to delete {upload_folder}.")
+            raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.")
+
         print("[dataprep - del] successfully delete all files.")
         create_upload_folder(upload_folder)
         return {"status": True}
@@ -300,35 +369,58 @@ async def delete_single_file(file_path: str = Body(..., embed=True)):
     delete_path = Path(upload_folder + "/" + encode_filename(file_path))
     print(f"[dataprep - del] delete_path: {delete_path}")
 
-    # partially delete files/folders
+    # partially delete files
     if delete_path.exists():
-        r = redis.Redis(connection_pool=redis_pool)
-        client = r.ft(KEY_INDEX_NAME)
-        client2 = r.ft(INDEX_NAME)
         doc_id = "file:" + encode_filename(file_path)
-        objs = search_by_id(client, doc_id).key_ids
-        file_ids = objs.split("#")
+
+        # determine whether this file exists in db KEY_INDEX_NAME
+        try:
+            key_ids = search_by_id(client, doc_id).key_ids
+        except Exception as e:
+            print(f"[dataprep - del] {e}, File {file_path} does not exists.")
+            raise HTTPException(
+                status_code=404, detail=f"File not found in db {KEY_INDEX_NAME}. Please check file_path."
+            )
+        file_ids = key_ids.split("#")
 
         # delete file
         if delete_path.is_file():
+            # delete file keys id in db KEY_INDEX_NAME
             try:
-                for file_id in file_ids:
-                    assert delete_by_id(client2, file_id)
                 assert delete_by_id(client, doc_id)
-                delete_path.unlink()
             except Exception as e:
-                print(f"[dataprep - del] fail to delete file {delete_path}: {e}")
-                return {"status": False}
+                print(f"[dataprep - del] {e}. File {file_path} delete failed for db {KEY_INDEX_NAME}.")
+                raise HTTPException(status_code=500, detail=f"File {file_path} delete failed.")
+
+            # delete file content in db INDEX_NAME
+            for file_id in file_ids:
+                # determine whether this file exists in db INDEX_NAME
+                try:
+                    content = search_by_id(client2, file_id).content
+                except Exception as e:
+                    print(f"[dataprep - del] {e}. File {file_path} does not exists.")
+                    raise HTTPException(
+                        status_code=404, detail=f"File not found in db {INDEX_NAME}. Please check file_path."
+                    )
+
+                # delete file content
+                try:
+                    assert delete_by_id(client2, file_id)
+                except Exception as e:
+                    print(f"[dataprep - del] {e}. File {file_path} delete failed for db {INDEX_NAME}")
+                    raise HTTPException(status_code=500, detail=f"File {file_path} delete failed.")
+
+            # delete file on local disk
+            delete_path.unlink()
+
+            return {"status": True}
+
         # delete folder
         else:
-            try:
-                shutil.rmtree(delete_path)
-            except Exception as e:
-                print(f"[dataprep - del] fail to delete folder {delete_path}: {e}")
-                return {"status": False}
-        return {"status": True}
+            print(f"[dataprep - del] Delete folder {file_path} is not supported for now.")
+            raise HTTPException(status_code=404, detail=f"Delete folder {file_path} is not supported for now.")
     else:
-        raise HTTPException(status_code=404, detail="File/folder not found. Please check del_path.")
+        raise HTTPException(status_code=404, detail=f"File {file_path} not found. Please check file_path.")
 
 
 if __name__ == "__main__":
diff --git a/comps/dataprep/redis/langchain/requirements.txt b/comps/dataprep/redis/langchain/requirements.txt
index 12d389513..284b9379b 100644
--- a/comps/dataprep/redis/langchain/requirements.txt
+++ b/comps/dataprep/redis/langchain/requirements.txt
@@ -5,10 +5,10 @@ docx2txt
 easyocr
 fastapi
 huggingface_hub
-langchain
+langchain==0.2.12
 langchain-community
 langchain-text-splitters
-langsmith
+langchain_huggingface
 markdown
 numpy
 opentelemetry-api
diff --git a/comps/dataprep/redis/langchain_ray/requirements.txt b/comps/dataprep/redis/langchain_ray/requirements.txt
index b16a4ac82..50d57b20e 100644
--- a/comps/dataprep/redis/langchain_ray/requirements.txt
+++ b/comps/dataprep/redis/langchain_ray/requirements.txt
@@ -19,6 +19,7 @@ pyarrow
 pymupdf
 python-bidi==0.4.2
 python-docx
+python-multipart
 python-pptx
 ray
 redis
diff --git a/comps/dataprep/redis/llama_index/requirements.txt b/comps/dataprep/redis/llama_index/requirements.txt
index e754a4275..ad75869c1 100644
--- a/comps/dataprep/redis/llama_index/requirements.txt
+++ b/comps/dataprep/redis/llama_index/requirements.txt
@@ -2,7 +2,7 @@ docarray[full]
 fastapi
 huggingface_hub
 langsmith
-llama-index 
+llama-index
 llama-index-embeddings-huggingface==0.2.0
 llama-index-readers-file
 llama-index-vector-stores-redis
@@ -12,6 +12,7 @@ opentelemetry-exporter-otlp
 opentelemetry-sdk
 prometheus-fastapi-instrumentator
 python-bidi==0.4.2
+python-multipart
 redis
 sentence_transformers
 shortuuid
diff --git a/comps/dataprep/utils.py b/comps/dataprep/utils.py
index 46acc8f5b..ae8361539 100644
--- a/comps/dataprep/utils.py
+++ b/comps/dataprep/utils.py
@@ -717,6 +717,19 @@ def get_file_structure(root_path: str, parent_path: str = "") -> List[Dict[str,
     return result
 
 
+def format_search_results(response, file_list: list):
+    for i in range(1, len(response), 2):
+        file_name = response[i].decode()[5:]
+        file_dict = {
+            "name": decode_filename(file_name),
+            "id": decode_filename(file_name),
+            "type": "File",
+            "parent": "",
+        }
+        file_list.append(file_dict)
+    return file_list
+
+
 def remove_folder_with_ignore(folder_path: str, except_patterns: List = []):
     """Remove the specific folder, and ignore some files/folders.
 
diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md
index ce4b4fa46..407644ada 100644
--- a/comps/embeddings/README.md
+++ b/comps/embeddings/README.md
@@ -14,7 +14,7 @@ Key Features:
 
 Users are albe to configure and build embedding-related services according to their actual needs.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 Currently, we provide two ways to implement the embedding service:
 
@@ -24,7 +24,7 @@ Currently, we provide two ways to implement the embedding service:
 
 For both of the implementations, you need to install requirements first.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 # run with langchain
@@ -33,11 +33,11 @@ pip install -r langchain/requirements.txt
 pip install -r llama_index/requirements.txt
 ```
 
-## 1.2 Start Embedding Service
+### 1.2 Start Embedding Service
 
 You can select one of following ways to start the embedding service:
 
-### Start Embedding Service with TEI
+#### Start Embedding Service with TEI
 
 First, you need to start a TEI service.
 
@@ -45,7 +45,7 @@ First, you need to start a TEI service.
 your_port=8090
 model="BAAI/bge-large-en-v1.5"
 revision="refs/pr/5"
-docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
+docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model --revision $revision
 ```
 
 Then you need to test your TEI service using the following commands:
@@ -66,13 +66,10 @@ cd langchain
 cd llama_index
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
 export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
-export LANGCHAIN_TRACING_V2=true
-export LANGCHAIN_API_KEY=${your_langchain_api_key}
-export LANGCHAIN_PROJECT="opea/gen-ai-comps:embeddings"
 python embedding_tei.py
 ```
 
-### Start Embedding Service with Local Model
+#### Start Embedding Service with Local Model
 
 ```bash
 # run with langchain
@@ -82,9 +79,9 @@ cd llama_index
 python local_embedding.py
 ```
 
-# 🚀2. Start Microservice with Docker (Optional 2)
+## 🚀2. Start Microservice with Docker (Optional 2)
 
-## 2.1 Start Embedding Service with TEI
+### 2.1 Start Embedding Service with TEI
 
 First, you need to start a TEI service.
 
@@ -92,7 +89,7 @@ First, you need to start a TEI service.
 your_port=8090
 model="BAAI/bge-large-en-v1.5"
 revision="refs/pr/5"
-docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
+docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model --revision $revision
 ```
 
 Then you need to test your TEI service using the following commands:
@@ -111,38 +108,41 @@ export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
 export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
-### Build Langchain Docker (Option a)
+#### Build Langchain Docker (Option a)
 
 ```bash
 cd ../../
 docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
 ```
 
-### Build LlamaIndex Docker (Option b)
+#### Build LlamaIndex Docker (Option b)
 
 ```bash
 cd ../../
-docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
+docker build -t opea/embedding-tei-llama-index:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
 ```
 
-## 2.3 Run Docker with CLI
+### 2.3 Run Docker with CLI
 
 ```bash
+# run with langchain docker
 docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest
+# run with llama-index docker
+docker run -d --name="embedding-tei-llama-index-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei-llama-index:latest
 ```
 
-## 2.4 Run Docker with Docker Compose
+### 2.4 Run Docker with Docker Compose
 
 ```bash
 cd docker
 docker compose -f docker_compose_embedding.yaml up -d
 ```
 
-# 🚀3. Consume Embedding Service
+## 🚀3. Consume Embedding Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:6000/v1/health_check\
@@ -150,7 +150,7 @@ curl http://localhost:6000/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Embedding Service
+### 3.2 Consume Embedding Service
 
 ```bash
 curl http://localhost:6000/v1/embeddings\
diff --git a/comps/embeddings/langchain-mosec/README.md b/comps/embeddings/langchain-mosec/README.md
index 624fcf6a2..4fd265829 100644
--- a/comps/embeddings/langchain-mosec/README.md
+++ b/comps/embeddings/langchain-mosec/README.md
@@ -4,26 +4,26 @@
 docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec-endpoint:latest -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile .
 ```
 
-# build embedding microservice docker image
+## build embedding microservice docker image
 
 ```
 docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/embedding-langchain-mosec:latest -f comps/embeddings/langchain-mosec/docker/Dockerfile .
 ```
 
-# launch Mosec endpoint docker container
+## launch Mosec endpoint docker container
 
 ```
 docker run -d --name="embedding-langchain-mosec-endpoint" -p 6001:8000  opea/embedding-langchain-mosec-endpoint:latest
 ```
 
-# launch embedding microservice docker container
+## launch embedding microservice docker container
 
 ```
 export MOSEC_EMBEDDING_ENDPOINT=http://{mosec_embedding_host_ip}:6001
 docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:6000 --ipc=host -e MOSEC_EMBEDDING_ENDPOINT=$MOSEC_EMBEDDING_ENDPOINT opea/embedding-langchain-mosec:latest
 ```
 
-# run client test
+## run client test
 
 ```
 curl localhost:6000/v1/embeddings \
diff --git a/comps/embeddings/langchain-mosec/embedding_mosec.py b/comps/embeddings/langchain-mosec/embedding_mosec.py
index f34b56a18..f13c23eca 100644
--- a/comps/embeddings/langchain-mosec/embedding_mosec.py
+++ b/comps/embeddings/langchain-mosec/embedding_mosec.py
@@ -67,7 +67,7 @@ def embedding(input: TextDoc) -> EmbedDoc:
     MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:8080")
     os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
     os.environ["OPENAI_API_KEY"] = "Dummy key"
-    MODEL_ID = "/root/bge-large-zh-v1.5"
+    MODEL_ID = "/home/user/bge-large-zh-v1.5"
     embeddings = MosecEmbeddings(model=MODEL_ID)
     print("Mosec Embedding initialized.")
     opea_microservices["opea_service@embedding_mosec"].start()
diff --git a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile
index a8241e04e..945f7b90c 100644
--- a/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile
+++ b/comps/embeddings/langchain-mosec/mosec-docker/Dockerfile
@@ -10,7 +10,6 @@ ARG DEBIAN_FRONTEND=noninteractive
 ENV GLIBC_TUNABLES glibc.cpu.x86_shstk=permissive
 RUN apt update && apt install -y python3 python3-pip 
 
-USER user
 COPY comps /home/user/comps
 
 RUN pip3 install torch==2.2.2 torchvision --index-url https://download.pytorch.org/whl/cpu 
@@ -19,7 +18,7 @@ RUN pip3 install transformers
 RUN pip3 install llmspec mosec
 
 RUN cd /home/user/ && export HF_ENDPOINT=https://hf-mirror.com && huggingface-cli download --resume-download BAAI/bge-large-zh-v1.5 --local-dir /home/user/bge-large-zh-v1.5
-
+USER user
 ENV EMB_MODEL="/home/user/bge-large-zh-v1.5/"
 
 WORKDIR /home/user/comps/embeddings/langchain-mosec/mosec-docker
diff --git a/comps/embeddings/langchain-mosec/mosec-docker/README.md b/comps/embeddings/langchain-mosec/mosec-docker/README.md
index e7f59d616..3222a1b1e 100644
--- a/comps/embeddings/langchain-mosec/mosec-docker/README.md
+++ b/comps/embeddings/langchain-mosec/mosec-docker/README.md
@@ -25,13 +25,13 @@ docker run -itd -p 8000:8000 embedding:latest
 - Restful API by curl
 
 ```shell
-curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/root/bge-large-zh-v1.5/", "input": "hello world"}'
+curl -X POST http://127.0.0.1:8000/v1/embeddings -H "Content-Type: application/json" -d '{ "model": "/home/user/bge-large-zh-v1.5/", "input": "hello world"}'
 ```
 
 - generate embedding from python
 
 ```python
-DEFAULT_MODEL = "/root/bge-large-zh-v1.5/"
+DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/"
 SERVICE_URL = "http://127.0.0.1:8000"
 INPUT_STR = "Hello world!"
 
diff --git a/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py b/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py
index 6db56fb88..9639b424a 100644
--- a/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py
+++ b/comps/embeddings/langchain-mosec/mosec-docker/server-ipex.py
@@ -13,7 +13,7 @@
 from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage
 from mosec import ClientError, Runtime, Server, Worker
 
-DEFAULT_MODEL = "/root/bge-large-zh-v1.5/"
+DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/"
 
 
 class Embedding(Worker):
diff --git a/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py b/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py
index d2d67c836..67a3939e1 100644
--- a/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py
+++ b/comps/embeddings/langchain-mosec/mosec-docker/test-embedding.py
@@ -4,7 +4,7 @@
 
 from openai import Client
 
-DEFAULT_MODEL = "/root/bge-large-zh-v1.5/"
+DEFAULT_MODEL = "/home/user/bge-large-zh-v1.5/"
 SERVICE_URL = "http://127.0.0.1:8000"
 INPUT_STR = "Hello world!"
 
diff --git a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml
index 62f5870b7..152f5030b 100644
--- a/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml
+++ b/comps/embeddings/llama_index/docker/docker_compose_embedding.yaml
@@ -5,7 +5,7 @@ version: "3.8"
 
 services:
   embedding:
-    image: opea/embedding-tei:latest
+    image: opea/embedding-tei-llama-index:latest
     container_name: embedding-tei-server
     ports:
       - "6000:6000"
@@ -16,7 +16,6 @@ services:
       https_proxy: ${https_proxy}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME}
-      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
     restart: unless-stopped
 
 networks:
diff --git a/comps/embeddings/llama_index/local_embedding.py b/comps/embeddings/llama_index/local_embedding.py
index bccec24ca..f6a69afaf 100644
--- a/comps/embeddings/llama_index/local_embedding.py
+++ b/comps/embeddings/llama_index/local_embedding.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from langsmith import traceable
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding
 
 from comps import EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice
 
@@ -24,5 +24,5 @@ def embedding(input: TextDoc) -> EmbedDoc:
 
 
 if __name__ == "__main__":
-    embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
+    embeddings = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-large-en-v1.5")
     opea_microservices["opea_service@local_embedding"].start()
diff --git a/comps/embeddings/llama_index/requirements.txt b/comps/embeddings/llama_index/requirements.txt
index b1d2beba9..908c38b06 100644
--- a/comps/embeddings/llama_index/requirements.txt
+++ b/comps/embeddings/llama_index/requirements.txt
@@ -2,6 +2,7 @@ docarray[full]
 fastapi
 huggingface_hub
 langsmith
+llama-index-embeddings-huggingface-api
 llama-index-embeddings-text-embeddings-inference
 opentelemetry-api
 opentelemetry-exporter-otlp
diff --git a/comps/guardrails/llama_guard/README.md b/comps/guardrails/llama_guard/README.md
index 94bdcd952..019aab5e3 100644
--- a/comps/guardrails/llama_guard/README.md
+++ b/comps/guardrails/llama_guard/README.md
@@ -20,30 +20,30 @@ Any content that is detected in the following categories is determined as unsafe
 - Regulated or Controlled Substances
 - Suicide & Self Harm
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the Guardrails microservice, you need to install python packages first.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start TGI Gaudi Service
+### 1.2 Start TGI Gaudi Service
 
 ```bash
 export HF_TOKEN=${your_hf_api_token}
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
-export LANGCHAIN_PROJECT="opea/gaurdrails"
+export LANGCHAIN_PROJECT="opea/guardrails"
 volume=$PWD/data
 model_id="meta-llama/Meta-Llama-Guard-2-8B"
 docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 docker run -p 8088:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy -e HF_TOKEN=$HF_TOKEN ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
 ```
 
-## 1.3 Verify the TGI Gaudi Service
+### 1.3 Verify the TGI Gaudi Service
 
 ```bash
 curl 127.0.0.1:8088/generate \
@@ -52,7 +52,7 @@ curl 127.0.0.1:8088/generate \
   -H 'Content-Type: application/json'
 ```
 
-## 1.4 Start Guardrails Service
+### 1.4 Start Guardrails Service
 
 Optional: If you have deployed a Guardrails model with TGI Gaudi Service other than default model (i.e., `meta-llama/Meta-Llama-Guard-2-8B`) [from section 1.2](## 1.2 Start TGI Gaudi Service), you will need to add the eviornment variable `SAFETY_GUARD_MODEL_ID` containing the model id. For example, the following informs the Guardrails Service the deployed model used LlamaGuard2:
 
@@ -65,11 +65,11 @@ export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
 python langchain/guardrails_tgi.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 If you start an Guardrails microservice with docker, the `docker_compose_guardrails.yaml` file will automatically start a TGI gaudi service with docker.
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 In order to start TGI and LLM services, you need to setup the following environment variables first.
 
@@ -79,29 +79,29 @@ export SAFETY_GUARD_ENDPOINT="http://${your_ip}:8088"
 export LLM_MODEL_ID=${your_hf_llm_model}
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
 docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile .
 ```
 
-## 2.3 Run Docker with CLI
+### 2.3 Run Docker with CLI
 
 ```bash
 docker run -d --name="guardrails-tgi-server" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/guardrails-tgi:latest
 ```
 
-## 2.4 Run Docker with Docker Compose
+### 2.4 Run Docker with Docker Compose
 
 ```bash
 cd langchain/docker
 docker compose -f docker_compose_guardrails.yaml up -d
 ```
 
-# 🚀3. Consume Guardrails Service
+## 🚀3. Consume Guardrails Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:9090/v1/health_check\
@@ -109,7 +109,7 @@ curl http://localhost:9090/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Guardrails Service
+### 3.2 Consume Guardrails Service
 
 ```bash
 curl http://localhost:9090/v1/guardrails\
diff --git a/comps/guardrails/llama_guard/guardrails_tgi.py b/comps/guardrails/llama_guard/guardrails_tgi.py
index 96a89b8c8..b415876ed 100644
--- a/comps/guardrails/llama_guard/guardrails_tgi.py
+++ b/comps/guardrails/llama_guard/guardrails_tgi.py
@@ -2,13 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
+from typing import List, Union
 
 from langchain_community.utilities.requests import JsonRequestsWrapper
 from langchain_huggingface import ChatHuggingFace
 from langchain_huggingface.llms import HuggingFaceEndpoint
 from langsmith import traceable
 
-from comps import ServiceType, TextDoc, opea_microservices, register_microservice
+from comps import GeneratedDoc, ServiceType, TextDoc, opea_microservices, register_microservice
 
 DEFAULT_MODEL = "meta-llama/LlamaGuard-7b"
 
@@ -59,12 +60,17 @@ def get_tgi_service_model_id(endpoint_url, default=DEFAULT_MODEL):
     endpoint="/v1/guardrails",
     host="0.0.0.0",
     port=9090,
-    input_datatype=TextDoc,
+    input_datatype=Union[GeneratedDoc, TextDoc],
     output_datatype=TextDoc,
 )
 @traceable(run_type="llm")
-def safety_guard(input: TextDoc) -> TextDoc:
-    response_input_guard = llm_engine_hf.invoke([{"role": "user", "content": input.text}]).content
+def safety_guard(input: Union[GeneratedDoc, TextDoc]) -> TextDoc:
+    if isinstance(input, GeneratedDoc):
+        messages = [{"role": "user", "content": input.prompt}, {"role": "assistant", "content": input.text}]
+    else:
+        messages = [{"role": "user", "content": input.text}]
+    response_input_guard = llm_engine_hf.invoke(messages).content
+
     if "unsafe" in response_input_guard:
         unsafe_dict = get_unsafe_dict(llm_engine_hf.model_id)
         policy_violation_level = response_input_guard.split("\n")[1].strip()
@@ -75,7 +81,6 @@ def safety_guard(input: TextDoc) -> TextDoc:
         )
     else:
         res = TextDoc(text=input.text)
-
     return res
 
 
diff --git a/comps/guardrails/llama_guard/requirements.txt b/comps/guardrails/llama_guard/requirements.txt
index 5fd992e66..5eda60170 100644
--- a/comps/guardrails/llama_guard/requirements.txt
+++ b/comps/guardrails/llama_guard/requirements.txt
@@ -1,6 +1,7 @@
 docarray[full]
 fastapi
-huggingface_hub
+# Fix for issue with langchain-huggingface not using InferenceClient `base_url` kwarg
+huggingface-hub<=0.24.0
 langchain-community
 langchain-huggingface
 langsmith
diff --git a/comps/guardrails/pii_detection/README.md b/comps/guardrails/pii_detection/README.md
index 3c1f1180e..152fa96db 100644
--- a/comps/guardrails/pii_detection/README.md
+++ b/comps/guardrails/pii_detection/README.md
@@ -19,7 +19,7 @@ The classifiler model is used together with an embedding model to make predictio
 
 Currently this strategy can detect both personal sensitive and business sensitive information such as financial figures and performance reviews. Please refer to the [model card](<(https://huggingface.co/Intel/business_safety_logistic_regression_classifier)>) to see the performance of our demo model on the Patronus EnterprisePII dataset.
 
-# Input and output
+## Input and output
 
 Users can send a list of files, a list of text strings, or a list of urls to the microservice, and the microservice will return a list of True or False for each piece of text following the original sequence.
 
@@ -27,15 +27,15 @@ For a concrete example of what input should look like, please refer to [Consume
 
 The output will be a list of booleans, which can be parsed and used as conditions in a bigger application.
 
-# 🚀1. Start Microservice with Python（Option 1）
+## 🚀1. Start Microservice with Python（Option 1）
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start PII Detection Microservice with Python Script
+### 1.2 Start PII Detection Microservice with Python Script
 
 Start pii detection microservice with below command.
 
@@ -43,24 +43,24 @@ Start pii detection microservice with below command.
 python pii_detection.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Prepare PII detection model
+### 2.1 Prepare PII detection model
 
 export HUGGINGFACEHUB_API_TOKEN=${HP_TOKEN}
 
-## 2.1.1 use LLM endpoint (will add later)
+#### 2.1.1 use LLM endpoint (will add later)
 
 intro placeholder
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../../ # back to GenAIComps/ folder
 docker build -t opea/guardrails-pii-detection:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/pii_detection/docker/Dockerfile .
 ```
 
-## 2.3 Run Docker with CLI
+### 2.3 Run Docker with CLI
 
 ```bash
 docker run -d --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p 6357:6357 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-pii-detection:latest
@@ -72,13 +72,13 @@ docker run -d --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p
 docker run --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p 6357:6357 -v ./comps/guardrails/pii_detection/:/home/user/comps/guardrails/pii_detection/ --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}  -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-pii-detection:latest
 ```
 
-# 🚀3. Get Status of Microservice
+## 🚀3. Get Status of Microservice
 
 ```bash
 docker container logs -f guardrails-pii-detection-endpoint
 ```
 
-# 🚀4. Consume Microservice
+## 🚀4. Consume Microservice
 
 Once microservice starts, user can use below script to invoke the microservice for pii detection.
 
diff --git a/comps/guardrails/pii_detection/requirements.txt b/comps/guardrails/pii_detection/requirements.txt
index 9ca5116da..e9bb7ba66 100644
--- a/comps/guardrails/pii_detection/requirements.txt
+++ b/comps/guardrails/pii_detection/requirements.txt
@@ -20,6 +20,7 @@ prometheus-fastapi-instrumentator
 pyarrow
 pymupdf
 python-docx
+python-multipart
 ray
 redis
 scikit-learn
diff --git a/comps/knowledgegraphs/README.md b/comps/knowledgegraphs/README.md
index 248e46eba..db8140b9c 100755
--- a/comps/knowledgegraphs/README.md
+++ b/comps/knowledgegraphs/README.md
@@ -16,9 +16,9 @@ A prerequisite for using this microservice is that users must have a knowledge g
 
 Overall, this microservice provides efficient support for applications related with graph dataset, especially for answering multi-part questions, or any other conditions including comples relationship between entities.
 
-# 🚀1. Start Microservice with Docker
+## 🚀1. Start Microservice with Docker
 
-## 1.1 Setup Environment Variables
+### 1.1 Setup Environment Variables
 
 ```bash
 export NEO4J_ENDPOINT="neo4j://${your_ip}:7687"
@@ -30,7 +30,7 @@ export LLM_MODEL="meta-llama/Llama-2-7b-hf"
 export AGENT_LLM="HuggingFaceH4/zephyr-7b-beta"
 ```
 
-## 1.2 Start Neo4j Service
+### 1.2 Start Neo4j Service
 
 ```bash
 docker pull neo4j
@@ -43,7 +43,7 @@ docker run --rm \
     neo4j
 ```
 
-## 1.3 Start LLM Service for "rag"/"query" mode
+### 1.3 Start LLM Service for "rag"/"query" mode
 
 You can start any LLM microserve, here we take TGI as an example.
 
@@ -70,7 +70,7 @@ curl $LLM_ENDPOINT/generate \
   -H 'Content-Type: application/json'
 ```
 
-## 1.4 Start Microservice
+### 1.4 Start Microservice
 
 ```bash
 cd ../..
@@ -93,9 +93,9 @@ docker run --rm \
     opea/knowledge_graphs:latest
 ```
 
-# 🚀2. Consume Knowledge Graph Service
+## 🚀2. Consume Knowledge Graph Service
 
-## 2.1 Cypher mode
+### 2.1 Cypher mode
 
 ```bash
 curl http://${your_ip}:8060/v1/graphs \
@@ -107,7 +107,7 @@ curl http://${your_ip}:8060/v1/graphs \
 Example output:
 ![Cypher Output](doc/output_cypher.png)
 
-## 2.2 Rag mode
+### 2.2 Rag mode
 
 ```bash
 curl http://${your_ip}:8060/v1/graphs \
@@ -119,7 +119,7 @@ curl http://${your_ip}:8060/v1/graphs \
 Example output:
 ![Cypher Output](doc/output_rag.png)
 
-## 2.3 Query mode
+### 2.3 Query mode
 
 First example:
 
diff --git a/comps/llms/README.md b/comps/llms/README.md
index 584f2ba12..d6ea6dbb6 100644
--- a/comps/llms/README.md
+++ b/comps/llms/README.md
@@ -6,19 +6,19 @@ A prerequisite for using this microservice is that users must have a LLM text ge
 
 Overall, this microservice offers a streamlined way to integrate large language model inference into applications, requiring minimal setup from the user beyond initiating a TGI/vLLM/Ray service and configuring the necessary environment variables. This allows for the seamless processing of queries and documents to generate intelligent, context-aware responses.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the LLM microservice, you need to install python packages first.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start LLM Service
+### 1.2 Start LLM Service
 
-### 1.2.1 Start TGI Service
+#### 1.2.1 Start TGI Service
 
 ```bash
 export HF_TOKEN=${your_hf_api_token}
@@ -28,24 +28,24 @@ export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
 docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
 ```
 
-### 1.2.2 Start vLLM Service
+#### 1.2.2 Start vLLM Service
 
 ```bash
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 docker run -it --name vllm_service -p 8008:80 -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -v ./data:/data opea/vllm:cpu /bin/bash -c "cd / && export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --model ${your_hf_llm_model} --port 80"
 ```
 
-## 1.2.3 Start Ray Service
+### 1.2.3 Start Ray Service
 
 ```bash
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export TRUST_REMOTE_CODE=True
-docker run -it --runtime=habana --name ray_serve_service -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -p 8008:80 -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE ray_serve:habana /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path ${your_hf_llm_model} --chat_processor ${your_hf_chatprocessor}"
+docker run -it --runtime=habana --name ray_serve_service -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -p 8008:80 -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e TRUST_REMOTE_CODE=$TRUST_REMOTE_CODE opea/llm-ray:latest /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path ${your_hf_llm_model} --chat_processor ${your_hf_chatprocessor}"
 ```
 
-## 1.3 Verify the LLM Service
+### 1.3 Verify the LLM Service
 
-### 1.3.1 Verify the TGI Service
+#### 1.3.1 Verify the TGI Service
 
 ```bash
 curl http://${your_ip}:8008/generate \
@@ -54,7 +54,7 @@ curl http://${your_ip}:8008/generate \
   -H 'Content-Type: application/json'
 ```
 
-### 1.3.2 Verify the vLLM Service
+#### 1.3.2 Verify the vLLM Service
 
 ```bash
 curl http://${your_ip}:8008/v1/completions \
@@ -67,7 +67,7 @@ curl http://${your_ip}:8008/v1/completions \
   }'
 ```
 
-### 1.3.3 Verify the Ray Service
+#### 1.3.3 Verify the Ray Service
 
 ```bash
 curl http://${your_ip}:8008/v1/chat/completions \
@@ -83,34 +83,34 @@ curl http://${your_ip}:8008/v1/chat/completions \
   }'
 ```
 
-## 1.4 Start LLM Service with Python Script
+### 1.4 Start LLM Service with Python Script
 
-### 1.4.1 Start the TGI Service
+#### 1.4.1 Start the TGI Service
 
 ```bash
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 python text-generation/tgi/llm.py
 ```
 
-### 1.4.2 Start the vLLM Service
+#### 1.4.2 Start the vLLM Service
 
 ```bash
 export vLLM_LLM_ENDPOINT="http://${your_ip}:8008"
 python text-generation/vllm/llm.py
 ```
 
-### 1.4.3 Start the Ray Service
+#### 1.4.3 Start the Ray Service
 
 ```bash
 export RAY_Serve_ENDPOINT="http://${your_ip}:8008"
 python text-generation/ray_serve/llm.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker.
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 In order to start TGI and LLM services, you need to setup the following environment variables first.
 
@@ -144,16 +144,16 @@ export LANGCHAIN_PROJECT="opea/llms"
 export CHAT_PROCESSOR="ChatModelLlama"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
-### 2.2.1 TGI
+#### 2.2.1 TGI
 
 ```bash
 cd ../../
 docker build -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
 ```
 
-### 2.2.2 vLLM
+#### 2.2.2 vLLM
 
 Build vllm docker.
 
@@ -168,7 +168,7 @@ cd ../../../../
 docker build -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/docker/Dockerfile.microservice .
 ```
 
-### 2.2.3 Ray Serve
+#### 2.2.3 Ray Serve
 
 Build Ray Serve docker.
 
@@ -190,15 +190,15 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
-### 2.3.1 TGI
+#### 2.3.1 TGI
 
 ```bash
 docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-tgi:latest
 ```
 
-### 2.3.2 vLLM
+#### 2.3.2 vLLM
 
 Start vllm endpoint.
 
@@ -212,7 +212,7 @@ Start vllm microservice.
 docker run --name="llm-vllm-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=${no_proxy} -e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e LLM_MODEL_ID=$LLM_MODEL_ID opea/llm-vllm:latest
 ```
 
-### 2.3.3 Ray Serve
+#### 2.3.3 Ray Serve
 
 Start Ray Serve endpoint.
 
@@ -226,32 +226,32 @@ Start Ray Serve microservice.
 docker run -d --name="llm-ray-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e RAY_Serve_ENDPOINT=$RAY_Serve_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e LLM_MODEL=$LLM_MODEL opea/llm-ray:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
-### 2.4.1 TGI
+#### 2.4.1 TGI
 
 ```bash
 cd text-generation/tgi
 docker compose -f docker_compose_llm.yaml up -d
 ```
 
-### 2.4.2 vLLM
+#### 2.4.2 vLLM
 
 ```bash
 cd text-generation/vllm
 docker compose -f docker_compose_llm.yaml up -d
 ```
 
-### 2.4.3 Ray Serve
+#### 2.4.3 Ray Serve
 
 ```bash
 cd text-genetation/ray_serve
 docker compose -f docker_compose_llm.yaml up -d
 ```
 
-# 🚀3. Consume LLM Service
+## 🚀3. Consume LLM Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:9000/v1/health_check\
@@ -259,7 +259,7 @@ curl http://${your_ip}:9000/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume LLM Service
+### 3.2 Consume LLM Service
 
 You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`.
 
@@ -279,7 +279,7 @@ curl http://${your_ip}:9000/v1/chat/completions \
   -H 'Content-Type: application/json'
 ```
 
-## 4. Validated Model
+### 4. Validated Model
 
 | Model                     | TGI-Gaudi | vLLM-CPU | vLLM-Gaudi | Ray |
 | ------------------------- | --------- | -------- | ---------- | --- |
diff --git a/comps/llms/faq-generation/tgi/README.md b/comps/llms/faq-generation/tgi/README.md
index 45f1ad1bd..9673ee7fe 100644
--- a/comps/llms/faq-generation/tgi/README.md
+++ b/comps/llms/faq-generation/tgi/README.md
@@ -2,11 +2,11 @@
 
 This microservice interacts with the TGI LLM server to generate FAQs from Input Text.[Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more.
 
-# 🚀1. Start Microservice with Docker
+## 🚀1. Start Microservice with Docker
 
 If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI service with docker.
 
-## 1.1 Setup Environment Variables
+### 1.1 Setup Environment Variables
 
 In order to start TGI and LLM services, you need to setup the following environment variables first.
 
@@ -18,7 +18,7 @@ export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 ```
 
-## 1.2 Build Docker Image
+### 1.2 Build Docker Image
 
 ```bash
 cd ../../../../
@@ -32,7 +32,7 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 1.3 Run Docker with CLI (Option A)
+### 1.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${LLM_MODEL_ID}
@@ -42,16 +42,16 @@ docker run -d -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.i
 docker run -d --name="llm-faqgen-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/llm-faqgen-tgi:latest
 ```
 
-## 1.4 Run Docker with Docker Compose (Option B)
+### 1.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 cd text-generation/tgi
 docker compose -f docker_compose_llm.yaml up -d
 ```
 
-# 🚀3. Consume LLM Service
+## 🚀3. Consume LLM Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:9000/v1/health_check\
@@ -59,7 +59,7 @@ curl http://${your_ip}:9000/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume FAQGen LLM Service
+### 3.2 Consume FAQGen LLM Service
 
 ```bash
 curl http://${your_ip}:9000/v1/faqgen \
diff --git a/comps/llms/summarization/tgi/README.md b/comps/llms/summarization/tgi/README.md
index 9e5858b4b..c8cc9c44a 100644
--- a/comps/llms/summarization/tgi/README.md
+++ b/comps/llms/summarization/tgi/README.md
@@ -3,24 +3,24 @@
 In this microservice, we utilize LangChain to implement summarization strategies and facilitate LLM inference using Text Generation Inference on Intel Xeon and Gaudi2 processors.
 [Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the LLM microservice, you need to install python packages first.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start LLM Service
+### 1.2 Start LLM Service
 
 ```bash
 export HF_TOKEN=${your_hf_api_token}
 docker run -p 8008:80 -v ./data:/data --name llm-docsum-tgi --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
 ```
 
-## 1.3 Verify the TGI Service
+### 1.3 Verify the TGI Service
 
 ```bash
 curl http://${your_ip}:8008/generate \
@@ -29,18 +29,18 @@ curl http://${your_ip}:8008/generate \
   -H 'Content-Type: application/json'
 ```
 
-## 1.4 Start LLM Service with Python Script
+### 1.4 Start LLM Service with Python Script
 
 ```bash
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 python llm.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker.
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 In order to start TGI and LLM services, you need to setup the following environment variables first.
 
@@ -50,7 +50,7 @@ export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export LLM_MODEL_ID=${your_hf_llm_model}
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
@@ -64,21 +64,21 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d --name="llm-docsum-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-docsum-tgi:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 docker compose -f docker_compose_llm.yaml up -d
 ```
 
-# 🚀3. Consume LLM Service
+## 🚀3. Consume LLM Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:9000/v1/health_check\
@@ -86,7 +86,7 @@ curl http://${your_ip}:9000/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume LLM Service
+### 3.2 Consume LLM Service
 
 ```bash
 curl http://${your_ip}:9000/v1/chat/docsum \
diff --git a/comps/llms/text-generation/native/Dockerfile b/comps/llms/text-generation/native/Dockerfile
deleted file mode 100644
index 9d7d1e094..000000000
--- a/comps/llms/text-generation/native/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# HABANA environment
-FROM vault.habana.ai/gaudi-docker/1.15.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest as hpu
-
-ENV LANG=en_US.UTF-8
-ARG REPO=https://github.com/huggingface/optimum-habana.git
-ARG REPO_VER=v1.11.1
-
-RUN apt-get update && \
-    apt-get install git-lfs && \
-    git-lfs install && \
-    apt-get install -y --no-install-recommends --fix-missing \
-    libgl1-mesa-glx \
-    libjemalloc-dev \
-    vim
-
-RUN useradd -m -s /bin/bash user && \
-    mkdir -p /home/user && \
-    chown -R user /home/user/
-
-USER user
-
-COPY comps /home/user/comps
-COPY comps/llm/text-generation/qwen2/qwen2.patch /home/user/qwen2.patch
-
-SHELL ["/bin/bash", "--login", "-c"]
-RUN git clone --single-branch -b ${REPO_VER} ${REPO} /optimum-habana
-
-ENV PYTHONPATH=/root:/home/user
-
-RUN cd /optimum-habana && git apply /qwen2.patch && \
-    cd /optimum-habana/examples/text-generation && pip install -r requirements.txt && \
-    cd /optimum-habana && python setup.py install
-
-WORKDIR /home/user/comps/llms/text-generation/qwen2
-
-ENTRYPOINT ["python", "llm.py"]
diff --git a/comps/llms/text-generation/native/README.md b/comps/llms/text-generation/native/README.md
new file mode 100644
index 000000000..a4fcc74c3
--- /dev/null
+++ b/comps/llms/text-generation/native/README.md
@@ -0,0 +1,61 @@
+# LLM Native Microservice
+
+LLM Native microservice uses [optimum-habana](https://github.com/huggingface/optimum-habana) for model initialization and warm-up, focusing solely on large language models (LLMs). It operates without frameworks like TGI/VLLM, using PyTorch directly for inference, and supports only non-streaming formats. This streamlined approach optimizes performance on Habana hardware.
+
+## 🚀1. Start Microservice
+
+If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a Native LLM service with docker.
+
+### 1.1 Setup Environment Variables
+
+In order to start Native LLM service, you need to setup the following environment variables first.
+
+```bash
+export LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct"
+```
+
+### 1.2 Build Docker Image
+
+```bash
+cd ../../../../
+docker build -t opea/llm-native:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/native/docker/Dockerfile .
+```
+
+To start a docker container, you have two options:
+
+- A. Run Docker with CLI
+- B. Run Docker with Docker Compose
+
+You can choose one as needed.
+
+### 1.3 Run Docker with CLI (Option A)
+
+```bash
+docker run -d --runtime=habana --name="llm-native-server" -p 9000:9000 -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e TOKENIZERS_PARALLELISM=false -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} opea/llm-native:latest
+```
+
+### 1.4 Run Docker with Docker Compose (Option B)
+
+```bash
+cd docker
+docker compose -f docker_compose_llm.yaml up -d
+```
+
+## 🚀2. Consume LLM Service
+
+### 2.1 Check Service Status
+
+```bash
+curl http://${your_ip}:9000/v1/health_check\
+  -X GET \
+  -H 'Content-Type: application/json'
+```
+
+### 2.2 Consume LLM Service
+
+```bash
+curl http://${your_ip}:9000/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?"}' \
+  -H 'Content-Type: application/json'
+```
diff --git a/comps/llms/text-generation/native/docker/Dockerfile b/comps/llms/text-generation/native/docker/Dockerfile
new file mode 100644
index 000000000..3dacf5211
--- /dev/null
+++ b/comps/llms/text-generation/native/docker/Dockerfile
@@ -0,0 +1,42 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as hpu
+
+ENV LANG=en_US.UTF-8
+ARG REPO=https://github.com/huggingface/optimum-habana.git
+ARG REPO_VER=v1.12.1
+
+RUN apt-get update && \
+    apt-get install git-lfs && \
+    git-lfs install && \
+    apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --upgrade-strategy eager optimum[habana] && \
+    pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0
+
+RUN git clone ${REPO} /home/user/optimum-habana && \
+    cd /home/user/optimum-habana && git checkout ${REPO_VER} && \
+    cd examples/text-generation && pip install -r requirements.txt && \
+    cd /home/user/comps/llms/text-generation/native && pip install -r requirements.txt && \
+    pip install --upgrade --force-reinstall pydantic
+
+ENV PYTHONPATH=/root:/home/user
+
+WORKDIR /home/user/comps/llms/text-generation/native
+
+ENTRYPOINT ["python", "llm.py"]
diff --git a/comps/llms/text-generation/native/docker/docker_compose_llm.yaml b/comps/llms/text-generation/native/docker/docker_compose_llm.yaml
new file mode 100644
index 000000000..f3a36e5bb
--- /dev/null
+++ b/comps/llms/text-generation/native/docker/docker_compose_llm.yaml
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  llm:
+    image: opea/llm-native:latest
+    container_name: llm-native-server
+    ports:
+      - "9000:9000"
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      LLM_NATIVE_MODEL: ${LLM_NATIVE_MODEL}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      TOKENIZERS_PARALLELISM: false
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/llms/text-generation/native/llm.py b/comps/llms/text-generation/native/llm.py
index 4f407ccd6..43348670d 100644
--- a/comps/llms/text-generation/native/llm.py
+++ b/comps/llms/text-generation/native/llm.py
@@ -11,87 +11,156 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import sys
 
-import os
-from datetime import datetime
+sys.path.append("/test/GenAIComps/")
+
+import logging
+import threading
+import time
 
 import torch
-from fastapi.responses import StreamingResponse
-from langsmith import traceable
+from langchain_core.prompts import PromptTemplate
+from template import ChatTemplate, args_dict, input_sentences
 from utils import initialize_model
 
-from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
+from comps import (
+    GeneratedDoc,
+    LLMParamsDoc,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+)
 
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
 
-def warmup():
-    input_sentences = ["DeepSpeed is a machine learning framework", "He is working on", "He has a", "He got all"]
-    input_tokens = tokenizer.batch_encode_plus(input_sentences, return_tensors="pt", padding=True)
-    for t in input_tokens:
-        if torch.is_tensor(input_tokens[t]):
-            input_tokens[t] = input_tokens[t].to("hpu")
-    for i in range(3):
-        print(f"Current time: {datetime.now()}")
-        print(f"Warming up {i+1}...")
-        outputs = model.generate(
-            **input_tokens,
-            generation_config=generation_config,
-            lazy_mode=True,
-            hpu_graphs=True,
-            profiling_steps=0,
-            profiling_warmup_steps=0,
-        ).cpu()
-        res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-        print(f"res: {res}")
 
+class Args:
+    def __init__(self, **entries):
+        self.__dict__.update(entries)
 
-@register_microservice(
-    name="opea_service@llm_qwen",
-    service_type=ServiceType.LLM,
-    endpoint="/v1/chat/completions",
-    host="0.0.0.0",
-    port=8000,
-)
-@traceable(run_type="llm")
-def llm_generate(input: LLMParamsDoc):
-    input_query = input.query
-    input_tokens = tokenizer.batch_encode_plus([input_query], return_tensors="pt", padding=True)
+
+model = None
+assistant_model = None
+tokenizer = None
+generation_config = None
+args = Args(**args_dict)
+initialization_lock = threading.Lock()
+initialized = False
+
+
+def generate(
+    input_query: list,
+    device="hpu",
+    use_lazy_mode=True,
+    use_hpu_graphs=True,
+    profiling_steps=0,
+    profiling_warmup_steps=0,
+    ignore_eos=True,
+    profiling_record_shapes=False,
+):
+    """Generates sequences from the input sentences and returns them."""
+    logger.info(f"[llm - generate] starting to inference with prompt {input_query}")
+    encode_t0 = time.perf_counter()
+
+    # Tokenization
+    input_tokens = tokenizer.batch_encode_plus(input_query, return_tensors="pt", padding=True)
+    encode_duration = time.perf_counter() - encode_t0
+    logger.info(f"[llm - generate] input tokenized: {input_tokens}")
+
+    # Move inputs to target device(s)
     for t in input_tokens:
+        logger.info(f"[llm - generate] t: {t}")
         if torch.is_tensor(input_tokens[t]):
-            input_tokens[t] = input_tokens[t].to("hpu")
+            logger.info("[llm - generate] input[t] is tensor")
+            logger.info(f"[llm - generate] device: {model.device}")
+            input_tokens[t] = input_tokens[t].to(model.device)
 
-    print(f"[llm - qwen] Current time: {datetime.now()}")
-    output = model.generate(
+    logger.info("[llm - generate] inputs transferred.")
+
+    iteration_times = []
+    outputs = model.generate(
         **input_tokens,
         generation_config=generation_config,
-        lazy_mode=True,
-        hpu_graphs=True,
-        profiling_steps=0,
-        profiling_warmup_steps=0,
+        assistant_model=assistant_model,
+        lazy_mode=use_lazy_mode,
+        hpu_graphs=use_hpu_graphs,
+        profiling_steps=profiling_steps,
+        profiling_warmup_steps=profiling_warmup_steps,
+        ignore_eos=ignore_eos,
+        iteration_times=iteration_times,
+        profiling_record_shapes=profiling_record_shapes,
     ).cpu()
-    res = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
-    print(f"[llm - qwen] res: {res}")
-    return res
+    logger.info("[llm - generate] result generated")
+    first_token_time = iteration_times[0] + encode_duration
+    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    logger.info(f"[llm - generate] result: {result}")
+    logger.info(f"[llm - generate] Time to first token = {first_token_time*1000}ms")
+    return result
 
 
-if __name__ == "__main__":
-    model, tokenizer, generation_config = initialize_model(
-        model_name_or_path="Qwen/Qwen1.5-7B-Chat", max_new_tokens=128
-    )
-    import habana_frameworks.torch.hpu as torch_hpu
+def initialize():
+    global model, assistant_model, tokenizer, generation_config, initialized
+    with initialization_lock:
+        if not initialized:
+            # initialize model and tokenizer
+            import habana_frameworks.torch.hpu as torch_hpu
+            from optimum.habana.utils import HabanaProfile
+
+            model, assistant_model, tokenizer, generation_config = initialize_model(args, logger)
+            logger.info("[llm] model and tokenizer initialized.")
+
+            # compilation and model warmup
+            HabanaProfile.disable()
+            logger.info("[llm - native] Graph compilation...")
+            for _ in range(args.warmup):
+                generate(input_sentences)
+            logger.info("[llm - native] model warm up finished.")
+            torch_hpu.synchronize()
+            HabanaProfile.enable()
+            logger.info("[llm - native] Ready to inference")
+            res = generate(["What is Deep Learning?"])
+            logger.info(f"[llm - native] test result: {res}")
+            initialized = True
+
 
-    print("[llm - qwen] model and tokenizer initialized.")
+@register_microservice(
+    name="opea_service@llm_native",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/completions",
+    host="0.0.0.0",
+    port=9000,
+)
+@register_statistics(names=["opea_service@llm_native"])
+def llm_generate(input: LLMParamsDoc):
+    initialize()
 
-    from optimum.habana.utils import HabanaProfile
+    prompt = input.query
+    prompt_template = None
+    if input.chat_template:
+        prompt_template = PromptTemplate.from_template(input.chat_template)
+        input_variables = prompt_template.input_variables
+    if prompt_template:
+        if sorted(input_variables) == ["context", "question"]:
+            prompt = prompt_template.format(question=input.query, context="\n".join(input.documents))
+        elif input_variables == ["question"]:
+            prompt = prompt_template.format(question=input.query)
+        else:
+            logger.info(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
+    else:
+        if input.documents:
+            prompt = ChatTemplate.generate_rag_prompt(input.query, input.documents)
+    res = generate([prompt])
 
-    # compilation stage disable profiling
-    HabanaProfile.disable()
-    # Compilation
-    print("Graph compilation...")
-    warmup()
-    print("[llm - qwen] model warm up finished.")
+    logger.info(f"[llm - native] inference result: {res}")
+    return GeneratedDoc(text=res[0], prompt=input.query)
 
-    torch_hpu.synchronize()
-    HabanaProfile.enable()
-    print("[llm - qwen] Ready to inference")
 
-    opea_microservices["opea_service@llm_qwen"].start()
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_native"].start()
diff --git a/comps/llms/text-generation/native/qwen2.patch b/comps/llms/text-generation/native/qwen2.patch
deleted file mode 100644
index 9b5d93567..000000000
--- a/comps/llms/text-generation/native/qwen2.patch
+++ /dev/null
@@ -1,127 +0,0 @@
-diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
-index b086c80..e0e5a9f 100644
---- a/examples/text-generation/run_lm_eval.py
-+++ b/examples/text-generation/run_lm_eval.py
-@@ -75,13 +75,13 @@ class HabanaModelAdapter(lm_eval.base.BaseLM):
-         self.options = options
-         self._device = args.device
-         self.model_inputs = {"use_cache": self.options.use_cache}
--        if self.model.config.model_type in ["llama", "falcon"]:
-+        if self.model.config.model_type in ["llama", "falcon", "qwen2"]:
-             self.model_inputs.update(
-                 {
-                     "reuse_cache": self.options.reuse_cache,
-                 }
-             )
--        if self.model.config.model_type == "llama":
-+        if self.model.config.model_type in ["llama","mistral","qwen2"]:
-             self.model_inputs.update(
-                 {
-                     "attn_softmax_bf16": self.options.attn_softmax_bf16,
-diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py
-index 8bce0ae..c29f458 100644
---- a/examples/text-generation/utils.py
-+++ b/examples/text-generation/utils.py
-@@ -234,7 +234,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger):
- 
-     model = deepspeed.init_inference(model, **ds_inference_kwargs)
-     model = model.module
--    if model.config.model_type in ["llama", "falcon"]:
-+    if model.config.model_type in ["llama", "falcon","qwen2"]:
-         patch_scoped_linear_all_reduce(model)
- 
-     if args.quant_config:
-diff --git a/optimum/habana/transformers/generation/utils.py b/optimum/habana/transformers/generation/utils.py
-index 0d50470..94cc7eb 100755
---- a/optimum/habana/transformers/generation/utils.py
-+++ b/optimum/habana/transformers/generation/utils.py
-@@ -740,7 +740,7 @@ class GaudiGenerationMixin(GenerationMixin):
-                     )
-                     model_kwargs["kv_cache_len"] = calculated_max_length
- 
--            if self.config.model_type in ["llama", "falcon"]:
-+            if self.config.model_type in ["llama", "falcon","qwen2"]:
-                 if self.config.max_position_embeddings < calculated_max_length:
-                     unwrap_deepspeed_model(self).update_sincos_cache(seq_len=calculated_max_length)
- 
-diff --git a/optimum/habana/transformers/modeling_utils.py b/optimum/habana/transformers/modeling_utils.py
-index 6dc40a7..b5044af 100644
---- a/optimum/habana/transformers/modeling_utils.py
-+++ b/optimum/habana/transformers/modeling_utils.py
-@@ -55,6 +55,9 @@ from .models import (
-     GaudiOPTForCausalLM,
-     GaudiOPTLearnedPositionalEmbedding,
-     GaudiPhiForCausalLM,
-+    GaudiQwen2Model,
-+    GaudiQwen2Attention,
-+    GaudiQwen2MLP,
-     _gaudi_wav2vec2_compute_mask_indices,
-     _gaudi_wav2vec2_mask_hidden_states,
-     gaudi_albert_forward,
-@@ -118,6 +121,7 @@ from .models import (
-     gaudi_phi_attention_forward,
-     gaudi_phi_decoder_layer_forward,
-     gaudi_phi_model_forward,
-+    gaudi_qwen2_rmsnorm_forward,
-     gaudi_rot_matmul,
-     gaudi_rot_vec_mul,
-     gaudi_SpeechT5Attention_forward,
-@@ -367,3 +371,11 @@ def adapt_transformers_to_gaudi():
-     transformers.models.speecht5.modeling_speecht5.SpeechT5SpeechDecoderPrenet.forward = (
-         gaudi_SpeechT5SpeechDecoderPrenet_forward
-     )
-+
-+    # Optimization for qwen2 on Gaudi
-+    transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM = GaudiQwen2ForCausalLM
-+    transformers.models.qwen2.modeling_qwen2.Qwen2Model = GaudiQwen2Model
-+    transformers.models.qwen2.modeling_qwen2.Qwen2Attention = GaudiQwen2Attention
-+    transformers.models.qwen2.modeling_qwen2.Qwen2MLP = GaudiQwen2MLP
-+    transformers.models.qwen2.modeling_qwen2.Qwen2DecoderLayer = GaudiQwen2DecoderLayer
-+    transformers.models.qwen2.modeling_qwen2.Qwen2RMSNorm.forward = gaudi_qwen2_rmsnorm_forward
-diff --git a/optimum/habana/transformers/models/__init__.py b/optimum/habana/transformers/models/__init__.py
-index 1582d3f..41fdfdc 100644
---- a/optimum/habana/transformers/models/__init__.py
-+++ b/optimum/habana/transformers/models/__init__.py
-@@ -122,6 +122,14 @@ from .phi import (
-     gaudi_phi_decoder_layer_forward,
-     gaudi_phi_model_forward,
- )
-+from .qwen2 import (
-+    GaudiQwen2Attention,
-+    GaudiQwen2DecoderLayer,
-+    GaudiQwen2ForCausalLM,
-+    GaudiQwen2MLP,
-+    GaudiQwen2Model,
-+    gaudi_qwen2_rmsnorm_forward,
-+)
- from .speecht5 import (
-     gaudi_generate_speech,
-     gaudi_SpeechT5Attention_forward,
-diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py
-index dc6e136..7dfebaa 100644
---- a/optimum/habana/transformers/trainer.py
-+++ b/optimum/habana/transformers/trainer.py
-@@ -916,9 +916,9 @@ class GaudiTrainer(Trainer):
-                 if step % args.gradient_accumulation_steps == 0:
-                     self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
- 
--                # attn_softmax_bf16 and use_flash_attention is enabled only for llama
-+                # attn_softmax_bf16 and use_flash_attention is enabled only for llama and qwen2
-                 if hasattr(self.model, "generation_config") and self.model.generation_config is not None:
--                    if self.model.config.model_type == "llama":
-+                    if self.model.config.model_type in ["llama", "qwen2"]:
-                         if self.model.generation_config.attn_softmax_bf16:
-                             inputs["attn_softmax_bf16"] = True
-                         if self.model.generation_config.use_flash_attention:
-@@ -1799,9 +1799,9 @@ class GaudiTrainer(Trainer):
-                 if batch_size is None:
-                     batch_size = observed_batch_size
- 
--            # attn_softmax_bf16 and use_flash_attention are enabled only for llama
-+            # attn_softmax_bf16 and use_flash_attention are enabled only for llama and qwen2
-             if hasattr(self.model, "generation_config") and self.model.generation_config is not None:
--                if self.model.config.model_type == "llama":
-+                if self.model.config.model_type in ["llama", "qwen2"]:
-                     if self.model.generation_config.attn_softmax_bf16:
-                         inputs["attn_softmax_bf16"] = True
-                     if self.model.generation_config.use_flash_attention:
diff --git a/comps/llms/text-generation/native/requirements.txt b/comps/llms/text-generation/native/requirements.txt
index e8473a80c..806f2d29f 100644
--- a/comps/llms/text-generation/native/requirements.txt
+++ b/comps/llms/text-generation/native/requirements.txt
@@ -1,10 +1,10 @@
-docarray[full]
+docarray
 fastapi
-langsmith
+httpx
+langchain_core
 opentelemetry-api
 opentelemetry-exporter-otlp
 opentelemetry-sdk
 prometheus-fastapi-instrumentator
 shortuuid
-transformers
 uvicorn
diff --git a/comps/llms/text-generation/native/template.py b/comps/llms/text-generation/native/template.py
new file mode 100644
index 000000000..c43205a0a
--- /dev/null
+++ b/comps/llms/text-generation/native/template.py
@@ -0,0 +1,99 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import re
+
+
+class ChatTemplate:
+    @staticmethod
+    def generate_rag_prompt(question, documents):
+        context_str = "\n".join(documents)
+        if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
+            # chinese context
+            template = """
+### 你将扮演一个乐于助人、尊重他人并诚实的助手，你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案，请避免分享不准确的信息。
+### 搜索结果：{context}
+### 问题：{question}
+### 回答：
+"""
+        else:
+            template = """
+### You are a helpful, respectful and honest assistant to help the user with questions. \
+Please refer to the search results obtained from the local knowledge base. \
+But be careful to not incorporate the information that you think is not relevant to the question. \
+If you don't know the answer to a question, please don't share false information. \n
+### Search results: {context} \n
+### Question: {question} \n
+### Answer:
+"""
+        return template.format(context=context_str, question=question)
+
+
+input_sentences = [
+    "DeepSpeed is a machine learning framework",
+    "He is working on",
+    "He has a",
+    "He got all",
+    "Everyone is happy and I can",
+    "The new movie that got Oscar this year",
+    "In the far far distance from our galaxy,",
+    "Peace is the only way",
+]
+
+
+llm_model = os.getenv("LLM_NATIVE_MODEL", "Qwen/Qwen2-7B-Instruct")
+args_dict = {
+    "device": "hpu",
+    "model_name_or_path": llm_model,
+    "bf16": True,
+    "max_new_tokens": 100,
+    "max_input_tokens": 0,
+    "batch_size": 1,
+    "warmup": 3,
+    "n_iterations": 5,
+    "local_rank": 0,
+    "use_kv_cache": True,
+    "use_hpu_graphs": True,
+    "dataset_name": None,
+    "column_name": None,
+    "do_sample": False,
+    "num_beams": 1,
+    "trim_logits": False,
+    "seed": 27,
+    "profiling_warmup_steps": 0,
+    "profiling_steps": 0,
+    "profiling_record_shapes": False,
+    "prompt": None,
+    "bad_words": None,
+    "force_words": None,
+    "assistant_model": None,
+    "peft_model": None,
+    "num_return_sequences": 1,
+    "token": None,
+    "model_revision": "main",
+    "attn_softmax_bf16": False,
+    "output_dir": None,
+    "bucket_size": -1,
+    "bucket_internal": False,
+    "dataset_max_samples": -1,
+    "limit_hpu_graphs": False,
+    "reuse_cache": False,
+    "verbose_workers": False,
+    "simulate_dyn_prompt": None,
+    "reduce_recompile": False,
+    "use_flash_attention": False,
+    "flash_attention_recompute": False,
+    "flash_attention_causal_mask": False,
+    "flash_attention_fast_softmax": False,
+    "book_source": False,
+    "torch_compile": False,
+    "ignore_eos": True,
+    "temperature": 1.0,
+    "top_p": 1.0,
+    "const_serialization_path": None,
+    "disk_offload": False,
+    "trust_remote_code": False,
+    "quant_config": "",
+    "world_size": 0,
+}
diff --git a/comps/llms/text-generation/native/utils.py b/comps/llms/text-generation/native/utils.py
index 3eef7a6e2..04cebfbd4 100644
--- a/comps/llms/text-generation/native/utils.py
+++ b/comps/llms/text-generation/native/utils.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2024 Intel Corporation
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#    http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -12,11 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+###############################################################################
+# Copyright (C) 2020-2021 Habana Labs, Ltd. an Intel Company
+###############################################################################
 
 import copy
+import glob
 import os
 import shutil
+import tempfile
 import time
+from pathlib import Path
 
 import torch
 from optimum.habana.checkpoint_utils import (
@@ -26,66 +33,376 @@
     model_on_meta,
     write_checkpoints_json,
 )
-from optimum.habana.utils import check_habana_frameworks_version, check_optimum_habana_min_version, set_seed
+from optimum.habana.utils import (
+    check_habana_frameworks_version,
+    check_optimum_habana_min_version,
+    get_habana_frameworks_version,
+    set_seed,
+)
 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
 from transformers.utils import check_min_version
 
 
-def setup_env():
+def adjust_batch(batch, size):
+    curr_size = batch["input_ids"].shape[1]
+    if curr_size >= size:
+        adjusted_batch = {
+            "input_ids": batch["input_ids"][:, :size],
+            "attention_mask": batch["attention_mask"][:, :size],
+        }
+    else:
+        adjusted_batch = {}
+        for k in batch.keys():
+            last_colm = batch[k][:, -1]
+            expanded = last_colm.tile((size - curr_size, 1)).T
+            adjusted_batch[k] = torch.concat([batch[k], expanded], 1)
+    assert adjusted_batch["input_ids"].shape[1] == size
+    assert adjusted_batch["attention_mask"].shape[1] == size
+    return adjusted_batch
+
+
+def override_print(enable):
+    import builtins as __builtin__
+
+    builtin_print = __builtin__.print
+
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if force or enable:
+            builtin_print(*args, **kwargs)
+
+    __builtin__.print = print
+
+
+def override_logger(logger, enable):
+    logger_info = logger.info
+
+    def info(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if force or enable:
+            logger_info(*args, **kwargs)
+
+    logger.info = info
+
+
+def count_hpu_graphs():
+    return len(glob.glob(".graph_dumps/*PreGraph*"))
+
+
+def override_prints(enable, logger):
+    override_print(enable)
+    override_logger(logger, enable)
+
+
+def setup_distributed(args):
+    args.local_rank = int(os.getenv("LOCAL_RANK", "0"))
+    args.world_size = int(os.getenv("WORLD_SIZE", "0"))
+    args.global_rank = int(os.getenv("RANK", "0"))
+
+
+def setup_inference(args, model):
+    import habana_frameworks.torch.core as htcore
+
+    habana_version = get_habana_frameworks_version()
+
+    print("Initializing inference mode")
+    # Keeping the if-else here for back compat. TODO remove later
+    if habana_version.major >= 1 and habana_version.minor >= 16:
+        htcore.hpu_initialize(model, mark_only_scales_as_const=True)
+    else:
+        const_marking = os.getenv("ENABLE_CONST_MARKING", "True")
+        if const_marking == "True":
+            htcore.hpu_initialize(model)
+    return model
+
+
+def setup_const_serialization(const_serialization_path):
+    import uuid
+
+    const_serialization_path = os.path.join(const_serialization_path + uuid.uuid4().hex)
+    os.makedirs(const_serialization_path)
+    from habana_frameworks.torch.hpu import enable_const_section_serialization
+
+    print("Serializing const params to {}".format(const_serialization_path))
+    enable_const_section_serialization(const_serialization_path, True)
+
+
+def setup_env(args):
     # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
     check_min_version("4.34.0")
     check_optimum_habana_min_version("1.9.0.dev0")
     # TODO: SW-167588 - WA for memory issue in hqt prep_model
     os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")
 
+    if args.global_rank == 0 and not args.torch_compile:
+        os.environ.setdefault("GRAPH_VISUALIZATION", "true")
+        shutil.rmtree(".graph_dumps", ignore_errors=True)
+
+    if args.world_size > 0:
+        os.environ.setdefault("PT_HPU_LAZY_ACC_PAR_MODE", "0")
+        os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true")
+
+    if args.use_hpu_graphs and args.limit_hpu_graphs and not args.reuse_cache and args.bucket_internal:
+        # Based upon above conditions and below env variable,
+        # we can call HPU graphs clear_inputs().
+        os.environ.setdefault("PT_HPUGRAPH_DISABLE_TENSOR_CACHE", "1")
+
     # Tweak generation so that it runs faster on Gaudi
     from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
     adapt_transformers_to_gaudi()
 
 
-def setup_device():
-    import habana_frameworks.torch.core as htcore
+def setup_device(args):
+    if args.device == "hpu":
+        import habana_frameworks.torch.core as htcore
+
+        if args.quant_config:
+            htcore.hpu_set_env()
+    return torch.device(args.device)
+
 
-    return torch.device("hpu")
+# patching LinearAllreduce to use ScopedLinearAllReduce
+def patch_scoped_linear_all_reduce(model):
+    from deepspeed.module_inject.layers import LinearAllreduce
+    from optimum.habana.transformers.models.modeling_all_models import ScopedLinearAllReduce
+
+    for name, module in model.named_children():
+        if type(module) is LinearAllreduce:
+            SL = ScopedLinearAllReduce(mod=module)
+            setattr(model, name, SL)
+        patch_scoped_linear_all_reduce(module)
 
 
 def get_torch_compiled_model(model):
-    model.model = torch.compile(model.model, backend="hpu_backend")
+    model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True})
     return model
 
 
-def setup_model(model_name_or_path, model_dtype, model_kwargs):
-    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=model_dtype, **model_kwargs)
-    model = model.eval().to("hpu")
+def setup_model(args, model_dtype, model_kwargs, logger):
+    logger.info("Single-device run.")
+    if args.assistant_model is None:
+        assistant_model = None
+    else:
+        logger.info(f"Using asssitant model {args.assistant_model}.")
+    if args.disk_offload:
+        from accelerate import infer_auto_device_map, init_empty_weights
+
+        config = AutoConfig.from_pretrained(args.model_name_or_path)
+        with init_empty_weights():
+            model = AutoModelForCausalLM.from_config(config)
+        max_memory = {"cpu": "10GiB"}
+        device_map = infer_auto_device_map(model, max_memory=max_memory, dtype=model_dtype)
+        model = AutoModelForCausalLM.from_pretrained(
+            args.model_name_or_path,
+            device_map=device_map,
+            offload_folder="/tmp/offload_folder/",
+            offload_state_dict=True,
+            torch_dtype=model_dtype,
+            **model_kwargs,
+        )
+    else:
+        if args.assistant_model is not None:
+            assistant_model = AutoModelForCausalLM.from_pretrained(
+                args.assistant_model, torch_dtype=model_dtype, **model_kwargs
+            )
+        if args.peft_model is not None:
+            model = peft_model(args, model_dtype, logger, **model_kwargs)
+        else:
+            model = AutoModelForCausalLM.from_pretrained(
+                args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs
+            )
+    if args.quant_config:
+        import habana_quantization_toolkit
+
+        habana_quantization_toolkit.prep_model(model)
+        if args.assistant_model is not None:
+            habana_quantization_toolkit.quantize_model(assistant_model)
+
+    model = model.eval().to(args.device)
+    if args.assistant_model is not None:
+        assistant_model = assistant_model.eval().to(args.device)
+
+    if args.use_hpu_graphs:
+        from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+        from optimum.habana.transformers.trainer import _is_peft_model
+
+        if check_habana_frameworks_version("1.13.0") and model.config.model_type == "falcon":
+            model = wrap_in_hpu_graph(model, hash_with_views=False)
+        else:
+            model = wrap_in_hpu_graph(model)
+        if args.assistant_model is not None:
+            assistant_model = wrap_in_hpu_graph(assistant_model)
+        if _is_peft_model(model):
+            model.base_model = wrap_in_hpu_graph(model.base_model)
 
-    from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+    if args.torch_compile and model.config.model_type == "llama":
+        model = get_torch_compiled_model(model)
+        # if args.assistant_model is not None:
+        #     assistant_model = get_torch_compiled_model(assistant_model)
+    return model, assistant_model
+
+
+def setup_distributed_model(args, model_dtype, model_kwargs, logger):
+    import deepspeed
 
-    if check_habana_frameworks_version("1.13.0") and model.config.model_type == "falcon":
-        model = wrap_in_hpu_graph(model, hash_with_views=False)
+    logger.info("DeepSpeed is enabled.")
+    deepspeed.init_distributed(dist_backend="hccl")
+    config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs)
+    load_to_meta = model_on_meta(config)
+
+    if args.assistant_model is None:
+        assistant_model = None
     else:
-        model = wrap_in_hpu_graph(model)
+        logger.info(f"Using asssitant model {args.assistant_model}.")
 
-    if model.config.model_type == "llama":
+    if load_to_meta:
+        # Construct model with fake meta tensors, later will be replaced on devices during ds-inference ckpt load
+        with deepspeed.OnDevice(dtype=model_dtype, device="meta"):
+            model = AutoModelForCausalLM.from_config(config, torch_dtype=model_dtype)
+
+        # Model loaded to meta is managed differently
+        checkpoints_json = tempfile.NamedTemporaryFile(suffix=".json", mode="+w")
+
+        # For PEFT models, write the merged model on disk to be able to load it on the meta device
+        if args.peft_model is not None:
+            merged_model_dir = "/tmp/text_generation_merged_peft_model"
+            if args.local_rank == 0:
+                if Path(merged_model_dir).is_dir():
+                    shutil.rmtree(merged_model_dir)
+                peft_model(args, model_dtype, logger, **model_kwargs).save_pretrained(merged_model_dir)
+            torch.distributed.barrier()
+
+        write_checkpoints_json(
+            merged_model_dir if args.peft_model is not None else args.model_name_or_path,
+            args.local_rank,
+            checkpoints_json,
+            token=args.token,
+        )
+    else:
+        # TODO: revisit placement on CPU when auto-injection is possible
+        with deepspeed.OnDevice(dtype=model_dtype, device="cpu"):
+            if args.peft_model is not None:
+                model = peft_model(args, model_dtype, logger, **model_kwargs)
+            else:
+                model = AutoModelForCausalLM.from_pretrained(
+                    args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs
+                )
+    model.eval()
+
+    if args.assistant_model is not None:
+        assistant_model = AutoModelForCausalLM.from_pretrained(
+            args.assistant_model, torch_dtype=model_dtype, **model_kwargs
+        ).eval()
+
+    # Initialize the model
+    ds_inference_kwargs = {"dtype": model_dtype}
+    ds_inference_kwargs["tensor_parallel"] = {"tp_size": args.world_size}
+    ds_inference_kwargs["enable_cuda_graph"] = args.use_hpu_graphs
+    ds_inference_kwargs["injection_policy"] = get_ds_injection_policy(config)
+    if load_to_meta:
+        ds_inference_kwargs["checkpoint"] = checkpoints_json.name
+
+    model = deepspeed.init_inference(model, **ds_inference_kwargs)
+    model = model.module
+    if model.config.model_type in ["llama", "falcon", "qwen2"]:
+        patch_scoped_linear_all_reduce(model)
+
+    if args.quant_config:
+        import habana_quantization_toolkit
+
+        habana_quantization_toolkit.prep_model(model)
+        if args.assistant_model is not None:
+            habana_quantization_toolkit.prep_model(assistant_model)
+
+    if args.torch_compile and model.config.model_type == "llama":
         model = get_torch_compiled_model(model)
+        # if args.assistant_model is not None:
+        #     assistant_model = get_torch_compiled_model(assistant_model)
+    return model, assistant_model
 
-    return model
 
+def peft_model(args, model_dtype, logger, **model_kwargs):
+    import importlib.util
+
+    if importlib.util.find_spec("peft") is None:
+        raise ImportError("The `peft` package is not installed, please run: `pip install peft`.")
+    from peft import AutoPeftModelForCausalLM
+    from peft.config import PeftConfigMixin
+
+    base_model_name = PeftConfigMixin.from_pretrained(
+        args.peft_model,
+        token=model_kwargs["token"] if "token" in model_kwargs else None,
+    ).base_model_name_or_path
+
+    base_model_is_local = Path(base_model_name).is_dir()
+    if not base_model_is_local:
+        # Check if the base model path to a remote repository on the HF Hub exists
+        from huggingface_hub import list_repo_files
+
+        try:
+            list_repo_files(base_model_name)
+            base_model_is_remote = True
+        except Exception:
+            base_model_is_remote = False
+
+    if base_model_is_local or base_model_is_remote:
+        model = AutoPeftModelForCausalLM.from_pretrained(args.peft_model, torch_dtype=model_dtype, **model_kwargs)
+    else:
+        # Since the base model doesn't exist locally nor remotely, use `args.model_name_or_path` as the base model
+        logger.warning(
+            f"The base model `{base_model_name}` of the LoRA configuration associated"
+            f" to `{args.peft_model}` does not exist locally or remotely. Using "
+            f"`--model_name_or_path {args.model_name_or_path}` as a fall back for the base model."
+        )
+        from peft import PeftModel
+
+        model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs)
+        model = PeftModel.from_pretrained(model, args.peft_model, torch_dtype=model_dtype, **model_kwargs)
+    if hasattr(model, "merge_and_unload"):
+        model = model.merge_and_unload()
+        if model_dtype == torch.bfloat16:
+            model = model.to(torch.bfloat16)
+        return model
+    else:
+        from optimum.habana.peft.peft_model import gaudi_generate, gaudi_prepare_inputs_for_generation
+
+        model.__class__.generate = gaudi_generate
+        model.__class__.prepare_inputs_for_generation = gaudi_prepare_inputs_for_generation
+        return model
 
-def setup_tokenizer(model_name_or_path, model):
+
+def setup_tokenizer(args, model, assistant_model):
     tokenizer_kwargs = {
-        "revision": "main",
-        "token": None,
+        "revision": args.model_revision,
+        "token": args.token,
+        "trust_remote_code": args.trust_remote_code,
     }
-    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, **tokenizer_kwargs)
+    if args.bad_words is not None or args.force_words is not None:
+        tokenizer_kwargs["add_prefix_space"] = True
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, **tokenizer_kwargs)
     if not model.config.is_encoder_decoder:
         tokenizer.padding_side = "left"
-    # Some models like GPT2 do not have a PAD token so we have to set it if necessary
+
     if model.config.model_type == "llama":
         # unwind broken decapoda-research config
         model.generation_config.pad_token_id = 0
         model.generation_config.bos_token_id = 1
         model.generation_config.eos_token_id = 2
+        if assistant_model is not None:
+            assistant_model.generation_config.pad_token_id = 0
+            assistant_model.generation_config.bos_token_id = 1
+            assistant_model.generation_config.eos_token_id = 2
+        tokenizer.bos_token_id = model.generation_config.bos_token_id
+        tokenizer.eos_token_id = model.generation_config.eos_token_id
+        tokenizer.pad_token_id = model.generation_config.pad_token_id
+        tokenizer.pad_token = tokenizer.decode(tokenizer.pad_token_id)
+        tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id)
+        tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id)
+    if model.config.model_type == "persimmon":
+        model.generation_config.pad_token_id = model.generation_config.eos_token_id
+        if assistant_model is not None:
+            assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id
         tokenizer.bos_token_id = model.generation_config.bos_token_id
         tokenizer.eos_token_id = model.generation_config.eos_token_id
         tokenizer.pad_token_id = model.generation_config.pad_token_id
@@ -93,54 +410,112 @@ def setup_tokenizer(model_name_or_path, model):
         tokenizer.eos_token = tokenizer.decode(tokenizer.eos_token_id)
         tokenizer.bos_token = tokenizer.decode(tokenizer.bos_token_id)
 
+    # Some models like GPT2 do not have a PAD token so we have to set it if necessary
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
         model.generation_config.pad_token_id = model.generation_config.eos_token_id
-    return tokenizer, model
+        if assistant_model is not None:
+            assistant_model.generation_config.pad_token_id = assistant_model.generation_config.eos_token_id
 
+    return tokenizer, model, assistant_model
 
-def setup_generation_config(model, tokenizer, max_new_tokens):
+
+def setup_generation_config(args, model, assistant_model, tokenizer):
     bad_words_ids = None
     force_words_ids = None
+    if args.bad_words is not None:
+        bad_words_ids = [tokenizer.encode(bad_word, add_special_tokens=False) for bad_word in args.bad_words]
+    if args.force_words is not None:
+        force_words_ids = [tokenizer.encode(force_word, add_special_tokens=False) for force_word in args.force_words]
 
     is_optimized = model_is_optimized(model.config)
+
     # Generation configuration
     generation_config = copy.deepcopy(model.generation_config)
-    generation_config.max_new_tokens = max_new_tokens
-    generation_config.use_cache = True
-    generation_config.static_shapes = is_optimized
-    generation_config.bucket_size = -1
-    generation_config.bucket_internal = True
-    generation_config.do_sample = True
-    generation_config.num_beams = 1
+    generation_config.max_new_tokens = args.max_new_tokens
+    generation_config.use_cache = args.use_kv_cache
+    generation_config.static_shapes = is_optimized and assistant_model is None
+    generation_config.bucket_size = args.bucket_size if is_optimized else -1
+    generation_config.bucket_internal = args.bucket_internal
+    generation_config.do_sample = args.do_sample
+    generation_config.num_beams = args.num_beams
     generation_config.bad_words_ids = bad_words_ids
     generation_config.force_words_ids = force_words_ids
-    generation_config.num_return_sequences = 1
-    generation_config.trim_logits = True
-    generation_config.attn_softmax_bf16 = True
-    generation_config.limit_hpu_graphs = True
-    generation_config.reuse_cache = False
-    generation_config.reduce_recompile = False
-    generation_config.use_flash_attention = False
-    generation_config.flash_attention_recompute = True
-    generation_config.flash_attention_causal_mask = True
+    generation_config.num_return_sequences = args.num_return_sequences
+    generation_config.trim_logits = args.trim_logits
+    generation_config.attn_softmax_bf16 = args.attn_softmax_bf16
+    generation_config.limit_hpu_graphs = args.limit_hpu_graphs
+    generation_config.reuse_cache = args.reuse_cache
+    generation_config.reduce_recompile = args.reduce_recompile
+    if generation_config.reduce_recompile:
+        assert generation_config.bucket_size > 0
+    generation_config.use_flash_attention = args.use_flash_attention
+    generation_config.flash_attention_recompute = args.flash_attention_recompute
+    generation_config.flash_attention_causal_mask = args.flash_attention_causal_mask
+    generation_config.flash_attention_fast_softmax = args.flash_attention_fast_softmax
+    generation_config.trust_remote_code = args.trust_remote_code
+
     return generation_config
 
 
-def initialize_model(model_name_or_path, max_new_tokens=128):
+def exclude_hpu_graph_configs(args):
+    # Excluded configs for batch size 1 for hpu graph
+    if args.batch_size == 1 and args.limit_hpu_graphs:
+        if "falcon-180B" in args.model_name_or_path or "falcon-180b" in args.model_name_or_path:
+            return False
+        if args.world_size == 2 or args.world_size == 4 or args.world_size == 8:
+            if args.quant_config:
+                if args.max_input_tokens >= 8192 and args.max_new_tokens >= 128:
+                    return False
+            else:
+                if args.max_input_tokens >= 4096 and args.max_new_tokens >= 128:
+                    return False
+        return True
+    else:
+        return False
+
+
+def initialize_model(args, logger):
     init_start = time.perf_counter()
-    setup_env()
-    setup_device()
-    set_seed(17)
-    get_repo_root(model_name_or_path, local_rank=0, token=None)
-    model_dtype = torch.bfloat16
+    setup_distributed(args)
+    if exclude_hpu_graph_configs(args):
+        args.limit_hpu_graphs = False
+    override_prints(args.global_rank == 0 or args.verbose_workers, logger)
+    setup_env(args)
+    setup_device(args)
+    set_seed(args.seed)
+    get_repo_root(args.model_name_or_path, local_rank=args.local_rank, token=args.token)
+    if args.assistant_model is not None:
+        get_repo_root(args.assistant_model, local_rank=args.local_rank, token=args.token)
+    use_deepspeed = args.world_size > 0
+    if use_deepspeed or args.bf16:
+        model_dtype = torch.bfloat16
+    else:
+        model_dtype = torch.float
+        args.attn_softmax_bf16 = False
 
-    model_kwargs = {"revision": "main", "token": None, "device_map": "auto", "offload_folder": "/tmp/offload_folder/"}
+    model_kwargs = {
+        "revision": args.model_revision,
+        "token": args.token,
+        "trust_remote_code": args.trust_remote_code,
+    }
+    if args.trust_remote_code:
+        logger.warning("`trust_remote_code` is set, there is no guarantee this model works properly and it may fail")
 
-    model = setup_model(model_name_or_path, model_dtype, model_kwargs)
-    tokenizer, model = setup_tokenizer(model_name_or_path, model)
-    generation_config = setup_generation_config(model, tokenizer, max_new_tokens)
+    model, assistant_model = (
+        setup_model(args, model_dtype, model_kwargs, logger)
+        if not use_deepspeed
+        else setup_distributed_model(args, model_dtype, model_kwargs, logger)
+    )
+    tokenizer, model, assistant_model = setup_tokenizer(args, model, assistant_model)
+    generation_config = setup_generation_config(args, model, assistant_model, tokenizer)
 
+    if args.const_serialization_path:
+        setup_const_serialization(args.const_serialization_path)
+    if args.quant_config:
+        model = setup_inference(args, model)
     init_end = time.perf_counter()
-    print(f"Model initialization took {(init_end - init_start):.3f}s")
-    return model, tokenizer, generation_config
+    logger.info(f"Args: {args}")
+    logger.info(f"device: {args.device}, n_hpu: {args.world_size}, bf16: {model_dtype == torch.bfloat16}")
+    logger.info(f"Model initialization took {(init_end - init_start):.3f}s")
+    return model, assistant_model, tokenizer, generation_config
diff --git a/comps/llms/text-generation/ollama/README.md b/comps/llms/text-generation/ollama/README.md
index 1ad636098..5a86e8c61 100644
--- a/comps/llms/text-generation/ollama/README.md
+++ b/comps/llms/text-generation/ollama/README.md
@@ -2,9 +2,9 @@
 
 [Ollama](https://github.com/ollama/ollama) allows you to run open-source large language models, such as Llama 3, locally. Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. It's the best choice to deploy large language models on AIPC locally.
 
-# Get Started
+## Get Started
 
-## Setup
+### Setup
 
 Follow [these instructions](https://github.com/ollama/ollama) to set up and run a local Ollama instance.
 
@@ -15,27 +15,35 @@ Follow [these instructions](https://github.com/ollama/ollama) to set up and run
 Note:
 Special settings are necessary to pull models behind the proxy.
 
-```bash
-sudo vim /etc/systemd/system/ollama.service
-```
+- Step1: Modify the ollama service configure file.
 
-Add your proxy to the above configure file.
+  ```bash
+  sudo vim /etc/systemd/system/ollama.service
+  ```
 
-```markdown
-[Service]
-Environment="http_proxy=${your_proxy}"
-Environment="https_proxy=${your_proxy}"
-```
+  Add your proxy to the above configure file.
+
+  ```markdown
+  [Service]
+  Environment="http_proxy=${your_proxy}"
+  Environment="https_proxy=${your_proxy}"
+  ```
+
+- Step2: Restart the ollama service.
+  ```bash
+  sudo systemctl daemon-reload
+  sudo systemctl restart ollama
+  ```
 
-## Usage
+### Usage
 
 Here are a few ways to interact with pulled local models:
 
-### In the terminal
+#### In the terminal
 
 All of your local models are automatically served on localhost:11434. Run ollama run <name-of-model> to start interacting via the command line directly.
 
-### API access
+#### API access
 
 Send an application/json request to the API endpoint of Ollama to interact.
 
@@ -46,20 +54,20 @@ curl http://localhost:11434/api/generate -d '{
 }'
 ```
 
-# Build Docker Image
+## Build Docker Image
 
 ```bash
 cd GenAIComps/
 docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/Dockerfile .
 ```
 
-# Run the Ollama Microservice
+## Run the Ollama Microservice
 
 ```bash
-docker run --network host opea/llm-ollama:latest
+docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llm-ollama:latest
 ```
 
-# Consume the Ollama Microservice
+## Consume the Ollama Microservice
 
 ```bash
 curl http://127.0.0.1:9000/v1/chat/completions  -X POST   -d '{"model": "llama3", "query":"What is Deep Learning?","max_new_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}'   -H 'Content-Type: application/json'
diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
index 57f476720..cca4d1fa4 100644
--- a/comps/llms/text-generation/tgi/README.md
+++ b/comps/llms/text-generation/tgi/README.md
@@ -2,17 +2,17 @@
 
 [Text Generation Inference](https://github.com/huggingface/text-generation-inference) (TGI) is a toolkit for deploying and serving Large Language Models (LLMs). TGI enables high-performance text generation for the most popular open-source LLMs, including Llama, Falcon, StarCoder, BLOOM, GPT-NeoX, and more.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the LLM microservice, you need to install python packages first.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start LLM Service
+### 1.2 Start LLM Service
 
 ```bash
 export HF_TOKEN=${your_hf_api_token}
@@ -22,7 +22,7 @@ export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
 docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
 ```
 
-## 1.3 Verify the TGI Service
+### 1.3 Verify the TGI Service
 
 ```bash
 curl http://${your_ip}:8008/generate \
@@ -31,18 +31,18 @@ curl http://${your_ip}:8008/generate \
   -H 'Content-Type: application/json'
 ```
 
-## 1.4 Start LLM Service with Python Script
+### 1.4 Start LLM Service with Python Script
 
 ```bash
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 python text-generation/tgi/llm.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 If you start an LLM microservice with docker, the `docker_compose_llm.yaml` file will automatically start a TGI/vLLM service with docker.
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 In order to start TGI and LLM services, you need to setup the following environment variables first.
 
@@ -55,7 +55,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/llms"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
@@ -69,22 +69,22 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d --name="llm-tgi-server" -p 9000:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/llm-tgi:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 cd text-generation/tgi
 docker compose -f docker_compose_llm.yaml up -d
 ```
 
-# 🚀3. Consume LLM Service
+## 🚀3. Consume LLM Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:9000/v1/health_check\
@@ -92,7 +92,7 @@ curl http://${your_ip}:9000/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume LLM Service
+### 3.2 Consume LLM Service
 
 You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`.
 
@@ -118,7 +118,7 @@ curl http://${your_ip}:9000/v1/chat/completions \
   -H 'Content-Type: application/json'
 ```
 
-## 4. Validated Model
+### 4. Validated Model
 
 | Model                     | TGI |
 | ------------------------- | --- |
diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
index c1ab98dcc..9551979a7 100644
--- a/comps/llms/text-generation/tgi/docker_compose_llm.yaml
+++ b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
@@ -14,7 +14,7 @@ services:
     shm_size: 1g
     command: --model-id ${LLM_MODEL_ID}
   llm:
-    image: opea/gen-ai-comps:llm-tgi-server
+    image: opea/llm-tgi:latest
     container_name: llm-tgi-server
     ports:
       - "9000:9000"
diff --git a/comps/llms/text-generation/vllm-openvino/launch_model_server.sh b/comps/llms/text-generation/vllm-openvino/launch_model_server.sh
index 8eef92f52..575088876 100755
--- a/comps/llms/text-generation/vllm-openvino/launch_model_server.sh
+++ b/comps/llms/text-generation/vllm-openvino/launch_model_server.sh
@@ -51,7 +51,7 @@ docker run -d --rm --name="vllm-openvino-server" \
   -e HTTPS_PROXY=$https_proxy \
   -e HTTP_PROXY=$https_proxy \
   -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
-  -v $HOME/.cache/huggingface:/root/.cache/huggingface \
+  -v $HOME/.cache/huggingface:/home/user/.cache/huggingface \
   vllm:openvino /bin/bash -c "\
     cd / && \
     export VLLM_CPU_KVCACHE_SPACE=50 && \
diff --git a/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray b/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray
index 53e900716..fce3a4ee2 100644
--- a/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray
+++ b/comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray
@@ -6,10 +6,10 @@ FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installe
 
 ENV LANG=en_US.UTF-8
 
-WORKDIR /root/vllm-ray
+WORKDIR /home/user/vllm-ray
 
 # copy the source code to the package directory
-COPY comps/llms/text-generation/vllm-ray /root/vllm-ray
+COPY comps/llms/text-generation/vllm-ray /home/user/vllm-ray
 
 RUN pip install --upgrade-strategy eager optimum[habana] && \
     pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.1
@@ -21,7 +21,7 @@ RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/
     service ssh restart
 
 ENV no_proxy=localhost,127.0.0.1
-ENV PYTHONPATH=$PYTHONPATH:/root:/root/vllm-ray
+ENV PYTHONPATH=$PYTHONPATH:/root:/home/user/vllm-ray
 
 # Required by DeepSpeed
 ENV RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES=1
diff --git a/comps/llms/text-generation/vllm-xft/README.md b/comps/llms/text-generation/vllm-xft/README.md
index 68931d4ca..4b39709a8 100644
--- a/comps/llms/text-generation/vllm-xft/README.md
+++ b/comps/llms/text-generation/vllm-xft/README.md
@@ -1,24 +1,26 @@
+# vLLM-xFT
+
 vLLM-xFT is a fork of vLLM to integrate the xfastertransformer backend, maintaining compatibility with most of the official vLLM's features.
 For usage of vllm-xFT, please refer to [xFasterTransformer/vllm-xft](https://github.com/intel/xFasterTransformer/blob/main/serving/vllm-xft.md)
 
-# 🚀 Start Microservice with Docker
+## 🚀 Start Microservice with Docker
 
-## 1 Build Docker Image
+### 1 Build Docker Image
 
 ```bash
 cd ../../../
 docker build -t opea/llm-vllm-xft:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm-xft/docker/Dockerfile .
 ```
 
-## 2 Run Docker with CLI
+### 2 Run Docker with CLI
 
 ```bash
 docker run -it -p 9000:9000  -v /home/sdp/Qwen2-7B-Instruct/:/Qwen2-7B-Instruct/   -e vLLM_LLM_ENDPOINT="http://localhost:18688" -e HF_DATASET_DIR="/Qwen2-7B-Instruct/" -e OUTPUT_DIR="./output" -e TOKEN_PATH="/Qwen2-7B-Instruct/" -e https_proxy=$https_proxy -e http_proxy=$http_proxy -e no_proxy=$no_proxy --ipc=host opea/llm-vllm-xft:latest
 ```
 
-# 🚀3. Consume LLM Service
+## 🚀3. Consume LLM Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:9000/v1/health_check\
@@ -26,7 +28,7 @@ curl http://${your_ip}:9000/v1/health_check\
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume LLM Service
+### 3.2 Consume LLM Service
 
 You can set the following model parameters according to your actual needs, such as `max_new_tokens`, `streaming`.
 
diff --git a/comps/llms/text-generation/vllm-xft/docker/Dockerfile b/comps/llms/text-generation/vllm-xft/docker/Dockerfile
index 95cd596d7..3742bcb2f 100644
--- a/comps/llms/text-generation/vllm-xft/docker/Dockerfile
+++ b/comps/llms/text-generation/vllm-xft/docker/Dockerfile
@@ -58,13 +58,13 @@ RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/oneCCL \
 
 RUN echo "source /usr/local/oneCCL/env/setvars.sh" >> ~/.bashrc
 
-WORKDIR /root/
+WORKDIR /home/user/
 RUN rm -rf /tmp/oneCCL
 
 RUN git clone https://github.com/intel/xFasterTransformer.git
 
 SHELL ["/bin/bash", "-c"]
-WORKDIR /root/xFasterTransformer
+WORKDIR /home/user/xFasterTransformer
 RUN git checkout ${TAG} \
     && export "LD_LIBRARY_PATH=/usr/local/mklml_lnx_2019.0.5.20190502/lib:$LD_LIBRARY_PATH" \
     && export "PATH=/usr/bin/python3.8:$PATH" \
@@ -75,23 +75,23 @@ RUN git checkout ${TAG} \
     && pip install --no-cache-dir dist/*
 
 RUN mkdir -p /usr/local/xft/lib \
-    && cp /root/xFasterTransformer/build/libxfastertransformer.so /usr/local/xft/lib \
-    && cp /root/xFasterTransformer/build/libxft_comm_helper.so /usr/local/xft/lib \
-    && cp -r /root/xFasterTransformer/include /usr/local/xft/ \
+    && cp /home/user/xFasterTransformer/build/libxfastertransformer.so /usr/local/xft/lib \
+    && cp /home/user/xFasterTransformer/build/libxft_comm_helper.so /usr/local/xft/lib \
+    && cp -r /home/user/xFasterTransformer/include /usr/local/xft/ \
     && mkdir -p  /usr/local/include/xft/ \
     && ln -s /usr/local/xft/include /usr/local/include/xft/include
 
 RUN echo "export \$(python -c 'import xfastertransformer as xft; print(xft.get_env())')" >> ~/.bashrc
 
-COPY comps /root/comps
+COPY comps /home/user/comps
 
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /root/comps/llms/text-generation/vllm-xft/requirements.txt
+    pip install --no-cache-dir -r /home/user/comps/llms/text-generation/vllm-xft/requirements.txt
 
 ENV PYTHONPATH=$PYTHONPATH:/root
 
-RUN chmod +x /root/comps/llms/text-generation/vllm-xft/run.sh
+RUN chmod +x /home/user/comps/llms/text-generation/vllm-xft/run.sh
 
-WORKDIR /root/comps/llms/text-generation/vllm-xft/
+WORKDIR /home/user/comps/llms/text-generation/vllm-xft/
 
-ENTRYPOINT ["/root/comps/llms/text-generation/vllm-xft/run.sh"]
+ENTRYPOINT ["/home/user/comps/llms/text-generation/vllm-xft/run.sh"]
diff --git a/comps/lvms/README.md b/comps/lvms/README.md
index 8f8237180..12648e99b 100644
--- a/comps/lvms/README.md
+++ b/comps/lvms/README.md
@@ -2,15 +2,15 @@
 
 Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start LLaVA Service/Test
+### 1.2 Start LLaVA Service/Test
 
 - Xeon CPU
 
@@ -37,7 +37,7 @@ nohup python llava_server.py &
 python check_llava_server.py
 ```
 
-## 1.3 Start Image To Text Service/Test
+### 1.3 Start Image To Text Service/Test
 
 ```bash
 cd ..
@@ -47,11 +47,11 @@ python lvm.py
 python check_lvm.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Build Images
+### 2.1 Build Images
 
-### 2.1.1 LLaVA Server Image
+#### 2.1.1 LLaVA Server Image
 
 - Xeon CPU
 
@@ -67,16 +67,16 @@ cd ../..
 docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile_hpu .
 ```
 
-### 2.1.2 LVM Service Image
+#### 2.1.2 LVM Service Image
 
 ```bash
 cd ../..
 docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/Dockerfile .
 ```
 
-## 2.2 Start LLaVA and LVM Service
+### 2.2 Start LLaVA and LVM Service
 
-### 2.2.1 Start LLaVA server
+#### 2.2.1 Start LLaVA server
 
 - Xeon
 
@@ -90,7 +90,7 @@ docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$htt
 docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llava:latest
 ```
 
-### 2.2.2 Start LVM service
+#### 2.2.2 Start LVM service
 
 ```bash
 ip_address=$(hostname -I | awk '{print $1}')
@@ -98,7 +98,7 @@ ip_address=$(hostname -I | awk '{print $1}')
 docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm:latest
 ```
 
-### 2.2.3 Test
+#### 2.2.3 Test
 
 ```bash
 # Use curl/python
diff --git a/comps/prompt_registry/mongo/README.md b/comps/prompt_registry/mongo/README.md
index 0cbfd6f99..86baaaf27 100644
--- a/comps/prompt_registry/mongo/README.md
+++ b/comps/prompt_registry/mongo/README.md
@@ -21,16 +21,16 @@ Start document preparation microservice for Milvus with below command.
 python prompt.py
 ```
 
-# 🚀Start Microservice with Docker
+## 🚀Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ~/GenAIComps
 docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### Run Docker with CLI
 
 1. Run mongoDB image
 
@@ -44,7 +44,7 @@ docker run -d -p 27017:27017 --name=mongo mongo:latest
 docker run -d --name="promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest
 ```
 
-## Invoke Microservice
+### Invoke Microservice
 
 Once prompt_registry service is up and running, users can access the database by using API endpoint below. Each API serves different purpose and return appropriate response.
 
diff --git a/comps/ragas/tgi/llm.py b/comps/ragas/tgi/llm.py
index f31c66657..895705703 100644
--- a/comps/ragas/tgi/llm.py
+++ b/comps/ragas/tgi/llm.py
@@ -1,86 +1,86 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-
-from datasets import Dataset
-from langchain_community.embeddings import (
-    HuggingFaceBgeEmbeddings,
-    HuggingFaceEmbeddings,
-    HuggingFaceHubEmbeddings,
-    HuggingFaceInstructEmbeddings,
-)
-from langchain_community.llms import HuggingFaceEndpoint
-from langsmith import traceable
-from ragas import evaluate
-from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness
-
-from comps import GeneratedDoc, RAGASParams, RAGASScores, ServiceType, opea_microservices, register_microservice
-
-tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
-EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
-
-
-@register_microservice(
-    name="opea_service@ragas_tgi_llm",
-    service_type=ServiceType.RAGAS,
-    endpoint="/v1/ragas",
-    host="0.0.0.0",
-    port=9050,
-    input_datatype=RAGASParams,
-    output_datatype=RAGASScores,
-)
-@traceable(run_type="llm")
-def llm_generate(input: RAGASParams):
-    llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
-
-    # Create vectorstore
-    if tei_embedding_endpoint:
-        # create embeddings using TEI endpoint service
-        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
-    else:
-        # create embeddings using local embedding model
-        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
-
-    llm = HuggingFaceEndpoint(
-        endpoint_url=llm_endpoint,
-        max_new_tokens=input.max_new_tokens,
-        top_k=input.top_k,
-        top_p=input.top_p,
-        typical_p=input.typical_p,
-        temperature=input.temperature,
-        repetition_penalty=input.repetition_penalty,
-        streaming=input.streaming,
-        timeout=600,
-    )
-
-    data_collections = {
-        "question": input.questions,
-        "answer": input.answers,
-        "docs": input.docs,
-        "ground_truth": input.groundtruths,
-    }
-    dataset = Dataset.from_dict(data_collections)
-
-    score = evaluate(
-        dataset,
-        metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
-        llm=llm,
-        embeddings=embedder,
-    )
-    df = score.to_pandas()
-    answer_relevancy_average = df["answer_relevancy"][:].mean()
-    faithfulness_average = df["faithfulness"][:].mean()
-    context_recall_average = df["context_recall"][:].mean()
-    context_precision_average = df["context_precision"][:].mean()
-
-    return RAGASScores(
-        answer_relevancy=answer_relevancy_average,
-        faithfulness=faithfulness_average,
-        context_recallL=context_recall_average,
-        context_precision=context_precision_average,
-    )
-
-
-if __name__ == "__main__":
-    opea_microservices["opea_service@llm_tgi"].start()
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from datasets import Dataset
+from langchain_community.embeddings import (
+    HuggingFaceBgeEmbeddings,
+    HuggingFaceEmbeddings,
+    HuggingFaceHubEmbeddings,
+    HuggingFaceInstructEmbeddings,
+)
+from langchain_community.llms import HuggingFaceEndpoint
+from langsmith import traceable
+from ragas import evaluate
+from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness
+
+from comps import GeneratedDoc, RAGASParams, RAGASScores, ServiceType, opea_microservices, register_microservice
+
+tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
+EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+
+
+@register_microservice(
+    name="opea_service@ragas_tgi_llm",
+    service_type=ServiceType.RAGAS,
+    endpoint="/v1/ragas",
+    host="0.0.0.0",
+    port=9050,
+    input_datatype=RAGASParams,
+    output_datatype=RAGASScores,
+)
+@traceable(run_type="llm")
+def llm_generate(input: RAGASParams):
+    llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
+
+    # Create vectorstore
+    if tei_embedding_endpoint:
+        # create embeddings using TEI endpoint service
+        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+    else:
+        # create embeddings using local embedding model
+        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
+
+    llm = HuggingFaceEndpoint(
+        endpoint_url=llm_endpoint,
+        max_new_tokens=input.max_new_tokens,
+        top_k=input.top_k,
+        top_p=input.top_p,
+        typical_p=input.typical_p,
+        temperature=input.temperature,
+        repetition_penalty=input.repetition_penalty,
+        streaming=input.streaming,
+        timeout=600,
+    )
+
+    data_collections = {
+        "question": input.questions,
+        "answer": input.answers,
+        "docs": input.docs,
+        "ground_truth": input.groundtruths,
+    }
+    dataset = Dataset.from_dict(data_collections)
+
+    score = evaluate(
+        dataset,
+        metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
+        llm=llm,
+        embeddings=embedder,
+    )
+    df = score.to_pandas()
+    answer_relevancy_average = df["answer_relevancy"][:].mean()
+    faithfulness_average = df["faithfulness"][:].mean()
+    context_recall_average = df["context_recall"][:].mean()
+    context_precision_average = df["context_precision"][:].mean()
+
+    return RAGASScores(
+        answer_relevancy=answer_relevancy_average,
+        faithfulness=faithfulness_average,
+        context_recallL=context_recall_average,
+        context_precision=context_precision_average,
+    )
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_tgi"].start()
diff --git a/comps/ragas/tgi/requirements.txt b/comps/ragas/tgi/requirements.txt
index 3fa49150e..2c8fad29f 100644
--- a/comps/ragas/tgi/requirements.txt
+++ b/comps/ragas/tgi/requirements.txt
@@ -1,14 +1,14 @@
-datasets
-docarray[full]
-fastapi
-huggingface_hub
-langchain==0.1.16
-langsmith
-opentelemetry-api
-opentelemetry-exporter-otlp
-opentelemetry-sdk
-prometheus-fastapi-instrumentator
-ragas
-shortuuid
-transformers
-uvicorn
+datasets
+docarray[full]
+fastapi
+huggingface_hub
+langchain==0.1.16
+langsmith
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+ragas
+shortuuid
+transformers
+uvicorn
diff --git a/comps/reranks/README.md b/comps/reranks/README.md
index ecec38272..9b5dc9042 100644
--- a/comps/reranks/README.md
+++ b/comps/reranks/README.md
@@ -2,17 +2,17 @@
 
 The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, either a dense embedding model or a sparse lexical search index is often employed to retrieve relevant text documents based on the input. However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the Reranking microservice, you must first install the required python packages.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start TEI Service
+### 1.2 Start TEI Service
 
 ```bash
 export HF_TOKEN=${your_hf_api_token}
@@ -25,7 +25,7 @@ volume=$PWD/data
 docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $RERANK_MODEL_ID --revision $revision --hf-api-token $HF_TOKEN
 ```
 
-## 1.3 Verify the TEI Service
+### 1.3 Verify the TEI Service
 
 ```bash
 curl 127.0.0.1:6060/rerank \
@@ -34,18 +34,18 @@ curl 127.0.0.1:6060/rerank \
     -H 'Content-Type: application/json'
 ```
 
-## 1.4 Start Reranking Service with Python Script
+### 1.4 Start Reranking Service with Python Script
 
 ```bash
 export TEI_RERANKING_ENDPOINT="http://${your_ip}:6060"
 python reranking_tei_xeon.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 If you start an Reranking microservice with docker, the `docker_compose_reranking.yaml` file will automatically start a TEI service with docker.
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 ```bash
 export HF_TOKEN=${your_hf_api_token}
@@ -55,7 +55,7 @@ export LANGCHAIN_PROJECT="opea/reranks"
 export TEI_RERANKING_ENDPOINT="http://${your_ip}:8808"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
@@ -69,22 +69,22 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d --name="reranking-tei-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN -e LANGCHAIN_API_KEY=$LANGCHAIN_API_KEY opea/reranking-tei:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 cd langchain/docker
 docker compose -f docker_compose_reranking.yaml up -d
 ```
 
-# 🚀3. Consume Reranking Service
+## 🚀3. Consume Reranking Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:8000/v1/health_check \
@@ -92,7 +92,7 @@ curl http://localhost:8000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Reranking Service
+### 3.2 Consume Reranking Service
 
 ```bash
 curl http://localhost:8000/v1/reranking \
diff --git a/comps/reranks/fastrag/README.md b/comps/reranks/fastrag/README.md
index fac481860..025799d05 100644
--- a/comps/reranks/fastrag/README.md
+++ b/comps/reranks/fastrag/README.md
@@ -2,17 +2,17 @@
 
 The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, either a dense embedding model or a sparse lexical search index is often employed to retrieve relevant text documents based on the input. However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the Reranking microservice, you must first install the required python packages.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Install fastRAG
+### 1.2 Install fastRAG
 
 ```bash
 git clone https://github.com/IntelLabs/fastRAG.git
@@ -21,37 +21,37 @@ pip install .
 pip install .[intel]
 ```
 
-## 1.3 Start Reranking Service with Python Script
+### 1.3 Start Reranking Service with Python Script
 
 ```bash
 export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static"
 python local_reranking.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 ```bash
 export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
 docker build -t opea/reranking-fastrag:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/fastrag/docker/Dockerfile .
 ```
 
-## 2.3 Run Docker
+### 2.3 Run Docker
 
 ```bash
 docker run -d --name="reranking-fastrag-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:latest
 ```
 
-# 🚀3. Consume Reranking Service
+## 🚀3. Consume Reranking Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:8000/v1/health_check \
@@ -59,7 +59,7 @@ curl http://localhost:8000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Reranking Service
+### 3.2 Consume Reranking Service
 
 ```bash
 curl http://localhost:8000/v1/reranking \
diff --git a/comps/reranks/langchain-mosec/README.md b/comps/reranks/langchain-mosec/README.md
index 59592a4ba..cd7e36ce0 100644
--- a/comps/reranks/langchain-mosec/README.md
+++ b/comps/reranks/langchain-mosec/README.md
@@ -4,26 +4,26 @@
 docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec-endpoint:latest -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile .
 ```
 
-# build reranking microservice docker image
+## build reranking microservice docker image
 
 ```
 docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -t opea/reranking-langchain-mosec:latest -f comps/reranks/langchain-mosec/docker/Dockerfile .
 ```
 
-# launch Mosec endpoint docker container
+## launch Mosec endpoint docker container
 
 ```
 docker run -d --name="reranking-langchain-mosec-endpoint" -p 6001:8000  opea/reranking-langchain-mosec-endpoint:latest
 ```
 
-# launch embedding microservice docker container
+## launch embedding microservice docker container
 
 ```
 export MOSEC_RERANKING_ENDPOINT=http://127.0.0.1:6001
 docker run -d --name="reranking-langchain-mosec-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 6000:8000 --ipc=host -e MOSEC_RERANKING_ENDPOINT=$MOSEC_RERANKING_ENDPOINT opea/reranking-langchain-mosec:latest
 ```
 
-# run client test
+## run client test
 
 ```
 curl http://localhost:6000/v1/reranking \
diff --git a/comps/retrievers/haystack/qdrant/README.md b/comps/retrievers/haystack/qdrant/README.md
index 70d2845ed..a7653e8ce 100644
--- a/comps/retrievers/haystack/qdrant/README.md
+++ b/comps/retrievers/haystack/qdrant/README.md
@@ -1,54 +1,59 @@
 # Retriever Microservice with Qdrant
 
-# 🚀Start Microservice with Python
+## 1. 🚀Start Microservice with Python (Option 1)
 
-## Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## Start Qdrant Server
+### 1.2 Start Qdrant Server
 
 Please refer to this [readme](../../../vectorstores/langchain/qdrant/README.md).
 
-## Setup Environment Variables
+### 1.3 Setup Environment Variables
 
 ```bash
-export http_proxy=${your_http_proxy}
-export https_proxy=${your_https_proxy}
 export QDRANT_HOST=${your_qdrant_host_ip}
 export QDRANT_PORT=6333
 export EMBED_DIMENSION=${your_embedding_dimension}
 export INDEX_NAME=${your_index_name}
-export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint}
 ```
 
-## Start Retriever Service
+### 1.4 Start Retriever Service
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060"
 python haystack/qdrant/retriever_qdrant.py
 ```
 
-# 🚀Start Microservice with Docker
+## 2. 🚀Start Microservice with Docker (Option 2)
 
-## Build Docker Image
+### 2.1 Setup Environment Variables
+
+```bash
+export QDRANT_HOST=${your_qdrant_host_ip}
+export QDRANT_PORT=6333
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060"
+```
+
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
 docker build -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/haystack/qdrant/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### 2.3 Run Docker with CLI
 
 ```bash
-docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint} -e QDRANT_HOST=${your_qdrant_host_ip} -e QDRANT_PORT=${your_qdrant_port} opea/retriever-qdrant:latest
+docker run -d --name="retriever-qdrant-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e QDRANT_HOST=$QDRANT_HOST -e QDRANT_PORT=$QDRANT_PORT opea/retriever-qdrant:latest
 ```
 
-# 🚀3. Consume Retriever Service
+## 🚀3. Consume Retriever Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:7000/v1/health_check \
@@ -56,7 +61,7 @@ curl http://${your_ip}:7000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Embedding Service
+### 3.2 Consume Embedding Service
 
 To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
 
diff --git a/comps/retrievers/haystack/qdrant/retriever_qdrant.py b/comps/retrievers/haystack/qdrant/retriever_qdrant.py
index d57232563..c2cc8735b 100644
--- a/comps/retrievers/haystack/qdrant/retriever_qdrant.py
+++ b/comps/retrievers/haystack/qdrant/retriever_qdrant.py
@@ -31,7 +31,7 @@ def initialize_qdrant_retriever() -> QdrantEmbeddingRetriever:
 @traceable(run_type="retriever")
 def retrieve(input: EmbedDoc) -> SearchedDoc:
     search_res = retriever.run(query_embedding=input.embedding)["documents"]
-    searched_docs = [TextDoc(text=r.content) for r in search_res]
+    searched_docs = [TextDoc(text=r.content) for r in search_res if r.content]
     result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text)
     return result
 
diff --git a/comps/retrievers/langchain/README.md b/comps/retrievers/langchain/README.md
index 3de5cab21..300fbc099 100644
--- a/comps/retrievers/langchain/README.md
+++ b/comps/retrievers/langchain/README.md
@@ -6,14 +6,14 @@ The service primarily utilizes similarity measures in vector space to rapidly re
 
 Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
 
-# Retriever Microservice with Redis
+## Retriever Microservice with Redis
 
 For details, please refer to this [readme](redis/README.md)
 
-# Retriever Microservice with Milvus
+## Retriever Microservice with Milvus
 
 For details, please refer to this [readme](milvus/README.md)
 
-# Retriever Microservice with PGVector
+## Retriever Microservice with PGVector
 
 For details, please refer to this [readme](pgvector/README.md)
diff --git a/comps/retrievers/langchain/milvus/README.md b/comps/retrievers/langchain/milvus/README.md
index d1bbc80da..6b9fe2045 100644
--- a/comps/retrievers/langchain/milvus/README.md
+++ b/comps/retrievers/langchain/milvus/README.md
@@ -1,18 +1,18 @@
 # Retriever Microservice with Milvus
 
-# 🚀Start Microservice with Python
+## 🚀Start Microservice with Python
 
-## Install Requirements
+### Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## Start Milvus Server
+### Start Milvus Server
 
 Please refer to this [readme](../../../vectorstores/langchain/milvus/README.md).
 
-## Setup Environment Variables
+### Setup Environment Variables
 
 ```bash
 export no_proxy=${your_no_proxy}
@@ -24,31 +24,31 @@ export COLLECTION_NAME=${your_collection_name}
 export MOSEC_EMBEDDING_ENDPOINT=${your_emdding_endpoint}
 ```
 
-## Start Retriever Service
+### Start Retriever Service
 
 ```bash
 export MOSEC_EMBEDDING_ENDPOINT="http://${your_ip}:6060"
 python langchain/retriever_redis.py
 ```
 
-# 🚀Start Microservice with Docker
+## 🚀Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ../../
 docker build -t opea/retriever-milvus:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/milvus/docker/Dockerfile .
 ```
 
-## Run Docker with CLI
+### Run Docker with CLI
 
 ```bash
 docker run -d --name="retriever-milvus-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e MOSEC_EMBEDDING_ENDPOINT=${your_emdding_endpoint} -e MILVUS=${your_milvus_host_ip}  opea/retriever-milvus:latest
 ```
 
-# 🚀3. Consume Retriever Service
+## 🚀3. Consume Retriever Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://${your_ip}:7000/v1/health_check \
@@ -56,7 +56,7 @@ curl http://${your_ip}:7000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Embedding Service
+### 3.2 Consume Embedding Service
 
 To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
 
diff --git a/comps/retrievers/langchain/milvus/config.py b/comps/retrievers/langchain/milvus/config.py
index dcbe167b5..b7e5ec420 100644
--- a/comps/retrievers/langchain/milvus/config.py
+++ b/comps/retrievers/langchain/milvus/config.py
@@ -16,4 +16,4 @@
 MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "")
 os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
 os.environ["OPENAI_API_KEY"] = "Dummy key"
-MODEL_ID = "/root/bce-embedding-base_v1"
+MODEL_ID = "/home/user/bce-embedding-base_v1"
diff --git a/comps/retrievers/langchain/pgvector/README.md b/comps/retrievers/langchain/pgvector/README.md
index a0febb7fb..4914e8cb4 100644
--- a/comps/retrievers/langchain/pgvector/README.md
+++ b/comps/retrievers/langchain/pgvector/README.md
@@ -6,17 +6,17 @@ The service primarily utilizes similarity measures in vector space to rapidly re
 
 Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the retriever microservice, you must first install the required python packages.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start TEI Service
+### 1.2 Start TEI Service
 
 ```bash
 export LANGCHAIN_TRACING_V2=true
@@ -28,7 +28,7 @@ volume=$PWD/data
 docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
 ```
 
-## 1.3 Verify the TEI Service
+### 1.3 Verify the TEI Service
 
 ```bash
 curl 127.0.0.1:6060/rerank \
@@ -37,7 +37,7 @@ curl 127.0.0.1:6060/rerank \
     -H 'Content-Type: application/json'
 ```
 
-## 1.4 Setup VectorDB Service
+### 1.4 Setup VectorDB Service
 
 You need to setup your own VectorDB service (PGvector in this example), and ingest your knowledge documents into the vector database.
 
@@ -52,16 +52,16 @@ export POSTGRES_DB=vectordb
 docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v ./init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16
 ```
 
-## 1.5 Start Retriever Service
+### 1.5 Start Retriever Service
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060"
 python retriever_pgvector.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 ```bash
 export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5"
@@ -73,7 +73,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/retrievers"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd comps/retrievers/langchain/pgvector/docker
@@ -87,22 +87,22 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d --name="retriever-pgvector" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 cd comps/retrievers/langchain/pgvector/docker
 docker compose -f docker_compose_retriever.yaml up -d
 ```
 
-# 🚀3. Consume Retriever Service
+## 🚀3. Consume Retriever Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:7000/v1/health_check \
@@ -110,7 +110,7 @@ curl http://localhost:7000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Embedding Service
+### 3.2 Consume Embedding Service
 
 To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
 
diff --git a/comps/retrievers/langchain/redis/README.md b/comps/retrievers/langchain/redis/README.md
index 5330e7870..17c37ed4c 100644
--- a/comps/retrievers/langchain/redis/README.md
+++ b/comps/retrievers/langchain/redis/README.md
@@ -6,17 +6,17 @@ The service primarily utilizes similarity measures in vector space to rapidly re
 
 Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the retriever microservice, you must first install the required python packages.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Start TEI Service
+### 1.2 Start TEI Service
 
 ```bash
 export LANGCHAIN_TRACING_V2=true
@@ -28,7 +28,7 @@ volume=$PWD/data
 docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
 ```
 
-## 1.3 Verify the TEI Service
+### 1.3 Verify the TEI Service
 
 ```bash
 curl 127.0.0.1:6060/rerank \
@@ -37,7 +37,7 @@ curl 127.0.0.1:6060/rerank \
     -H 'Content-Type: application/json'
 ```
 
-## 1.4 Setup VectorDB Service
+### 1.4 Setup VectorDB Service
 
 You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.
 
@@ -48,16 +48,16 @@ Remember to ingest data into it manually.
 docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9
 ```
 
-## 1.5 Start Retriever Service
+### 1.5 Start Retriever Service
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6060"
 python langchain/retriever_redis.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 ```bash
 export RETRIEVE_MODEL_ID="BAAI/bge-base-en-v1.5"
@@ -69,7 +69,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/retrievers"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
@@ -83,22 +83,22 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/retriever-redis:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 cd langchain/docker
 docker compose -f docker_compose_retriever.yaml up -d
 ```
 
-# 🚀3. Consume Retriever Service
+## 🚀3. Consume Retriever Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:7000/v1/health_check \
@@ -106,7 +106,7 @@ curl http://localhost:7000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Embedding Service
+### 3.2 Consume Embedding Service
 
 To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
 
diff --git a/comps/retrievers/llamaindex/README.md b/comps/retrievers/llamaindex/README.md
index 3f6db8899..28203160c 100644
--- a/comps/retrievers/llamaindex/README.md
+++ b/comps/retrievers/llamaindex/README.md
@@ -6,17 +6,17 @@ The service primarily utilizes similarity measures in vector space to rapidly re
 
 Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
 
-# 🚀1. Start Microservice with Python (Option 1)
+## 🚀1. Start Microservice with Python (Option 1)
 
 To start the retriever microservice, you must first install the required python packages.
 
-## 1.1 Install Requirements
+### 1.1 Install Requirements
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## 1.2 Setup VectorDB Service
+### 1.2 Setup VectorDB Service
 
 You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.
 
@@ -29,15 +29,15 @@ docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-sta
 
 And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice.
 
-## 1.3 Start Retriever Service
+### 1.3 Start Retriever Service
 
 ```bash
 python retriever_redis.py
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
-## 2.1 Setup Environment Variables
+### 2.1 Setup Environment Variables
 
 ```bash
 export REDIS_URL="redis://${your_ip}:6379"
@@ -47,7 +47,7 @@ export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/retrievers"
 ```
 
-## 2.2 Build Docker Image
+### 2.2 Build Docker Image
 
 ```bash
 cd ../../
@@ -61,22 +61,22 @@ To start a docker container, you have two options:
 
 You can choose one as needed.
 
-## 2.3 Run Docker with CLI (Option A)
+### 2.3 Run Docker with CLI (Option A)
 
 ```bash
 docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest
 ```
 
-## 2.4 Run Docker with Docker Compose (Option B)
+### 2.4 Run Docker with Docker Compose (Option B)
 
 ```bash
 cd llamaindex/docker
 docker compose -f docker_compose_retriever.yaml up -d
 ```
 
-# 🚀3. Consume Retriever Service
+## 🚀3. Consume Retriever Service
 
-## 3.1 Check Service Status
+### 3.1 Check Service Status
 
 ```bash
 curl http://localhost:7000/v1/health_check \
@@ -84,7 +84,7 @@ curl http://localhost:7000/v1/health_check \
   -H 'Content-Type: application/json'
 ```
 
-## 3.2 Consume Retriever Service
+### 3.2 Consume Retriever Service
 
 To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
 
diff --git a/comps/tts/README.md b/comps/tts/README.md
index 52ab6c92b..5045d5870 100644
--- a/comps/tts/README.md
+++ b/comps/tts/README.md
@@ -30,13 +30,13 @@ python tts.py
 curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json'
 ```
 
-# 🚀2. Start Microservice with Docker (Option 2)
+## 🚀2. Start Microservice with Docker (Option 2)
 
 Alternatively, you can start the TTS microservice with Docker.
 
-## 2.1 Build Images
+### 2.1 Build Images
 
-### 2.1.1 Whisper Server Image
+#### 2.1.1 Whisper Server Image
 
 - Xeon CPU
 
@@ -52,15 +52,15 @@ cd ../..
 docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile_hpu .
 ```
 
-### 2.1.2 TTS Service Image
+#### 2.1.2 TTS Service Image
 
 ```bash
 docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/Dockerfile .
 ```
 
-## 2.2 Start SpeechT5 and TTS Service
+### 2.2 Start SpeechT5 and TTS Service
 
-### 2.2.1 Start SpeechT5 Server
+#### 2.2.1 Start SpeechT5 Server
 
 - Xeon
 
@@ -74,7 +74,7 @@ docker run -p 7055:7055 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$htt
 docker run -p 7055:7055 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/speecht5-gaudi:latest
 ```
 
-### 2.2.2 Start TTS service
+#### 2.2.2 Start TTS service
 
 ```bash
 ip_address=$(hostname -I | awk '{print $1}')
@@ -82,7 +82,7 @@ ip_address=$(hostname -I | awk '{print $1}')
 docker run -p 9088:9088 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TTS_ENDPOINT=http://$ip_address:7055 opea/tts:latest
 ```
 
-### 2.2.3 Test
+#### 2.2.3 Test
 
 ```bash
 # curl
diff --git a/comps/vectorstores/README.md b/comps/vectorstores/README.md
index 6dd35ee0b..bdccf5bc0 100644
--- a/comps/vectorstores/README.md
+++ b/comps/vectorstores/README.md
@@ -2,18 +2,18 @@
 
 The Vectorstores Microservice provides convenient way to start various vector database servers.
 
-# Vectorstores Microservice with Redis
+## Vectorstores Microservice with Redis
 
 For details, please refer to this [readme](langchain/redis/README.md)
 
-# Vectorstores Microservice with Qdrant
+## Vectorstores Microservice with Qdrant
 
 For details, please refer to this [readme](langchain/qdrant/README.md)
 
-# Vectorstores Microservice with PGVector
+## Vectorstores Microservice with PGVector
 
 For details, please refer to this [readme](langchain/pgvector/README.md)
 
-# Vectorstores Microservice with Pinecone
+## Vectorstores Microservice with Pinecone
 
 For details, please refer to this [readme](langchain/pinecone/README.md)
diff --git a/comps/vectorstores/langchain/chroma/README.md b/comps/vectorstores/langchain/chroma/README.md
index d7399b8fb..155593010 100644
--- a/comps/vectorstores/langchain/chroma/README.md
+++ b/comps/vectorstores/langchain/chroma/README.md
@@ -2,9 +2,9 @@
 
 Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0. Chroma runs in various modes, we can deploy it as a server running your local machine or in the cloud.
 
-# Getting Started
+## Getting Started
 
-## Start Chroma Server
+### Start Chroma Server
 
 To start the Chroma server on your local machine, follow these steps:
 
@@ -14,11 +14,11 @@ cd chroma
 docker compose up -d
 ```
 
-## Start Log Output
+### Start Log Output
 
 Upon starting the server, you should see log outputs similar to the following:
 
-```log
+```
 server-1  | Starting 'uvicorn chromadb.app:app' with args: --workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30
 server-1  | INFO:     [02-08-2024 07:03:19] Set chroma_server_nofile to 65536
 server-1  | INFO:     [02-08-2024 07:03:19] Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
diff --git a/comps/vectorstores/langchain/milvus/README.md b/comps/vectorstores/langchain/milvus/README.md
index d02508351..b0f19caf4 100644
--- a/comps/vectorstores/langchain/milvus/README.md
+++ b/comps/vectorstores/langchain/milvus/README.md
@@ -6,7 +6,7 @@ Configure your Milvus instance to suit your application scenarios by adjusting c
 Customized the path to store data, default is /volumes
 
 ```bash
-export DOCKER_VOLUME_DIRECTORY=./your_path
+export DOCKER_VOLUME_DIRECTORY=${your_path}
 ```
 
 ## 2. Run Milvus service
diff --git a/comps/vectorstores/langchain/milvus/docker-compose.yml b/comps/vectorstores/langchain/milvus/docker-compose.yml
index 125463752..d6c39d0f0 100644
--- a/comps/vectorstores/langchain/milvus/docker-compose.yml
+++ b/comps/vectorstores/langchain/milvus/docker-compose.yml
@@ -7,10 +7,6 @@ services:
   etcd:
     container_name: milvus-etcd
     image: quay.io/coreos/etcd:v3.5.5
-    deploy:
-      resources:
-        limits:
-          cpus: "0.5"
     environment:
       - ETCD_AUTO_COMPACTION_MODE=revision
       - ETCD_AUTO_COMPACTION_RETENTION=1000
@@ -28,10 +24,6 @@ services:
   minio:
     container_name: milvus-minio
     image: minio/minio:RELEASE.2023-03-20T20-16-18Z
-    deploy:
-      resources:
-        limits:
-          cpus: "0.5"
     environment:
       MINIO_ACCESS_KEY: minioadmin
       MINIO_SECRET_KEY: minioadmin
@@ -49,31 +41,25 @@ services:
 
   standalone:
     container_name: milvus-standalone
-    image: milvusdb/milvus:latest
-    deploy:
-      resources:
-        limits:
-          cpus: "8"
-          memory: 32G
+    image: milvusdb/milvus:v2.4.6
     command: ["milvus", "run", "standalone"]
     security_opt:
       - seccomp:unconfined
     environment:
       ETCD_ENDPOINTS: etcd:2379
       MINIO_ADDRESS: minio:9000
-      DNNL_ENABLE: 0
     volumes:
-      - ./milvus.yaml:/milvus/configs/milvus.yaml
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml
       - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:9092/healthz"]
+      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
       interval: 30s
       start_period: 90s
       timeout: 20s
       retries: 3
     ports:
       - "19530:19530"
-      - "9092:9092"
+      - "9091:9091"
     depends_on:
       - "etcd"
       - "minio"
diff --git a/comps/vectorstores/langchain/milvus/milvus.yaml b/comps/vectorstores/langchain/milvus/milvus.yaml
index de29dfe3d..b9f22cb3d 100644
--- a/comps/vectorstores/langchain/milvus/milvus.yaml
+++ b/comps/vectorstores/langchain/milvus/milvus.yaml
@@ -105,7 +105,9 @@ minio:
   region: # Specify minio storage system location region
   useVirtualHost: false # Whether use virtual host mode for bucket
   requestTimeoutMs: 10000 # minio timeout for request time in milliseconds
-  listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configuration if ListObjects timeout
+  # The maximum number of objects requested per batch in minio ListObjects rpc,
+  # 0 means using oss client by default, decrease these configuration if ListObjects timeout
+  listObjectsMaxKeys: 0
 
 # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka.
 # You can change your mq by setting mq.type field.
@@ -120,6 +122,10 @@ mq:
   pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds
   pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes
   mqBufSize: 16 # MQ client consumer buffer length
+  dispatcher:
+    mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge
+    targetBufSize: 16 # the length of channel buffer for targe
+    maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack
 
 # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
 pulsar:
@@ -182,7 +188,7 @@ natsmq:
 # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
 rootCoord:
   dmlChannelNum: 16 # The number of dml channels created at system startup
-  maxPartitionNum: 4096 # Maximum number of partitions in a collection
+  maxPartitionNum: 1024 # Maximum number of partitions in a collection
   minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed
   enableActiveStandby: false
   maxDatabaseNum: 64 # Maximum number of database
@@ -200,7 +206,6 @@ rootCoord:
 proxy:
   timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick
   healthCheckTimeout: 3000 # ms, the interval that to do component healthy check
-  healthCheckTimetout: 3000 # ms, the interval that to do component healthy check
   msgStream:
     timeTick:
       bufSize: 512
@@ -217,6 +222,7 @@ proxy:
   ginLogging: true
   ginLogSkipPaths: / # skip url path for gin log
   maxTaskNum: 1024 # max task number of proxy task queue
+  mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection
   accessLog:
     enable: false # if use access log
     minioEnable: false # if upload sealed access log file to minio
@@ -244,7 +250,7 @@ proxy:
     port: # high-level restful api
     acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64
     enablePprof: true # Whether to enable pprof middleware on the metrics port
-  ip: 0.0.0.0 # if not specified, use the first unicastable address
+  ip: # if not specified, use the first unicastable address
   port: 19530
   internalPort: 19529
   grpc:
@@ -282,6 +288,8 @@ queryCoord:
   channelTaskTimeout: 60000 # 1 minute
   segmentTaskTimeout: 120000 # 2 minute
   distPullInterval: 500
+  collectionObserverInterval: 200
+  checkExecutedFlagInterval: 100
   heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available
   loadTimeoutSeconds: 600
   distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds
@@ -298,6 +306,7 @@ queryCoord:
   checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session
   gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
   enableStoppingBalance: true # whether enable stopping balance
+  channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode
   cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds
   ip: # if not specified, use the first unicastable address
   port: 19531
@@ -320,6 +329,7 @@ queryNode:
       nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
       memExpansionRate: 1.15 # extra memory needed by building interim index
       buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
+    knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
   loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
   enableDisk: false # enable querynode load disk index, and search on disk index
   maxDiskUsagePercentage: 95
@@ -327,17 +337,22 @@ queryNode:
     enabled: true
     memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024
     readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
-    # options: async, sync, off.
+    # options: async, sync, disable.
     # Specifies the necessity for warming up the chunk cache.
-    # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the
+    # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the
     # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency
     # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage;
-    # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query.
-    warmup: async
+    # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query.
+    warmup: disable
   mmap:
     mmapEnabled: false # Enable mmap for loading data
-  mmapEnabled: false # Enable mmap for loading data
-  lazyloadEnabled: false # Enable lazyload for loading data
+  lazyload:
+    enabled: false # Enable lazyload for loading data
+    waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve
+    requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default
+    requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default
+    maxRetryTimes: 1 # max retry times for lazy load, 1 by default
+    maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default
   grouping:
     enabled: true
     maxNQ: 1000
@@ -403,9 +418,11 @@ indexNode:
 dataCoord:
   channel:
     watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
+    balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch
+    legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels
     balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing
     balanceInterval: 360 # The interval with which the channel manager check dml channel balance status
-    checkInterval: 10 # The interval in seconds with which the channel manager advances channel states
+    checkInterval: 1 # The interval in seconds with which the channel manager advances channel states
     notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds).
   segment:
     maxSize: 1024 # Maximum size of a segment in MB
@@ -485,7 +502,7 @@ dataNode:
       coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds
   segment:
     insertBufSize: 16777216 # Max buffer size to flush for a single segment.
-    deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB
+    deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB
     syncPeriod: 600 # The period to sync segments if buffer is not empty.
   memory:
     forceSyncEnable: true # Set true to force sync if memory usage is too high
@@ -536,8 +553,6 @@ log:
 grpc:
   log:
     level: WARNING
-  serverMaxSendSize: 536870912
-  serverMaxRecvSize: 268435456
   gracefulStopTimeout: 10 # second, time to wait graceful stop finish
   client:
     compressionEnabled: false
@@ -550,8 +565,6 @@ grpc:
     minResetInterval: 1000
     maxCancelError: 32
     minSessionCheckInterval: 200
-  clientMaxSendSize: 268435456
-  clientMaxRecvSize: 536870912
 
 # Configure the proxy tls enable.
 tls:
@@ -560,18 +573,6 @@ tls:
   caPemPath: configs/cert/ca.pem
 
 common:
-  chanNamePrefix:
-    cluster: by-dev
-    rootCoordTimeTick: rootcoord-timetick
-    rootCoordStatistics: rootcoord-statistics
-    rootCoordDml: rootcoord-dml
-    replicateMsg: replicate-msg
-    queryTimeTick: queryTimeTick
-    dataCoordTimeTick: datacoord-timetick-channel
-    dataCoordSegmentInfo: segment-info-channel
-  subNamePrefix:
-    dataCoordSubNamePrefix: dataCoord
-    dataNodeSubNamePrefix: dataNode
   defaultPartitionName: _default # default partition name for a collection
   defaultIndexName: _default_idx # default index name
   entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
@@ -617,7 +618,7 @@ common:
   ttMsgEnabled: true # Whether the instance disable sending ts messages
   traceLogMode: 0 # trace request info
   bloomFilterSize: 100000 # bloom filter initial size
-  maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter
+  maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter
 
 # QuotaConfig, configurations of Milvus quota and limits.
 # By default, we enable:
@@ -631,7 +632,7 @@ common:
 #   4. DQL result rate protection;
 # If necessary, you can also manually force to deny RW requests.
 quotaAndLimits:
-  enabled: false # `true` to enable quota and limits, `false` to disable.
+  enabled: true # `true` to enable quota and limits, `false` to disable.
   # quotaCenterCollectInterval is the time interval that quotaCenter
   # collects metrics from Proxies, Query cluster and Data cluster.
   # seconds, (0 ~ 65536)
@@ -649,10 +650,10 @@ quotaAndLimits:
     db:
       max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex
   flushRate:
-    enabled: false
+    enabled: true
     max: -1 # qps, default no limit, rate for flush
     collection:
-      max: -1 # qps, default no limit, rate for flush at collection level.
+      max: 0.1 # qps, default no limit, rate for flush at collection level.
     db:
       max: -1 # qps of db level, default no limit, rate for flush
   compactionRate:
@@ -719,6 +720,7 @@ quotaAndLimits:
   limits:
     maxCollectionNum: 65536
     maxCollectionNumPerDB: 65536
+    maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit
     maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes
   limitWriting:
     # forceDeny false means dml requests are allowed (except for some
@@ -786,8 +788,8 @@ quotaAndLimits:
 
 trace:
   # trace exporter type, default is stdout,
-  # optional values: ['stdout', 'jaeger', 'otlp']
-  exporter: stdout
+  # optional values: ['noop','stdout', 'jaeger', 'otlp']
+  exporter: noop
   # fraction of traceID based sampler,
   # optional values: [0, 1]
   # Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
diff --git a/comps/web_retrievers/langchain/chroma/README.md b/comps/web_retrievers/langchain/chroma/README.md
index 47a308837..563a04965 100644
--- a/comps/web_retrievers/langchain/chroma/README.md
+++ b/comps/web_retrievers/langchain/chroma/README.md
@@ -2,16 +2,16 @@
 
 The Web Retriever Microservice is designed to efficiently search web pages relevant to the prompt, save them into the VectorDB, and retrieve the matched documents with the highest similarity. The retrieved documents will be used as context in the prompt to LLMs. Different from the normal RAG process, a web retriever can leverage advanced search engines for more diverse demands, such as real-time news, verifiable sources, and diverse sources.
 
-# Start Microservice with Docker
+## Start Microservice with Docker
 
-## Build Docker Image
+### Build Docker Image
 
 ```bash
 cd ../../
 docker build -t opea/web-retriever-chroma:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/langchain/chroma/docker/Dockerfile .
 ```
 
-## Start TEI Service
+### Start TEI Service
 
 ```bash
 model=BAAI/bge-base-en-v1.5
@@ -20,7 +20,7 @@ volume=$PWD/data
 docker run -d -p 6060:80 -v $volume:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
 ```
 
-## Start Web Retriever Service
+### Start Web Retriever Service
 
 ```bash
 # set TEI endpoint
@@ -35,7 +35,7 @@ export GOOGLE_CSE_ID=xxx
 docker run -d --name="web-retriever-chroma-server" -p 7078:7077 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/web-retriever-chroma:latest
 ```
 
-## Consume Web Retriever Service
+### Consume Web Retriever Service
 
 To consume the Web Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
 
diff --git a/tests/test_agent_langchain.sh b/tests/test_agent_langchain.sh
index ad9aae145..0c4db337e 100644
--- a/tests/test_agent_langchain.sh
+++ b/tests/test_agent_langchain.sh
@@ -12,8 +12,13 @@ function build_docker_images() {
     echo "Building the docker images"
     cd $WORKPATH
     echo $WORKPATH
-    docker build -t opea/comps-agent-langchain:latest -f comps/agent/langchain/docker/Dockerfile .
-
+    docker build --no-cache -t opea/comps-agent-langchain:comps -f comps/agent/langchain/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/comps-agent-langchain built fail"
+        exit 1
+    else
+        echo "opea/comps-agent-langchain built successful"
+    fi
 }
 
 function start_service() {
@@ -24,19 +29,19 @@ function start_service() {
 
     #single card
     echo "start tgi gaudi service"
-    docker run -d --runtime=habana --name "comps-tgi-gaudi-service" -p 8080:80 -v ./data:/data -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8092
+    docker run -d --runtime=habana --name "test-comps-tgi-gaudi-service" -p 8080:80 -v ./data:/data -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8092
     sleep 5s
-    docker logs comps-tgi-gaudi-service
+    docker logs test-comps-tgi-gaudi-service
 
     echo "Starting agent microservice"
-    docker run -d --runtime=runc --name="comps-langchain-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 9090:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react -e llm_endpoint_url=http://${ip_address}:8080 -e llm_engine=tgi -e recursion_limit=5 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:latest
+    docker run -d --runtime=runc --name="test-comps-langchain-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 5042:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react -e llm_endpoint_url=http://${ip_address}:8080 -e llm_engine=tgi -e recursion_limit=5 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps
     sleep 5s
-    docker logs comps-langchain-agent-endpoint
+    docker logs test-comps-langchain-agent-endpoint
 
     echo "Waiting tgi gaudi ready"
     n=0
     until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-        docker logs comps-tgi-gaudi-service > ${WORKPATH}/tests/tgi-gaudi-service.log
+        docker logs test-comps-tgi-gaudi-service > ${WORKPATH}/tests/tgi-gaudi-service.log
         n=$((n+1))
         if grep -q Connected ${WORKPATH}/tests/tgi-gaudi-service.log; then
             break
@@ -44,7 +49,7 @@ function start_service() {
         sleep 5s
     done
     sleep 5s
-    docker logs comps-tgi-gaudi-service
+    docker logs test-comps-tgi-gaudi-service
     echo "Service started successfully"
 }
 
@@ -64,7 +69,7 @@ function validate() {
 
 function validate_microservice() {
     echo "Testing agent service"
-    local CONTENT=$(curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+    local CONTENT=$(curl http://${ip_address}:5042/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
      "query": "What is Intel OPEA project?"
     }' | tee ${LOG_PATH}/test-agent-langchain.log)
     local EXIT_CODE=$(validate "$CONTENT" "OPEA" "test-agent-langchain")
@@ -72,17 +77,17 @@ function validate_microservice() {
     local EXIT_CODE="${EXIT_CODE:0-1}"
     echo "return value is $EXIT_CODE"
     if [ "$EXIT_CODE" == "1" ]; then
-        docker logs comps-tgi-gaudi-service &> ${LOG_PATH}/test-comps-tgi-gaudi-service.log
-        docker logs comps-langchain-agent-endpoint &> ${LOG_PATH}/test-comps-langchain-agent-endpoint.log
+        docker logs test-comps-tgi-gaudi-service &> ${LOG_PATH}/test-comps-tgi-gaudi-service.log
+        docker logs test-comps-langchain-agent-endpoint &> ${LOG_PATH}/test-comps-langchain-agent-endpoint.log
         exit 1
     fi
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=comps-tgi-gaudi-service")
+    cid=$(docker ps -aq --filter "name=test-comps-tgi-gaudi-service")
     echo "Stopping the docker containers "${cid}
     if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
-    cid=$(docker ps -aq --filter "name=comps-langchain-agent-endpoint")
+    cid=$(docker ps -aq --filter "name=test-comps-langchain-agent-endpoint")
     echo "Stopping the docker containers "${cid}
     if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
     echo "Docker containers stopped successfully"
diff --git a/tests/test_asr.sh b/tests/test_asr.sh
index 1cfc4a093..7334a18f1 100644
--- a/tests/test_asr.sh
+++ b/tests/test_asr.sh
@@ -10,8 +10,20 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build -t opea/whisper:comps -f comps/asr/whisper/Dockerfile .
-    docker build -t opea/asr:comps -f comps/asr/Dockerfile .
+    docker build --no-cache -t opea/whisper:comps -f comps/asr/whisper/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/whisper built fail"
+        exit 1
+    else
+        echo "opea/whisper built successful"
+    fi
+    docker build --no-cache -t opea/asr:comps -f comps/asr/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/asr built fail"
+        exit 1
+    else
+        echo "opea/asr built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_chathistory_mongo.sh b/tests/test_chathistory_mongo.sh
index 005a1a6ef..c821fc05e 100755
--- a/tests/test_chathistory_mongo.sh
+++ b/tests/test_chathistory_mongo.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -18,6 +18,12 @@ function build_docker_images() {
     docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest
 
     docker build --no-cache -t opea/chathistory-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/chathistory-mongo-server built fail"
+        exit 1
+    else
+        echo "opea/chathistory-mongo-server built successful"
+    fi
 }
 
 function start_service() {
@@ -42,6 +48,7 @@ function validate_microservice() {
         echo "Result correct."
     else
         echo "Result wrong."
+        docker logs test-comps-chathistory-mongo-server
         exit 1
     fi
 
diff --git a/tests/test_dataprep_milvus.sh b/tests/test_dataprep_milvus.sh
index e379882d5..727ef81e1 100644
--- a/tests/test_dataprep_milvus.sh
+++ b/tests/test_dataprep_milvus.sh
@@ -13,8 +13,20 @@ function build_docker_images() {
     echo $(pwd)
     # langchain mosec embedding image
     docker build --no-cache -t opea/langchain-mosec:comps --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/langchain-mosec built fail"
+        exit 1
+    else
+        echo "opea/langchain-mosec built successful"
+    fi
     # dataprep milvus image
     docker build --no-cache -t opea/dataprep-milvus:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/milvus/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-milvus built fail"
+        exit 1
+    else
+        echo "opea/dataprep-milvus built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_dataprep_pgvector.sh b/tests/test_dataprep_pgvector.sh
index c4c892ee9..ca5649fe9 100755
--- a/tests/test_dataprep_pgvector.sh
+++ b/tests/test_dataprep_pgvector.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -16,7 +16,13 @@ function build_docker_images() {
     docker pull pgvector/pgvector:0.7.0-pg16
 
     # build dataprep image for pgvector
-    docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/docker/Dockerfile .
+    docker build --no-cache -t opea/dataprep-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-pgvector built fail"
+        exit 1
+    else
+        echo "opea/dataprep-pgvector built successful"
+    fi
 }
 
 function start_service() {
@@ -24,12 +30,11 @@ function start_service() {
     export POSTGRES_PASSWORD=testpwd
     export POSTGRES_DB=vectordb
 
-
-    docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16
+    docker run --name test-comps-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16
 
     sleep 10s
 
-    docker run -d --name="dataprep-pgvector" -p ${dataprep_service_port}:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:latest
+    docker run -d --name="test-comps-dataprep-pgvector" -p ${dataprep_service_port}:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:comps
 
     sleep 3m
 }
@@ -50,12 +55,12 @@ function validate_microservice() {
             echo "[ dataprep ] Content is as expected."
         else
             echo "[ dataprep ] Content does not match the expected result: $CONTENT"
-            docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep.log
+            docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log
             exit 1
         fi
     else
         echo "[ dataprep ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep.log
+        docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep.log
         exit 1
     fi
 
@@ -70,12 +75,12 @@ function validate_microservice() {
             echo "[ dataprep - file ] Content is as expected."
         else
             echo "[ dataprep - file ] Content does not match the expected result: $CONTENT"
-            docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log
+            docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log
             exit 1
         fi
     else
         echo "[ dataprep - file ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log
+        docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_file.log
         exit 1
     fi
 
@@ -84,19 +89,19 @@ function validate_microservice() {
     HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"file_path": "dataprep_file.txt"}' -H 'Content-Type: application/json' "$URL")
     if [ "$HTTP_STATUS" -eq 200 ]; then
         echo "[ dataprep - del ] HTTP status is 200."
-        docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log
+        docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log
     else
         echo "[ dataprep - del ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log
+        docker logs test-comps-dataprep-pgvector >> ${LOG_PATH}/dataprep_del.log
         exit 1
     fi
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=vectorstore-postgres*")
+    cid=$(docker ps -aq --filter "name=test-comps-vectorstore-postgres*")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 
-    cid=$(docker ps -aq --filter "name=dataprep-pgvector*")
+    cid=$(docker ps -aq --filter "name=test-comps-dataprep-pgvector*")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
diff --git a/tests/test_dataprep_pinecone.sh b/tests/test_dataprep_pinecone.sh
index a92a86c64..4d0f64fac 100755
--- a/tests/test_dataprep_pinecone.sh
+++ b/tests/test_dataprep_pinecone.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -10,7 +10,13 @@ function build_docker_images() {
     cd $WORKPATH
 
     # build dataprep image for pinecone
-    docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pinecone/docker/Dockerfile .
+    docker build --no-cache -t opea/dataprep-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pinecone/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-pinecone built fail"
+        exit 1
+    else
+        echo "opea/dataprep-pinecone built successful"
+    fi
 }
 
 function start_service() {
@@ -18,27 +24,40 @@ function start_service() {
     export PINECONE_INDEX_NAME="test-index"
     export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN
 
-    docker run -d --name="dataprep-pinecone" -p 6007:6007 -p 6008:6008 -p 6009:6009 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME opea/dataprep-pinecone:latest
+    docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 -p 5040:6008 -p 5041:6009 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME opea/dataprep-pinecone:comps
 
     sleep 1m
 }
 
 function validate_microservice() {
-    URL="http://$ip_address:6007/v1/dataprep"
+    URL="http://$ip_address:5039/v1/dataprep"
     echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > ./dataprep_file.txt
-    curl --noproxy $ip_address --location --request POST \
-      --form 'files=@./dataprep_file.txt' $URL
-
-    DELETE_URL="http://$ip_address:6009/v1/dataprep/delete_file"
-    curl --noproxy $ip_address --location --request POST \
-      -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL
+    result=$(curl --noproxy $ip_address --location --request POST \
+      --form 'files=@./dataprep_file.txt' $URL)
+    if [[ $result == *"200"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-dataprep-pinecone
+        exit 1
+    fi
+    DELETE_URL="http://$ip_address:5041/v1/dataprep/delete_file"
+    result=$(curl --noproxy $ip_address --location --request POST \
+      -d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL)
+    if [[ $result == *"true"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-dataprep-pinecone
+        exit 1
+    fi
 }
 
 function stop_docker() {
     cid=$(docker ps -aq --filter "name=vectorstore-pinecone*")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 
-    cid=$(docker ps -aq --filter "name=dataprep-pinecone*")
+    cid=$(docker ps -aq --filter "name=test-comps-dataprep-pinecone*")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
 
diff --git a/tests/test_dataprep_qdrant_langchain.sh b/tests/test_dataprep_qdrant_langchain.sh
new file mode 100644
index 000000000..632d7e06f
--- /dev/null
+++ b/tests/test_dataprep_qdrant_langchain.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH
+
+    # dataprep qdrant image
+    docker build --no-cache -t opea/dataprep-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-qdrant built fail"
+        exit 1
+    else
+        echo "opea/dataprep-qdrant built successful"
+    fi
+}
+
+function start_service() {
+    QDRANT_PORT=6360
+    docker run -d --name="test-comps-dataprep-qdrant-langchain" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $QDRANT_PORT:6333 -p 6334:6334 --ipc=host qdrant/qdrant
+    tei_embedding_port=6361
+    model="BAAI/bge-base-en-v1.5"
+    docker run -d --name="test-comps-dataprep-qdrant-langchain-tei" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p $tei_embedding_port:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model
+    dataprep_service_port=6362
+    TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_embedding_port}"
+    COLLECTION_NAME="rag-qdrant"
+    docker run -d --name="test-comps-dataprep-qdrant-langchain-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e QDRANT_HOST=$ip_address -e QDRANT_PORT=$QDRANT_PORT -e COLLECTION_NAME=$COLLECTION_NAME -e TEI_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -p ${dataprep_service_port}:6007 --ipc=host opea/dataprep-qdrant:comps
+    sleep 1m
+}
+
+function validate_services() {
+    local URL="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+    local DOCKER_NAME="$4"
+    local INPUT_DATA="$5"
+
+    if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL")
+    else
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    fi
+    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
+
+    docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+
+    # check response status
+    if [ "$HTTP_STATUS" -ne "200" ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-dataprep-qdrant-langchain
+        docker logs test-comps-dataprep-qdrant-langchain-tei
+        docker logs test-comps-dataprep-qdrant-langchain-server
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    fi
+    # check response body
+    if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        docker logs test-comps-dataprep-qdrant-langchain
+        docker logs test-comps-dataprep-qdrant-langchain-tei
+        docker logs test-comps-dataprep-qdrant-langchain-server
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] Content is as expected."
+    fi
+
+    sleep 1s
+}
+
+function validate_microservice() {
+    # tei for embedding service
+    validate_services \
+        "${ip_address}:6361/embed" \
+        "[[" \
+        "tei_embedding" \
+        "test-comps-dataprep-qdrant-langchain-tei" \
+        '{"inputs":"What is Deep Learning?"}'
+
+    # dataprep upload file
+    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
+    validate_services \
+        "${ip_address}:6362/v1/dataprep" \
+        "Data preparation succeeded" \
+        "dataprep_upload_file" \
+        "test-comps-dataprep-qdrant-langchain-server"
+
+    # dataprep upload link
+    validate_services \
+        "${ip_address}:6362/v1/dataprep" \
+        "Data preparation succeeded" \
+        "dataprep_upload_link" \
+        "test-comps-dataprep-qdrant-langchain-server"
+
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-dataprep-qdrant-langchain*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+
+    rm $LOG_PATH/dataprep_file.txt
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/tests/test_dataprep_redis_langchain.sh b/tests/test_dataprep_redis_langchain.sh
index 1a4b06ef7..82c25b9d3 100644
--- a/tests/test_dataprep_redis_langchain.sh
+++ b/tests/test_dataprep_redis_langchain.sh
@@ -12,6 +12,12 @@ function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --no-cache -t opea/dataprep-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-redis built fail"
+        exit 1
+    else
+        echo "opea/dataprep-redis built successful"
+    fi
 }
 
 function start_service() {
@@ -33,16 +39,17 @@ function validate_microservice() {
     HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
     RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
     SERVICE_NAME="dataprep - upload - file"
-    docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log
 
     if [ "$HTTP_STATUS" -ne "200" ]; then
         echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
     fi
     if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then
         echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_file.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] Content is as expected."
@@ -54,16 +61,18 @@ function validate_microservice() {
     HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
     RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
     SERVICE_NAME="dataprep - upload - link"
-    docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log
+
 
     if [ "$HTTP_STATUS" -ne "200" ]; then
         echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
     fi
     if [[ "$RESPONSE_BODY" != *"Data preparation succeeded"* ]]; then
         echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_upload_link.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] Content is as expected."
@@ -75,16 +84,17 @@ function validate_microservice() {
     HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
     RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
     SERVICE_NAME="dataprep - get"
-    docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log
 
     if [ "$HTTP_STATUS" -ne "200" ]; then
         echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
     fi
     if [[ "$RESPONSE_BODY" != *'{"name":'* ]]; then
         echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_file.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] Content is as expected."
@@ -96,11 +106,11 @@ function validate_microservice() {
     HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
     RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
     SERVICE_NAME="dataprep - del"
-    docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log
 
     # check response status
     if [ "$HTTP_STATUS" -ne "200" ]; then
         echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
@@ -108,6 +118,7 @@ function validate_microservice() {
     # check response body
     if [[ "$RESPONSE_BODY" != *'{"status":true}'* ]]; then
         echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        docker logs test-comps-dataprep-redis-langchain-server >> ${LOG_PATH}/dataprep_del.log
         exit 1
     else
         echo "[ $SERVICE_NAME ] Content is as expected."
diff --git a/tests/test_dataprep_redis_langchain_ray.sh b/tests/test_dataprep_redis_langchain_ray.sh
index f377d9ef5..9b1303d90 100644
--- a/tests/test_dataprep_redis_langchain_ray.sh
+++ b/tests/test_dataprep_redis_langchain_ray.sh
@@ -11,20 +11,25 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     echo "Building the docker images"
     cd $WORKPATH
-    docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
-    echo "Docker image built successfully"
+    docker build --no-cache -t opea/dataprep-on-ray-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-on-ray-redis built fail"
+        exit 1
+    else
+        echo "opea/dataprep-on-ray-redis built successful"
+    fi
 }
 
 function start_service() {
     echo "Starting redis microservice"
     # redis endpoint
-    docker run -d --name="test-comps-dataprep-redis-ray" --runtime=runc -p 6382:6379 -p 8004:8001 redis/redis-stack:7.2.0-v9
+    docker run -d --name="test-comps-dataprep-redis-ray" --runtime=runc -p 5038:6379 -p 8004:8001 redis/redis-stack:7.2.0-v9
 
     # dataprep-redis-server endpoint
-    export REDIS_URL="redis://${ip_address}:6382"
+    export REDIS_URL="redis://${ip_address}:5038"
     export INDEX_NAME="rag-redis"
     echo "Starting dataprep-redis-server"
-    docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 6009:6007 -p 6010:6008 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:latest
+    docker run -d --name="test-comps-dataprep-redis-ray-server" --runtime=runc -p 5037:6007 -p 6010:6008 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e TIMEOUT_SECONDS=600 opea/dataprep-on-ray-redis:comps
 
     sleep 10
     echo "Service started successfully"
@@ -33,7 +38,7 @@ function start_service() {
 function validate_microservice() {
     cd $LOG_PATH
 
-    dataprep_service_port=6009
+    dataprep_service_port=5037
     export URL="http://${ip_address}:$dataprep_service_port/v1/dataprep"
 
     echo "Starting validating the microservice"
diff --git a/tests/test_dataprep_redis_llama_index.sh b/tests/test_dataprep_redis_llama_index.sh
index db959d821..a7d20160b 100644
--- a/tests/test_dataprep_redis_llama_index.sh
+++ b/tests/test_dataprep_redis_llama_index.sh
@@ -12,6 +12,12 @@ function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --no-cache -t opea/dataprep-redis-llama-index:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/llama_index/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/dataprep-redis-llama-index built fail"
+        exit 1
+    else
+        echo "opea/dataprep-redis-llama-index built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_embeddings_langchain-mosec.sh b/tests/test_embeddings_langchain-mosec.sh
index a2f9aeb2a..95858118b 100644
--- a/tests/test_embeddings_langchain-mosec.sh
+++ b/tests/test_embeddings_langchain-mosec.sh
@@ -11,12 +11,24 @@ function build_mosec_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec-endpoint:comps -f comps/embeddings/langchain-mosec/mosec-docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/embedding-langchain-mosec-endpoint built fail"
+        exit 1
+    else
+        echo "opea/embedding-langchain-mosec-endpoint built successful"
+    fi
 }
 
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/embedding-langchain-mosec:comps -f comps/embeddings/langchain-mosec/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/embedding-langchain-mosec built fail"
+        exit 1
+    else
+        echo "opea/embedding-langchain-mosec built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_embeddings_langchain.sh b/tests/test_embeddings_langchain.sh
index 65f29b023..6c6241226 100644
--- a/tests/test_embeddings_langchain.sh
+++ b/tests/test_embeddings_langchain.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -11,6 +11,12 @@ function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --no-cache -t opea/embedding-tei:comps -f comps/embeddings/langchain/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/embedding-tei built fail"
+        exit 1
+    else
+        echo "opea/embedding-tei built successful"
+    fi
 }
 
 function start_service() {
@@ -27,10 +33,18 @@ function start_service() {
 
 function validate_microservice() {
     tei_service_port=5002
-    http_proxy="" curl http://${ip_address}:$tei_service_port/v1/embeddings \
+    result=$(http_proxy="" curl http://${ip_address}:$tei_service_port/v1/embeddings \
         -X POST \
         -d '{"text":"What is Deep Learning?"}' \
-        -H 'Content-Type: application/json'
+        -H 'Content-Type: application/json')
+    if [[ $result == *"embedding"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-embedding-tei-endpoint
+        docker logs test-comps-embedding-tei-server
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/test_embeddings_llama_index.sh b/tests/test_embeddings_llama_index.sh
index 006a2c259..048726044 100644
--- a/tests/test_embeddings_llama_index.sh
+++ b/tests/test_embeddings_llama_index.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -11,24 +11,30 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build --no-cache -t opea/embedding-tei:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
+    docker build --no-cache -t opea/embedding-tei-llamaindex:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/embedding-tei-llamaindex built fail"
+        exit 1
+    else
+        echo "opea/embedding-tei-llamaindex built successful"
+    fi
 }
 
 function start_service() {
     tei_endpoint=5001
     model="BAAI/bge-large-en-v1.5"
     revision="refs/pr/5"
-    docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
+    docker run -d --name="test-comps-embedding-tei-llamaindex-endpoint" -p $tei_endpoint:80 -v ./data:/data -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.2 --model-id $model --revision $revision
     export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}"
     tei_service_port=5010
-    docker run -d --name="test-comps-embedding-tei-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT  opea/embedding-tei:comps
+    docker run -d --name="test-comps-embedding-tei-llamaindex-server" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT  opea/embedding-tei-llamaindex:comps
     sleep 3m
 }
 
 function validate_microservice() {
     tei_service_port=5010
     URL="http://${ip_address}:$tei_service_port/v1/embeddings"
-    docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log
+    docker logs test-comps-embedding-tei-llamaindex-server >> ${LOG_PATH}/embedding.log
     HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d '{"text":"What is Deep Learning?"}' -H 'Content-Type: application/json' "$URL")
     if [ "$HTTP_STATUS" -eq 200 ]; then
         echo "[ embedding - llama_index ] HTTP status is 200. Checking content..."
@@ -38,12 +44,12 @@ function validate_microservice() {
             echo "[ embedding - llama_index ] Content is as expected."
         else
             echo "[ embedding - llama_index ] Content does not match the expected result: $CONTENT"
-            docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log
+            docker logs test-comps-embedding-tei-llamaindex-server >> ${LOG_PATH}/embedding.log
             exit 1
         fi
     else
         echo "[ embedding - llama_index ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs test-comps-embedding-tei-server >> ${LOG_PATH}/embedding.log
+        docker logs test-comps-embedding-tei-llamaindex-server >> ${LOG_PATH}/embedding.log
         exit 1
     fi
 }
diff --git a/tests/test_guardrails_llama_guard.sh b/tests/test_guardrails_llama_guard.sh
index 1462611aa..0e7980384 100644
--- a/tests/test_guardrails_llama_guard.sh
+++ b/tests/test_guardrails_llama_guard.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -11,49 +11,55 @@ function build_docker_images() {
     echo "Start building docker images for microservice"
     cd $WORKPATH
     docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
-    docker build --no-cache -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile .
-    echo "Docker images built"
+    docker build --no-cache -t opea/guardrails-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/guardrails-tgi built fail"
+        exit 1
+    else
+        echo "opea/guardrails-tgi built successful"
+    fi
 }
 
 function start_service() {
     echo "Starting microservice"
     export model_id="meta-llama/Meta-Llama-Guard-2-8B"
     export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
-    export SAFETY_GUARD_ENDPOINT=http://${ip_address}:8088/v1/chat/completions
+    export SAFETY_GUARD_ENDPOINT=http://${ip_address}:5035/v1/chat/completions
 
-    docker run -d --name="test-guardrails-langchain-tgi-server" -p 8088:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
+    docker run -d --name="test-comps-guardrails-langchain-tgi-server" -p 5035:80 --runtime=habana -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e HTTPS_PROXY=$https_proxy -e HTTP_PROXY=$https_proxy ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id $model_id --max-input-length 1024 --max-total-tokens 2048
     sleep 4m
-    docker run -d --name="test-guardrails-langchain-service" -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-tgi:latest
+    docker run -d --name="test-comps-guardrails-langchain-service" -p 5036:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e SAFETY_GUARD_MODEL_ID=$SAFETY_GUARD_MODEL_ID -e SAFETY_GUARD_ENDPOINT=$SAFETY_GUARD_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN opea/guardrails-tgi:comps
     sleep 10s
-
     echo "Microservice started"
 }
 
 function validate_microservice() {
     echo "Validate microservice started"
     echo "test 1 - violated policies"
-    docker logs test-guardrails-langchain-tgi-server
-    docker logs test-guardrails-langchain-service
-    result=$(http_proxy= curl http://localhost:9090/v1/guardrails  -X POST   -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}'   -H 'Content-Type: application/json')
+    result=$(http_proxy= curl http://localhost:5036/v1/guardrails  -X POST   -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}'   -H 'Content-Type: application/json')
     if [[ $result == *"Violated"* ]]; then
         echo "Result correct."
     else
         echo "Result wrong."
+        docker logs test-comps-guardrails-langchain-tgi-server
+        docker logs test-comps-guardrails-langchain-service
         exit 1
     fi
     echo "test 2 - safe"
-    result=$(http_proxy= curl http://localhost:9090/v1/guardrails  -X POST   -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}'   -H 'Content-Type: application/json')
+    result=$(http_proxy= curl http://localhost:5036/v1/guardrails  -X POST   -d '{"text":"How do you buy a car in the US?","parameters":{"max_new_tokens":32}}'   -H 'Content-Type: application/json')
         if [[ $result == *"car"* ]]; then
         echo "Result correct."
     else
         echo "Result wrong."
+        docker logs test-comps-guardrails-langchain-tgi-server
+        docker logs test-comps-guardrails-langchain-service
         exit 1
     fi
 
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=test-guardrails-langchain*")
+    cid=$(docker ps -aq --filter "name=test-comps-guardrails-langchain*")
     echo "Shutdown legacy containers "$cid
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
diff --git a/tests/test_guardrails_pii_detection.sh b/tests/test_guardrails_pii_detection.sh
index 1ba8202f6..178b6ea23 100644
--- a/tests/test_guardrails_pii_detection.sh
+++ b/tests/test_guardrails_pii_detection.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -10,13 +10,18 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     echo "Start building docker images for microservice"
     cd $WORKPATH
-    docker build -t opea/guardrails-pii-detection:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/pii_detection/docker/Dockerfile .
-    echo "Docker images built"
+    docker build --no-cache -t opea/guardrails-pii-detection:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/pii_detection/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/guardrails-pii-detection built fail"
+        exit 1
+    else
+        echo "opea/guardrails-pii-detection built successful"
+    fi
 }
 
 function start_service() {
     echo "Starting microservice"
-    docker run -d --runtime=runc --name="test-guardrails-pii-detection-endpoint" -p 6357:6357 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/guardrails-pii-detection:latest
+    docker run -d --runtime=runc --name="test-comps-guardrails-pii-detection-endpoint" -p 6357:6357 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/guardrails-pii-detection:comps
     sleep 5
     echo "Microservice started"
 }
@@ -26,19 +31,46 @@ function validate_microservice() {
     export PATH="${HOME}/miniforge3/bin:$PATH"
     source activate
     echo "test 1 - single task - ner"
-    python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ner
+    result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ner)
+    if [[ $result == *"An error occurred"* ]]; then
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-guardrails-pii-detection-endpoint
+        exit 1
+    else
+        echo "Result correct."
+    fi
     echo "test 2 - 20 tasks in parallel - ner"
-    python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ner
+    result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ner)
+    if [[ $result == *"An error occurred"* ]]; then
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-guardrails-pii-detection-endpoint
+        exit 1
+    else
+        echo "Result correct."
+    fi
     echo "test 3 - single task - ml"
-    python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ml
+    result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 1 --ip_addr $ip_address --strategy ml)
+    if [[ $result == *"An error occurred"* ]]; then
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-guardrails-pii-detection-endpoint
+        exit 1
+    else
+        echo "Result correct."
+    fi
     echo "test 4 - 20 tasks in parallel - ml"
-    python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ml
+    result=$(python comps/guardrails/pii_detection/test.py --test_text --batch_size 20 --ip_addr $ip_address --strategy ml)
+    if [[ $result == *"An error occurred"* ]]; then
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-guardrails-pii-detection-endpoint
+        exit 1
+    else
+        echo "Result correct."
+    fi
     echo "Validate microservice completed"
-    docker logs test-guardrails-pii-detection-endpoint
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=test-guardrails-pii-detection-endpoint")
+    cid=$(docker ps -aq --filter "name=test-comps-guardrails-pii-detection-endpoint")
     echo "Shutdown legacy containers "$cid
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }
diff --git a/tests/test_llms_faq-generation_tgi.sh b/tests/test_llms_faq-generation_tgi.sh
index 9be561cf9..de5ec3466 100755
--- a/tests/test_llms_faq-generation_tgi.sh
+++ b/tests/test_llms_faq-generation_tgi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -11,6 +11,12 @@ LOG_PATH="$WORKPATH/tests"
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/llm-faqgen-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-faqgen-tgi built fail"
+        exit 1
+    else
+        echo "opea/llm-faqgen-tgi built successful"
+    fi
 }
 
 function start_service() {
@@ -44,7 +50,6 @@ function validate_microservice() {
         -d '{"query":"Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data."}' \
         -H 'Content-Type: application/json'
     docker logs test-comps-llm-tgi-endpoint
-    docker logs test-comps-llm-tgi-server
 
     cd $LOG_PATH
     tei_service_port=5015
diff --git a/tests/test_llms_summarization_tgi.sh b/tests/test_llms_summarization_tgi.sh
index adffb3b28..9d463d321 100644
--- a/tests/test_llms_summarization_tgi.sh
+++ b/tests/test_llms_summarization_tgi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -11,6 +11,12 @@ LOG_PATH="$WORKPATH/tests"
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/llm-tgi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-tgi built fail"
+        exit 1
+    else
+        echo "opea/llm-tgi built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_llms_text-generation_native.sh b/tests/test_llms_text-generation_native.sh
new file mode 100644
index 000000000..f1e7fff63
--- /dev/null
+++ b/tests/test_llms_text-generation_native.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH
+    docker build --no-cache \
+        --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \
+        -t opea/llm-native:comps \
+        -f comps/llms/text-generation/native/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-native built fail"
+        exit 1
+    else
+        echo "opea/llm-native built successful"
+    fi
+}
+
+function start_service() {
+    LLM_NATIVE_MODEL="Qwen/Qwen2-7B-Instruct"
+    llm_native_service_port=5070
+    docker run -d \
+        --name="test-comps-llm-native-server" \
+        -p ${llm_native_service_port}:9000 \
+        --runtime=habana \
+        --cap-add=SYS_NICE \
+        --ipc=host \
+        -e http_proxy=${http_proxy} \
+        -e https_proxy=${https_proxy} \
+        -e LLM_NATIVE_MODEL=${LLM_NATIVE_MODEL} \
+        -e HABANA_VISIBLE_DEVICES=all \
+        -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
+        -e TOKENIZERS_PARALLELISM=false \
+        --restart unless-stopped \
+        --network bridge \
+        opea/llm-native:comps
+
+    sleep 5s
+}
+
+function validate_microservice() {
+    llm_native_service_port=5070
+    URL="http://${ip_address}:${llm_native_service_port}/v1/chat/completions"
+    INPUT_DATA='{"query":"What is Deep Learning?"}'
+    HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
+    SERVICE_NAME="llm-native"
+
+    # check response status
+    if [ "$HTTP_STATUS" -ne "200" ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    fi
+    # check response body
+    if [[ "$RESPONSE_BODY" != *'"text":"What'* ]]; then
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        docker logs test-comps-llm-native-server >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] Content is as expected."
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-llm-native*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+    build_docker_images
+    start_service
+    validate_microservice
+    stop_docker
+
+    echo y | docker system prune
+
+}
+
+main
diff --git a/tests/test_llms_text-generation_ray_serve.sh b/tests/test_llms_text-generation_ray_serve.sh
deleted file mode 100644
index 823971ecc..000000000
--- a/tests/test_llms_text-generation_ray_serve.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -xe
-
-WORKPATH=$(dirname "$PWD")
-ip_address=$(hostname -I | awk '{print $1}')
-
-function build_docker_images() {
-    ## Build VLLM Ray docker
-    cd $WORKPATH
-    docker build \
-        -f comps/llms/text-generation/ray_serve/docker/Dockerfile.rayserve \
-        --network=host \
-        -t ray_serve:habana .
-
-    ## Build OPEA microservice docker
-    cd $WORKPATH
-    docker build  \
-        -t opea/llm-ray:comps \
-        -f comps/llms/text-generation/ray_serve/docker/Dockerfile.microservice .
-}
-
-function start_service() {
-    export LLM_MODEL="facebook/opt-125m"
-    port_number=8008
-
-    docker run -d --rm \
-        --runtime=habana \
-        --name="test-comps-ray-service" \
-        -v $PWD/data:/data \
-        -e HABANA_VISIBLE_DEVICES=all \
-        -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-        --cap-add=sys_nice \
-        --ipc=host \
-        -p $port_number:80 \
-        -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
-        -e TRUST_REMOTE_CODE=True \
-        ray_serve:habana \
-        /bin/bash -c "ray start --head && python api_server_openai.py --port_number 80 --model_id_or_path $LLM_MODEL --chat_processor ChatModelLlama --num_cpus_per_worker 8 --num_hpus_per_worker 1"
-
-    export RAY_Serve_ENDPOINT="http://${ip_address}:${port_number}"
-    docker run -d --rm \
-        --name="test-comps-ray-microserve" \
-        -p 9000:9000 \
-        --ipc=host \
-        -e RAY_Serve_ENDPOINT=$RAY_Serve_ENDPOINT \
-        -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
-        -e LLM_MODEL=$LLM_MODEL \
-        opea/llm-ray:comps
-
-    # check whether ray is fully ready
-    n=0
-    until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-        docker logs test-comps-ray-service > ${WORKPATH}/tests/test-comps-ray-service.log
-        n=$((n+1))
-        if grep -q Connected ${WORKPATH}/tests/test-comps-ray-service.log; then
-            break
-        fi
-        sleep 5s
-    done
-    sleep 5s
-}
-
-function validate_microservice() {
-    http_proxy="" curl http://${ip_address}:8008/v1/chat/completions   \
-        -H "Content-Type: application/json"   \
-        -d '{"model": "opt-125m", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 32 }'
-    http_proxy="" curl http://${ip_address}:9000/v1/chat/completions \
-        -X POST \
-        -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-        -H 'Content-Type: application/json'
-    docker logs test-comps-ray-service
-    docker logs test-comps-ray-microserve
-        }
-
-
-function stop_docker() {
-    cid=$(docker ps -aq --filter "name=test-comps-ray*")
-    if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
-}
-
-function main() {
-
-    stop_docker
-
-    build_docker_images
-    start_service
-
-    validate_microservice
-
-    stop_docker
-    # echo y | docker system prune
-
-}
-
-main
diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index 6b6c17c19..b2956b12b 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -10,6 +10,12 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm-tgi:comps -f comps/llms/text-generation/tgi/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-tgi built fail"
+        exit 1
+    else
+        echo "opea/llm-tgi built successful"
+    fi
 }
 
 function start_service() {
@@ -39,12 +45,18 @@ function start_service() {
 
 function validate_microservice() {
     tei_service_port=5005
-    http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/completions \
+    result=$(http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/completions \
         -X POST \
         -d '{"query":"What is Deep Learning?", "max_new_tokens": 128}' \
-        -H 'Content-Type: application/json'
-    docker logs test-comps-llm-tgi-endpoint
-    docker logs test-comps-llm-tgi-server
+        -H 'Content-Type: application/json')
+    if [[ $result == *"DONE"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-llm-tgi-endpoint
+        docker logs test-comps-llm-tgi-server
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/test_llms_text-generation_vllm-openvino.sh b/tests/test_llms_text-generation_vllm-openvino.sh
index f2df98584..ac57b29d8 100755
--- a/tests/test_llms_text-generation_vllm-openvino.sh
+++ b/tests/test_llms_text-generation_vllm-openvino.sh
@@ -2,25 +2,31 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH="$( cd "$( dirname "$0" )" && pwd )"
 
 # Define variables
-port=8123
+port=5033
 HF_CACHE_DIR=$HOME/.cache/huggingface
-DOCKER_IMAGE="vllm:openvino"
-CONTAINER_NAME="vllm-openvino-container"
+DOCKER_IMAGE="vllm-openvino:comps"
+CONTAINER_NAME="test-comps-vllm-openvino-container"
 
 function build_container() {
     cd $WORKPATH
     git clone https://github.com/vllm-project/vllm.git vllm-openvino
     cd ./vllm-openvino/
-    docker build -t $DOCKER_IMAGE \
+    docker build --no-cache -t $DOCKER_IMAGE \
       -f Dockerfile.openvino \
       . \
       --build-arg https_proxy=$https_proxy \
       --build-arg http_proxy=$http_proxy
+    if [ $? -ne 0 ]; then
+        echo "vllm-openvino built fail"
+        exit 1
+    else
+        echo "vllm-openvino built successful"
+    fi
     cd $WORKPATH
     rm -rf vllm-openvino
 }
@@ -34,7 +40,7 @@ start_container() {
       -e HTTPS_PROXY=$https_proxy \
       -e HTTP_PROXY=$https_proxy \
       -v $HF_CACHE_DIR:/root/.cache/huggingface \
-      vllm:openvino /bin/bash -c "\
+      vllm-openvino:comps /bin/bash -c "\
         cd / && \
         export VLLM_CPU_KVCACHE_SPACE=50 && \
         python3 -m vllm.entrypoints.openai.api_server \
@@ -95,6 +101,8 @@ function test_api_endpoint {
         echo "PASS: $endpoint returned expected status code: $expected_status"
     else
         echo "FAIL: $endpoint returned unexpected status code: $response (expected: $expected_status)"
+        docker logs $CONTAINER_NAME
+        exit 1
     fi
 }
 # Main function
diff --git a/tests/test_llms_text-generation_vllm-ray.sh b/tests/test_llms_text-generation_vllm-ray.sh
index 7ab235a93..41433b27f 100644
--- a/tests/test_llms_text-generation_vllm-ray.sh
+++ b/tests/test_llms_text-generation_vllm-ray.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -12,18 +12,30 @@ function build_docker_images() {
     cd $WORKPATH
     docker build \
         -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.vllmray  \
-        -t opea/vllm_ray:habana --network=host .
+        --no-cache -t opea/vllm_ray-habana:comps --network=host .
+    if [ $? -ne 0 ]; then
+        echo "opea/vllm_ray-habana built fail"
+        exit 1
+    else
+        echo "opea/vllm_ray-habana built successful"
+    fi
 
     ## Build OPEA microservice docker
     cd $WORKPATH
     docker build \
-        -t opea/llm-vllm-ray:comps \
+        --no-cache -t opea/llm-vllm-ray:comps \
         -f comps/llms/text-generation/vllm-ray/docker/Dockerfile.microservice .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-vllm-ray built fail"
+        exit 1
+    else
+        echo "opea/llm-vllm-ray built successful"
+    fi
 }
 
 function start_service() {
     export LLM_MODEL="facebook/opt-125m"
-    port_number=8006
+    port_number=5031
     docker run -d --rm \
         --name="test-comps-vllm-ray-service" \
         --runtime=habana \
@@ -34,13 +46,13 @@ function start_service() {
         --ipc=host \
         -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
         -p $port_number:8000 \
-        opea/vllm_ray:habana \
+        opea/vllm_ray-habana:comps \
         /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL --tensor_parallel_size 2 --enforce_eager False"
 
     export vLLM_RAY_ENDPOINT="http://${ip_address}:${port_number}"
     docker run -d --rm\
         --name="test-comps-vllm-ray-microservice" \
-        -p 9000:9000 \
+        -p 5032:9000 \
         --ipc=host \
         -e vLLM_RAY_ENDPOINT=$vLLM_RAY_ENDPOINT \
         -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
@@ -61,15 +73,29 @@ function start_service() {
 }
 
 function validate_microservice() {
-    http_proxy="" curl http://${ip_address}:8006/v1/chat/completions \
+    result=$(http_proxy="" curl http://${ip_address}:5031/v1/chat/completions \
         -H "Content-Type: application/json" \
-        -d '{"model": "facebook/opt-125m", "messages": [{"role": "user", "content": "How are you?"}]}'
-    http_proxy="" curl http://${ip_address}:9000/v1/chat/completions \
+        -d '{"model": "facebook/opt-125m", "messages": [{"role": "user", "content": "How are you?"}]}')
+    if [[ $result == *"message"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-vllm-ray-service
+        docker logs test-comps-vllm-ray-microservice
+        exit 1
+    fi
+    result=$(http_proxy="" curl http://${ip_address}:5032/v1/chat/completions \
         -X POST \
         -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-        -H 'Content-Type: application/json'
-    docker logs test-comps-vllm-ray-service
-    docker logs test-comps-vllm-ray-microservice
+        -H 'Content-Type: application/json')
+    if [[ $result == *"text"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-vllm-ray-service
+        docker logs test-comps-vllm-ray-microservice
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/test_llms_text-generation_vllm.sh b/tests/test_llms_text-generation_vllm.sh
index 48bee9ae8..0210f5075 100644
--- a/tests/test_llms_text-generation_vllm.sh
+++ b/tests/test_llms_text-generation_vllm.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -12,19 +12,31 @@ function build_docker_images() {
     cd $WORKPATH/comps/llms/text-generation/vllm
     docker build \
         -f docker/Dockerfile.hpu \
-        -t opea/vllm:hpu \
+        --no-cache -t opea/vllm-hpu:comps \
         --shm-size=128g .
+    if [ $? -ne 0 ]; then
+        echo "opea/vllm-hpu built fail"
+        exit 1
+    else
+        echo "opea/vllm-hpu built successful"
+    fi
 
     ## Build OPEA microservice docker
     cd $WORKPATH
     docker build  \
-        -t opea/llm-vllm:comps \
+        --no-cache -t opea/llm-vllm:comps \
         -f comps/llms/text-generation/vllm/docker/Dockerfile.microservice .
+    if [ $? -ne 0 ]; then
+        echo "opea/llm-vllm built fail"
+        exit 1
+    else
+        echo "opea/llm-vllm built successful"
+    fi
 }
 
 function start_service() {
     export LLM_MODEL="facebook/opt-125m"
-    port_number=8008
+    port_number=5025
     docker run -d --rm \
         --runtime=habana \
         --name="test-comps-vllm-service" \
@@ -35,13 +47,13 @@ function start_service() {
         --cap-add=sys_nice \
         --ipc=host \
         -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
-        opea/vllm:hpu \
+        opea/vllm-hpu:comps \
         /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL  --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
 
     export vLLM_ENDPOINT="http://${ip_address}:${port_number}"
     docker run -d --rm \
         --name="test-comps-vllm-microservice" \
-        -p 9000:9000 \
+        -p 5030:9000 \
         --ipc=host \
         -e vLLM_ENDPOINT=$vLLM_ENDPOINT \
         -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \
@@ -62,21 +74,35 @@ function start_service() {
 }
 
 function validate_microservice() {
-    http_proxy="" curl http://${ip_address}:8008/v1/completions \
+    result=$(http_proxy="" curl http://${ip_address}:5025/v1/completions \
         -H "Content-Type: application/json" \
         -d '{
         "model": "facebook/opt-125m",
         "prompt": "What is Deep Learning?",
         "max_tokens": 32,
         "temperature": 0
-        }'
-    http_proxy="" curl http://${ip_address}:9000/v1/chat/completions \
+        }')
+    if [[ $result == *"text"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-vllm-service
+        docker logs test-comps-vllm-microservice
+        exit 1
+    fi
+    result=$(http_proxy="" curl http://${ip_address}:5030/v1/chat/completions \
         -X POST \
         -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_p":0.95,"temperature":0.01,"streaming":false}' \
-        -H 'Content-Type: application/json'
-            docker logs test-comps-vllm-service
-            docker logs test-comps-vllm-microservice
-        }
+        -H 'Content-Type: application/json')
+    if [[ $result == *"text"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-vllm-service
+        docker logs test-comps-vllm-microservice
+        exit 1
+    fi
+}
 
 function stop_docker() {
     cid=$(docker ps -aq --filter "name=test-comps-vllm*")
diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh
index d9d4258e7..08f138e2f 100644
--- a/tests/test_lvms_llava.sh
+++ b/tests/test_lvms_llava.sh
@@ -10,20 +10,32 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build -t opea/llava:comps -f comps/lvms/llava/Dockerfile .
+    docker build --no-cache -t opea/llava:comps -f comps/lvms/llava/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/llava built fail"
+        exit 1
+    else
+        echo "opea/llava built successful"
+    fi
     docker build --no-cache -t opea/lvm:comps -f comps/lvms/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/lvm built fail"
+        exit 1
+    else
+        echo "opea/lvm built successful"
+    fi
 }
 
 function start_service() {
     unset http_proxy
-    docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:comps
-    docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:comps
+    docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5029:8399 --ipc=host opea/llava:comps
+    docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:5029 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm:comps
     sleep 8m
 }
 
 function validate_microservice() {
 
-    result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json')
+    result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json')
     if [[ $result == *"yellow"* ]]; then
         echo "Result correct."
     else
diff --git a/tests/test_lvms_tgi_llava_next.sh b/tests/test_lvms_tgi_llava_next.sh
index 970e3004f..c9b28f6d5 100644
--- a/tests/test_lvms_tgi_llava_next.sh
+++ b/tests/test_lvms_tgi_llava_next.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -11,25 +11,40 @@ function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     git clone https://github.com/yuanwu2017/tgi-gaudi.git && cd tgi-gaudi && git checkout v2.0.4
-    docker build -t opea/llava-tgi:latest .
+    docker build --no-cache -t opea/llava-tgi:comps .
+    if [ $? -ne 0 ]; then
+        echo "opea/llava-tgi built fail"
+        exit 1
+    else
+        echo "opea/llava-tgi built successful"
+    fi
+
     cd ..
-    docker build --no-cache -t opea/lvm-tgi:latest -f comps/lvms/Dockerfile_tgi .
+    docker build --no-cache -t opea/lvm-tgi:comps -f comps/lvms/Dockerfile_tgi .
+    if [ $? -ne 0 ]; then
+        echo "opea/lvm-tgi built fail"
+        exit 1
+    else
+        echo "opea/lvm-tgi built successful"
+    fi
 }
 
 function start_service() {
     unset http_proxy
     model="llava-hf/llava-v1.6-mistral-7b-hf"
-    docker run -d --name="test-comps-lvm-llava-tgi" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:80 --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e SKIP_TOKENIZER_IN_TGI=true -e HABANA_VISIBLE_DEVICES=all  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/llava-tgi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8192
-    docker run -d --name="test-comps-lvm-tgi" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm-tgi:latest
+    docker run -d --name="test-comps-lvm-llava-tgi" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5027:80 --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e SKIP_TOKENIZER_IN_TGI=true -e HABANA_VISIBLE_DEVICES=all  -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host opea/llava-tgi:comps --model-id $model --max-input-tokens 4096 --max-total-tokens 8192
+    docker run -d --name="test-comps-lvm-tgi" -e LVM_ENDPOINT=http://$ip_address:5027 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5028:9399 --ipc=host opea/lvm-tgi:comps
     sleep 3m
 }
 
 function validate_microservice() {
-    result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json')
+    result=$(http_proxy="" curl http://localhost:5028/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json')
     if [[ $result == *"yellow"* ]]; then
         echo "Result correct."
     else
         echo "Result wrong."
+        docker logs test-comps-lvm-llava-tgi
+        docker logs test-comps-lvm-tgi
         exit 1
     fi
 
diff --git a/tests/test_prompt_registry_mongo.sh b/tests/test_prompt_registry_mongo.sh
index e91bf225c..b5d976999 100644
--- a/tests/test_prompt_registry_mongo.sh
+++ b/tests/test_prompt_registry_mongo.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
@@ -17,12 +17,18 @@ function build_docker_images() {
     echo $(pwd)
     docker run -d -p 27017:27017 --name=test-comps-mongo mongo:latest
 
-    docker build --no-cache -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/docker/Dockerfile .
+    docker build --no-cache -t opea/promptregistry-mongo-server:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/promptregistry-mongo-server built fail"
+        exit 1
+    else
+        echo "opea/promptregistry-mongo-server built successful"
+    fi
 }
 
 function start_service() {
 
-    docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:latest
+    docker run -d --name="test-comps-promptregistry-mongo-server" -p 6012:6012 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e MONGO_HOST=${MONGO_HOST} -e MONGO_PORT=${MONGO_PORT} -e DB_NAME=${DB_NAME} -e COLLECTION_NAME=${COLLECTION_NAME} opea/promptregistry-mongo-server:comps
 
     sleep 10s
 }
@@ -40,6 +46,7 @@ function validate_microservice() {
         echo "Correct result."
     else
         echo "Incorrect result."
+        docker logs test-comps-promptregistry-mongo-server
         exit 1
     fi
 
diff --git a/tests/test_reranks_fastrag.sh b/tests/test_reranks_fastrag.sh
index d423d19d5..7b0575523 100644
--- a/tests/test_reranks_fastrag.sh
+++ b/tests/test_reranks_fastrag.sh
@@ -2,30 +2,42 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/reranking-fastrag:comps -f comps/reranks/fastrag/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/reranking-fastrag built fail"
+        exit 1
+    else
+        echo "opea/reranking-fastrag built successful"
+    fi
 }
 
 function start_service() {
     export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static"
-    fastrag_service_port=8000
+    fastrag_service_port=5020
     unset http_proxy
     docker run -d --name="test-comps-reranking-fastrag-server" -p ${fastrag_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:comps
     sleep 3m
 }
 
 function validate_microservice() {
-    fastrag_service_port=8000
-    http_proxy="" curl http://${ip_address}:${fastrag_service_port}/v1/reranking\
+    fastrag_service_port=5020
+    result=$(http_proxy="" curl http://${ip_address}:${fastrag_service_port}/v1/reranking\
         -X POST \
         -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-        -H 'Content-Type: application/json'
-    docker logs test-comps-reranking-fastrag-server
+        -H 'Content-Type: application/json')
+    if [[ $result == *"reranked_docs"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-reranking-fastrag-server
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/test_reranks_langchain-mosec.sh b/tests/test_reranks_langchain-mosec.sh
index ba675bccf..d34957a4c 100644
--- a/tests/test_reranks_langchain-mosec.sh
+++ b/tests/test_reranks_langchain-mosec.sh
@@ -11,12 +11,24 @@ function build_mosec_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec-endpoint:comps -f comps/reranks/langchain-mosec/mosec-docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/reranking-langchain-mosec-endpoint built fail"
+        exit 1
+    else
+        echo "opea/reranking-langchain-mosec-endpoint built successful"
+    fi
 }
 
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
     docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --no-cache -t opea/reranking-langchain-mosec:comps -f comps/reranks/langchain-mosec/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/reranking-langchain-mosec built fail"
+        exit 1
+    else
+        echo "opea/reranking-langchain-mosec built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_reranks_tei.sh b/tests/test_reranks_tei.sh
index 4a8c77aad..0b146d81e 100644
--- a/tests/test_reranks_tei.sh
+++ b/tests/test_reranks_tei.sh
@@ -2,13 +2,19 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/reranking-tei:comps -f comps/reranks/tei/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/reranking-tei built fail"
+        exit 1
+    else
+        echo "opea/reranking-tei built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_retrievers_haystack_qdrant.sh b/tests/test_retrievers_haystack_qdrant.sh
index b1f8a02e8..4fdfb13d6 100644
--- a/tests/test_retrievers_haystack_qdrant.sh
+++ b/tests/test_retrievers_haystack_qdrant.sh
@@ -2,13 +2,19 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/retriever-qdrant:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/haystack/qdrant/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/retriever-qdrant built fail"
+        exit 1
+    else
+        echo "opea/retriever-qdrant built successful"
+    fi
 }
 
 function start_service() {
@@ -39,12 +45,18 @@ function validate_microservice() {
     export PATH="${HOME}/miniforge3/bin:$PATH"
     source activate
     test_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-    http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \
+    result=$(http_proxy='' curl http://${ip_address}:$retriever_port/v1/retrieval \
         -X POST \
         -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \
-        -H 'Content-Type: application/json'
-    docker logs test-comps-retriever-qdrant-server
-    docker logs test-comps-retriever-tei-endpoint
+        -H 'Content-Type: application/json')
+    if [[ $result == *"retrieved_docs"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-retriever-qdrant-server
+        docker logs test-comps-retriever-tei-endpoint
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/test_retrievers_langchain_pgvector.sh b/tests/test_retrievers_langchain_pgvector.sh
index 4c5b08963..41295eb5e 100755
--- a/tests/test_retrievers_langchain_pgvector.sh
+++ b/tests/test_retrievers_langchain_pgvector.sh
@@ -2,13 +2,19 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/retriever-pgvector:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pgvector/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/retriever-pgvector built fail"
+        exit 1
+    else
+        echo "opea/retriever-pgvector built successful"
+    fi
 }
 
 function start_service() {
@@ -17,7 +23,7 @@ function start_service() {
     export POSTGRES_PASSWORD=testpwd
     export POSTGRES_DB=vectordb
 
-    docker run --name test-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16
+    docker run --name test-comps-vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql -p 5432:5432 pgvector/pgvector:0.7.0-pg16
     sleep 10s
 
     # tei endpoint
@@ -28,21 +34,27 @@ function start_service() {
     export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}"
 
     # pgvector retriever
-    docker run -d --name="test-retriever-pgvector" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:comps
+    docker run -d --name="test-retriever-pgvector" -p 5003:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT opea/retriever-pgvector:comps
     sleep 3m
 }
 
 function validate_microservice() {
-    retriever_port=7000
+    retriever_port=5003
     test_embedding="[0.3212316218862614, 0.05284697028105079, 0.792736615029739, -0.01450667589035648, -0.7358454555705813, -0.5159104761926909, 0.3535153166047822, -0.6465310827905328, -0.3260418169245214, 0.5427377177268364, 0.839674125021304, 0.27459120894125255, -0.9833857616143291, 0.4763752586395751, 0.7048355150785723, 0.4935209825796325, -0.09655411499027178, -0.5739389241976944, 0.34450497876796815, -0.03401327136919208, -0.8247080270670755, -0.9430721851019634, 0.4702688485035773, 0.3872526674852217, -0.13436894777006136, 0.27166203983338266, 0.7724679346611174, 0.49524109590526666, 0.9810730976435518, 0.2143402533230332, 0.35235793217357947, -0.3199320624935764, -0.3535996110405917, 0.1982603781951089, -0.37547349902996063, -0.6148649695355071, 0.388521078627599, 0.7073360849235228, 0.1768845283243352, -0.38289339223361885, 0.36390326284734775, -0.4790146416310761, -0.5412301982310956, 0.33793186533237507, -0.7028178009236765, -0.6850965350085609, -0.519584428926227, 0.07610032557230206, 0.8173990245819258, 0.6620078274633294, 0.9159029345791101, -0.6353085978752564, 0.5816911666251467, -0.03007583916355916, 0.7405029634324471, 0.43720248036100817, -0.8588961125219283, -0.5267610831146254, 0.17242810571201828, -0.5958637989986995, -0.9424146892733949, 0.593549429279222, -0.6516554787902789, -0.5666971591678356, -0.942676397097636, -0.7754876202156127, 0.4981071621118629, 0.3479716647812874, -0.20905562164787628, -0.01239748867059931, -0.39282697259470645, -0.682776727276128, 0.8490471472078613, 0.9407846472878745, 0.38429459825058054, -0.6217288222979798, 0.7017039943902317, 0.2666859825508645, -0.8350624589077213, -0.6844099142855995, 0.7150220289787632, 0.6172753342426756, 0.3411977212235433, -0.6885106120374, -0.9063819220399785, -0.8409372842391187, -0.8297926800281972, -0.7209991962325382, -0.10750064217958677, 0.3293914797165298, -0.7839812511866298, 0.3413595850264284, 0.9251256529601857, -0.7129635996889019, 0.2032168270911272, -0.744174955251268, 0.7691350055313244, -0.20065548721684312, 0.8869269473893813, -0.02043469943990095, 0.6747773545635596, -0.08840723444251264, 0.29835753335664084, -0.06410433319206965, 0.6915278973312651, 0.35470936730145075, -0.8143883316077478, 0.3700125242841532, 0.21752822647915626, -0.8620510146349405, -0.9872766671960136, -0.4418160577207253, -0.22054594310628928, -0.12301077500821433, -0.32532691454130314, -0.13151154223491113, -0.11476973253362455, -0.6347877217496254, -0.7764229239974911, 0.8494414471799672, -0.8096141861298036, -0.126108099532108, -0.3910538453811505, 0.7416491690145808, -0.9147820237179922, -0.09053536925720418, 0.6536341825563443, 0.655602583013402, 0.1757558598054938, -0.2501459855449637, 0.23414048418314914, -0.2944157385030681, 0.9386472406881659, -0.18806566910431344, -0.29109490690006345, -0.06582041104197667, -0.24458043176038613, 0.22893907834264082, -0.6322528508563678, -0.7885667746432836, 0.10383516801892911, 0.25661930212021256, 0.48395546864077654, 0.25074187080653787, 0.7878158493705165, 0.23874513474134984, -0.18963037155323526, 0.6768315857746809, 0.5323731821887652, 0.23324330999046516, -0.738289178845237, 0.8231931441360549, -0.5243106029457096, 0.21804967641989204, 0.3707592922049536, 0.1970890658467559, 0.6290401053696923, -0.6193312718716564, 0.4319818453521995, -0.4373242547587233, -0.20412719166280646, -0.868724458613944, -0.9426457085574942, 0.7688331784589177, 0.8429476319014946, -0.6928872166553237, -0.3089062124196522, -0.4951601658025162, -0.20786350848417157, -0.1834098357401246, 0.6258630377921288, -0.25204085881527294, -0.6433661815891194, 0.24194250996512046, 0.7945180851525879, 0.6730215739979015, 0.45995755232419877, 0.27685945410814927, 0.7529674957244883, -0.4439881981193141, 0.38722277085649703, 0.4225851985441007, 0.5151867308566294, 0.8592936274009735, -0.5577167356519221, -0.22541015002223674, 0.7872403040580904, -0.12895843621078895, 0.5887160803674254, -0.6121486933005933, -0.45190497189987, 0.5882515994898736, -0.20915972333667443, 0.6412544240387859, -0.9812292190679823, 0.23598351448404986, -0.01874477123769469, -0.5571884049798792, -0.21717058226127106, -0.8566428604555374, -0.7698283820683764, -0.7788953845967042, -0.9695043602118194, 0.2531642774513472, 0.24476771264255004, 0.799177428779027, 0.15892099361251932, 0.2675472976400166, 0.7977537791258142, 0.5682082238828539, -0.45861936031507833, 0.976812562932188, 0.7074171102968665, -0.255345769250928, -0.8903371790301657, 0.7704811965386686, 0.7499406836491052, 0.015867022798163433, 0.023343856172087563, -0.8985882333056163, 0.967943518200411, 0.6738003473613683, 0.500027753964835, -0.25086930359627546, 0.8192342987623937, -0.5553572601867272, -0.5869387659256808, 0.8105241617485164, 0.26722188191476604, -0.3958252448602495, -0.5045071968072412, -0.28738102025143886, 0.9466985876572256, 0.7491954841518662, -0.05398806963889902, 0.5602374066760636, -0.7105267600964871, 0.9183176656578995, -0.7484524873628995, -0.9707740622635459, -0.835248467210193, -0.6698976002755301, -0.9157167347077453, 0.8385470752014215, -0.8484323571440642, 0.1488482374866753, 0.3535389435893035, 0.40201643606217297, -0.39307181109310174, -0.651228451786785, 0.9707155460374848, 0.7578035730666239, -0.916880505891617, 0.7976566483403702, 0.4769359186496589, -0.9056872532891009, 0.5018227509242583, 0.06634988131602104, -0.38876676686204537, -0.20473802582321277, 0.5980365889203325, -0.34935300908506206, 0.5873905336860825, -0.8339160527604776, 0.2903116937984762, -0.9254374424169307, 0.6580958452134436, 0.15246698154103022, -0.6646130474515959, 0.8207084174685697, 0.06879769054023499, 0.6856796611464853, 0.7434402148947985, -0.07417300955086725, -0.37981881059511857, 0.7945700979382095, 0.9465476443316254, 0.7045891367557522, -0.21374560717812052, 0.09707043886320443, 0.40542472035097754, -0.21295063208183063, -0.3638798039778244, 0.27259830494730597, -0.9679565648433712, 0.574009198040323, 0.5453104171463734, 0.4226578254247848, 0.8135241112071945, -0.9913587704531821, -0.5117490950168377, 0.31240764840477486, 0.05726091394767008, -0.44352035546239654, 0.973651830312322, -0.30089019754641044, -0.38110683211990515, 0.12746451891554633, -0.44142668003974683, -0.6085743100333996, 0.6897705314589502, 0.9941017194163115, 0.22931154106427631, -0.38393397164902865, -0.487276417971108, 0.9823011016539693, -0.525188403356583, 0.20472304461076174, -0.549309125745228, 0.8391439613819196, -0.29947371410247614, -0.9587993477785177, 0.49169643064876745, -0.8450431739492874, 0.4992908092405386, 0.8214166011949593, 0.3514461197612715, 0.7052749449063302, -0.456428137096097, -0.21613329759075817, -0.4240696515484821, -0.6072280877366947, -0.19019911975234938, 0.03207563995916485, 0.7832264288656379, -0.9848532944591397, 0.2814057130788894, 0.860398099217986, -0.5757789213121853, -0.6403226820347003, 0.6276892831123779, 0.6966115314942829, -0.5964071917752842, 0.44624318175630373, 0.7747997483259705, -0.5274892594576506, -0.00345488047657061, 0.39694784159551255, -0.32018146543784254, 0.7503113292041483, 0.2279567107684024, -0.6993797573511833, 0.07551046336599065, 0.34912828888955083, 0.4590408940147299, 0.25454507513086266, -0.30882522463970363, -0.4080889783776509, -0.3123706885833979, -0.8906352519220135, -0.8139972234039548, -0.08828963608894047, 0.14503312886836617, -0.3714118896544083, 0.3827783378301277, 0.5438460044018558, 0.5097760438462526, 0.15715247575456592, 0.7656929283612122, 0.2920396353744734, 0.2373440190759446, 0.9526910643357105, 0.1250822784239567, 0.8541819063485603, -0.12747895073713877, 0.5735382473541981, -0.5032516001742902, 0.7413632640531032, -0.7276977107465363, 0.843580565716205, 0.7018464054348241, 0.5586022744519274, 0.8087171435922904, -0.21245941454116735, -0.948838383837346, -0.33122336674310726, -0.6044852681843789, 0.9537863293189539, 0.2536799406315282, -0.6165803849255769, 0.7101896753682724, -0.7295247078012181, -0.7614076971639918, -0.26355996174665797, 0.2821572530049805, -0.31435759840484767, 0.4606279529588946, -0.6454718015595133, 0.29204230021467015, -0.9773214517280517, 0.9018006022750058, 0.41864735598581615, -0.6362219585524242, 0.6393270283675747, 0.8775458814947836, -0.8151570635893794, 0.3439568607968999, 0.29709851503999474, -0.757078876496533, 0.5012539900859203, 0.9894088580102554, -0.7830638861580885, -0.2991021462567893, 0.106227593453466, 0.475717480159388, -0.8190837445165258, 0.7235860704831878, 0.7463245164230621, -0.5005231847044065, 0.6040314499611552, 0.6735380082955229, -0.5547291176872893, -0.9090102518914822, 0.13079236830880614, 0.30122136258272514, -0.6417236467561747, 0.2630310905704383, -0.37163926901056077, 0.20821525595060142, 0.058213575984825905, -0.7186424501121726, 0.7186917038077467, 0.20368227867764155, 0.7957158871869667, -0.8553769107478018, 0.8475526085456688, -0.929286319233819, -0.4084410910607217, -0.18451194893213185, -0.2629665470348457, 0.36380699955097695, 0.2762298083541519, 0.8264334555626198, -0.022207373606218495, -0.32224911623004626, -0.18947254078026798, 0.33627343422225175, 0.6906306880901341, -0.5248865356053838, -0.8976978225060646, -0.9198989266658277, -0.9045058048590318, -0.43074279628622225, 0.9599523380525761, 0.16694571818827875, 0.08638717900194992, 0.24369341180939874, -0.29293980835779454, 0.13980998987643733, -0.9103052978285509, 0.9109674748745353, -0.6189652187256851, -0.30507868365416413, -0.4232217216255978, 0.34784431052206877, -0.8235167119697908, 0.1565512568825982, -0.11476153735499195, -0.5476852944817927, -0.9695366885614041, 0.31387227761880165, -0.8460727492314095, 0.5313339961520958, 0.5605009436841186, 0.04504755045556719, -0.10937916620725119, -0.40867992424849797, -0.9148814576758182, 0.41260731002228, 0.6535850987782705, -0.3956136730481463, 0.03633719317271722, -0.26520169024611917, -0.39307279913859916, 0.8389708129910836, -0.10965192030153337, -0.8114479506343715, 0.6624055258346568, -0.12364857684372677, -0.3391386034226034, 0.5064344415363975, 0.4222558794792024, -0.8920802019539475, 0.8403881748708741, -0.5144930020007417, -0.3961429483392995, -0.9112376538340263, 0.5369991550001529, 0.4099994212177125, 0.8971702224538953, -0.07250674251100442, -0.4123232887614461, -0.4122138364547645, 0.30115503935936516, 0.9140832812087094, -0.37996517983025035, 0.45766194212423583, 0.8778668278803266, -0.871373882496363, 0.9061603981794313, -0.4815792838295849, -0.3540250825062252, 0.47058280496548677, 0.6353307464139133, -0.9084299203157564, 0.32569503818833767, -0.5917177728092791, 0.017982667746413883, -0.39657854384311597, 0.30240291420731147, -0.8789617636583977, 0.398601970442066, -0.9537566407528597, -0.7326801366509474, 0.6394091009367926, -0.24018952260048332, -0.4410443985541457, -0.715250103875068, -0.9531170489995859, 0.8907413230296786, -0.6270483513933209, -0.1278281545077713, 0.6205668124687644, -0.5880492136441298, 0.8458960227498347, 0.5156432304509859, -0.41522707199863196, -0.9971627462302537, 0.967570980171752, -0.1258013547750596, -0.3920054384667395, -0.7579953976551077, -0.5047276085442098, -0.742917134758996, 0.307776046578512, 0.33240724082891204, -0.12439712701067074, 0.8297068611891512, 0.9092972699438713, -0.5553533790744807, -0.9327632085647035, 0.4797798607215402, -0.6407284323825371, 0.23503537288803233, 0.7356444783186646, 0.550461677629142, -0.8859356421536595, -0.06157466053719496, 0.2628024780598055, -0.14515603184459613, -0.9382781600128365, -0.9076306357777459, -0.5661586668239169, -0.5778188698610502, -0.343591139945177, -0.9957519288956789, 3.652203366399931e-05, -0.2850434941249338, 0.9450784913510459, -0.7344049612004591, 0.3966551077940945, 0.9820403785569927, 0.7132254472780228, 0.04475455308790677, 0.7149662286904288, 0.30640286803677386, -0.11825818002978239, 0.9475071024012094, -0.4020573255284672, -0.25210492474829316, -0.9864930649895771, -0.3662338670933165, 0.6528806547589174, 0.23157758222346203, -0.5707934304014186, -0.12462852967839688, 0.1912875382350001, 0.9111205883142817, -0.7227638014501978, -0.36537014763125186, -0.37380198030841805, 0.4707867786085871, -0.5824192322860218, -0.47547092650542666, 0.7836345381645189, 0.7843678847969751, 0.6754328587362883, -0.6670404362153401, 0.7372872996570987, -0.8333262364813818, -0.41971949504499273, -0.7600660277081586, 0.22809249636551576, -0.8923092554006928, -0.28910705230462663, 0.17556387278264474, -0.3120642961908995, -0.08857040909612457, 0.9736924099705169, -0.6425732085916924, 0.5667862783362607, -0.45242262118684295, -0.3366537122702131, -0.21042580668493605, -0.969230642055972, -0.6986186588663355, -0.5420629464988849, 0.8012632695329027, 0.10364503122371205, -0.8288649738571241, -0.7488901002163446, -0.2086447971105505, 0.24528530567671103, -0.1194706644737491, -0.4487125509839567, 0.19757079065420702, 0.9701391397770309, 0.6918580324259651, -0.6609864495230626, -0.5767397650124655, 0.13274852903677803, 0.45790899492650117, 0.6156249211932037, -0.5400854790245104, -0.4871335994554471, -0.37124459518957686, -0.9740961061020355, 0.8132186161153883, 0.5432742278375737, -0.7555629992450097, -0.3626273029276168, 0.3273351801156006, 0.2950481130490956, 0.5899713501222568, 0.1290258276325824, 0.14809153246329188, -0.8527458869128903, -0.45135237009997664, -0.78966354981686, -0.9869505409499153, 0.5440922045096472, -0.5065478252374527, 0.8914118613097968, -0.7009799840752231, -0.37720301784400667, -0.1990418958793818, 0.07895118490326825, 0.43246496862820827, 0.06871630683294172, 0.04584623777009278, -0.34229499350310455, 0.9387219959330184, -0.5381844165951264, 0.4794422861285379, 0.8534951958829573, 0.5734335942167272, -0.85412829706822, -0.7352963908032732, -0.12895000820916747, -0.22552570725823173, -0.5976878733463429, -0.32791035485443487, 0.7202059113861725, 0.39099290295132905, 0.30525825694263764, -0.2266469266742548, -0.03379388729241706, -0.5954645444941691, -0.02422270847921526, 0.2367051711225363, 0.0254309367030352, -0.8571941247598263, 0.6036464885617703, 0.780145197998714, -0.18486284139078912, -0.4861368589284454, -0.2789831003703762, -0.695370188724934, 0.20748300875047643, 0.613995882433769, -0.20040817194169125, 0.8373240273873666, 0.6138944053316708, -0.7863205352137852, -0.7823411702718377, 0.79906295867358, -0.5467331800231525, -0.6344655458958364, -0.9818941753091346, 0.5525644258030062, 0.6262889073747209, 0.9963129049354384, -0.6272737000603017, -0.2716262931036606, 0.2096677033434846, -0.6982262682600213, -0.5674210473085657, 0.24902399542030595, -0.5657568018493333, 0.08618618872017958, 0.5489764282591345, -0.8941510222698827, 0.41351613826944567, -0.5112980841262675, 0.4470615015729351, -0.20725162805621333, -0.08479642143543553, -0.1278591923549064, -0.4999896814124227, 0.9888904679503661, -0.048462424602504495, -0.7019088972627803, 0.24200967459107448, -0.07080934919496995, -0.7205222066189325, 0.8569714457890816, -0.16535406501060956, -0.6995151061411666, -0.002471197183836038, 0.36657456718336245, -0.21418945415378254, 0.8960422717208372, -0.8112144998402944, 0.3367368342692487, -0.1409734233274329, 0.9270438056838188, 0.6449085435355675, -0.42063510394970094, -0.5514753035609532, -0.7824719546926855, 0.27064161179409774, 0.7610801292513893, 0.041332375564573365, -0.4938906089444197, 0.6565606828711339, -0.8175201877660032, -0.7145428710506601, 0.5266689558422335, -0.36373337569732045, -0.4295940430516798, 0.6614123405581125, -0.5795867768963181, 0.09683447902632913, -0.7233160622088481, -0.035259383881968365, 0.44407987368431834, 0.5080824859277744, -0.025605597564321236, -0.33746311986945, 0.8643101724003239, -0.6590382567793307, 0.11251953056040387, -0.5283365207737802, 0.8881578952123139, -0.9796498715072419, -0.8206325632112821, -0.5431772730915239, -0.09628735573638458, 0.8509192593020449, 0.6468967965920123, -0.5886852895684587, -0.25974684548008664, 0.4474352123365879, -0.2199845691372495, 0.7554317108927318, 0.9809450136647395, -0.9430090133566618, 0.23635288316941683]"
 
-    http_proxy=''
+    result=$(http_proxy=''
     curl http://${ip_address}:$retriever_port/v1/retrieval \
         -X POST \
         -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \
-        -H 'Content-Type: application/json'
-    docker logs test-vectorstore-postgres
-    docker logs test-comps-retriever-tei-endpoint
+        -H 'Content-Type: application/json')
+    if [[ $result == *"retrieved_docs"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-vectorstore-postgres
+        docker logs test-comps-retriever-tei-endpoint
+        exit 1
+    fi
 }
 
 function stop_docker() {
@@ -51,7 +63,7 @@ function stop_docker() {
         docker stop $cid_retrievers && docker rm $cid_retrievers && sleep 1s
     fi
 
-    cid_redis=$(docker ps -aq --filter "name=test-vectorstore-postgres")
+    cid_redis=$(docker ps -aq --filter "name=test-comps-vectorstore-postgres")
     if [[ ! -z "$cid_redis" ]]; then
         docker stop $cid_redis && docker rm $cid_redis && sleep 1s
     fi
diff --git a/tests/test_retrievers_langchain_pinecone.sh b/tests/test_retrievers_langchain_pinecone.sh
index 3e5215ba7..d370fa92a 100755
--- a/tests/test_retrievers_langchain_pinecone.sh
+++ b/tests/test_retrievers_langchain_pinecone.sh
@@ -2,13 +2,19 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/retriever-pinecone:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/pinecone/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/retriever-pinecone built fail"
+        exit 1
+    else
+        echo "opea/retriever-pinecone built successful"
+    fi
 }
 
 function start_service() {
@@ -34,12 +40,18 @@ function start_service() {
 function validate_microservice() {
     retriever_port=5009
     test_embedding="[0.3212316218862614, 0.05284697028105079, 0.792736615029739, -0.01450667589035648, -0.7358454555705813, -0.5159104761926909, 0.3535153166047822, -0.6465310827905328, -0.3260418169245214, 0.5427377177268364, 0.839674125021304, 0.27459120894125255, -0.9833857616143291, 0.4763752586395751, 0.7048355150785723, 0.4935209825796325, -0.09655411499027178, -0.5739389241976944, 0.34450497876796815, -0.03401327136919208, -0.8247080270670755, -0.9430721851019634, 0.4702688485035773, 0.3872526674852217, -0.13436894777006136, 0.27166203983338266, 0.7724679346611174, 0.49524109590526666, 0.9810730976435518, 0.2143402533230332, 0.35235793217357947, -0.3199320624935764, -0.3535996110405917, 0.1982603781951089, -0.37547349902996063, -0.6148649695355071, 0.388521078627599, 0.7073360849235228, 0.1768845283243352, -0.38289339223361885, 0.36390326284734775, -0.4790146416310761, -0.5412301982310956, 0.33793186533237507, -0.7028178009236765, -0.6850965350085609, -0.519584428926227, 0.07610032557230206, 0.8173990245819258, 0.6620078274633294, 0.9159029345791101, -0.6353085978752564, 0.5816911666251467, -0.03007583916355916, 0.7405029634324471, 0.43720248036100817, -0.8588961125219283, -0.5267610831146254, 0.17242810571201828, -0.5958637989986995, -0.9424146892733949, 0.593549429279222, -0.6516554787902789, -0.5666971591678356, -0.942676397097636, -0.7754876202156127, 0.4981071621118629, 0.3479716647812874, -0.20905562164787628, -0.01239748867059931, -0.39282697259470645, -0.682776727276128, 0.8490471472078613, 0.9407846472878745, 0.38429459825058054, -0.6217288222979798, 0.7017039943902317, 0.2666859825508645, -0.8350624589077213, -0.6844099142855995, 0.7150220289787632, 0.6172753342426756, 0.3411977212235433, -0.6885106120374, -0.9063819220399785, -0.8409372842391187, -0.8297926800281972, -0.7209991962325382, -0.10750064217958677, 0.3293914797165298, -0.7839812511866298, 0.3413595850264284, 0.9251256529601857, -0.7129635996889019, 0.2032168270911272, -0.744174955251268, 0.7691350055313244, -0.20065548721684312, 0.8869269473893813, -0.02043469943990095, 0.6747773545635596, -0.08840723444251264, 0.29835753335664084, -0.06410433319206965, 0.6915278973312651, 0.35470936730145075, -0.8143883316077478, 0.3700125242841532, 0.21752822647915626, -0.8620510146349405, -0.9872766671960136, -0.4418160577207253, -0.22054594310628928, -0.12301077500821433, -0.32532691454130314, -0.13151154223491113, -0.11476973253362455, -0.6347877217496254, -0.7764229239974911, 0.8494414471799672, -0.8096141861298036, -0.126108099532108, -0.3910538453811505, 0.7416491690145808, -0.9147820237179922, -0.09053536925720418, 0.6536341825563443, 0.655602583013402, 0.1757558598054938, -0.2501459855449637, 0.23414048418314914, -0.2944157385030681, 0.9386472406881659, -0.18806566910431344, -0.29109490690006345, -0.06582041104197667, -0.24458043176038613, 0.22893907834264082, -0.6322528508563678, -0.7885667746432836, 0.10383516801892911, 0.25661930212021256, 0.48395546864077654, 0.25074187080653787, 0.7878158493705165, 0.23874513474134984, -0.18963037155323526, 0.6768315857746809, 0.5323731821887652, 0.23324330999046516, -0.738289178845237, 0.8231931441360549, -0.5243106029457096, 0.21804967641989204, 0.3707592922049536, 0.1970890658467559, 0.6290401053696923, -0.6193312718716564, 0.4319818453521995, -0.4373242547587233, -0.20412719166280646, -0.868724458613944, -0.9426457085574942, 0.7688331784589177, 0.8429476319014946, -0.6928872166553237, -0.3089062124196522, -0.4951601658025162, -0.20786350848417157, -0.1834098357401246, 0.6258630377921288, -0.25204085881527294, -0.6433661815891194, 0.24194250996512046, 0.7945180851525879, 0.6730215739979015, 0.45995755232419877, 0.27685945410814927, 0.7529674957244883, -0.4439881981193141, 0.38722277085649703, 0.4225851985441007, 0.5151867308566294, 0.8592936274009735, -0.5577167356519221, -0.22541015002223674, 0.7872403040580904, -0.12895843621078895, 0.5887160803674254, -0.6121486933005933, -0.45190497189987, 0.5882515994898736, -0.20915972333667443, 0.6412544240387859, -0.9812292190679823, 0.23598351448404986, -0.01874477123769469, -0.5571884049798792, -0.21717058226127106, -0.8566428604555374, -0.7698283820683764, -0.7788953845967042, -0.9695043602118194, 0.2531642774513472, 0.24476771264255004, 0.799177428779027, 0.15892099361251932, 0.2675472976400166, 0.7977537791258142, 0.5682082238828539, -0.45861936031507833, 0.976812562932188, 0.7074171102968665, -0.255345769250928, -0.8903371790301657, 0.7704811965386686, 0.7499406836491052, 0.015867022798163433, 0.023343856172087563, -0.8985882333056163, 0.967943518200411, 0.6738003473613683, 0.500027753964835, -0.25086930359627546, 0.8192342987623937, -0.5553572601867272, -0.5869387659256808, 0.8105241617485164, 0.26722188191476604, -0.3958252448602495, -0.5045071968072412, -0.28738102025143886, 0.9466985876572256, 0.7491954841518662, -0.05398806963889902, 0.5602374066760636, -0.7105267600964871, 0.9183176656578995, -0.7484524873628995, -0.9707740622635459, -0.835248467210193, -0.6698976002755301, -0.9157167347077453, 0.8385470752014215, -0.8484323571440642, 0.1488482374866753, 0.3535389435893035, 0.40201643606217297, -0.39307181109310174, -0.651228451786785, 0.9707155460374848, 0.7578035730666239, -0.916880505891617, 0.7976566483403702, 0.4769359186496589, -0.9056872532891009, 0.5018227509242583, 0.06634988131602104, -0.38876676686204537, -0.20473802582321277, 0.5980365889203325, -0.34935300908506206, 0.5873905336860825, -0.8339160527604776, 0.2903116937984762, -0.9254374424169307, 0.6580958452134436, 0.15246698154103022, -0.6646130474515959, 0.8207084174685697, 0.06879769054023499, 0.6856796611464853, 0.7434402148947985, -0.07417300955086725, -0.37981881059511857, 0.7945700979382095, 0.9465476443316254, 0.7045891367557522, -0.21374560717812052, 0.09707043886320443, 0.40542472035097754, -0.21295063208183063, -0.3638798039778244, 0.27259830494730597, -0.9679565648433712, 0.574009198040323, 0.5453104171463734, 0.4226578254247848, 0.8135241112071945, -0.9913587704531821, -0.5117490950168377, 0.31240764840477486, 0.05726091394767008, -0.44352035546239654, 0.973651830312322, -0.30089019754641044, -0.38110683211990515, 0.12746451891554633, -0.44142668003974683, -0.6085743100333996, 0.6897705314589502, 0.9941017194163115, 0.22931154106427631, -0.38393397164902865, -0.487276417971108, 0.9823011016539693, -0.525188403356583, 0.20472304461076174, -0.549309125745228, 0.8391439613819196, -0.29947371410247614, -0.9587993477785177, 0.49169643064876745, -0.8450431739492874, 0.4992908092405386, 0.8214166011949593, 0.3514461197612715, 0.7052749449063302, -0.456428137096097, -0.21613329759075817, -0.4240696515484821, -0.6072280877366947, -0.19019911975234938, 0.03207563995916485, 0.7832264288656379, -0.9848532944591397, 0.2814057130788894, 0.860398099217986, -0.5757789213121853, -0.6403226820347003, 0.6276892831123779, 0.6966115314942829, -0.5964071917752842, 0.44624318175630373, 0.7747997483259705, -0.5274892594576506, -0.00345488047657061, 0.39694784159551255, -0.32018146543784254, 0.7503113292041483, 0.2279567107684024, -0.6993797573511833, 0.07551046336599065, 0.34912828888955083, 0.4590408940147299, 0.25454507513086266, -0.30882522463970363, -0.4080889783776509, -0.3123706885833979, -0.8906352519220135, -0.8139972234039548, -0.08828963608894047, 0.14503312886836617, -0.3714118896544083, 0.3827783378301277, 0.5438460044018558, 0.5097760438462526, 0.15715247575456592, 0.7656929283612122, 0.2920396353744734, 0.2373440190759446, 0.9526910643357105, 0.1250822784239567, 0.8541819063485603, -0.12747895073713877, 0.5735382473541981, -0.5032516001742902, 0.7413632640531032, -0.7276977107465363, 0.843580565716205, 0.7018464054348241, 0.5586022744519274, 0.8087171435922904, -0.21245941454116735, -0.948838383837346, -0.33122336674310726, -0.6044852681843789, 0.9537863293189539, 0.2536799406315282, -0.6165803849255769, 0.7101896753682724, -0.7295247078012181, -0.7614076971639918, -0.26355996174665797, 0.2821572530049805, -0.31435759840484767, 0.4606279529588946, -0.6454718015595133, 0.29204230021467015, -0.9773214517280517, 0.9018006022750058, 0.41864735598581615, -0.6362219585524242, 0.6393270283675747, 0.8775458814947836, -0.8151570635893794, 0.3439568607968999, 0.29709851503999474, -0.757078876496533, 0.5012539900859203, 0.9894088580102554, -0.7830638861580885, -0.2991021462567893, 0.106227593453466, 0.475717480159388, -0.8190837445165258, 0.7235860704831878, 0.7463245164230621, -0.5005231847044065, 0.6040314499611552, 0.6735380082955229, -0.5547291176872893, -0.9090102518914822, 0.13079236830880614, 0.30122136258272514, -0.6417236467561747, 0.2630310905704383, -0.37163926901056077, 0.20821525595060142, 0.058213575984825905, -0.7186424501121726, 0.7186917038077467, 0.20368227867764155, 0.7957158871869667, -0.8553769107478018, 0.8475526085456688, -0.929286319233819, -0.4084410910607217, -0.18451194893213185, -0.2629665470348457, 0.36380699955097695, 0.2762298083541519, 0.8264334555626198, -0.022207373606218495, -0.32224911623004626, -0.18947254078026798, 0.33627343422225175, 0.6906306880901341, -0.5248865356053838, -0.8976978225060646, -0.9198989266658277, -0.9045058048590318, -0.43074279628622225, 0.9599523380525761, 0.16694571818827875, 0.08638717900194992, 0.24369341180939874, -0.29293980835779454, 0.13980998987643733, -0.9103052978285509, 0.9109674748745353, -0.6189652187256851, -0.30507868365416413, -0.4232217216255978, 0.34784431052206877, -0.8235167119697908, 0.1565512568825982, -0.11476153735499195, -0.5476852944817927, -0.9695366885614041, 0.31387227761880165, -0.8460727492314095, 0.5313339961520958, 0.5605009436841186, 0.04504755045556719, -0.10937916620725119, -0.40867992424849797, -0.9148814576758182, 0.41260731002228, 0.6535850987782705, -0.3956136730481463, 0.03633719317271722, -0.26520169024611917, -0.39307279913859916, 0.8389708129910836, -0.10965192030153337, -0.8114479506343715, 0.6624055258346568, -0.12364857684372677, -0.3391386034226034, 0.5064344415363975, 0.4222558794792024, -0.8920802019539475, 0.8403881748708741, -0.5144930020007417, -0.3961429483392995, -0.9112376538340263, 0.5369991550001529, 0.4099994212177125, 0.8971702224538953, -0.07250674251100442, -0.4123232887614461, -0.4122138364547645, 0.30115503935936516, 0.9140832812087094, -0.37996517983025035, 0.45766194212423583, 0.8778668278803266, -0.871373882496363, 0.9061603981794313, -0.4815792838295849, -0.3540250825062252, 0.47058280496548677, 0.6353307464139133, -0.9084299203157564, 0.32569503818833767, -0.5917177728092791, 0.017982667746413883, -0.39657854384311597, 0.30240291420731147, -0.8789617636583977, 0.398601970442066, -0.9537566407528597, -0.7326801366509474, 0.6394091009367926, -0.24018952260048332, -0.4410443985541457, -0.715250103875068, -0.9531170489995859, 0.8907413230296786, -0.6270483513933209, -0.1278281545077713, 0.6205668124687644, -0.5880492136441298, 0.8458960227498347, 0.5156432304509859, -0.41522707199863196, -0.9971627462302537, 0.967570980171752, -0.1258013547750596, -0.3920054384667395, -0.7579953976551077, -0.5047276085442098, -0.742917134758996, 0.307776046578512, 0.33240724082891204, -0.12439712701067074, 0.8297068611891512, 0.9092972699438713, -0.5553533790744807, -0.9327632085647035, 0.4797798607215402, -0.6407284323825371, 0.23503537288803233, 0.7356444783186646, 0.550461677629142, -0.8859356421536595, -0.06157466053719496, 0.2628024780598055, -0.14515603184459613, -0.9382781600128365, -0.9076306357777459, -0.5661586668239169, -0.5778188698610502, -0.343591139945177, -0.9957519288956789, 3.652203366399931e-05, -0.2850434941249338, 0.9450784913510459, -0.7344049612004591, 0.3966551077940945, 0.9820403785569927, 0.7132254472780228, 0.04475455308790677, 0.7149662286904288, 0.30640286803677386, -0.11825818002978239, 0.9475071024012094, -0.4020573255284672, -0.25210492474829316, -0.9864930649895771, -0.3662338670933165, 0.6528806547589174, 0.23157758222346203, -0.5707934304014186, -0.12462852967839688, 0.1912875382350001, 0.9111205883142817, -0.7227638014501978, -0.36537014763125186, -0.37380198030841805, 0.4707867786085871, -0.5824192322860218, -0.47547092650542666, 0.7836345381645189, 0.7843678847969751, 0.6754328587362883, -0.6670404362153401, 0.7372872996570987, -0.8333262364813818, -0.41971949504499273, -0.7600660277081586, 0.22809249636551576, -0.8923092554006928, -0.28910705230462663, 0.17556387278264474, -0.3120642961908995, -0.08857040909612457, 0.9736924099705169, -0.6425732085916924, 0.5667862783362607, -0.45242262118684295, -0.3366537122702131, -0.21042580668493605, -0.969230642055972, -0.6986186588663355, -0.5420629464988849, 0.8012632695329027, 0.10364503122371205, -0.8288649738571241, -0.7488901002163446, -0.2086447971105505, 0.24528530567671103, -0.1194706644737491, -0.4487125509839567, 0.19757079065420702, 0.9701391397770309, 0.6918580324259651, -0.6609864495230626, -0.5767397650124655, 0.13274852903677803, 0.45790899492650117, 0.6156249211932037, -0.5400854790245104, -0.4871335994554471, -0.37124459518957686, -0.9740961061020355, 0.8132186161153883, 0.5432742278375737, -0.7555629992450097, -0.3626273029276168, 0.3273351801156006, 0.2950481130490956, 0.5899713501222568, 0.1290258276325824, 0.14809153246329188, -0.8527458869128903, -0.45135237009997664, -0.78966354981686, -0.9869505409499153, 0.5440922045096472, -0.5065478252374527, 0.8914118613097968, -0.7009799840752231, -0.37720301784400667, -0.1990418958793818, 0.07895118490326825, 0.43246496862820827, 0.06871630683294172, 0.04584623777009278, -0.34229499350310455, 0.9387219959330184, -0.5381844165951264, 0.4794422861285379, 0.8534951958829573, 0.5734335942167272, -0.85412829706822, -0.7352963908032732, -0.12895000820916747, -0.22552570725823173, -0.5976878733463429, -0.32791035485443487, 0.7202059113861725, 0.39099290295132905, 0.30525825694263764, -0.2266469266742548, -0.03379388729241706, -0.5954645444941691, -0.02422270847921526, 0.2367051711225363, 0.0254309367030352, -0.8571941247598263, 0.6036464885617703, 0.780145197998714, -0.18486284139078912, -0.4861368589284454, -0.2789831003703762, -0.695370188724934, 0.20748300875047643, 0.613995882433769, -0.20040817194169125, 0.8373240273873666, 0.6138944053316708, -0.7863205352137852, -0.7823411702718377, 0.79906295867358, -0.5467331800231525, -0.6344655458958364, -0.9818941753091346, 0.5525644258030062, 0.6262889073747209, 0.9963129049354384, -0.6272737000603017, -0.2716262931036606, 0.2096677033434846, -0.6982262682600213, -0.5674210473085657, 0.24902399542030595, -0.5657568018493333, 0.08618618872017958, 0.5489764282591345, -0.8941510222698827, 0.41351613826944567, -0.5112980841262675, 0.4470615015729351, -0.20725162805621333, -0.08479642143543553, -0.1278591923549064, -0.4999896814124227, 0.9888904679503661, -0.048462424602504495, -0.7019088972627803, 0.24200967459107448, -0.07080934919496995, -0.7205222066189325, 0.8569714457890816, -0.16535406501060956, -0.6995151061411666, -0.002471197183836038, 0.36657456718336245, -0.21418945415378254, 0.8960422717208372, -0.8112144998402944, 0.3367368342692487, -0.1409734233274329, 0.9270438056838188, 0.6449085435355675, -0.42063510394970094, -0.5514753035609532, -0.7824719546926855, 0.27064161179409774, 0.7610801292513893, 0.041332375564573365, -0.4938906089444197, 0.6565606828711339, -0.8175201877660032, -0.7145428710506601, 0.5266689558422335, -0.36373337569732045, -0.4295940430516798, 0.6614123405581125, -0.5795867768963181, 0.09683447902632913, -0.7233160622088481, -0.035259383881968365, 0.44407987368431834, 0.5080824859277744, -0.025605597564321236, -0.33746311986945, 0.8643101724003239, -0.6590382567793307, 0.11251953056040387, -0.5283365207737802, 0.8881578952123139, -0.9796498715072419, -0.8206325632112821, -0.5431772730915239, -0.09628735573638458, 0.8509192593020449, 0.6468967965920123, -0.5886852895684587, -0.25974684548008664, 0.4474352123365879, -0.2199845691372495, 0.7554317108927318, 0.9809450136647395, -0.9430090133566618, 0.23635288316941683]"
-    http_proxy='' curl  --noproxy $ip_address http://${ip_address}:$retriever_port/v1/retrieval \
+    result=$(http_proxy='' curl  --noproxy $ip_address http://${ip_address}:$retriever_port/v1/retrieval \
         -X POST \
         -d "{\"text\":\"test\",\"embedding\":${test_embedding}}" \
-        -H 'Content-Type: application/json'
-    docker logs test-comps-retriever-pinecone-server
-    docker logs test-comps-retriever-tei-endpoint
+        -H 'Content-Type: application/json')
+    if [[ $result == *"retrieved_docs"* ]]; then
+        echo "Result correct."
+    else
+        echo "Result wrong. Received was $result"
+        docker logs test-comps-retriever-pinecone-server
+        docker logs test-comps-retriever-tei-endpoint
+        exit 1
+    fi
 }
 
 function stop_docker() {
diff --git a/tests/test_retrievers_langchain_redis.sh b/tests/test_retrievers_langchain_redis.sh
index 00ad4c761..9d367ce25 100644
--- a/tests/test_retrievers_langchain_redis.sh
+++ b/tests/test_retrievers_langchain_redis.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -xe
+set -x
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -11,6 +11,12 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/retriever-redis:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/langchain/redis/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/retriever-redis built fail"
+        exit 1
+    else
+        echo "opea/retriever-redis built successful"
+    fi
 }
 
 function start_service() {
diff --git a/tests/test_tts.sh b/tests/test_tts.sh
index 1982271fa..e4039d956 100644
--- a/tests/test_tts.sh
+++ b/tests/test_tts.sh
@@ -10,19 +10,31 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build -t opea/speecht5:comps -f comps/tts/speecht5/Dockerfile .
-    docker build -t opea/tts:comps -f comps/tts/Dockerfile .
+    docker build --no-cache -t opea/speecht5:comps -f comps/tts/speecht5/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/speecht5 built fail"
+        exit 1
+    else
+        echo "opea/speecht5 built successful"
+    fi
+    docker build --no-cache -t opea/tts:comps -f comps/tts/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/tts built fail"
+        exit 1
+    else
+        echo "opea/tts built successful"
+    fi
 }
 
 function start_service() {
     unset http_proxy
-    docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7055:7055 --ipc=host opea/speecht5:comps
-    docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:7055 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9088:9088 --ipc=host opea/tts:comps
+    docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5017:7055 --ipc=host opea/speecht5:comps
+    docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps
     sleep 3m
 }
 
 function validate_microservice() {
-    result=$(http_proxy="" curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json')
+    result=$(http_proxy="" curl http://localhost:5016/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json')
     if [[ $result == *"Ukl"* ]]; then
         echo "Result correct."
     else
diff --git a/tests/test_vectorstores_langchain_milvus.sh b/tests/test_vectorstores_langchain_milvus.sh
new file mode 100644
index 000000000..86124baa5
--- /dev/null
+++ b/tests/test_vectorstores_langchain_milvus.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -x
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+
+function start_service() {
+    cd $WORKPATH/comps/vectorstores/langchain/milvus
+    rm -rf volumes/
+
+    docker compose up -d
+
+    sleep 60s
+}
+
+function validate_vectorstore() {
+    PORT="19530"
+    COLLECTION_NAME="test_col"
+
+    # test create collection
+    echo "[ test create ] creating collection.."
+    create_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/collections/create"  -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"dbName\": \"default\", \"dimension\": 2, \"metricType\": \"L2\", \"primaryField\": \"id\", \"vectorField\": \"vector\"}")
+    echo $create_response >> ${LOG_PATH}/milvus_create_col.log
+    if [[ $(echo $create_response | grep '{"code":200') ]]; then
+        echo "[ test create ] create collection succeed"
+    else
+        echo "[ test create ] create collection failed"
+        docker logs milvus-standalone
+        exit 1
+    fi
+
+    # test insert data
+    echo "[ test insert ] inserting data.."
+    insert_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/insert" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"data\": [{\"vector\":[1,2]}] }")
+    echo $insert_response >> ${LOG_PATH}/milvus_insert_data.log
+    if [[ $(echo $insert_response | grep '{"code":200,"data":{"insertCount":1') ]]; then
+        echo "[ test insert ] insert data succeed"
+    else
+        echo "[ test insert ] insert data failed"
+        docker logs milvus-standalone
+        exit 1
+    fi
+
+    # test search data
+    echo "[ test search ] searching data.."
+    search_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/search" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"vector\":[1,2] }")
+    echo $search_response>> ${LOG_PATH}/milvus_search_data.log
+    if [[ $(echo $search_response | grep '{"code":200,"data":') ]]; then
+        echo "[ test search ] search data succeed"
+    else
+        echo "[ test search ] search data failed"
+        docker logs milvus-standalone
+        exit 1
+    fi
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=milvus-*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    start_service
+
+    validate_vectorstore
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/tests/test_vectorstores_pgvector.sh b/tests/test_vectorstores_pgvector.sh
deleted file mode 100755
index 1b43a6930..000000000
--- a/tests/test_vectorstores_pgvector.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -xe
-
-WORKPATH=$(dirname "$PWD")
-ip_address=$(hostname -I | awk '{print $1}')
-function build_docker_images() {
-    cd $WORKPATH
-
-    # piull pgvector image
-    docker pull pgvector/pgvector:0.7.0-pg16
-
-    # build dataprep image for pgvector
-    docker build -t opea/dataprep-pgvector:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f $WORKPATH/comps/dataprep/pgvector/langchain/docker/Dockerfile .
-}
-
-function start_service() {
-    export POSTGRES_USER=testuser
-    export POSTGRES_PASSWORD=testpwd
-    export POSTGRES_DB=vectordb
-
-    docker run --name vectorstore-postgres -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5432:5432 -d -v $WORKPATH/comps/vectorstores/langchain/pgvector/init.sql:/docker-entrypoint-initdb.d/init.sql pgvector/pgvector:0.7.0-pg16
-
-    sleep 10s
-
-    docker run -d --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@$ip_address:5432/${POSTGRES_DB} opea/dataprep-pgvector:latest
-}
-
-function validate_microservice() {
-    url="http://$ip_address:6007/v1/dataprep"
-    touch $WORKPATH/tests/test.txt
-    echo 'The OPEA platform includes: Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines' > $WORKPATH/tests/test.txt
-
-    curl --location --request POST "${url}" \
-    --form 'files=@"'${WORKPATH}'/tests/test.txt"' \
-    --proxy http://proxy-chain.intel.com:912
-}
-
-function stop_docker() {
-    cid=$(docker ps -aq --filter "name=vectorstore-postgres*")
-    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-
-    cid=$(docker ps -aq --filter "name=dataprep-pgvector*")
-    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-}
-
-function main() {
-
-    stop_docker
-
-    build_docker_images
-    start_service
-
-    validate_microservice
-
-    #stop_docker
-    #echo y | docker system prune
-
-}
-
-main
diff --git a/tests/test_web_retrievers_langchain_chroma.sh b/tests/test_web_retrievers_langchain_chroma.sh
index 288d4fe6b..c9e20aa08 100644
--- a/tests/test_web_retrievers_langchain_chroma.sh
+++ b/tests/test_web_retrievers_langchain_chroma.sh
@@ -9,6 +9,12 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH
     docker build --no-cache -t opea/web-retriever-chroma:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/web_retrievers/langchain/chroma/docker/Dockerfile .
+    if [ $? -ne 0 ]; then
+        echo "opea/web-retriever-chroma built fail"
+        exit 1
+    else
+        echo "opea/web-retriever-chroma built successful"
+    fi
 }
 
 function start_service() {