diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..919a07d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,37 @@
+# Use a Python base image
+FROM python:3.10
+
+# Set working directory
+WORKDIR /app/
+
+# Install necessary system packages and clean up
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        bash \
+        build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Poetry
+RUN pip install poetry
+
+# Copy only necessary project files into the container
+COPY pyproject.toml poetry.lock /app/
+
+
+# Install project dependencies using Poetry
+RUN poetry config virtualenvs.create false \
+    && poetry install --no-root --extras "rag huggingface" \
+    && pip uninstall -y torch \
+    && pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 \
+    && rm -rf /root/.cache/
+
+
+# Copy the project files into the container
+COPY example /app/example
+COPY pykoi /app/pykoi
+
+ENV RETRIEVAL_MODEL=databricks/dolly-v2-3b
+
+# Set entrypoint to run your command
+CMD ["python", "-u", "-m", "example.retrieval_qa.retrieval_qa_huggingface_demo"]
+
diff --git a/docker/README.md b/docker/README.md
index 6399397..01c77ee 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,18 +1,3 @@
-# Overview
-In this folder, we create the different dockerfiles for using pykoi.
-
-1. `pykoi-cpu`: The base image for the cpu-based usage.
-2. `pykoi-cpu-custom`: When you run this docker image, try to modify the `app.py` and mount it when running the docker container.
-
-To run a docker container, we can use the following command:
-```bash
-docker run -dp 5000:5000 -v $(pwd)/app.py:/app/app.py \
-        --name alex_test \
-        pykoi/pykoi-cpu:app
-```
-
-Note that we need to keep the exposed port as 5000 (default value) to make the server work.
-
 # Launching Docker Hub `pykoi` Image
 You can also launch a `pykoi` image that has been released by `CambioML` on [Docker Hub](https://hub.docker.com/u/cambioml).
 
@@ -123,16 +108,17 @@ docker pull cambioml/pykoi
 ### Running the Docker Image
 To run the Docker image, you can use the following command, with different options depending on which repository you are running.
 ```
-docker run -d -e [ENV_NAME]=[ENV_VALUE] -p 5000:5000 --name [CUSTOM_CONTAINER_NAME] [DOCKER_REPO_NAME]:[TAG]
+docker run -d -e [ENV_VAR_NAME]=[ENV_VAR_VALUE] -p 5000:5000 --gpus [NUM_GPUS]--name [CUSTOM_CONTAINER_NAME] [DOCKER_REPO_NAME]:[TAG]
 ```
 - `-d`: specifies to run the container in the background
 - `-e`: specifies any environment variables to use
 - `-p`: specifies the port binding. Default `CambioML` is to use port 5000
+- `--gpus`: specifies the number of GPUs to use.
 - `--name`: A custom name for your container. If you don't specify, Docker will randomly generate one. It's best to create one so it's easy to remember to use for commands.
 
 For example, here is a command to run `cambioml\pykoi` version `0.1_ec2_linux`.
 ```
-docker run -d -e RETRIEVAL_MODEL=mistralai/Mistral-7B-v0.1 -p 5000:5000 --name pykoi_test cambioml/pykoi:0.1_ec2_linux
+docker run -d -e RETRIEVAL_MODEL=mistralai/Mistral-7B-v0.1 -p 5000:5000 --gpus all --name pykoi_test cambioml/pykoi:0.1_ec2_linux
 ```
 
 If you are running it in the background, with a `-d` tag, you can check the logs using the following command:
@@ -159,4 +145,19 @@ docker stop [CONTAINER_NAME]
 To delete a container, run the following command.
 ```
 docker rm [CONTAINER_NAME]
-```
\ No newline at end of file
+```
+
+# Building Custom Docker Images
+In this folder, we create the different dockerfiles for using pykoi.
+
+1. `pykoi-cpu`: The base image for the cpu-based usage.
+2. `pykoi-cpu-custom`: When you run this docker image, try to modify the `app.py` and mount it when running the docker container.
+
+To run a docker container, we can use the following command:
+```bash
+docker run -dp 5000:5000 -v $(pwd)/app.py:/app/app.py \
+        --name alex_test \
+        pykoi/pykoi-cpu:app
+```
+
+Note that we need to keep the exposed port as 5000 (default value) to make the server work.
\ No newline at end of file
diff --git a/example/retrieval_qa/retrieval_qa_huggingface_demo.py b/example/retrieval_qa/retrieval_qa_huggingface_demo.py
index 9de0e8a..42fad58 100644
--- a/example/retrieval_qa/retrieval_qa_huggingface_demo.py
+++ b/example/retrieval_qa/retrieval_qa_huggingface_demo.py
@@ -1,4 +1,7 @@
-"""Demo for the retrieval_qa application."""
+"""Demo for the retrieval_qa application.
+
+python -m example.retrieval_qa.retrieval_qa_huggingface_demo
+"""
 
 import os
 import argparse
@@ -7,6 +10,13 @@
 from pykoi.retrieval import RetrievalFactory
 from pykoi.retrieval import VectorDbFactory
 from pykoi.component import Chatbot, Dashboard, RetrievalQA
+from dotenv import load_dotenv
+
+# NOTE: Configure your retrieval model as RETRIEVAL_MODEL in .env file.
+# Load environment variables from .env file
+load_dotenv()
+
+RETRIEVAL_MODEL = os.getenv("RETRIEVAL_MODEL")
 
 
 def main(**kwargs):
@@ -27,10 +37,11 @@ def main(**kwargs):
     )
 
     # retrieval model with vector database
+    print("model", RETRIEVAL_MODEL)
     retrieval_model = RetrievalFactory.create(
         model_source=MODEL_SOURCE,
         vector_db=vector_db,
-        model_name="databricks/dolly-v2-3b",
+        model_name=RETRIEVAL_MODEL,
         trust_remote_code=True,
         max_length=1000
     )
@@ -48,6 +59,7 @@ def main(**kwargs):
     app.add_component(retriever)
     app.add_component(chatbot)
     app.add_component(dashboard)
+    print("RUNNING APP IN DEMO MODE")
     app.run()
 
 
diff --git a/pykoi/application.py b/pykoi/application.py
index 238b0ac..8da4b03 100644
--- a/pykoi/application.py
+++ b/pykoi/application.py
@@ -97,7 +97,7 @@ def __init__(
         debug: bool = False,
         username: Union[None, str, List] = None,
         password: Union[None, str, List] = None,
-        host: str = "127.0.0.1",
+        host: str = "0.0.0.0",
         port: int = 5000,
         enable_telemetry: bool = True,
     ):
diff --git a/pykoi/retrieval/llm/huggingface.py b/pykoi/retrieval/llm/huggingface.py
index ea164d2..48dad8b 100644
--- a/pykoi/retrieval/llm/huggingface.py
+++ b/pykoi/retrieval/llm/huggingface.py
@@ -29,7 +29,8 @@ def __init__(self, vector_db: AbsVectorDb, **kwargs):
                 model_kwargs={
                     "temperature": 0,
                     "max_length": kwargs.get("max_length", 500),
-                    "trust_remote_code": kwargs.get("trust_remote_code", True),
+                    "load_in_8bit": True,
+                    "trust_remote_code": kwargs.get("trust_remote_code", True)
                 },
             )
 
diff --git a/pyproject.toml b/pyproject.toml
index f1f66a1..a32a889 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ pynvml = "11.5.0"
 pandas = "2.0.3"
 python-dotenv = "1.0.0"
 
-transformers = { version = "4.31.0", optional = true }
+transformers = { version = "4.35.0", optional = true }
 einops = { version = "0.6.1", optional = true }
 accelerate = { version = "0.21.0", optional = true }
 bitsandbytes = { version = "0.40.2", optional = true }