Merge pull request #84 from CambioML/jojo-branch

Add Dockerfile for huggingface demo
CambioML · Nov 20, 2023 · ee99d85 · ee99d85
2 parents cac4788 + 544a605
commit ee99d85
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 23 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,37 @@
+# Use a Python base image
+FROM python:3.10
+
+# Set working directory
+WORKDIR /app/
+
+# Install necessary system packages and clean up
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        bash \
+        build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Poetry
+RUN pip install poetry
+
+# Copy only necessary project files into the container
+COPY pyproject.toml poetry.lock /app/
+
+
+# Install project dependencies using Poetry
+RUN poetry config virtualenvs.create false \
+    && poetry install --no-root --extras "rag huggingface" \
+    && pip uninstall -y torch \
+    && pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 \
+    && rm -rf /root/.cache/
+
+
+# Copy the project files into the container
+COPY example /app/example
+COPY pykoi /app/pykoi
+
+ENV RETRIEVAL_MODEL=databricks/dolly-v2-3b
+
+# Set entrypoint to run your command
+CMD ["python", "-u", "-m", "example.retrieval_qa.retrieval_qa_huggingface_demo"]
+
diff --git a/docker/README.md b/docker/README.md
@@ -1,18 +1,3 @@
-# Overview
-In this folder, we create the different dockerfiles for using pykoi.
-
-1. `pykoi-cpu`: The base image for the cpu-based usage.
-2. `pykoi-cpu-custom`: When you run this docker image, try to modify the `app.py` and mount it when running the docker container.
-
-To run a docker container, we can use the following command:
-```bash
-docker run -dp 5000:5000 -v $(pwd)/app.py:/app/app.py \
-        --name alex_test \
-        pykoi/pykoi-cpu:app
-```
-
-Note that we need to keep the exposed port as 5000 (default value) to make the server work.
-
 # Launching Docker Hub `pykoi` Image
 You can also launch a `pykoi` image that has been released by `CambioML` on [Docker Hub](https://hub.docker.com/u/cambioml).
 
@@ -123,16 +108,17 @@ docker pull cambioml/pykoi
 ### Running the Docker Image
 To run the Docker image, you can use the following command, with different options depending on which repository you are running.
 ```
-docker run -d -e [ENV_NAME]=[ENV_VALUE] -p 5000:5000 --name [CUSTOM_CONTAINER_NAME] [DOCKER_REPO_NAME]:[TAG]
+docker run -d -e [ENV_VAR_NAME]=[ENV_VAR_VALUE] -p 5000:5000 --gpus [NUM_GPUS]--name [CUSTOM_CONTAINER_NAME] [DOCKER_REPO_NAME]:[TAG]
 ```
 - `-d`: specifies to run the container in the background
 - `-e`: specifies any environment variables to use
 - `-p`: specifies the port binding. Default `CambioML` is to use port 5000
+- `--gpus`: specifies the number of GPUs to use.
 - `--name`: A custom name for your container. If you don't specify, Docker will randomly generate one. It's best to create one so it's easy to remember to use for commands.
 
 For example, here is a command to run `cambioml\pykoi` version `0.1_ec2_linux`.
 ```
-docker run -d -e RETRIEVAL_MODEL=mistralai/Mistral-7B-v0.1 -p 5000:5000 --name pykoi_test cambioml/pykoi:0.1_ec2_linux
+docker run -d -e RETRIEVAL_MODEL=mistralai/Mistral-7B-v0.1 -p 5000:5000 --gpus all --name pykoi_test cambioml/pykoi:0.1_ec2_linux
 ```
 
 If you are running it in the background, with a `-d` tag, you can check the logs using the following command:
@@ -159,4 +145,19 @@ docker stop [CONTAINER_NAME]
 To delete a container, run the following command.
 ```
 docker rm [CONTAINER_NAME]
-```
+```
+
+# Building Custom Docker Images
+In this folder, we create the different dockerfiles for using pykoi.
+
+1. `pykoi-cpu`: The base image for the cpu-based usage.
+2. `pykoi-cpu-custom`: When you run this docker image, try to modify the `app.py` and mount it when running the docker container.
+
+To run a docker container, we can use the following command:
+```bash
+docker run -dp 5000:5000 -v $(pwd)/app.py:/app/app.py \
+        --name alex_test \
+        pykoi/pykoi-cpu:app
+```
+
+Note that we need to keep the exposed port as 5000 (default value) to make the server work.
diff --git a/example/retrieval_qa/retrieval_qa_huggingface_demo.py b/example/retrieval_qa/retrieval_qa_huggingface_demo.py
@@ -1,4 +1,7 @@
-"""Demo for the retrieval_qa application."""
+"""Demo for the retrieval_qa application.
+
+python -m example.retrieval_qa.retrieval_qa_huggingface_demo
+"""
 
 import os
 import argparse
@@ -7,6 +10,13 @@
 from pykoi.retrieval import RetrievalFactory
 from pykoi.retrieval import VectorDbFactory
 from pykoi.component import Chatbot, Dashboard, RetrievalQA
+from dotenv import load_dotenv
+
+# NOTE: Configure your retrieval model as RETRIEVAL_MODEL in .env file.
+# Load environment variables from .env file
+load_dotenv()
+
+RETRIEVAL_MODEL = os.getenv("RETRIEVAL_MODEL")
 
 
 def main(**kwargs):
@@ -27,10 +37,11 @@ def main(**kwargs):
     )
 
     # retrieval model with vector database
+    print("model", RETRIEVAL_MODEL)
     retrieval_model = RetrievalFactory.create(
         model_source=MODEL_SOURCE,
         vector_db=vector_db,
-        model_name="databricks/dolly-v2-3b",
+        model_name=RETRIEVAL_MODEL,
         trust_remote_code=True,
         max_length=1000
     )
@@ -48,6 +59,7 @@ def main(**kwargs):
     app.add_component(retriever)
     app.add_component(chatbot)
     app.add_component(dashboard)
+    print("RUNNING APP IN DEMO MODE")
     app.run()
 
 

diff --git a/pykoi/application.py b/pykoi/application.py
@@ -97,7 +97,7 @@ def __init__(
         debug: bool = False,
         username: Union[None, str, List] = None,
         password: Union[None, str, List] = None,
-        host: str = "127.0.0.1",
+        host: str = "0.0.0.0",
         port: int = 5000,
         enable_telemetry: bool = True,
     ):

diff --git a/pykoi/retrieval/llm/huggingface.py b/pykoi/retrieval/llm/huggingface.py
@@ -29,7 +29,8 @@ def __init__(self, vector_db: AbsVectorDb, **kwargs):
                 model_kwargs={
                     "temperature": 0,
                     "max_length": kwargs.get("max_length", 500),
-                    "trust_remote_code": kwargs.get("trust_remote_code", True),
+                    "load_in_8bit": True,
+                    "trust_remote_code": kwargs.get("trust_remote_code", True)
                 },
             )
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@ pynvml = "11.5.0"
 pandas = "2.0.3"
 python-dotenv = "1.0.0"
 
-transformers = { version = "4.31.0", optional = true }
+transformers = { version = "4.35.0", optional = true }
 einops = { version = "0.6.1", optional = true }
 accelerate = { version = "0.21.0", optional = true }
 bitsandbytes = { version = "0.40.2", optional = true }