forked from runpod-workers/worker-vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
69 lines (55 loc) · 2.05 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Base image
# The following docker base image is recommended by VLLM:
FROM runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel
# Use bash shell with pipefail option
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Set the working directory
WORKDIR /
# Update and upgrade the system packages (Worker Template)
ARG DEBIAN_FRONTEND=noninteractive
RUN pip uninstall torch -y
RUN pip install torch==2.0.1 -f https://download.pytorch.org/whl/cu118
COPY builder/setup.sh /setup.sh
RUN chmod +x /setup.sh && \
/setup.sh && \
rm /setup.sh
# Install fast api
RUN pip install fastapi==0.99.1
# Install Python dependencies (Worker Template)
COPY builder/requirements.txt /requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install --upgrade pip && \
pip install --upgrade -r /requirements.txt --no-cache-dir && \
rm /requirements.txt
# Add src files (Worker Template)
ADD src .
# Quick temporary updates
RUN pip install git+https://github.com/runpod/runpod-python@a1#egg=runpod --compile
# Prepare the models inside the docker image
ARG HUGGING_FACE_HUB_TOKEN=
ENV HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
# Prepare argument for the model and tokenizer
ARG MODEL_NAME=""
ENV MODEL_NAME=$MODEL_NAME
ARG MODEL_REVISION="main"
ENV MODEL_REVISION=$MODEL_REVISION
ARG MODEL_BASE_PATH="/runpod-volume/"
ENV MODEL_BASE_PATH=$MODEL_BASE_PATH
ARG TOKENIZER=
ENV TOKENIZER=$TOKENIZER
ARG STREAMING=
ENV STREAMING=$STREAMING
ENV HF_DATASETS_CACHE="/runpod-volume/huggingface-cache/datasets"
ENV HUGGINGFACE_HUB_CACHE="/runpod-volume/huggingface-cache/hub"
ENV TRANSFORMERS_CACHE="/runpod-volume/huggingface-cache/hub"
# Download the models
RUN mkdir -p /model
# Set environment variables
ENV MODEL_NAME=$MODEL_NAME \
MODEL_REVISION=$MODEL_REVISION \
MODEL_BASE_PATH=$MODEL_BASE_PATH \
HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
# Run the Python script to download the model
RUN python -u /download_model.py
# Start the handler
CMD STREAMING=$STREAMING MODEL_NAME=$MODEL_NAME MODEL_BASE_PATH=$MODEL_BASE_PATH TOKENIZER=$TOKENIZER python -u /handler.py