diff --git a/README.md b/README.md index 2b63352..ea3dd0b 100644 --- a/README.md +++ b/README.md @@ -44,13 +44,24 @@ python scripts/example_batch_request.py docker build -t llm-inference-server . ``` +### Run the S3 inference server + ```bash docker run -d \ - -e HUGGING_FACE_TOKEN=${HUGGING_FACE_TOKEN} \ -e S3_ACCESS_KEY_ID=${S3_ACCESS_KEY_ID} \ -e S3_SECRET_ACCESS_KEY=${S3_SECRET_ACCESS_KEY} \ -e S3_ENDPOINT_URL=${S3_ENDPOINT_URL} \ - -v my_docker_volume:/app/models \ + -e INFERENCE_SERVER=llm_inference.s3_inference_server \ + -p 8000:8000 \ + llm-inference-server +``` + +### Run the HuggingFace inference server + +```bash +docker run -d \ + -e HUGGING_FACE_TOKEN=${HUGGING_FACE_TOKEN} \ + -e INFERENCE_SERVER=llm_inference.hf_inference_server \ -p 8000:8000 \ llm-inference-server ``` diff --git a/dockerfile b/dockerfile index 78d0c2a..fe4afe1 100644 --- a/dockerfile +++ b/dockerfile @@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1 \ POETRY_HOME="/opt/poetry" \ POETRY_VENV="/opt/poetry-venv" \ POETRY_CACHE_DIR="/opt/.cache" \ - PYTHONPATH="/app:$PYTHONPATH" + PYTHONPATH="/app" # Install system dependencies RUN apt-get update && apt-get install -y \ @@ -34,11 +34,14 @@ RUN poetry config virtualenvs.create false \ COPY llm_inference /app/llm_inference COPY scripts /app/scripts -# Create a volume for the models -VOLUME /app/models - # Expose port 8000 to the host EXPOSE 8000 +# Define a build-time argument with a default value +ARG INFERENCE_SERVER=llm_inference.s3_inference_server + +# Set an environment variable using the argument +ENV INFERENCE_SERVER=${INFERENCE_SERVER} + # Run the application -CMD ["poetry", "run", "python", "-m", "llm_inference.s3_inference_server"] +CMD ["sh", "-c", "poetry run python -m ${INFERENCE_SERVER}"]