forked from intel/intel-technology-enabling-for-openshift
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vllm gaudi ubi image based on PR HabanaAI/vllm-fork#602 Signed-off-by: vbedida79 <[email protected]>
- Loading branch information
Showing
4 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
# Copyright (c) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
apiVersion: image.openshift.io/v1 | ||
kind: ImageStream | ||
metadata: | ||
name: vllm-workload | ||
namespace: gaudi-validation | ||
spec: {} | ||
--- | ||
apiVersion: build.openshift.io/v1 | ||
kind: BuildConfig | ||
metadata: | ||
name: vllm-workload | ||
namespace: gaudi-validation | ||
spec: | ||
triggers: | ||
- type: "ConfigChange" | ||
- type: "ImageChange" | ||
runPolicy: "Serial" | ||
source: | ||
type: Dockerfile | ||
dockerfile: | | ||
ARG BASE_IMAGE=vault.habana.ai/gaudi-docker/1.18.0/rhel9.4/habanalabs/pytorch-installer-2.4.0:1.18.0-524 | ||
FROM ${BASE_IMAGE} as habana-base | ||
USER root | ||
ENV VLLM_TARGET_DEVICE="hpu" | ||
ENV HABANA_SOFTWARE_VERSION="1.18.0-524" | ||
RUN dnf -y update --best --allowerasing --skip-broken && dnf clean all | ||
WORKDIR /workspace | ||
## Python Installer ################################################################# | ||
FROM habana-base as python-install | ||
ARG PYTHON_VERSION=3.11 | ||
ENV VIRTUAL_ENV=/opt/vllm | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
RUN dnf install -y --setopt=install_weak_deps=0 --nodocs \ | ||
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ | ||
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && pip install --no-cache -U pip wheel && dnf clean all | ||
## Python Habana base ################################################################# | ||
FROM python-install as python-habana-base | ||
ENV VIRTUAL_ENV=/opt/vllm | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
# install Habana Software and common dependencies | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ | ||
--mount=type=bind,source=requirements-hpu.txt,target=requirements-hpu.txt \ | ||
pip install \ | ||
-r requirements-hpu.txt | ||
## Builder ##################################################################### | ||
FROM python-habana-base AS build | ||
# install build dependencies | ||
# copy input files | ||
COPY csrc csrc | ||
COPY setup.py setup.py | ||
COPY cmake cmake | ||
COPY CMakeLists.txt CMakeLists.txt | ||
COPY requirements-common.txt requirements-common.txt | ||
COPY requirements-hpu.txt requirements-hpu.txt | ||
COPY pyproject.toml pyproject.toml | ||
# max jobs used by Ninja to build extensions | ||
ARG max_jobs=2 | ||
ENV MAX_JOBS=${max_jobs} | ||
# # make sure punica kernels are built (for LoRA) | ||
# HPU currently doesn't support LoRA | ||
# ENV VLLM_INSTALL_PUNICA_KERNELS=1 | ||
# Copy the entire directory before building wheel | ||
COPY vllm vllm | ||
ENV CCACHE_DIR=/root/.cache/ccache | ||
RUN --mount=type=cache,target=/root/.cache/ccache \ | ||
--mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=bind,src=.git,target=/workspace/.git \ | ||
env CFLAGS="-march=haswell" \ | ||
CXXFLAGS="$CFLAGS $CXXFLAGS" \ | ||
CMAKE_BUILD_TYPE=Release \ | ||
python3 setup.py bdist_wheel --dist-dir=dist | ||
## Release ##################################################################### | ||
FROM python-install AS vllm-openai | ||
WORKDIR /workspace | ||
ENV VIRTUAL_ENV=/opt/vllm | ||
ENV PATH=$VIRTUAL_ENV/bin/:$PATH | ||
# Triton needs a CC compiler | ||
RUN dnf install -y --setopt=install_weak_deps=0 --nodocs gcc \ | ||
&& dnf clean all | ||
# install vllm wheel first, so that torch etc will be installed | ||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ | ||
--mount=type=cache,target=/root/.cache/pip \ | ||
pip install $(echo dist/*.whl)'[tensorizer]' --verbose | ||
ENV HF_HUB_OFFLINE=1 \ | ||
PORT=8000 \ | ||
HOME=/home/vllm \ | ||
VLLM_USAGE_SOURCE=production-docker-image | ||
# setup non-root user for OpenShift | ||
RUN umask 002 \ | ||
&& useradd --uid 2000 --gid 0 vllm \ | ||
&& chmod g+rwx $HOME /usr/src /workspace | ||
COPY LICENSE /licenses/vllm.md | ||
USER 2000 | ||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] | ||
FROM vllm-openai as vllm-grpc-adapter | ||
USER root | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ | ||
pip install $(echo dist/*.whl)'[tensorizer]' vllm-tgis-adapter==0.2.3 | ||
ENV GRPC_PORT=8033 \ | ||
PORT=8000 \ | ||
# As an optimization, vLLM disables logprobs when using spec decoding by | ||
# default, but this would be unexpected to users of a hosted model that | ||
# happens to have spec decoding | ||
# see: https://github.com/vllm-project/vllm/pull/6485 | ||
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false | ||
USER 2000 | ||
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] | ||
strategy: | ||
type: Docker | ||
noCache: true | ||
dockerStrategy: | ||
buildArgs: | ||
- name: "BASE_IMAGE" | ||
value: "vault.habana.ai/gaudi-docker/1.18.0/rhel9.4/habanalabs/pytorch-installer-2.4.0:1.18.0-524" | ||
output: | ||
to: | ||
kind: ImageStreamTag | ||
name: vllm-workload:latest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
--- | ||
kind: PersistentVolumeClaim | ||
apiVersion: v1 | ||
metadata: | ||
name: vllm-workload-pvc | ||
namespace: gaudi-validation | ||
spec: | ||
accessModes: | ||
- ReadWriteOnce | ||
resources: | ||
requests: | ||
storage: 60Gi | ||
storageClassName: "" # Add your storage class | ||
volumeMode: Filesystem | ||
--- | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: vllm-workload | ||
namespace: gaudi-validation | ||
labels: | ||
app: vllm-workload | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app: vllm-workload | ||
template: | ||
metadata: | ||
labels: | ||
app: vllm-workload | ||
spec: | ||
containers: | ||
- name: vllm-container | ||
image: image-registry.openshift-image-registry.svc:5000/gaudi-validation/vllm-workload:latest | ||
command: [ "/bin/bash", "-c", "--" ] | ||
args: ["vllm serve meta-llama/Llama-3.1-8B"] # Add the model | ||
ports: | ||
- containerPort: 8000 | ||
resources: | ||
limits: | ||
habana.ai/gaudi: 1 | ||
env: | ||
- name: HF_TOKEN | ||
valueFrom: | ||
secretKeyRef: | ||
name: hf-token | ||
key: hf-token | ||
- name: HF_HOME | ||
value: /home/vllm/.cache/huggingface | ||
- name: HF_HUB_OFFLINE | ||
value: "0" | ||
imagePullPolicy: Always | ||
volumeMounts: | ||
- name: hf-cache | ||
mountPath: /home/vllm/.cache | ||
- name: shm | ||
mountPath: /dev/shm | ||
volumes: | ||
- name: hf-cache | ||
persistentVolumeClaim: | ||
claimName: vllm-workload-pvc | ||
- name: shm | ||
emptyDir: | ||
medium: Memory | ||
sizeLimit: "2Gi" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Copyright (c) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
apiVersion: v1 | ||
kind: Secret | ||
metadata: | ||
name: hf-token | ||
namespace: gaudi-validation | ||
type: Opaque | ||
data: | ||
hf-token: # Add your token |