diff --git a/.github/workflows/spdx-checker.yml b/.github/workflows/spdx-checker.yml new file mode 100644 index 0000000..4936c6a --- /dev/null +++ b/.github/workflows/spdx-checker.yml @@ -0,0 +1,49 @@ +name: SPDX Header Checker + +on: + workflow_dispatch: + workflow_call: + pull_request: + branches: + - "main" + types: + - opened + - reopened + - synchronize + - assigned + - review_requested + +jobs: + run-spdx-header-script: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5.0.0 + with: + python-version: "3.8" + + - name: Run SPDX Header Script + run: | + echo "Running SPDX header script on all files in the repository" + python ./scripts/add_spdx_header.py + + - name: Check for changes + run: | + git status + if git diff --quiet; then + echo "No changes detected." + exit 0 + else + echo "Changes detected, committing changes." + fi + + - name: Commit changes + if: success() + uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_user_name: SPDX-Bot + commit_user_email: bot@example.com + commit_message: "🚨✨AUTOMATED COMMIT | Added missing SPDX license headers automatically" + branch: ${{ github.head_ref }} + commit_options: "--verbose" diff --git a/.gitignore b/.gitignore index edbde29..8b4d3df 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ db.sqlite3 # unignore !requirements.txt +!requirements-dev.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f09ba8c..e5c806e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,4 +6,4 @@ repos: # Run the linter. - id: ruff # Run the formatter. - - id: ruff-format \ No newline at end of file + - id: ruff-format diff --git a/LICENSE b/LICENSE index 463f598..69e6692 100644 --- a/LICENSE +++ b/LICENSE @@ -1,17 +1,3 @@ - Copyright (c) 2024 Tenstorrent AI ULC - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -189,8 +175,22 @@ END OF TERMS AND CONDITIONS + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ------------------------------------------------------------------------------- + Copyright (c) 2024 Tenstorrent AI ULC +------------------------------------------------------------------------------- Third-Party Dependencies: The following dependencies are utilized by this project but are not explicitly diff --git a/README.md b/README.md index 4d6fcd7..92cd392 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,18 @@ # TT-Inference-Server -## Model implementations +Tenstorrent Inference Server (`tt-inference-server`) is the repo of available model APIs for deploying on Tenstorrent hardware. + +## Official Repository + +[https://github.com/tenstorrent/tt-inference-server](https://github.com/tenstorrent/tt-inference-server/) + + +## Getting Started +Please follow setup instructions found in each model folder's README.md doc + +-------------------------------------------------------------------------------------------------------------- + +## Model Implementations | Model | Hardware | |----------------|-----------------------------| | [LLaMa 3.1 70B](tt-metal-llama3-70b/README.md) | TT-QuietBox & TT-LoudBox | diff --git a/evals/README.md b/evals/README.md index 86a3202..620b769 100644 --- a/evals/README.md +++ b/evals/README.md @@ -1,14 +1,13 @@ # Running LM evals with vLLM -Containerization in: https://github.com/tenstorrent/tt-inference-server/blob/tstesco/vllm-llama3-70b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile +Source code: +- tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal +- lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness +- llama-recipes fork: https://github.com/tstescoTT/llama-recipes -tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal +## Step 1: Pull Docker image -lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness/tree/tstesco/local-api-vllm-streaming - -## Step 1: Build container - -When building, update the commit SHA and get correct SHA from model developers or from vLLM readme (https://github.com/tenstorrent/vllm/tree/dev/tt_metal#vllm-and-tt-metal-branches ). The Dockerfile version updates infrequently but may also be updated. +Docker images are published to: https://ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm ```bash # build image export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc16 @@ -41,87 +40,90 @@ docker run \ --volume /dev/hugepages-1G:/dev/hugepages-1G:rw \ --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \ --shm-size 32G \ - ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-aee03c7eadaa bash -``` - -additionally for development you can mount the volumes: -```bash - --volume $PWD/../vllm:/home/user/vllm \ - --volume $PWD/../lm-evaluation-harness:/home/user/lm-evaluation-harness \ + ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911 bash ``` ## Step 3: Inside container setup and run vLLM -The following env vars should be set: +#### Install vLLM - Option 1: use default installation in docker image -- `PYTHON_ENV_DIR="${TT_METAL_HOME}/build/python_env"` -- `VLLM_TARGET_DEVICE="tt"` -- `vllm_dir` +already built into Docker image +#### Install vLLM - option 2: install vLLM from github ```bash -# vllm dir is defined in container -cd /home/user/vllm - -# option 1: use default installation in docker image -# already set up! - # option 2: install from github +cd /home/user/vllm git fetch -# git checkout +git checkout git pull pip install -e . echo "done vllm install." +``` +#### Install vLLM - option 3: install edittable (for development) from mounted volume +```bash # option 3: install edittable (for development) - mount from outside container +cd /home/user/vllm pip install -e . echo "done vllm install." +``` + +#### Run vllm serving openai compatible API server +```bash # run vllm serving cd /home/user/vllm -python examples/test_vllm_alpaca_eval.py +python examples/server_example_tt.py +``` + +## Step 4: Inside container setup LM evalulation harness + +Enter new bash shell in running container (this does so with newest running container): +```bash +docker exec -it $(docker ps -q | head -n1) bash ``` -## Step 4: Inside container setup LM evals +Now inside container: +```bash +# option 1: install from github: https://github.com/tstescoTT/lm-evaluation-harness +pip install git+https://github.com/tstescoTT/lm-evaluation-harness.git#egg=lm-eval[ifeval] +# option 2: install edittable (for development) - mounted to container +cd ~/lm-evaluation-harness +pip install -e .[ifeval] +``` + +## Step 5: Inside container set up llama-recipes LM evalulation harness templates + Using Meta’s LM eval reproduce documentation: https://github.com/meta-llama/llama-recipes/tree/main/tools/benchmarks/llm_eval_harness/meta_eval To access Meta Llama 3.1 evals, you must: -Log in to the Hugging Face website (https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f ) and click the 3.1 evals dataset pages and agree to the terms. - -Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine. +1. Log in to the Hugging Face website (https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f ) and click the 3.1 evals dataset pages and agree to the terms. +2. Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine. -option 1: HF_TOKEN +#### Hugging Face authentication - option 1: HF_TOKEN (if not already passed into Docker container) ```bash # set up HF Token, needed for IFEval dataset # echo "hf_" > ${HF_HOME}/token export PYTHONPATH=${PYTHONPATH}:$PWD ``` -option 2: huggingface_hub login + +#### Hugging Face authentication - option 2: huggingface_hub login ```python from huggingface_hub import notebook_login notebook_login() ``` -build llama-recipe lm-evaluation-harness templates: +Finally, build llama-recipe lm-evaluation-harness templates: ```bash -git clone https://github.com/meta-llama/llama-recipes.git +git clone https://github.com/tstescoTT/llama-recipes.git cd llama-recipes/tools/benchmarks/llm_eval_harness/meta_eval python prepare_meta_eval.py --config_path ./eval_config.yaml cp -rf work_dir/ ~/lm-evaluation-harness/ ``` -## Step 5: Inside container set up LM evals - -```bash -# option 1: install from github -pip install git+https://github.com/tstescoTT/lm-evaluation-harness.git@tstesco/local-api-vllm-streaming#egg=lm-eval[ifeval] -# option 2: install edittable (for development) - mounted to container -cd ~/lm-evaluation-harness -pip install -e .[ifeval] -``` - ## Step 6: Inside container run LM evals `run_evals.sh` can be run from where lm_eval CLI is available: @@ -131,12 +133,14 @@ run_evals.sh ``` For example, running GPQA manually: + +The model args (`Meta-Llama-3.1-70B` below) need only correspond to the model defined by running the server, not the actual weights. ```bash lm_eval \ --model local-completions \ ---model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \ ---gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=True \ ---tasks meta_gpqa \ +--model_args model=meta-llama/Meta-Llama-3.1-70B,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \ +--gen_kwargs model=meta-llama/Meta-Llama-3.1-70B,stop="<|eot_id|>",stream=False \ +--tasks meta_ifeval \ --batch_size auto \ --output_path /home/user/cache_root/eval_output \ --include_path ./work_dir \ diff --git a/evals/run_evals.sh b/evals/run_evals.sh index 75cf2f9..7d4bd69 100644 --- a/evals/run_evals.sh +++ b/evals/run_evals.sh @@ -7,7 +7,7 @@ lm_eval \ --model local-completions \ --model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \ ---gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=True \ +--gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=False \ --tasks meta_gpqa \ --batch_size auto \ --output_path /home/user/cache_root/eval_output \ @@ -15,3 +15,14 @@ lm_eval \ --seed 42 \ --log_samples +# IFEval +lm_eval \ +--model local-completions \ +--model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \ +--gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=False \ +--tasks meta_ifeval \ +--batch_size auto \ +--output_path /home/user/cache_root/eval_output \ +--include_path ./work_dir \ +--seed 42 \ +--log_samples diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..62cc8d1 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +pre-commit==3.5.0 +ruff==0.7.0 diff --git a/scripts/add_spdx_header.py b/scripts/add_spdx_header.py index 49d174e..f7fa986 100644 --- a/scripts/add_spdx_header.py +++ b/scripts/add_spdx_header.py @@ -3,22 +3,25 @@ # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC from pathlib import Path +from datetime import datetime + +# get current year +current_year = datetime.now().year # * SPDX header content SPDX_HEADER = """# SPDX-License-Identifier: Apache-2.0 # -# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC -""" +# SPDX-FileCopyrightText: © """ +SPDX_DATE = str(current_year) + " Tenstorrent AI ULC\n" def add_spdx_header(file_path): with open(file_path, "r+") as file: content = file.read() if "SPDX-License-Identifier" not in content: file.seek(0, 0) - file.write(SPDX_HEADER + "\n" + content) - + file.write(SPDX_HEADER + SPDX_DATE + "\n" + content) if __name__ == "__main__": # List of directories to process here @@ -31,7 +34,5 @@ def add_spdx_header(file_path): for directory in directories_to_process: for file_path in directory.rglob("*"): # Check if the file is Python, Dockerfile, or Bash - if file_path.suffix in (".py", ".sh") or file_path.name.endswith( - "Dockerfile" - ): + if file_path.suffix in (".py", ".sh") or file_path.name.endswith("Dockerfile"): add_spdx_header(file_path) diff --git a/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile b/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile index edce872..0250da3 100644 --- a/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile +++ b/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile @@ -2,6 +2,7 @@ # # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + ARG TT_METAL_VERSION=v0.51.0-rc29 FROM ghcr.io/tenstorrent/tt-inference-server/tt-metal-mistral-7b-src-base:v0.0.1-tt-metal-${TT_METAL_VERSION} diff --git a/tt-metal-mistral-7b/src/gunicorn.conf.py b/tt-metal-mistral-7b/src/gunicorn.conf.py index c32b980..caf61f2 100644 --- a/tt-metal-mistral-7b/src/gunicorn.conf.py +++ b/tt-metal-mistral-7b/src/gunicorn.conf.py @@ -2,6 +2,8 @@ # # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + + import pathlib from datetime import datetime diff --git a/vllm-tt-metal-llama3-70b/docs/development.md b/vllm-tt-metal-llama3-70b/docs/development.md new file mode 100644 index 0000000..5448239 --- /dev/null +++ b/vllm-tt-metal-llama3-70b/docs/development.md @@ -0,0 +1,100 @@ +# Development vllm-tt-metal-llama3-70B + +Containerization in: https://github.com/tenstorrent/tt-inference-server/blob/tstesco/vllm-llama3-70b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile + +tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal + +lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness + +## Step 1: Build container + +When building, update the commit SHA and get correct SHA from model developers or from vLLM readme (https://github.com/tenstorrent/vllm/tree/dev/tt_metal#vllm-and-tt-metal-branches ). The Dockerfile version updates infrequently but may also be updated. +```bash +# build image +export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc27 +export TT_METAL_COMMIT_SHA_OR_TAG=685ef1303b5abdfda63183fdd4fd6ed51b496833 +export TT_METAL_COMMIT_DOCKER_TAG=${TT_METAL_COMMIT_SHA_OR_TAG:0:12} +export TT_VLLM_COMMIT_SHA_OR_TAG=582c05ecaa37a7d03224a26f52df5af067d3311f +export TT_VLLM_COMMIT_DOCKER_TAG=${TT_VLLM_COMMIT_SHA_OR_TAG:0:12} +docker build \ + -t ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG}-${TT_VLLM_COMMIT_DOCKER_TAG} \ + --build-arg TT_METAL_DOCKERFILE_VERSION=${TT_METAL_DOCKERFILE_VERSION} \ + --build-arg TT_METAL_COMMIT_SHA_OR_TAG=${TT_METAL_COMMIT_SHA_OR_TAG} \ + --build-arg TT_VLLM_COMMIT_SHA_OR_TAG=${TT_VLLM_COMMIT_SHA_OR_TAG} \ + . -f vllm.llama3.src.base.inference.v0.52.0.Dockerfile + +# push image +docker push ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG}-${TT_VLLM_COMMIT_DOCKER_TAG} +``` + +## Step 2: Run container for LM evals development + +note: this requires running `setup.sh` to set up the weights for a particular model, in this example `llama-3.1-70b-instruct`. + +```bash +cd tt-inference-server +export PERSISTENT_VOLUME=$PWD/persistent_volume/volume_id_tt-metal-llama-3.1-70b-instructv0.0.1/ +docker run \ + --rm \ + -it \ + --env-file tt-metal-llama3-70b/.env \ + --cap-add ALL \ + --device /dev/tenstorrent:/dev/tenstorrent \ + --volume /dev/hugepages-1G:/dev/hugepages-1G:rw \ + --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \ + --shm-size 32G \ + ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911 bash +``` + +additionally for development you can mount the volumes: +```bash + --volume $PWD/../vllm:/home/user/vllm \ + --volume $PWD/../lm-evaluation-harness:/home/user/lm-evaluation-harness \ +``` + +## Step 3: Inside container setup and run vLLM + +The following env vars are required be set by the Dockerfile already: + +- `PYTHON_ENV_DIR="${TT_METAL_HOME}/python_env"`: location where tt-metal python environment was installed. This is defined in Dockerfile. +- `VLLM_TARGET_DEVICE="tt"`: This is defined in Dockerfile. +- `vllm_dir`: Location of vLLM installation. This is defined in Dockerfile. You must update this if you've changed the vLLM install locationed. + +#### Option 1: use default installation in docker image + +Already built into Docker image, continue to run vLLM. + +#### option 2: install vLLM from github + +```bash +# option 2: install from github +cd /home/user/vllm +git fetch +# git checkout +git pull +pip install -e . +echo "done vllm install." +``` +#### option 3: install edittable (for development) - mount from outside container + +```bash +# option 3: install edittable (for development) - mount from outside container +cd /home/user/vllm +pip install -e . +echo "done vllm install." +``` + +#### Run vllm serving openai compatible API server + +```bash +# run vllm serving +cd /home/user/vllm +python examples/server_example_tt.py +``` + +## Sending requests to vLLM inference server + +If the container is exposing a port (e.g. `docker run ... --publish 7000:7000`), you can send requests to that port , otherwise you can enter an interactive shell within the container via: +```bash +docker exec -it $(docker ps -q | head -n1) bash +``` diff --git a/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile index c8d120f..5d504ec 100644 --- a/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile +++ b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile @@ -102,3 +102,107 @@ RUN cd ${vllm_dir} && cd tt_metal \ && ln -s ${TT_METAL_HOME}/models ./models WORKDIR ${vllm_dir} + +# SPDX-License-Identifier: Apache-2.0 +# +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# default base image, override with --build-arg TT_METAL_DOCKERFILE_VERSION= +ARG TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc16 + +FROM ghcr.io/tenstorrent/tt-metal/tt-metalium/ubuntu-20.04-amd64:$TT_METAL_DOCKERFILE_VERSION-dev + +# Build stage +LABEL maintainer="Tom Stesco " +# connect Github repo with package +LABEL org.opencontainers.image.source https://github.com/tenstorrent/tt-inference-server + +ARG DEBIAN_FRONTEND=noninteractive +# default commit sha, override with --build-arg TT_METAL_COMMIT_SHA_OR_TAG= +ARG TT_METAL_COMMIT_SHA_OR_TAG=ebdffa93d911ebf18e1fd4058a6f65ed0dff09ef +ARG TT_VLLM_COMMIT_SHA_OR_TAG=dev + +# make build commit SHA available in the image for reference and debugging +ENV TT_METAL_COMMIT_SHA_OR_TAG=${TT_METAL_COMMIT_SHA_OR_TAG} +ENV SHELL=/bin/bash +ENV TZ=America/Los_Angeles +# tt-metal build vars +ENV ARCH_NAME=wormhole_b0 +ENV TT_METAL_HOME=/tt-metal +ENV CONFIG=Release +ENV TT_METAL_ENV=dev +ENV LOGURU_LEVEL=INFO +# derived vars +ENV PYTHONPATH=${TT_METAL_HOME} +# note: PYTHON_ENV_DIR is used by create_venv.sh +ENV PYTHON_ENV_DIR=${TT_METAL_HOME}/python_env +ENV LD_LIBRARY_PATH=${TT_METAL_HOME}/build/lib + +# extra system deps +RUN apt-get update && apt-get install -y \ + patchelf \ + libsndfile1 \ + wget \ + nano \ + acl \ + jq \ + vim \ + # user deps + htop \ + screen \ + tmux \ + unzip \ + zip \ + curl \ + iputils-ping \ + rsync \ + # syseng tools + cargo \ + && rm -rf /var/lib/apt/lists/* + +# build tt-metal +RUN git clone https://github.com/tenstorrent-metal/tt-metal.git ${TT_METAL_HOME} \ + && cd ${TT_METAL_HOME} \ + && git checkout ${TT_METAL_COMMIT_SHA_OR_TAG} \ + && git submodule update --init --recursive \ + && git submodule foreach 'git lfs fetch --all && git lfs pull' \ + && bash ./build_metal.sh \ + && bash ./create_venv.sh + +# user setup +ARG HOME_DIR=/home/user +RUN useradd -u 1000 -s /bin/bash -d ${HOME_DIR} user \ + && mkdir -p ${HOME_DIR} \ + && chown -R user:user ${HOME_DIR} \ + && chown -R user:user ${TT_METAL_HOME} + +USER user + +# tt-metal python env default +RUN echo "source ${PYTHON_ENV_DIR}/bin/activate" >> ${HOME_DIR}/.bashrc + +# install tt-smi +RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \ + && pip3 install --upgrade pip \ + && pip3 install git+https://github.com/tenstorrent/tt-smi" + +# runtime required for tt-metal on WH +ENV WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml + +WORKDIR ${HOME_DIR} +# vllm install, see: https://github.com/tenstorrent/vllm/blob/dev/tt_metal/README.md +ENV vllm_dir=${HOME_DIR}/vllm +ENV VLLM_TARGET_DEVICE="tt" +RUN git clone https://github.com/tenstorrent/vllm.git ${vllm_dir}\ + && cd ${vllm_dir} && git checkout ${TT_VLLM_COMMIT_SHA_OR_TAG} \ + && /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate && pip install -e ." + +# extra vllm dependencies +RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate && pip install compressed-tensors" + +# additonal tools +USER root +RUN apt-get update && apt-get install -y gdb +USER user + +WORKDIR ${vllm_dir}