diff --git a/.github/workflows/spdx-checker.yml b/.github/workflows/spdx-checker.yml
new file mode 100644
index 0000000..4936c6a
--- /dev/null
+++ b/.github/workflows/spdx-checker.yml
@@ -0,0 +1,49 @@
+name: SPDX Header Checker
+
+on:
+  workflow_dispatch:
+  workflow_call:
+  pull_request:
+    branches:
+      - "main"
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - assigned
+      - review_requested
+
+jobs:
+  run-spdx-header-script:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5.0.0
+        with:
+          python-version: "3.8"
+
+      - name: Run SPDX Header Script
+        run: |
+          echo "Running SPDX header script on all files in the repository"
+          python ./scripts/add_spdx_header.py
+
+      - name: Check for changes
+        run: |
+          git status
+          if git diff --quiet; then
+            echo "No changes detected."
+            exit 0
+          else
+            echo "Changes detected, committing changes."
+          fi
+
+      - name: Commit changes
+        if: success()
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          commit_user_name: SPDX-Bot
+          commit_user_email: bot@example.com
+          commit_message: "🚨✨AUTOMATED COMMIT | Added missing SPDX license headers automatically"
+          branch: ${{ github.head_ref }}
+          commit_options: "--verbose"
diff --git a/.gitignore b/.gitignore
index edbde29..8b4d3df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,3 +39,4 @@ db.sqlite3
 
 # unignore
 !requirements.txt
+!requirements-dev.txt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f09ba8c..e5c806e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,4 +6,4 @@ repos:
     # Run the linter.
     - id: ruff
     # Run the formatter.
-    - id: ruff-format
\ No newline at end of file
+    - id: ruff-format
diff --git a/LICENSE b/LICENSE
index 463f598..69e6692 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,17 +1,3 @@
-   Copyright (c) 2024 Tenstorrent AI ULC
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/
@@ -189,8 +175,22 @@
 
    END OF TERMS AND CONDITIONS
 
+    Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
 -------------------------------------------------------------------------------
+ Copyright (c) 2024 Tenstorrent AI ULC
 
+-------------------------------------------------------------------------------
 Third-Party Dependencies:
 
 The following dependencies are utilized by this project but are not explicitly
diff --git a/README.md b/README.md
index 4d6fcd7..92cd392 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,18 @@
 # TT-Inference-Server
 
-## Model implementations
+Tenstorrent Inference Server (`tt-inference-server`) is the repo of available model APIs for deploying on Tenstorrent hardware.
+
+## Official Repository
+
+[https://github.com/tenstorrent/tt-inference-server](https://github.com/tenstorrent/tt-inference-server/)
+
+
+## Getting Started
+Please follow setup instructions found in each model folder's README.md doc
+
+--------------------------------------------------------------------------------------------------------------
+
+## Model Implementations
 | Model          | Hardware                    |
 |----------------|-----------------------------|
 | [LLaMa 3.1 70B](tt-metal-llama3-70b/README.md)  | TT-QuietBox & TT-LoudBox    |
diff --git a/evals/README.md b/evals/README.md
index 86a3202..620b769 100644
--- a/evals/README.md
+++ b/evals/README.md
@@ -1,14 +1,13 @@
 # Running LM evals with vLLM
 
-Containerization in: https://github.com/tenstorrent/tt-inference-server/blob/tstesco/vllm-llama3-70b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile 
+Source code:
+- tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal 
+- lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness
+- llama-recipes fork: https://github.com/tstescoTT/llama-recipes
 
-tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal 
+## Step 1: Pull Docker image
 
-lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness/tree/tstesco/local-api-vllm-streaming 
-
-## Step 1: Build container
-
-When building, update the commit SHA and get correct SHA from model developers or from vLLM readme (https://github.com/tenstorrent/vllm/tree/dev/tt_metal#vllm-and-tt-metal-branches ). The Dockerfile version updates infrequently but may also be updated.
+Docker images are published to: https://ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm
 ```bash
 # build image
 export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc16
@@ -41,87 +40,90 @@ docker run \
   --volume /dev/hugepages-1G:/dev/hugepages-1G:rw \
   --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \
   --shm-size 32G \
-  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-aee03c7eadaa bash
-```
-
-additionally for development you can mount the volumes:
-```bash
-  --volume $PWD/../vllm:/home/user/vllm \
-  --volume $PWD/../lm-evaluation-harness:/home/user/lm-evaluation-harness \
+  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911 bash
 ```
 
 ## Step 3: Inside container setup and run vLLM
 
-The following env vars should be set:
+#### Install vLLM - Option 1: use default installation in docker image
 
-- `PYTHON_ENV_DIR="${TT_METAL_HOME}/build/python_env"`
-- `VLLM_TARGET_DEVICE="tt"`
-- `vllm_dir`
+already built into Docker image
 
+#### Install vLLM - option 2: install vLLM from github
 
 ```bash
-# vllm dir is defined in container
-cd /home/user/vllm
-
-# option 1: use default installation in docker image
-# already set up!
-
 # option 2: install from github
+cd /home/user/vllm
 git fetch
-# git checkout <branch>
+git checkout <branch>
 git pull
 pip install -e .
 echo "done vllm install."
+```
+#### Install vLLM - option 3: install edittable (for development) from mounted volume
 
+```bash
 # option 3: install edittable (for development) - mount from outside container
+cd /home/user/vllm
 pip install -e .
 echo "done vllm install."
+```
+
+#### Run vllm serving openai compatible API server
 
+```bash
 # run vllm serving
 cd /home/user/vllm
-python examples/test_vllm_alpaca_eval.py
+python examples/server_example_tt.py
+```
+
+## Step 4: Inside container setup LM evalulation harness
+
+Enter new bash shell in running container (this does so with newest running container):
+```bash
+docker exec -it $(docker ps -q | head -n1) bash
 ```
 
-## Step 4: Inside container setup LM evals
+Now inside container:
+```bash
+# option 1: install from github: https://github.com/tstescoTT/lm-evaluation-harness
+pip install git+https://github.com/tstescoTT/lm-evaluation-harness.git#egg=lm-eval[ifeval]
+# option 2: install edittable (for development) - mounted to container
+cd ~/lm-evaluation-harness
+pip install -e .[ifeval]
+```
+
+## Step 5: Inside container set up llama-recipes LM evalulation harness templates
+
 
 Using Meta’s LM eval reproduce documentation: https://github.com/meta-llama/llama-recipes/tree/main/tools/benchmarks/llm_eval_harness/meta_eval 
 
 To access Meta Llama 3.1 evals, you must:
 
-Log in to the Hugging Face website (https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f ) and click the 3.1 evals dataset pages and agree to the terms.
-
-Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.
+1. Log in to the Hugging Face website (https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f ) and click the 3.1 evals dataset pages and agree to the terms.
+2. Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.
 
-option 1: HF_TOKEN
+#### Hugging Face authentication - option 1: HF_TOKEN (if not already passed into Docker container)
 ```bash
 # set up HF Token, needed for IFEval dataset
 # echo "hf_<token>" > ${HF_HOME}/token
 export PYTHONPATH=${PYTHONPATH}:$PWD
 ```
-option 2: huggingface_hub login
+
+#### Hugging Face authentication - option 2: huggingface_hub login
 ```python
 from huggingface_hub import notebook_login
 notebook_login()
 ```
 
-build llama-recipe lm-evaluation-harness templates:
+Finally,  build llama-recipe lm-evaluation-harness templates:
 ```bash
-git clone https://github.com/meta-llama/llama-recipes.git
+git clone https://github.com/tstescoTT/llama-recipes.git
 cd llama-recipes/tools/benchmarks/llm_eval_harness/meta_eval
 python prepare_meta_eval.py --config_path ./eval_config.yaml
 cp -rf work_dir/ ~/lm-evaluation-harness/
 ```
 
-## Step 5: Inside container set up LM evals
-
-```bash
-# option 1: install from github
-pip install git+https://github.com/tstescoTT/lm-evaluation-harness.git@tstesco/local-api-vllm-streaming#egg=lm-eval[ifeval]
-# option 2: install edittable (for development) - mounted to container
-cd ~/lm-evaluation-harness
-pip install -e .[ifeval]
-```
-
 ## Step 6: Inside container run LM evals
 
 `run_evals.sh` can be run from where lm_eval CLI is available:
@@ -131,12 +133,14 @@ run_evals.sh
 ```
 
 For example, running GPQA manually:
+
+The model args (`Meta-Llama-3.1-70B` below) need only correspond to the model defined by running the server, not the actual weights.
 ```bash
 lm_eval \
 --model local-completions \
---model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
---gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=True \
---tasks meta_gpqa \
+--model_args model=meta-llama/Meta-Llama-3.1-70B,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
+--gen_kwargs model=meta-llama/Meta-Llama-3.1-70B,stop="<|eot_id|>",stream=False \
+--tasks meta_ifeval \
 --batch_size auto \
 --output_path /home/user/cache_root/eval_output \
 --include_path ./work_dir \
diff --git a/evals/run_evals.sh b/evals/run_evals.sh
index 75cf2f9..7d4bd69 100644
--- a/evals/run_evals.sh
+++ b/evals/run_evals.sh
@@ -7,7 +7,7 @@
 lm_eval \
 --model local-completions \
 --model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
---gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=True \
+--gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=False \
 --tasks meta_gpqa \
 --batch_size auto \
 --output_path /home/user/cache_root/eval_output \
@@ -15,3 +15,14 @@ lm_eval \
 --seed 42  \
 --log_samples
 
+# IFEval
+lm_eval \
+--model local-completions \
+--model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
+--gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=False \
+--tasks meta_ifeval \
+--batch_size auto \
+--output_path /home/user/cache_root/eval_output \
+--include_path ./work_dir \
+--seed 42  \
+--log_samples
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..62cc8d1
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,2 @@
+pre-commit==3.5.0
+ruff==0.7.0
diff --git a/scripts/add_spdx_header.py b/scripts/add_spdx_header.py
index 49d174e..f7fa986 100644
--- a/scripts/add_spdx_header.py
+++ b/scripts/add_spdx_header.py
@@ -3,22 +3,25 @@
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
 from pathlib import Path
+from datetime import datetime
+
+# get current year
+current_year = datetime.now().year
 
 
 # * SPDX header content
 SPDX_HEADER = """# SPDX-License-Identifier: Apache-2.0
 #
-# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
-"""
+# SPDX-FileCopyrightText: © """
 
+SPDX_DATE = str(current_year) + " Tenstorrent AI ULC\n"
 
 def add_spdx_header(file_path):
     with open(file_path, "r+") as file:
         content = file.read()
         if "SPDX-License-Identifier" not in content:
             file.seek(0, 0)
-            file.write(SPDX_HEADER + "\n" + content)
-
+            file.write(SPDX_HEADER + SPDX_DATE + "\n" + content)
 
 if __name__ == "__main__":
     # List of directories to process here
@@ -31,7 +34,5 @@ def add_spdx_header(file_path):
     for directory in directories_to_process:
         for file_path in directory.rglob("*"):
             # Check if the file is Python, Dockerfile, or Bash
-            if file_path.suffix in (".py", ".sh") or file_path.name.endswith(
-                "Dockerfile"
-            ):
+            if file_path.suffix in (".py", ".sh") or file_path.name.endswith("Dockerfile"):
                 add_spdx_header(file_path)
diff --git a/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile b/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile
index edce872..0250da3 100644
--- a/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile
+++ b/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile
@@ -2,6 +2,7 @@
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
+
 ARG TT_METAL_VERSION=v0.51.0-rc29
 FROM ghcr.io/tenstorrent/tt-inference-server/tt-metal-mistral-7b-src-base:v0.0.1-tt-metal-${TT_METAL_VERSION}
 
diff --git a/tt-metal-mistral-7b/src/gunicorn.conf.py b/tt-metal-mistral-7b/src/gunicorn.conf.py
index c32b980..caf61f2 100644
--- a/tt-metal-mistral-7b/src/gunicorn.conf.py
+++ b/tt-metal-mistral-7b/src/gunicorn.conf.py
@@ -2,6 +2,8 @@
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
+
+
 import pathlib
 from datetime import datetime
 
diff --git a/vllm-tt-metal-llama3-70b/docs/development.md b/vllm-tt-metal-llama3-70b/docs/development.md
new file mode 100644
index 0000000..5448239
--- /dev/null
+++ b/vllm-tt-metal-llama3-70b/docs/development.md
@@ -0,0 +1,100 @@
+# Development vllm-tt-metal-llama3-70B
+
+Containerization in: https://github.com/tenstorrent/tt-inference-server/blob/tstesco/vllm-llama3-70b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile 
+
+tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal 
+
+lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness
+
+## Step 1: Build container
+
+When building, update the commit SHA and get correct SHA from model developers or from vLLM readme (https://github.com/tenstorrent/vllm/tree/dev/tt_metal#vllm-and-tt-metal-branches ). The Dockerfile version updates infrequently but may also be updated.
+```bash
+# build image
+export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc27
+export TT_METAL_COMMIT_SHA_OR_TAG=685ef1303b5abdfda63183fdd4fd6ed51b496833
+export TT_METAL_COMMIT_DOCKER_TAG=${TT_METAL_COMMIT_SHA_OR_TAG:0:12}
+export TT_VLLM_COMMIT_SHA_OR_TAG=582c05ecaa37a7d03224a26f52df5af067d3311f
+export TT_VLLM_COMMIT_DOCKER_TAG=${TT_VLLM_COMMIT_SHA_OR_TAG:0:12}
+docker build \
+  -t ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG}-${TT_VLLM_COMMIT_DOCKER_TAG} \
+  --build-arg TT_METAL_DOCKERFILE_VERSION=${TT_METAL_DOCKERFILE_VERSION} \
+  --build-arg TT_METAL_COMMIT_SHA_OR_TAG=${TT_METAL_COMMIT_SHA_OR_TAG} \
+  --build-arg TT_VLLM_COMMIT_SHA_OR_TAG=${TT_VLLM_COMMIT_SHA_OR_TAG} \
+  . -f vllm.llama3.src.base.inference.v0.52.0.Dockerfile
+
+# push image
+docker push ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG}-${TT_VLLM_COMMIT_DOCKER_TAG}
+```
+
+## Step 2: Run container for LM evals development
+
+note: this requires running `setup.sh` to set up the weights for a particular model, in this example `llama-3.1-70b-instruct`.
+
+```bash
+cd tt-inference-server
+export PERSISTENT_VOLUME=$PWD/persistent_volume/volume_id_tt-metal-llama-3.1-70b-instructv0.0.1/
+docker run \
+  --rm \
+  -it \
+  --env-file tt-metal-llama3-70b/.env \
+  --cap-add ALL \
+  --device /dev/tenstorrent:/dev/tenstorrent \
+  --volume /dev/hugepages-1G:/dev/hugepages-1G:rw \
+  --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \
+  --shm-size 32G \
+  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911 bash
+```
+
+additionally for development you can mount the volumes:
+```bash
+  --volume $PWD/../vllm:/home/user/vllm \
+  --volume $PWD/../lm-evaluation-harness:/home/user/lm-evaluation-harness \
+```
+
+## Step 3: Inside container setup and run vLLM
+
+The following env vars are required be set by the Dockerfile already:
+
+- `PYTHON_ENV_DIR="${TT_METAL_HOME}/python_env"`: location where tt-metal python environment was installed. This is defined in Dockerfile.
+- `VLLM_TARGET_DEVICE="tt"`: This is defined in Dockerfile.
+- `vllm_dir`: Location of vLLM installation. This is defined in Dockerfile. You must update this if you've changed the vLLM install locationed.
+
+#### Option 1: use default installation in docker image
+
+Already built into Docker image, continue to run vLLM.
+
+#### option 2: install vLLM from github
+
+```bash
+# option 2: install from github
+cd /home/user/vllm
+git fetch
+# git checkout <branch>
+git pull
+pip install -e .
+echo "done vllm install."
+```
+#### option 3: install edittable (for development) - mount from outside container
+
+```bash
+# option 3: install edittable (for development) - mount from outside container
+cd /home/user/vllm
+pip install -e .
+echo "done vllm install."
+```
+
+#### Run vllm serving openai compatible API server
+
+```bash
+# run vllm serving
+cd /home/user/vllm
+python examples/server_example_tt.py
+```
+
+## Sending requests to vLLM inference server
+
+If the container is exposing a port (e.g. `docker run ... --publish 7000:7000`), you can send requests to that port , otherwise you can enter an interactive shell within the container via:
+```bash
+docker exec -it $(docker ps -q | head -n1) bash
+```
diff --git a/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile
index c8d120f..5d504ec 100644
--- a/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile
+++ b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile
@@ -102,3 +102,107 @@ RUN cd ${vllm_dir} && cd tt_metal \
     && ln -s ${TT_METAL_HOME}/models ./models
 
 WORKDIR ${vllm_dir}
+
+# SPDX-License-Identifier: Apache-2.0
+#
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+
+# default base image, override with --build-arg TT_METAL_DOCKERFILE_VERSION=<version>
+ARG TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc16
+
+FROM ghcr.io/tenstorrent/tt-metal/tt-metalium/ubuntu-20.04-amd64:$TT_METAL_DOCKERFILE_VERSION-dev
+
+# Build stage
+LABEL maintainer="Tom Stesco <tstesco@tenstorrent.com>"
+# connect Github repo with package
+LABEL org.opencontainers.image.source https://github.com/tenstorrent/tt-inference-server
+
+ARG DEBIAN_FRONTEND=noninteractive
+# default commit sha, override with --build-arg TT_METAL_COMMIT_SHA_OR_TAG=<sha>
+ARG TT_METAL_COMMIT_SHA_OR_TAG=ebdffa93d911ebf18e1fd4058a6f65ed0dff09ef
+ARG TT_VLLM_COMMIT_SHA_OR_TAG=dev
+
+# make build commit SHA available in the image for reference and debugging
+ENV TT_METAL_COMMIT_SHA_OR_TAG=${TT_METAL_COMMIT_SHA_OR_TAG}
+ENV SHELL=/bin/bash
+ENV TZ=America/Los_Angeles
+# tt-metal build vars
+ENV ARCH_NAME=wormhole_b0
+ENV TT_METAL_HOME=/tt-metal
+ENV CONFIG=Release
+ENV TT_METAL_ENV=dev
+ENV LOGURU_LEVEL=INFO
+# derived vars
+ENV PYTHONPATH=${TT_METAL_HOME}
+# note: PYTHON_ENV_DIR is used by create_venv.sh
+ENV PYTHON_ENV_DIR=${TT_METAL_HOME}/python_env
+ENV LD_LIBRARY_PATH=${TT_METAL_HOME}/build/lib
+
+# extra system deps
+RUN apt-get update && apt-get install -y \
+    patchelf \
+    libsndfile1 \
+    wget \
+    nano \
+    acl \
+    jq \
+    vim \
+    # user deps
+    htop \
+    screen \
+    tmux \
+    unzip \
+    zip \
+    curl \
+    iputils-ping \
+    rsync \
+    # syseng tools
+    cargo \
+    && rm -rf /var/lib/apt/lists/*
+
+# build tt-metal
+RUN git clone https://github.com/tenstorrent-metal/tt-metal.git ${TT_METAL_HOME} \
+    && cd ${TT_METAL_HOME} \
+    && git checkout ${TT_METAL_COMMIT_SHA_OR_TAG} \
+    && git submodule update --init --recursive \
+    && git submodule foreach 'git lfs fetch --all && git lfs pull' \
+    && bash ./build_metal.sh \
+    && bash ./create_venv.sh
+
+# user setup
+ARG HOME_DIR=/home/user
+RUN useradd -u 1000 -s /bin/bash -d ${HOME_DIR} user \
+    && mkdir -p ${HOME_DIR} \
+    && chown -R user:user ${HOME_DIR} \
+    && chown -R user:user ${TT_METAL_HOME}
+  
+USER user
+
+# tt-metal python env default
+RUN echo "source ${PYTHON_ENV_DIR}/bin/activate" >> ${HOME_DIR}/.bashrc
+
+# install tt-smi
+RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate \
+    && pip3 install --upgrade pip \
+    && pip3 install git+https://github.com/tenstorrent/tt-smi"
+
+# runtime required for tt-metal on WH
+ENV WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml
+
+WORKDIR ${HOME_DIR}
+# vllm install, see: https://github.com/tenstorrent/vllm/blob/dev/tt_metal/README.md
+ENV vllm_dir=${HOME_DIR}/vllm
+ENV VLLM_TARGET_DEVICE="tt"
+RUN git clone https://github.com/tenstorrent/vllm.git ${vllm_dir}\
+    && cd ${vllm_dir} && git checkout ${TT_VLLM_COMMIT_SHA_OR_TAG} \
+    && /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate && pip install -e ."
+
+# extra vllm dependencies
+RUN /bin/bash -c "source ${PYTHON_ENV_DIR}/bin/activate && pip install compressed-tensors"
+    
+# additonal tools
+USER root
+RUN apt-get update && apt-get install -y gdb
+USER user
+
+WORKDIR ${vllm_dir}