resolve merge conflict

tenstorrent · Oct 31, 2024 · 46a935e · 46a935e
2 parents c0acf71 + f1298e1
commit 46a935e
Show file tree

Hide file tree

Showing 13 changed files with 358 additions and 71 deletions.
diff --git a/.github/workflows/spdx-checker.yml b/.github/workflows/spdx-checker.yml
@@ -0,0 +1,49 @@
+name: SPDX Header Checker
+
+on:
+  workflow_dispatch:
+  workflow_call:
+  pull_request:
+    branches:
+      - "main"
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - assigned
+      - review_requested
+
+jobs:
+  run-spdx-header-script:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/[email protected]
+        with:
+          python-version: "3.8"
+
+      - name: Run SPDX Header Script
+        run: |
+          echo "Running SPDX header script on all files in the repository"
+          python ./scripts/add_spdx_header.py
+
+      - name: Check for changes
+        run: |
+          git status
+          if git diff --quiet; then
+            echo "No changes detected."
+            exit 0
+          else
+            echo "Changes detected, committing changes."
+          fi
+
+      - name: Commit changes
+        if: success()
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          commit_user_name: SPDX-Bot
+          commit_user_email: [email protected]
+          commit_message: "🚨✨AUTOMATED COMMIT | Added missing SPDX license headers automatically"
+          branch: ${{ github.head_ref }}
+          commit_options: "--verbose"
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,4 @@ db.sqlite3
 
 # unignore
 !requirements.txt
+!requirements-dev.txt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,4 +6,4 @@ repos:
     # Run the linter.
     - id: ruff
     # Run the formatter.
-    - id: ruff-format
+    - id: ruff-format
diff --git a/LICENSE b/LICENSE
@@ -1,17 +1,3 @@
-   Copyright (c) 2024 Tenstorrent AI ULC
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/
@@ -189,8 +175,22 @@
 
    END OF TERMS AND CONDITIONS
 
+    Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
 -------------------------------------------------------------------------------
+ Copyright (c) 2024 Tenstorrent AI ULC
 
+-------------------------------------------------------------------------------
 Third-Party Dependencies:
 
 The following dependencies are utilized by this project but are not explicitly

diff --git a/README.md b/README.md
@@ -1,6 +1,18 @@
 # TT-Inference-Server
 
-## Model implementations
+Tenstorrent Inference Server (`tt-inference-server`) is the repo of available model APIs for deploying on Tenstorrent hardware.
+
+## Official Repository
+
+[https://github.com/tenstorrent/tt-inference-server](https://github.com/tenstorrent/tt-inference-server/)
+
+
+## Getting Started
+Please follow setup instructions found in each model folder's README.md doc
+
+--------------------------------------------------------------------------------------------------------------
+
+## Model Implementations
 | Model          | Hardware                    |
 |----------------|-----------------------------|
 | [LLaMa 3.1 70B](tt-metal-llama3-70b/README.md)  | TT-QuietBox & TT-LoudBox    |

diff --git a/evals/README.md b/evals/README.md
@@ -1,14 +1,13 @@
 # Running LM evals with vLLM
 
-Containerization in: https://github.com/tenstorrent/tt-inference-server/blob/tstesco/vllm-llama3-70b/vllm-tt-metal-llama3-70b/vllm.llama3.src.base.inference.v0.52.0.Dockerfile 
+Source code:
+- tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal 
+- lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness
+- llama-recipes fork: https://github.com/tstescoTT/llama-recipes
 
-tt-metal and vLLM are under active development in lock-step: https://github.com/tenstorrent/vllm/tree/dev/tt_metal 
+## Step 1: Pull Docker image
 
-lm-evaluation-harness fork: https://github.com/tstescoTT/lm-evaluation-harness/tree/tstesco/local-api-vllm-streaming 
-
-## Step 1: Build container
-
-When building, update the commit SHA and get correct SHA from model developers or from vLLM readme (https://github.com/tenstorrent/vllm/tree/dev/tt_metal#vllm-and-tt-metal-branches ). The Dockerfile version updates infrequently but may also be updated.
+Docker images are published to: https://ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm
 ```bash
 # build image
 export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc16
@@ -41,87 +40,90 @@ docker run \
   --volume /dev/hugepages-1G:/dev/hugepages-1G:rw \
   --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \
   --shm-size 32G \
-  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-aee03c7eadaa bash
-```
-
-additionally for development you can mount the volumes:
-```bash
-  --volume $PWD/../vllm:/home/user/vllm \
-  --volume $PWD/../lm-evaluation-harness:/home/user/lm-evaluation-harness \
+  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911 bash
 ```
 
 ## Step 3: Inside container setup and run vLLM
 
-The following env vars should be set:
+#### Install vLLM - Option 1: use default installation in docker image
 
-- `PYTHON_ENV_DIR="${TT_METAL_HOME}/build/python_env"`
-- `VLLM_TARGET_DEVICE="tt"`
-- `vllm_dir`
+already built into Docker image
 
+#### Install vLLM - option 2: install vLLM from github
 
 ```bash
-# vllm dir is defined in container
-cd /home/user/vllm
-
-# option 1: use default installation in docker image
-# already set up!
-
 # option 2: install from github
+cd /home/user/vllm
 git fetch
-# git checkout <branch>
+git checkout <branch>
 git pull
 pip install -e .
 echo "done vllm install."
+```
+#### Install vLLM - option 3: install edittable (for development) from mounted volume
 
+```bash
 # option 3: install edittable (for development) - mount from outside container
+cd /home/user/vllm
 pip install -e .
 echo "done vllm install."
+```
+
+#### Run vllm serving openai compatible API server
 
+```bash
 # run vllm serving
 cd /home/user/vllm
-python examples/test_vllm_alpaca_eval.py
+python examples/server_example_tt.py
+```
+
+## Step 4: Inside container setup LM evalulation harness
+
+Enter new bash shell in running container (this does so with newest running container):
+```bash
+docker exec -it $(docker ps -q | head -n1) bash
 ```
 
-## Step 4: Inside container setup LM evals
+Now inside container:
+```bash
+# option 1: install from github: https://github.com/tstescoTT/lm-evaluation-harness
+pip install git+https://github.com/tstescoTT/lm-evaluation-harness.git#egg=lm-eval[ifeval]
+# option 2: install edittable (for development) - mounted to container
+cd ~/lm-evaluation-harness
+pip install -e .[ifeval]
+```
+
+## Step 5: Inside container set up llama-recipes LM evalulation harness templates
+
 
 Using Meta’s LM eval reproduce documentation: https://github.com/meta-llama/llama-recipes/tree/main/tools/benchmarks/llm_eval_harness/meta_eval 
 
 To access Meta Llama 3.1 evals, you must:
 
-Log in to the Hugging Face website (https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f ) and click the 3.1 evals dataset pages and agree to the terms.
-
-Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.
+1. Log in to the Hugging Face website (https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f ) and click the 3.1 evals dataset pages and agree to the terms.
+2. Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.
 
-option 1: HF_TOKEN
+#### Hugging Face authentication - option 1: HF_TOKEN (if not already passed into Docker container)
 ```bash
 # set up HF Token, needed for IFEval dataset
 # echo "hf_<token>" > ${HF_HOME}/token
 export PYTHONPATH=${PYTHONPATH}:$PWD
 ```
-option 2: huggingface_hub login
+
+#### Hugging Face authentication - option 2: huggingface_hub login
 ```python
 from huggingface_hub import notebook_login
 notebook_login()
 ```
 
-build llama-recipe lm-evaluation-harness templates:
+Finally,  build llama-recipe lm-evaluation-harness templates:
 ```bash
-git clone https://github.com/meta-llama/llama-recipes.git
+git clone https://github.com/tstescoTT/llama-recipes.git
 cd llama-recipes/tools/benchmarks/llm_eval_harness/meta_eval
 python prepare_meta_eval.py --config_path ./eval_config.yaml
 cp -rf work_dir/ ~/lm-evaluation-harness/
 ```
 
-## Step 5: Inside container set up LM evals
-
-```bash
-# option 1: install from github
-pip install git+https://github.com/tstescoTT/lm-evaluation-harness.git@tstesco/local-api-vllm-streaming#egg=lm-eval[ifeval]
-# option 2: install edittable (for development) - mounted to container
-cd ~/lm-evaluation-harness
-pip install -e .[ifeval]
-```
-
 ## Step 6: Inside container run LM evals
 
 `run_evals.sh` can be run from where lm_eval CLI is available:
@@ -131,12 +133,14 @@ run_evals.sh
 ```
 
 For example, running GPQA manually:
+
+The model args (`Meta-Llama-3.1-70B` below) need only correspond to the model defined by running the server, not the actual weights.
 ```bash
 lm_eval \
 --model local-completions \
---model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
---gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=True \
---tasks meta_gpqa \
+--model_args model=meta-llama/Meta-Llama-3.1-70B,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
+--gen_kwargs model=meta-llama/Meta-Llama-3.1-70B,stop="<|eot_id|>",stream=False \
+--tasks meta_ifeval \
 --batch_size auto \
 --output_path /home/user/cache_root/eval_output \
 --include_path ./work_dir \

diff --git a/evals/run_evals.sh b/evals/run_evals.sh
@@ -7,11 +7,22 @@
 lm_eval \
 --model local-completions \
 --model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
---gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=True \
+--gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=False \
 --tasks meta_gpqa \
 --batch_size auto \
 --output_path /home/user/cache_root/eval_output \
 --include_path ./work_dir \
 --seed 42  \
 --log_samples
 
+# IFEval
+lm_eval \
+--model local-completions \
+--model_args model=meta-llama/Llama-3.1-70B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=32,max_retries=4,tokenized_requests=False,add_bos_token=True \
+--gen_kwargs model=meta-llama/Llama-3.1-70B-Instruct,stop="<|eot_id|>",stream=False \
+--tasks meta_ifeval \
+--batch_size auto \
+--output_path /home/user/cache_root/eval_output \
+--include_path ./work_dir \
+--seed 42  \
+--log_samples
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,2 @@
+pre-commit==3.5.0
+ruff==0.7.0
diff --git a/scripts/add_spdx_header.py b/scripts/add_spdx_header.py
@@ -3,22 +3,25 @@
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
 from pathlib import Path
+from datetime import datetime
+
+# get current year
+current_year = datetime.now().year
 
 
 # * SPDX header content
 SPDX_HEADER = """# SPDX-License-Identifier: Apache-2.0
 #
-# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
-"""
+# SPDX-FileCopyrightText: © """
 
+SPDX_DATE = str(current_year) + " Tenstorrent AI ULC\n"
 
 def add_spdx_header(file_path):
     with open(file_path, "r+") as file:
         content = file.read()
         if "SPDX-License-Identifier" not in content:
             file.seek(0, 0)
-            file.write(SPDX_HEADER + "\n" + content)
-
+            file.write(SPDX_HEADER + SPDX_DATE + "\n" + content)
 
 if __name__ == "__main__":
     # List of directories to process here
@@ -31,7 +34,5 @@ def add_spdx_header(file_path):
     for directory in directories_to_process:
         for file_path in directory.rglob("*"):
             # Check if the file is Python, Dockerfile, or Bash
-            if file_path.suffix in (".py", ".sh") or file_path.name.endswith(
-                "Dockerfile"
-            ):
+            if file_path.suffix in (".py", ".sh") or file_path.name.endswith("Dockerfile"):
                 add_spdx_header(file_path)
diff --git a/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile b/tt-metal-mistral-7b/mistral7b.src.base.inference.v0.51.0-rc29-cs.Dockerfile
@@ -2,6 +2,7 @@
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
+
 ARG TT_METAL_VERSION=v0.51.0-rc29
 FROM ghcr.io/tenstorrent/tt-inference-server/tt-metal-mistral-7b-src-base:v0.0.1-tt-metal-${TT_METAL_VERSION}
 

diff --git a/tt-metal-mistral-7b/src/gunicorn.conf.py b/tt-metal-mistral-7b/src/gunicorn.conf.py
@@ -2,6 +2,8 @@
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
+
+
 import pathlib
 from datetime import datetime