update Dockerfile version in docs

tenstorrent · Oct 31, 2024 · 4dc87cd · 4dc87cd
1 parent b2c9aaa
commit 4dc87cd
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 8 deletions.
diff --git a/vllm-tt-metal-llama3-70b/README.md b/vllm-tt-metal-llama3-70b/README.md
@@ -22,7 +22,7 @@ If first run setup has already been completed, start here. If first run setup ha
 
 ### Docker Run - vLLM llama3 inference server
 
-Container will run `gunicorn --config gunicorn.conf.py` and start the inference server and model backend.
+Container will run with uvicorn and start the inference server and model backend.
 ```bash
 cd tt-inference-server
 # make sure if you already set up the model weights and cache you use the correct persistent volume
@@ -37,11 +37,11 @@ docker run \
   --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \
   --shm-size 32G \
   --publish 7000:7000 \
-  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911
+  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-685ef1303b5a-54b9157d852b
 ```
 
 ```bash
-# run server
+# run server manually
 python examples/offline_inference_tt.py
 ```
 
@@ -82,20 +82,20 @@ sudo cpupower frequency-set -g performance
 
 ```bash
 # pull image from GHCR
-docker pull ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911
+docker pull ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-685ef1303b5a-54b9157d852b
 ```
 
 ### 5. Automated Setup: environment variables and weights files
 
-The script `tt-metal-llama3-70b/setup.sh` automates:
+The script `vllm-tt-metal-llama3-70b/setup.sh` automates:
 
 1. interactively creating the .env file,
 2. downloading the Llama model weights,
 3. repacking the weights as required for tt-metal implementation,
 4. creating the default persistent storage directory structure and permissions.
 
 ```bash
-cd tt-inference-server/tt-metal-llama3-70b
+cd tt-inference-server/vllm-tt-metal-llama3-70b
 chmod +x setup.sh
 ./setup.sh llama-3.1-70b-instruct
 ```

diff --git a/vllm-tt-metal-llama3-70b/docs/development.md b/vllm-tt-metal-llama3-70b/docs/development.md
@@ -14,7 +14,7 @@ When building, update the commit SHA and get correct SHA from model developers o
 export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc27
 export TT_METAL_COMMIT_SHA_OR_TAG=685ef1303b5abdfda63183fdd4fd6ed51b496833
 export TT_METAL_COMMIT_DOCKER_TAG=${TT_METAL_COMMIT_SHA_OR_TAG:0:12}
-export TT_VLLM_COMMIT_SHA_OR_TAG=582c05ecaa37a7d03224a26f52df5af067d3311f
+export TT_VLLM_COMMIT_SHA_OR_TAG=54b9157d852b0fa219613c00abbaa5a35f221049
 export TT_VLLM_COMMIT_DOCKER_TAG=${TT_VLLM_COMMIT_SHA_OR_TAG:0:12}
 docker build \
   -t ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG}-${TT_VLLM_COMMIT_DOCKER_TAG} \
@@ -70,7 +70,7 @@ Already built into Docker image, continue to run vLLM.
 # option 2: install from github
 cd /home/user/vllm
 git fetch
-# git checkout <branch>
+git checkout <branch>
 git pull
 pip install -e .
 echo "done vllm install."