From 4dc87cdf4368db71c537274bb46d98795a1ff9ea Mon Sep 17 00:00:00 2001
From: Tom Stesco <tstesco@tenstorrent.com>
Date: Thu, 31 Oct 2024 15:36:06 +0000
Subject: [PATCH] update Dockerfile version in docs

---
 vllm-tt-metal-llama3-70b/README.md           | 12 ++++++------
 vllm-tt-metal-llama3-70b/docs/development.md |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/vllm-tt-metal-llama3-70b/README.md b/vllm-tt-metal-llama3-70b/README.md
index fc5c620..19358c0 100644
--- a/vllm-tt-metal-llama3-70b/README.md
+++ b/vllm-tt-metal-llama3-70b/README.md
@@ -22,7 +22,7 @@ If first run setup has already been completed, start here. If first run setup ha
 
 ### Docker Run - vLLM llama3 inference server
 
-Container will run `gunicorn --config gunicorn.conf.py` and start the inference server and model backend.
+Container will run with uvicorn and start the inference server and model backend.
 ```bash
 cd tt-inference-server
 # make sure if you already set up the model weights and cache you use the correct persistent volume
@@ -37,11 +37,11 @@ docker run \
   --volume ${PERSISTENT_VOLUME?ERROR env var PERSISTENT_VOLUME must be set}:/home/user/cache_root:rw \
   --shm-size 32G \
   --publish 7000:7000 \
-  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911
+  ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-685ef1303b5a-54b9157d852b
 ```
 
 ```bash
-# run server
+# run server manually
 python examples/offline_inference_tt.py
 ```
 
@@ -82,12 +82,12 @@ sudo cpupower frequency-set -g performance
 
 ```bash
 # pull image from GHCR
-docker pull ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-v0.53.0-rc16-ebdffa93d911
+docker pull ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-685ef1303b5a-54b9157d852b
 ```
 
 ### 5. Automated Setup: environment variables and weights files
 
-The script `tt-metal-llama3-70b/setup.sh` automates:
+The script `vllm-tt-metal-llama3-70b/setup.sh` automates:
 
 1. interactively creating the .env file,
 2. downloading the Llama model weights,
@@ -95,7 +95,7 @@ The script `tt-metal-llama3-70b/setup.sh` automates:
 4. creating the default persistent storage directory structure and permissions.
 
 ```bash
-cd tt-inference-server/tt-metal-llama3-70b
+cd tt-inference-server/vllm-tt-metal-llama3-70b
 chmod +x setup.sh
 ./setup.sh llama-3.1-70b-instruct
 ```
diff --git a/vllm-tt-metal-llama3-70b/docs/development.md b/vllm-tt-metal-llama3-70b/docs/development.md
index 5448239..082379f 100644
--- a/vllm-tt-metal-llama3-70b/docs/development.md
+++ b/vllm-tt-metal-llama3-70b/docs/development.md
@@ -14,7 +14,7 @@ When building, update the commit SHA and get correct SHA from model developers o
 export TT_METAL_DOCKERFILE_VERSION=v0.53.0-rc27
 export TT_METAL_COMMIT_SHA_OR_TAG=685ef1303b5abdfda63183fdd4fd6ed51b496833
 export TT_METAL_COMMIT_DOCKER_TAG=${TT_METAL_COMMIT_SHA_OR_TAG:0:12}
-export TT_VLLM_COMMIT_SHA_OR_TAG=582c05ecaa37a7d03224a26f52df5af067d3311f
+export TT_VLLM_COMMIT_SHA_OR_TAG=54b9157d852b0fa219613c00abbaa5a35f221049
 export TT_VLLM_COMMIT_DOCKER_TAG=${TT_VLLM_COMMIT_SHA_OR_TAG:0:12}
 docker build \
   -t ghcr.io/tenstorrent/tt-inference-server/tt-metal-llama3-70b-src-base-vllm:v0.0.1-tt-metal-${TT_METAL_COMMIT_DOCKER_TAG}-${TT_VLLM_COMMIT_DOCKER_TAG} \
@@ -70,7 +70,7 @@ Already built into Docker image, continue to run vLLM.
 # option 2: install from github
 cd /home/user/vllm
 git fetch
-# git checkout <branch>
+git checkout <branch>
 git pull
 pip install -e .
 echo "done vllm install."