From ff618a12e44fff2f861ec5e2bbcbec2c7b7012f8 Mon Sep 17 00:00:00 2001
From: Elias Bermudez <dbermudez@nvidia.com>
Date: Fri, 20 Dec 2024 16:13:38 -0800
Subject: [PATCH 1/5] Update readme with new pip instructions and reorganize

---
 genai-perf/README.md | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)
diff --git a/genai-perf/README.md b/genai-perf/README.md
index 0f2609aa..094fc42d 100644
--- a/genai-perf/README.md
+++ b/genai-perf/README.md
@@ -73,7 +73,18 @@ INSTALLATION
 
 ## Installation
 
-The easiest way to install GenAI-Perf is through
+The easiest way to install GenAI-Perf is through pip.
+### Install Perf Analyzer (Ubuntu 24.04, Python 3.10+)
+
+```bash
+pip install git+https://github.com/triton-inference-server/perf_analyzer.git#subdirectory=genai-perf
+```
+**NOTE**: you must already have CUDA 12 installed
+
+
+<details>
+
+<summary>Alternatively, to install the container:</summary>
 [Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver).
 Install the latest release using the following command:
 
@@ -86,31 +97,8 @@ docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3
 genai-perf --help
 ```
 
-<details>
-
-<summary>Alternatively, to install from source:</summary>
-
-Since GenAI-Perf depends on Perf Analyzer,
-you'll need to install the Perf Analyzer binary:
-
-### Install Perf Analyzer (Ubuntu, Python 3.10+)
-
-**NOTE**: you must already have CUDA 12 installed
-(checkout the [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)).
-
-```bash
-pip install tritonclient
-
-sudo apt update && sudo apt install -y --no-install-recommends libb64-0d
-```
-
-You can also build Perf Analyzer [from source](../docs/install.md#build-from-source) as well.
-
-### Install GenAI-Perf from source
 
-```bash
-pip install git+https://github.com/triton-inference-server/perf_analyzer.git#subdirectory=genai-perf
-```
+You can also build Perf Analyzer [from source](../docs/install.md#build-from-source) to use alongside GenAI-Perf as well.
 
 </details>
 

From af8fe41c2cbdfa179720b7ccd9b8aa3f8fc223c6 Mon Sep 17 00:00:00 2001
From: Elias Bermudez <dbermudez@nvidia.com>
Date: Fri, 20 Dec 2024 16:19:00 -0800
Subject: [PATCH 2/5] Fix spacing and link

---
 genai-perf/README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/genai-perf/README.md b/genai-perf/README.md
index 094fc42d..c8b89ead 100644
--- a/genai-perf/README.md
+++ b/genai-perf/README.md
@@ -85,7 +85,9 @@ pip install git+https://github.com/triton-inference-server/perf_analyzer.git#sub
 <details>
 
 <summary>Alternatively, to install the container:</summary>
-[Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver).
+
+[Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver)
+
 Install the latest release using the following command:
 
 ```bash
@@ -97,7 +99,6 @@ docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3
 genai-perf --help
 ```
 
-
 You can also build Perf Analyzer [from source](../docs/install.md#build-from-source) to use alongside GenAI-Perf as well.
 
 </details>

From 89b1e8569c929c838f1d5be7a883a2a3c5076679 Mon Sep 17 00:00:00 2001
From: Elias Bermudez <dbermudez@nvidia.com>
Date: Fri, 20 Dec 2024 16:25:55 -0800
Subject: [PATCH 3/5] Migrate to use the pypi.org release

---
 genai-perf/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/genai-perf/README.md b/genai-perf/README.md
index c8b89ead..3a52ff45 100644
--- a/genai-perf/README.md
+++ b/genai-perf/README.md
@@ -77,7 +77,7 @@ The easiest way to install GenAI-Perf is through pip.
 ### Install Perf Analyzer (Ubuntu 24.04, Python 3.10+)
 
 ```bash
-pip install git+https://github.com/triton-inference-server/perf_analyzer.git#subdirectory=genai-perf
+pip install genai-perf
 ```
 **NOTE**: you must already have CUDA 12 installed
 

From a2b791214e6207361a4993a0e26fbf8d8f0e1626 Mon Sep 17 00:00:00 2001
From: Elias Bermudez <dbermudez@nvidia.com>
Date: Fri, 20 Dec 2024 16:52:21 -0800
Subject: [PATCH 4/5] Updated headers and wording around installation

---
 genai-perf/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/genai-perf/README.md b/genai-perf/README.md
index 3a52ff45..d670c16e 100644
--- a/genai-perf/README.md
+++ b/genai-perf/README.md
@@ -74,7 +74,7 @@ INSTALLATION
 ## Installation
 
 The easiest way to install GenAI-Perf is through pip.
-### Install Perf Analyzer (Ubuntu 24.04, Python 3.10+)
+### Install GenAI-Perf (Ubuntu 24.04, Python 3.10+)
 
 ```bash
 pip install genai-perf
@@ -88,14 +88,14 @@ pip install genai-perf
 
 [Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver)
 
-Install the latest release using the following command:
+Pull the latest release using the following command:
 
 ```bash
 export RELEASE="24.10"
 
 docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
-# Check out genai_perf command inside the container:
+# Validate the genai-perf command works inside the container:
 genai-perf --help
 ```
 

From 4484cd2e5fef70d3a928d7c39ea6e3a3730f3ad6 Mon Sep 17 00:00:00 2001
From: Elias Bermudez <dbermudez@nvidia.com>
Date: Mon, 23 Dec 2024 13:23:53 -0800
Subject: [PATCH 5/5] Update templates for docs

---
 genai-perf/README.md                          |  4 +-
 genai-perf/docs/lora.md                       |  6 +--
 .../genai-perf-templates/README_template      | 50 +++++++++----------
 .../customizable_frontends_template           |  1 +
 .../genai-perf-templates/embeddings_template  |  1 +
 templates/genai-perf-templates/files_template | 31 +-----------
 .../genai-perf-templates/rankings_template    |  1 +
 .../genai-perf-templates/tutorial_template    |  1 +
 templates/template_vars.yaml                  |  6 +--
 9 files changed, 38 insertions(+), 63 deletions(-)

diff --git a/genai-perf/README.md b/genai-perf/README.md
index d670c16e..8fc93ac2 100644
--- a/genai-perf/README.md
+++ b/genai-perf/README.md
@@ -91,7 +91,7 @@ pip install genai-perf
 Pull the latest release using the following command:
 
 ```bash
-export RELEASE="24.10"
+export RELEASE="24.12"
 
 docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
@@ -131,7 +131,7 @@ docker run -ti \
     --shm-size=1g --ulimit memlock=-1 \
     -v /tmp:/tmp \
     -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-    nvcr.io/nvidia/tritonserver:24.10-trtllm-python-py3
+    nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3
 
 # Install the Triton CLI
 pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.11
diff --git a/genai-perf/docs/lora.md b/genai-perf/docs/lora.md
index e086464a..4ea25d3e 100644
--- a/genai-perf/docs/lora.md
+++ b/genai-perf/docs/lora.md
@@ -90,7 +90,7 @@ docker run -it --net=host --rm --gpus=all \
 Run GenAI-Perf from the Triton Inference Server SDK container:
 
 ```bash
-export RELEASE="24.10"
+export RELEASE="24.12"
 
 docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
@@ -149,7 +149,7 @@ docker run \
 Run GenAI-Perf from the Triton Inference Server SDK container:
 
 ```bash
-export RELEASE="24.10"
+export RELEASE="24.12"
 
 docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
@@ -207,7 +207,7 @@ docker run \
 Run GenAI-Perf from the Triton Inference Server SDK container:
 
 ```bash
-export RELEASE="24.10"
+export RELEASE="24.12"
 
 docker run -it --net=host --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
diff --git a/templates/genai-perf-templates/README_template b/templates/genai-perf-templates/README_template
index edbba913..610990c5 100644
--- a/templates/genai-perf-templates/README_template
+++ b/templates/genai-perf-templates/README_template
@@ -73,43 +73,34 @@ INSTALLATION
 
 ## Installation
 
-The easiest way to install GenAI-Perf is through
-[Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver).
-Install the latest release using the following command:
+The easiest way to install GenAI-Perf is through pip.
+### Install GenAI-Perf (Ubuntu 24.04, Python 3.10+)
 
 ```bash
-export RELEASE="{{ release }}"
-
-docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
-
-# Check out genai_perf command inside the container:
-genai-perf --help
+pip install genai-perf
 ```
+**NOTE**: you must already have CUDA 12 installed
+
 
 <details>
 
-<summary>Alternatively, to install from source:</summary>
+<summary>Alternatively, to install the container:</summary>
 
-Since GenAI-Perf depends on Perf Analyzer,
-you'll need to install the Perf Analyzer binary:
+[Triton Server SDK container](https://ngc.nvidia.com/catalog/containers/nvidia:tritonserver)
 
-### Install Perf Analyzer (Ubuntu, Python 3.10+)
-
-**NOTE**: you must already have CUDA 12 installed
-(checkout the [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)).
+Pull the latest release using the following command:
 
 ```bash
-pip install tritonclient
-```
-
-You can also build Perf Analyzer [from source](../docs/install.md#build-from-source) as well.
+export RELEASE="{{ release }}"
 
-### Install GenAI-Perf from source
+docker run -it --net=host --gpus=all  nvcr.io/nvidia/tritonserver:${RELEASE}-py3-sdk
 
-```bash
-pip install git+https://github.com/triton-inference-server/perf_analyzer.git#subdirectory=genai-perf
+# Validate the genai-perf command works inside the container:
+genai-perf --help
 ```
 
+You can also build Perf Analyzer [from source](../docs/install.md#build-from-source) to use alongside GenAI-Perf as well.
+
 </details>
 
 </br>
@@ -182,6 +173,15 @@ See [Tutorial](docs/tutorial.md) for additional examples.
 
 </br>
 
+<!--
+=====================
+Analyze Subcommand
+====================
+-->
+## Analyze
+GenAI-Perf can be used to sweep through PA or GenAI-Perf stimulus allowing the user to profile multiple scenarios with a single command.
+See [Analyze](docs/analyze.md) for details on how this subcommand can be utilized.
+
 <!--
 ======================
 VISUALIZATION
@@ -335,7 +335,7 @@ key authentication. To do so, you must add your API key directly in the command.
 Add the following flag to your command.
 
 ```bash
--h "Authorization: Bearer ${API_KEY}" -H "Accept: text/event-stream"
+-H "Authorization: Bearer ${API_KEY}" -H "Accept: text/event-stream"
 ```
 
 </br>
@@ -456,7 +456,7 @@ Alternatively, a string representing a json formatted dict can be provided.
 (default: `None`)
 
 ##### `--header <str>`
-##### `--h <str>`
+##### `--H <str>`
 Add a custom header to the requests. Headers must be specified as
 'Header:Value'. You can repeat this flag for multiple headers.
 (default: `None`)
diff --git a/templates/genai-perf-templates/customizable_frontends_template b/templates/genai-perf-templates/customizable_frontends_template
index 737f7fcf..df0a0907 100644
--- a/templates/genai-perf-templates/customizable_frontends_template
+++ b/templates/genai-perf-templates/customizable_frontends_template
@@ -164,3 +164,4 @@ To do so, create a test file in the tests directory.
 You can reference existing converter tests named `test_**_converter.py`.
 To run the test, run `pytest tests/test_new_converter.py`, replacing the
 file name with the name of the file you created.
+
diff --git a/templates/genai-perf-templates/embeddings_template b/templates/genai-perf-templates/embeddings_template
index cfbe6320..a982da09 100644
--- a/templates/genai-perf-templates/embeddings_template
+++ b/templates/genai-perf-templates/embeddings_template
@@ -122,3 +122,4 @@ Example output:
 └──────────────────────┴───────┴───────┴────────┴───────┴───────┴───────┘
 Request throughput (per sec): 23.63
 ```
+
diff --git a/templates/genai-perf-templates/files_template b/templates/genai-perf-templates/files_template
index 19a0bd70..5860bf69 100644
--- a/templates/genai-perf-templates/files_template
+++ b/templates/genai-perf-templates/files_template
@@ -46,7 +46,7 @@ genai-perf/
 
 ## File Types
 Within the artifacts and docs directories, several file types are generated,
-including .gzip, .csv, .json, .html, and .jpeg. Below is a detailed
+including .csv, .json, .html, and .jpeg. Below is a detailed
 explanation of each file and its purpose.
 
 ### Artifacts Directory
@@ -55,18 +55,6 @@ explanation of each file and its purpose.
 
 The data subdirectory contains the raw and processed performance data files.
 
-##### GZIP Files
-
-- all_data.gzip: Aggregated performance data from all collected metrics.
-- input_sequence_lengths_vs_output_sequence_lengths.gzip: This contains data on
-the input sequence lengths versus the output sequence lengths for each request.
-- request_latency.gzip: This contains the latency for each request.
-- time_to_first_token.gzip: This contains the time to first token for each request.
-- token_to_token_vs_output_position.gzip: This contains the time from one token
-generation to the next versus the position of the output token for each token.
-- ttft_vs_input_sequence_lengths.gzip: This contains the time to first token
-versus the input sequence length for each request.
-
 ##### JSON Files
 
 - inputs.json: This contains the input prompts provided to the LLM during testing.
@@ -101,23 +89,6 @@ versus the input sequence lengths.
 To use the generated files, navigate to the artifacts/data directory. Then,
 the next steps depend on the file format you wish to work with.
 
-### GZIP Files
-
-The GZIP files contain Parquet files with calculated data, which can be read
-with Pandas in Python. For example, you can create a dataframe with these files:
-
-```
-import pandas
-df = pandas.read_partquet(path_to_file)`
-```
-
-You can then use Pandas to work with the data.
-
-```
-print(df.head())     # See the first few rows of the data.
-print(df.describe()) # Get summary statistics for the data
-```
-
 ### CSV and JSON Files
 Open .csv and .json files with spreadsheet or JSON parsing tools for structured
 data analysis. These can also be read via a text editor, like Vim.
diff --git a/templates/genai-perf-templates/rankings_template b/templates/genai-perf-templates/rankings_template
index ba7cc59a..42ce8d0f 100644
--- a/templates/genai-perf-templates/rankings_template
+++ b/templates/genai-perf-templates/rankings_template
@@ -119,3 +119,4 @@ Example output:
 └──────────────────────┴──────┴──────┴───────┴───────┴──────┴──────┘
 Request throughput (per sec): 180.11
 ```
+
diff --git a/templates/genai-perf-templates/tutorial_template b/templates/genai-perf-templates/tutorial_template
index 6f2f5e61..41a70882 100644
--- a/templates/genai-perf-templates/tutorial_template
+++ b/templates/genai-perf-templates/tutorial_template
@@ -211,3 +211,4 @@ Example output:
 │      Request throughput (per sec) │   2.28 │    N/A │    N/A │    N/A │    N/A │    N/A │
 └───────────────────────────────────┴────────┴────────┴────────┴────────┴────────┴────────┘
 ```
+
diff --git a/templates/template_vars.yaml b/templates/template_vars.yaml
index ef147ed4..c6a11c6f 100644
--- a/templates/template_vars.yaml
+++ b/templates/template_vars.yaml
@@ -1,7 +1,7 @@
 General:
-  release: 24.10
+  release: 24.12
   triton_cli_version: 0.0.11
-  genai_perf_version: 0.0.8dev
+  genai_perf_version: 0.0.9dev
 
 README:
   filename: README.md
@@ -15,7 +15,7 @@ compare:
 
 customizable_frontends:
   filename: customizable_frontends.md
-  template: genai-perf-templates/customizable_fronetnds_template
+  template: genai-perf-templates/customizable_frontends_template
   output_dir: ../genai-perf/docs/
 
 embeddings: