diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 1c2a4b58d..39ad94f76 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -48,7 +48,7 @@ def print_benchmark_summary(results): f"Average first-token latency: {avg_first_token_latency:.4f} sec" ) output += ( - f", Average token-token latency: {avg_token_to_token_latency:.4f} sec" + f", Average token-to-token latency: {avg_token_to_token_latency:.4f} sec" if avg_token_to_token_latency else "" ) diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index 60fbf9000..509bdd127 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -123,9 +123,9 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --max-tokens 256 --ign # Sample output # [ Benchmark Summary ] -# Prompt size: 100, Average first-token latency: 0.0388 sec, Average token-token latency: 0.0066 sec -# Prompt size: 300, Average first-token latency: 0.0431 sec, Average token-token latency: 0.0071 sec -# Prompt size: 500, Average first-token latency: 0.0400 sec, Average token-token latency: 0.0070 sec +# Prompt size: 100, Average first-token latency: 0.0388 sec, Average token-to-token latency: 0.0066 sec +# Prompt size: 300, Average first-token latency: 0.0431 sec, Average token-to-token latency: 0.0071 sec +# Prompt size: 500, Average first-token latency: 0.0400 sec, Average token-to-token latency: 0.0070 sec ``` ## Benchmark 3: Profiling In-Flight Batching