From bd3127b03e44743e402b015d3c1010e3a7cd24cf Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Thu, 26 Oct 2023 15:50:15 -0700 Subject: [PATCH] Fix sample output --- src/c++/perf_analyzer/docs/examples/profile.py | 5 ++++- src/c++/perf_analyzer/docs/llm.md | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 784a435ae..d746a66d8 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -222,7 +222,10 @@ def summarize_profile_results(args, prompts): print_benchmark_summary(results) if args.periodic_concurrency_range: - print("Saved in-flight benchmark plots @ 'inflight_batching_benchmark-*.png'.") + print( + "Saved in-flight batching benchmark plots " + "@ 'inflight_batching_benchmark-*.png'." + ) def profile(args, export_file): diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index 1d895b43c..500076fe1 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -123,9 +123,9 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --max-tokens 256 --ign # Sample output # [ Benchmark Summary ] -# Prompt size: 100, Average first-token latency: 0.0388 sec, Average token-to-token latency: 0.0066 sec -# Prompt size: 300, Average first-token latency: 0.0431 sec, Average token-to-token latency: 0.0071 sec -# Prompt size: 500, Average first-token latency: 0.0400 sec, Average token-to-token latency: 0.0070 sec +# Prompt size: 100, Average first-token latency: 0.0388 sec, Average total token-to-token latency: 0.0066 sec +# Prompt size: 300, Average first-token latency: 0.0431 sec, Average total token-to-token latency: 0.0071 sec +# Prompt size: 500, Average first-token latency: 0.0400 sec, Average total token-to-token latency: 0.0070 sec ``` ## Benchmark 3: Profiling In-Flight Batching @@ -164,9 +164,9 @@ python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-ran # Sample output # [ BENCHMARK SUMMARY ] -# Prompt size: 10, Average first-token latency: 0.0799 sec, Average total token-token latency: 0.0324 sec +# Prompt size: 10, Average first-token latency: 0.0799 sec, Average total token-to-token latency: 0.0324 sec # -# Saved in-flight benchmark plots @ 'inflight_batching_benchmark-*.png'. +# Saved in-flight batching benchmark plots @ 'inflight_batching_benchmark-*.png'. ``` The resulting plot will look like