diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 924dee01f..0e881f0c0 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -108,17 +108,15 @@ def save_json_data(data, filename): json.dump(data, f) -def get_postfix(args, prompt_size=None): +def get_postfix(args, prompt_size): """Generate postfix for profile export filename and plot. e.g. - - trtllm-maxtokens256 - trtllm-prompt100-maxtokens256 - trtllm-prompt100-periodic1_100_1-period32-maxtokens1024 """ stream_type = "online" if args.stream else "offline" - postfix = f"{args.model}-{stream_type}-" - postfix += f"prompt{prompt_size}-" if prompt_size else "" + postfix = f"{args.model}-{stream_type}-prompt{prompt_size}-" if args.periodic_concurrency_range: start, end, step = args.periodic_concurrency_range postfix += f"periodic{start}_{end}_{step}-period{args.request_period}-" @@ -139,15 +137,15 @@ def get_plot_filename(args, prompt_size): def save_benchmark_results(args, profile_results): - postfix = get_postfix(args) - results_csv = f"results-{postfix}.csv" - with open(results_csv, "w") as f: - fieldnames = [f.name for f in fields(profile_results[0])] - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - for pr in profile_results: + for pr in profile_results: + postfix = get_postfix(args, pr.prompt_size) + results_csv = f"results-{postfix}.csv" + with open(results_csv, "w") as f: + fieldnames = [f.name for f in fields(pr)] + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() writer.writerow(asdict(pr)) - print(f"Saved benchmark results @ '{results_csv}'") + print(f"Saved benchmark results @ '{results_csv}'") def print_benchmark_summary(profile_results): diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index 1f9fee93d..e0587aa2b 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -77,7 +77,9 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens # * Min first token latency: 11.0879 ms # * Avg first token latency: 18.3775 ms # ... -# Saved benchmark results @ 'results-vllm-online-maxtokens1.csv' +# Saved benchmark results @ 'results-vllm-online-prompt100-maxtokens1.csv' +# Saved benchmark results @ 'results-vllm-online-prompt200-maxtokens1.csv' +# Saved benchmark results @ 'results-vllm-online-prompt300-maxtokens1.csv' ``` > **Note** @@ -131,7 +133,9 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens # * Min first token latency: 11.0127 ms # * Avg first token latency: 16.0468 ms # ... -# Saved benchmark results @ 'results-vllm-online-maxtokens256.csv' +# Saved benchmark results @ 'results-vllm-online-prompt100-maxtokens256.csv' +# Saved benchmark results @ 'results-vllm-online-prompt200-maxtokens256.csv' +# Saved benchmark results @ 'results-vllm-online-prompt300-maxtokens256.csv' ``` ## Benchmark 3: Profiling In-Flight Batching @@ -167,7 +171,7 @@ python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-ran # * Min first token latency: 18.4281 ms # * Avg first token latency: 61.8372 ms # ... -# Saved benchmark results @ 'results-vllm-online-periodic1_100_1-period32-maxtokens1024.csv' +# Saved benchmark results @ 'results-vllm-online-prompt10-periodic1_100_1-period32-maxtokens1024.csv' # Saved in-flight batching benchmark plots @ 'inflight_batching_benchmark-*.png'. ```