Skip to content

Commit

Permalink
Add postfix to filenames and update doc
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Oct 26, 2023
1 parent bd3127b commit 723dbad
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
35 changes: 29 additions & 6 deletions src/c++/perf_analyzer/docs/examples/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,30 @@ def save_json_data(data, filename):
json.dump(data, f)


def get_export_filename(model, prompt_size):
filename = f"profile_export-{model}_prompt_{prompt_size}.json"
def get_postfix(args, prompt_size):
"""Generate postfix for profile export filename and plot.
e.g.
- trtllm-prompt100-maxtokens256
- trtllm-prompt100-periodic1_100_1-period32-maxtokens1024
"""
postfix = f"{args.model}-prompt{prompt_size}-"
if args.periodic_concurrency_range:
start, end, step = args.periodic_concurrency_range
postfix += f"periodic{start}_{end}_{step}-period{args.request_period}-"
postfix += f"maxtokens{args.max_tokens}"
return postfix


def get_export_filename(args, prompt_size):
postfix = get_postfix(args, prompt_size)
filename = f"profile_export-{postfix}.json"
return filename


def get_plot_filename(args, prompt_size):
postfix = get_postfix(args, prompt_size)
filename = f"inflight_batching_benchmark-{postfix}.png"
return filename


Expand Down Expand Up @@ -197,7 +219,7 @@ def summarize_profile_results(args, prompts):
results = []
for prompt in prompts:
prompt_size = len(prompt.split())
export_file = get_export_filename(args.model, prompt_size)
export_file = get_export_filename(args, prompt_size)
avg_first_token_latency, avg_total_t2t_latency = calculate_avg_latencies(
filename=export_file
)
Expand All @@ -211,10 +233,9 @@ def summarize_profile_results(args, prompts):
if args.periodic_concurrency_range:
periodic_latencies = calculate_avg_periodic_latencies(args, export_file)
profile_result.avg_periodic_t2t_latencies = periodic_latencies

plot_results(
latencies=periodic_latencies,
filename=f"inflight_batching_benchmark-{args.model}_prompt_{prompt_size}.png",
filename=get_plot_filename(args, prompt_size),
)

results.append(profile_result)
Expand Down Expand Up @@ -253,7 +274,7 @@ def profile(args, export_file):

def prepare_export_file(args, prompt):
prompt_size = len(prompt.split())
filename = get_export_filename(args.model, prompt_size)
filename = get_export_filename(args, prompt_size)

# If exists, clean up
export_file = Path(filename)
Expand Down Expand Up @@ -299,6 +320,8 @@ def construct_input_data(args):
# If specified, overwrite max_tokens
if args.max_tokens:
sampling_params["max_tokens"] = args.max_tokens
else:
args.max_tokens = sampling_params["max_tokens"]

# If specified, overwrite ignore_eos
if "ignore_eos" not in sampling_params:
Expand Down
8 changes: 5 additions & 3 deletions src/c++/perf_analyzer/docs/llm.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,11 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --max-tokens 1

> **Note**
>
> In order to provide a specific prompt (instead of the dummy prompt generated by default),
> the user can provide input data JSON file using `--input-data` option.
> This will however *ignore* any parameters specified through the command line.
> Users can also run a custom prompt by providing input data JSON file using
> `--input-data` option. They can also specify input tensors or parameters to
> the model as well. However, when a parameter is defined in both input data
> JSON file and through command line option (e.g. `max_tokens`), the command
> line option value will overwrite the one in the input data JSON file.
> ```bash
> $ echo '
> {
Expand Down

0 comments on commit 723dbad

Please sign in to comment.