Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Option to Sweep PA batch size #228

Merged
merged 5 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion genai-perf/docs/analyze.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The `analyze` subcommand is used to sweep through PA or GenAI-Perf stimulus allo
The `analyze` subcommand uses the same CLI options as `profile` with the following additional options, which are used to specify the type and ranges of the stimulus you wish to sweep:

#### `--sweep-type` - The type of stimulus you wish the sweep over
The currently support stimulus values are `concurrency`, `request_rate`, `input_sequence_length`, and `num_dataset_entries`
The currently support stimulus values are `batch_size`, `concurrency`, `request_rate`, `input_sequence_length`, and `num_dataset_entries`

#### `--sweep-range` - The range over which the stimulus will be swept
This can be represented as `min:max` or `min:max:step`. If a `step` is not specified then we assume the range to be min/max for power-of-2 values. For example, `8:256`, would sweep `8,16,32,64,128,256`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ def _get_artifact_stimulus_type(
parameters["num_dataset_entries"].get_value_based_on_category()
)
stimulus = [f"num_dataset_entries{input_sequence_length}"]
elif "runtime_batch_size" in parameters:
runtime_batch_size = str(
parameters["runtime_batch_size"].get_value_based_on_category()
)
stimulus = [f"batch_size{runtime_batch_size}"]

return stimulus

Expand Down Expand Up @@ -406,7 +411,7 @@ def _create_parameter_args(self) -> List[str]:

def _convert_objective_to_cli_option(self, objective_name: str) -> str:
obj_to_cli_dict = {
"runtime_batch_size": "--batch-size",
"runtime_batch_size": "-b",
"concurrency": "--concurrency-range",
"request_rate": "--request-rate-range",
}
Expand Down
1 change: 1 addition & 0 deletions genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ def _add_analyze_args(parser):
type=str,
default=RunConfigDefaults.STIMULUS_TYPE,
choices=[
"batch_size",
"concurrency",
"num_dataset_entries",
"input_sequence_length",
Expand Down
14 changes: 12 additions & 2 deletions genai-perf/genai_perf/subcommand/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,25 @@ def __init__(self, args: Namespace) -> None:

def _setup_config(self, args: Namespace) -> ConfigCommand:
config = ConfigCommand(model_names=args.model)
sweep_type = self._map_args_to_config_sweep_type(args.sweep_type)

if args.sweep_list:
config.analyze.sweep_parameters = {args.sweep_type: args.sweep_list}
config.analyze.sweep_parameters = {sweep_type: args.sweep_list}
else:
config.analyze.sweep_parameters = {
args.sweep_type: Range(min=args.sweep_min, max=args.sweep_max)
sweep_type: Range(min=args.sweep_min, max=args.sweep_max)
}

return config

def _map_args_to_config_sweep_type(self, sweep_type: str) -> str:
# The CLI arg sweep type name doesn't have a 1:1 mapping to
# what was implemented in the config
if sweep_type == "batch_size":
return "runtime_batch_size"
else:
return sweep_type

###########################################################################
# Sweep Methods
###########################################################################
Expand Down Expand Up @@ -369,6 +378,7 @@ def _determine_infer_mode_and_load_level(
elif (
args.sweep_type == "input_sequence_length"
or args.sweep_type == "num_dataset_entries"
or args.sweep_type == "batch_size"
):
if args.concurrency:
infer_mode = "concurrency"
Expand Down
4 changes: 2 additions & 2 deletions genai-perf/tests/test_perf_analyzer_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_default_command_creation(self):
"artifacts/test_model-triton-tensorrtllm-concurrency64/inputs.json",
"--profile-export-file",
"artifacts/test_model-triton-tensorrtllm-concurrency64/profile_export.json",
"--batch-size",
"-b",
"1",
"--concurrency-range",
"64",
Expand Down Expand Up @@ -158,7 +158,7 @@ def test_default_representation(self):
"10000",
"--stability-percentage",
"999",
"--batch-size",
"-b",
"1",
"--concurrency-range",
"64",
Expand Down
Loading