triton-inference-server · nv-braf · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/genai-perf/docs/analyze.md b/genai-perf/docs/analyze.md
@@ -34,7 +34,7 @@ The `analyze` subcommand is used to sweep through PA or GenAI-Perf stimulus allo
 The `analyze` subcommand uses the same CLI options as `profile` with the following additional options, which are used to specify the type and ranges of the stimulus you wish to sweep:
 
 #### `--sweep-type` - The type of stimulus you wish the sweep over
-The currently support stimulus values are `concurrency`, `request_rate`, `input_sequence_length`, and `num_dataset_entries`
+The currently support stimulus values are `batch_size`, `concurrency`, `request_rate`, `input_sequence_length`, and `num_dataset_entries`
 
 #### `--sweep-range` - The range over which the stimulus will be swept
 This can be represented as `min:max` or `min:max:step`. If a `step` is not specified then we assume the range to be min/max for power-of-2 values. For example, `8:256`, would sweep `8,16,32,64,128,256`

diff --git a/genai-perf/genai_perf/config/generate/perf_analyzer_config.py b/genai-perf/genai_perf/config/generate/perf_analyzer_config.py
@@ -242,6 +242,11 @@ def _get_artifact_stimulus_type(
                 parameters["num_dataset_entries"].get_value_based_on_category()
             )
             stimulus = [f"num_dataset_entries{input_sequence_length}"]
+        elif "runtime_batch_size" in parameters:
+            runtime_batch_size = str(
+                parameters["runtime_batch_size"].get_value_based_on_category()
+            )
+            stimulus = [f"batch_size{runtime_batch_size}"]
 
         return stimulus
 
@@ -406,7 +411,7 @@ def _create_parameter_args(self) -> List[str]:
 
     def _convert_objective_to_cli_option(self, objective_name: str) -> str:
         obj_to_cli_dict = {
-            "runtime_batch_size": "--batch-size",
+            "runtime_batch_size": "-b",
             "concurrency": "--concurrency-range",
             "request_rate": "--request-rate-range",
         }

diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py
@@ -527,6 +527,7 @@ def _add_analyze_args(parser):
         type=str,
         default=RunConfigDefaults.STIMULUS_TYPE,
         choices=[
+            "batch_size",
             "concurrency",
             "num_dataset_entries",
             "input_sequence_length",

diff --git a/genai-perf/genai_perf/subcommand/analyze.py b/genai-perf/genai_perf/subcommand/analyze.py
@@ -141,16 +141,25 @@ def __init__(self, args: Namespace) -> None:
 
     def _setup_config(self, args: Namespace) -> ConfigCommand:
         config = ConfigCommand(model_names=args.model)
+        sweep_type = self._map_args_to_config_sweep_type(args.sweep_type)
 
         if args.sweep_list:
-            config.analyze.sweep_parameters = {args.sweep_type: args.sweep_list}
+            config.analyze.sweep_parameters = {sweep_type: args.sweep_list}
         else:
             config.analyze.sweep_parameters = {
-                args.sweep_type: Range(min=args.sweep_min, max=args.sweep_max)
+                sweep_type: Range(min=args.sweep_min, max=args.sweep_max)
             }
 
         return config
 
+    def _map_args_to_config_sweep_type(self, sweep_type: str) -> str:
+        # The CLI arg sweep type name doesn't have a 1:1 mapping to
+        # what was implemented in the config
+        if sweep_type == "batch_size":
+            return "runtime_batch_size"
+        else:
+            return sweep_type
+
     ###########################################################################
     # Sweep Methods
     ###########################################################################
@@ -369,6 +378,7 @@ def _determine_infer_mode_and_load_level(
         elif (
             args.sweep_type == "input_sequence_length"
             or args.sweep_type == "num_dataset_entries"
+            or args.sweep_type == "batch_size"
         ):
             if args.concurrency:
                 infer_mode = "concurrency"

diff --git a/genai-perf/tests/test_perf_analyzer_config.py b/genai-perf/tests/test_perf_analyzer_config.py
@@ -126,7 +126,7 @@ def test_default_command_creation(self):
             "artifacts/test_model-triton-tensorrtllm-concurrency64/inputs.json",
             "--profile-export-file",
             "artifacts/test_model-triton-tensorrtllm-concurrency64/profile_export.json",
-            "--batch-size",
+            "-b",
             "1",
             "--concurrency-range",
             "64",
@@ -158,7 +158,7 @@ def test_default_representation(self):
                 "10000",
                 "--stability-percentage",
                 "999",
-                "--batch-size",
+                "-b",
                 "1",
                 "--concurrency-range",
                 "64",