From c6eaab25a6670a6a0edeccb5246f7fb87158444a Mon Sep 17 00:00:00 2001 From: braf Date: Thu, 19 Dec 2024 16:26:04 +0000 Subject: [PATCH 1/4] Adding option to sweep PA batch size --- .../config/generate/perf_analyzer_config.py | 7 +++- genai-perf/genai_perf/parser.py | 41 +++++++++++++++++++ genai-perf/genai_perf/subcommand/analyze.py | 14 ++++++- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/genai-perf/genai_perf/config/generate/perf_analyzer_config.py b/genai-perf/genai_perf/config/generate/perf_analyzer_config.py index a9d30896..ff7662c3 100644 --- a/genai-perf/genai_perf/config/generate/perf_analyzer_config.py +++ b/genai-perf/genai_perf/config/generate/perf_analyzer_config.py @@ -242,6 +242,11 @@ def _get_artifact_stimulus_type( parameters["num_dataset_entries"].get_value_based_on_category() ) stimulus = [f"num_dataset_entries{input_sequence_length}"] + elif "runtime_batch_size" in parameters: + runtime_batch_size = str( + parameters["runtime_batch_size"].get_value_based_on_category() + ) + stimulus = [f"batch_size{runtime_batch_size}"] return stimulus @@ -406,7 +411,7 @@ def _create_parameter_args(self) -> List[str]: def _convert_objective_to_cli_option(self, objective_name: str) -> str: obj_to_cli_dict = { - "runtime_batch_size": "--batch-size", + "runtime_batch_size": "-b", "concurrency": "--concurrency-range", "request_rate": "--request-rate-range", } diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 1f7fe828..ead2de19 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -987,6 +987,47 @@ def _add_tokenizer_args(parser): "tokenizers stored in HuggingFace Hub. ", ) + other_group.add_argument( + "-v", + "--verbose", + action="store_true", + required=False, + help="An option to enable verbose mode.", + ) + + +def _add_analyze_args(parser): + analyze_group = parser.add_argument_group("Analyze") + + analyze_group.add_argument( + "--sweep-type", + type=str, + default=RunConfigDefaults.STIMULUS_TYPE, + choices=[ + "batch_size", + "concurrency", + "num_dataset_entries", + "input_sequence_length", + "request_rate", + ], + required=False, + help=f"The stimulus type that GAP will sweep.", + ) + analyze_group.add_argument( + "--sweep-range", + type=str, + default=f"{RunConfigDefaults.MIN_CONCURRENCY}:{RunConfigDefaults.MAX_CONCURRENCY}", + required=False, + help=f"The range the stimulus will be swept. Represented as 'min:max' or 'min:max:step'.", + ) + analyze_group.add_argument( + "--sweep-list", + type=str, + default=None, + required=False, + help=f"A comma-separated list of values that stimulus will be swept over.", + ) + def _parse_compare_args(subparsers) -> argparse.ArgumentParser: compare = subparsers.add_parser( diff --git a/genai-perf/genai_perf/subcommand/analyze.py b/genai-perf/genai_perf/subcommand/analyze.py index f20da0c6..e2e70319 100644 --- a/genai-perf/genai_perf/subcommand/analyze.py +++ b/genai-perf/genai_perf/subcommand/analyze.py @@ -141,16 +141,25 @@ def __init__(self, args: Namespace) -> None: def _setup_config(self, args: Namespace) -> ConfigCommand: config = ConfigCommand(model_names=args.model) + sweep_type = self._map_args_to_config_sweep_type(args.sweep_type) if args.sweep_list: - config.analyze.sweep_parameters = {args.sweep_type: args.sweep_list} + config.analyze.sweep_parameters = {sweep_type: args.sweep_list} else: config.analyze.sweep_parameters = { - args.sweep_type: Range(min=args.sweep_min, max=args.sweep_max) + sweep_type: Range(min=args.sweep_min, max=args.sweep_max) } return config + def _map_args_to_config_sweep_type(self, sweep_type: str) -> str: + # The CLI arg sweep type name doesn't have a 1:1 mapping to + # what was implemented in the config + if sweep_type == "batch_size": + return "runtime_batch_size" + else: + return sweep_type + ########################################################################### # Sweep Methods ########################################################################### @@ -369,6 +378,7 @@ def _determine_infer_mode_and_load_level( elif ( args.sweep_type == "input_sequence_length" or args.sweep_type == "num_dataset_entries" + or args.sweep_type == "batch_size" ): if args.concurrency: infer_mode = "concurrency" From 7df95752905eb3493e5f7a55308d8045efe6c944 Mon Sep 17 00:00:00 2001 From: braf Date: Thu, 19 Dec 2024 16:45:25 +0000 Subject: [PATCH 2/4] Fixing merge conflict --- genai-perf/genai_perf/parser.py | 34 +-------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index ead2de19..69b8191d 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -527,6 +527,7 @@ def _add_analyze_args(parser): type=str, default=RunConfigDefaults.STIMULUS_TYPE, choices=[ + "batch_size", "concurrency", "num_dataset_entries", "input_sequence_length", @@ -996,39 +997,6 @@ def _add_tokenizer_args(parser): ) -def _add_analyze_args(parser): - analyze_group = parser.add_argument_group("Analyze") - - analyze_group.add_argument( - "--sweep-type", - type=str, - default=RunConfigDefaults.STIMULUS_TYPE, - choices=[ - "batch_size", - "concurrency", - "num_dataset_entries", - "input_sequence_length", - "request_rate", - ], - required=False, - help=f"The stimulus type that GAP will sweep.", - ) - analyze_group.add_argument( - "--sweep-range", - type=str, - default=f"{RunConfigDefaults.MIN_CONCURRENCY}:{RunConfigDefaults.MAX_CONCURRENCY}", - required=False, - help=f"The range the stimulus will be swept. Represented as 'min:max' or 'min:max:step'.", - ) - analyze_group.add_argument( - "--sweep-list", - type=str, - default=None, - required=False, - help=f"A comma-separated list of values that stimulus will be swept over.", - ) - - def _parse_compare_args(subparsers) -> argparse.ArgumentParser: compare = subparsers.add_parser( Subcommand.COMPARE.to_lowercase(), From 22280035da2f1a2aa1d1424745b415e1073902c1 Mon Sep 17 00:00:00 2001 From: braf Date: Thu, 19 Dec 2024 16:51:48 +0000 Subject: [PATCH 3/4] Fixing failing unit tests --- genai-perf/genai_perf/parser.py | 8 -------- genai-perf/tests/test_perf_analyzer_config.py | 4 ++-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 69b8191d..bfe65c11 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -988,14 +988,6 @@ def _add_tokenizer_args(parser): "tokenizers stored in HuggingFace Hub. ", ) - other_group.add_argument( - "-v", - "--verbose", - action="store_true", - required=False, - help="An option to enable verbose mode.", - ) - def _parse_compare_args(subparsers) -> argparse.ArgumentParser: compare = subparsers.add_parser( diff --git a/genai-perf/tests/test_perf_analyzer_config.py b/genai-perf/tests/test_perf_analyzer_config.py index 87cca6cc..773a4fa6 100644 --- a/genai-perf/tests/test_perf_analyzer_config.py +++ b/genai-perf/tests/test_perf_analyzer_config.py @@ -126,7 +126,7 @@ def test_default_command_creation(self): "artifacts/test_model-triton-tensorrtllm-concurrency64/inputs.json", "--profile-export-file", "artifacts/test_model-triton-tensorrtllm-concurrency64/profile_export.json", - "--batch-size", + "-b", "1", "--concurrency-range", "64", @@ -158,7 +158,7 @@ def test_default_representation(self): "10000", "--stability-percentage", "999", - "--batch-size", + "-b", "1", "--concurrency-range", "64", From cf19dcb6761ed93e3c5d513df6e3c37091469b2a Mon Sep 17 00:00:00 2001 From: braf Date: Thu, 19 Dec 2024 17:46:55 +0000 Subject: [PATCH 4/4] Update documentation --- genai-perf/docs/analyze.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genai-perf/docs/analyze.md b/genai-perf/docs/analyze.md index 71c40159..655b7285 100644 --- a/genai-perf/docs/analyze.md +++ b/genai-perf/docs/analyze.md @@ -34,7 +34,7 @@ The `analyze` subcommand is used to sweep through PA or GenAI-Perf stimulus allo The `analyze` subcommand uses the same CLI options as `profile` with the following additional options, which are used to specify the type and ranges of the stimulus you wish to sweep: #### `--sweep-type` - The type of stimulus you wish the sweep over -The currently support stimulus values are `concurrency`, `request_rate`, `input_sequence_length`, and `num_dataset_entries` +The currently support stimulus values are `batch_size`, `concurrency`, `request_rate`, `input_sequence_length`, and `num_dataset_entries` #### `--sweep-range` - The range over which the stimulus will be swept This can be represented as `min:max` or `min:max:step`. If a `step` is not specified then we assume the range to be min/max for power-of-2 values. For example, `8:256`, would sweep `8,16,32,64,128,256`