From ade066d486498c5e5dd0cb701cca81bcaddb24c6 Mon Sep 17 00:00:00 2001 From: David Yastremsky <58150256+dyastremsky@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:12:10 -0700 Subject: [PATCH] Move GenAI-Perf profiling to its own subcommand (#745) --- src/c++/perf_analyzer/genai-perf/README.md | 4 +- .../genai-perf/docs/embeddings.md | 4 +- src/c++/perf_analyzer/genai-perf/docs/lora.md | 2 +- .../perf_analyzer/genai-perf/docs/rankings.md | 2 +- .../perf_analyzer/genai-perf/docs/tutorial.md | 8 +- .../genai-perf/genai_perf/parser.py | 114 ++++++++++++------ .../genai-perf/genai_perf/test_end_to_end.py | 12 +- .../genai-perf/tests/test_cli.py | 74 +++++++----- .../genai-perf/tests/test_console_exporter.py | 3 + .../genai-perf/tests/test_csv_exporter.py | 3 + .../genai-perf/tests/test_json_exporter.py | 3 +- .../genai-perf/tests/test_wrapper.py | 28 ++++- 12 files changed, 169 insertions(+), 88 deletions(-) diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md index 45159cc15..1d03b3dd0 100644 --- a/src/c++/perf_analyzer/genai-perf/README.md +++ b/src/c++/perf_analyzer/genai-perf/README.md @@ -162,7 +162,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE} 2. Run GenAI-Perf: ```bash -genai-perf \ +genai-perf profile \ -m gpt2 \ --service-kind triton \ --backend tensorrtllm \ @@ -209,7 +209,7 @@ current profile run. This is disabled by default but users can easily enable it by passing the `--generate-plots` option when running the benchmark: ```bash -genai-perf \ +genai-perf profile \ -m gpt2 \ --service-kind triton \ --backend tensorrtllm \ diff --git a/src/c++/perf_analyzer/genai-perf/docs/embeddings.md b/src/c++/perf_analyzer/genai-perf/docs/embeddings.md index bc6e2d413..e508f9eff 100644 --- a/src/c++/perf_analyzer/genai-perf/docs/embeddings.md +++ b/src/c++/perf_analyzer/genai-perf/docs/embeddings.md @@ -60,7 +60,7 @@ docker run -it --net=host --rm --gpus=all vllm/vllm-openai:latest --model intflo To profile embeddings models using GenAI-Perf, use the following command: ```bash -genai-perf \ +genai-perf profile \ -m intfloat/e5-mistral-7b-instruct \ --service-kind openai \ --endpoint-type embeddings \ @@ -73,7 +73,7 @@ additional arguments with the `--extra-inputs` [flag](../README.md#input-options For example, you could use this command: ```bash -genai-perf \ +genai-perf profile \ -m intfloat/e5-mistral-7b-instruct \ --service-kind openai \ --endpoint-type embeddings \ diff --git a/src/c++/perf_analyzer/genai-perf/docs/lora.md b/src/c++/perf_analyzer/genai-perf/docs/lora.md index b3ddbe479..d30867eda 100644 --- a/src/c++/perf_analyzer/genai-perf/docs/lora.md +++ b/src/c++/perf_analyzer/genai-perf/docs/lora.md @@ -41,7 +41,7 @@ When profiling with multiple models, you can specify how the models should be assigned to prompts using the `--model-selection-strategy` option: ```bash -genai-perf \ +genai-perf profile \ -m lora_adapter1 lora_adapter2 lora_adapter3 \ --model-selection-strategy round_robin ``` diff --git a/src/c++/perf_analyzer/genai-perf/docs/rankings.md b/src/c++/perf_analyzer/genai-perf/docs/rankings.md index 5cd1a4812..a316ef857 100644 --- a/src/c++/perf_analyzer/genai-perf/docs/rankings.md +++ b/src/c++/perf_analyzer/genai-perf/docs/rankings.md @@ -74,7 +74,7 @@ docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingf To profile ranking models using GenAI-Perf, use the following command: ```bash -genai-perf \ +genai-perf profile \ -m BAAI/bge-reranker-large \ --service-kind openai \ --endpoint-type rankings \ diff --git a/src/c++/perf_analyzer/genai-perf/docs/tutorial.md b/src/c++/perf_analyzer/genai-perf/docs/tutorial.md index bc9dec71b..6d6f3e301 100644 --- a/src/c++/perf_analyzer/genai-perf/docs/tutorial.md +++ b/src/c++/perf_analyzer/genai-perf/docs/tutorial.md @@ -82,7 +82,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE} 2. Run GenAI-Perf: ```bash -genai-perf \ +genai-perf profile \ -m gpt2 \ --service-kind triton \ --backend tensorrtllm \ @@ -166,7 +166,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE} 2. Run GenAI-Perf: ```bash -genai-perf \ +genai-perf profile \ -m gpt2 \ --service-kind triton \ --backend vllm \ @@ -232,7 +232,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE} 2. Run GenAI-Perf: ```bash -genai-perf \ +genai-perf profile \ -m gpt2 \ --service-kind openai \ --endpoint v1/chat/completions \ @@ -296,7 +296,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE} 2. Run GenAI-Perf: ```bash -genai-perf \ +genai-perf profile \ -m gpt2 \ --service-kind openai \ --endpoint v1/completions \ diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 64178fd4c..521b30e53 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -61,6 +61,14 @@ def to_lowercase(self): return self.name.lower() +class Subcommand(Enum): + PROFILE = auto() + COMPARE = auto() + + def to_lowercase(self): + return self.name.lower() + + logger = logging.getLogger(__name__) _endpoint_type_map = { @@ -77,7 +85,7 @@ def _check_model_args( """ Check if model name is provided. """ - if not args.subcommand and not args.model: + if not args.model: parser.error("The -m/--model option is required and cannot be empty.") args = _convert_str_to_enum_entry( args, "model_selection_strategy", ModelSelectionStrategy @@ -102,9 +110,8 @@ def _check_compare_args( """ Check compare subcommand args """ - if args.subcommand == "compare": - if not args.config and not args.files: - parser.error("Either the --config or --files option must be specified.") + if not args.config and not args.files: + parser.error("Either the --config or --files option must be specified.") return args @@ -573,13 +580,6 @@ def _add_other_args(parser): help="An option to enable verbose mode.", ) - other_group.add_argument( - "--version", - action="version", - version="%(prog)s " + __version__, - help=f"An option to print the version and exit.", - ) - def get_extra_inputs_as_dict(args: argparse.Namespace) -> dict: request_inputs = {} @@ -626,10 +626,10 @@ def get_extra_inputs_as_dict(args: argparse.Namespace) -> dict: def _parse_compare_args(subparsers) -> argparse.ArgumentParser: compare = subparsers.add_parser( - "compare", + Subcommand.COMPARE.to_lowercase(), description="Subcommand to generate plots that compare multiple profile runs.", ) - compare_group = compare.add_argument_group("Compare") + compare_group = compare.add_argument_group("Input") mx_group = compare_group.add_mutually_exclusive_group(required=False) mx_group.add_argument( "--config", @@ -651,6 +651,20 @@ def _parse_compare_args(subparsers) -> argparse.ArgumentParser: return compare +def _parse_profile_args(subparsers) -> argparse.ArgumentParser: + profile = subparsers.add_parser( + Subcommand.PROFILE.to_lowercase(), + description="Subcommand to profile LLMs and Generative AI models.", + ) + _add_endpoint_args(profile) + _add_input_args(profile) + _add_profile_args(profile) + _add_output_args(profile) + _add_other_args(profile) + profile.set_defaults(func=profile_handler) + return profile + + ### Handlers ### @@ -659,12 +673,6 @@ def create_compare_dir() -> None: os.mkdir(DEFAULT_COMPARE_DIR) -def profile_handler(args, extra_args): - from genai_perf.wrapper import Profiler - - Profiler.run(args=args, extra_args=extra_args) - - def compare_handler(args: argparse.Namespace): """Handles `compare` subcommand workflow.""" if args.files: @@ -679,45 +687,75 @@ def compare_handler(args: argparse.Namespace): plot_manager.generate_plots() -### Entrypoint ### +def profile_handler(args, extra_args): + from genai_perf.wrapper import Profiler + Profiler.run(args=args, extra_args=extra_args) -def parse_args(): - argv = sys.argv +### Parser Initialization ### + + +def init_parsers(): parser = argparse.ArgumentParser( prog="genai-perf", description="CLI to profile LLMs and Generative AI models with Perf Analyzer", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - parser.set_defaults(func=profile_handler) - - # Conceptually group args for easier visualization - _add_endpoint_args(parser) - _add_input_args(parser) - _add_profile_args(parser) - _add_output_args(parser) - _add_other_args(parser) + parser.add_argument( + "--version", + action="version", + version="%(prog)s " + __version__, + help=f"An option to print the version and exit.", + ) # Add subcommands subparsers = parser.add_subparsers( help="List of subparser commands.", dest="subcommand" ) - compare_parser = _parse_compare_args(subparsers) + _ = _parse_compare_args(subparsers) + _ = _parse_profile_args(subparsers) + subparsers.required = True + + return parser - # Check for passthrough args + +def get_passthrough_args_index(argv: list) -> int: if "--" in argv: passthrough_index = argv.index("--") logger.info(f"Detected passthrough args: {argv[passthrough_index + 1:]}") else: passthrough_index = len(argv) + return passthrough_index + + +def refine_args( + parser: argparse.ArgumentParser, args: argparse.Namespace +) -> argparse.Namespace: + if args.subcommand == Subcommand.PROFILE.to_lowercase(): + args = _infer_prompt_source(args) + args = _check_model_args(parser, args) + args = _check_conditional_args(parser, args) + args = _check_load_manager_args(args) + args = _set_artifact_paths(args) + elif args.subcommand == Subcommand.COMPARE.to_lowercase(): + args = _check_compare_args(parser, args) + else: + raise ValueError(f"Unknown subcommand: {args.subcommand}") + + return args + + +### Entrypoint ### + + +def parse_args(): + argv = sys.argv + + parser = init_parsers() + passthrough_index = get_passthrough_args_index(argv) args = parser.parse_args(argv[1:passthrough_index]) - args = _infer_prompt_source(args) - args = _check_model_args(parser, args) - args = _check_conditional_args(parser, args) - args = _check_compare_args(compare_parser, args) - args = _check_load_manager_args(args) - args = _set_artifact_paths(args) + args = refine_args(parser, args) return args, argv[passthrough_index + 1 :] diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py b/src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py index 3cc2999f5..a44304348 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py @@ -10,7 +10,7 @@ # For all cases but vllm_openai, it assumes that the server will be on port 9999 # # This script will run a sweep of all combinations of values in the testing matrix -# by appending those options on to the genai-pa base command +# by appending those options on to the genai-perf base command # @@ -20,11 +20,11 @@ ] base_commands = { - "nim_chat": "genai-perf -s 999 -p 20000 -m llama-2-7b-chat -u http://localhost:9999 --service-kind openai --endpoint-type chat", - "nim_completions": "genai-perf -s 999 -p 20000 -m llama-2-7b -u http://localhost:9999 --service-kind openai --endpoint-type completions", - "vllm_openai": "genai-perf -s 999 -p 20000 -m mistralai/Mistral-7B-v0.1 --service-kind openai --endpoint-type chat", - "triton_tensorrtllm": "genai-perf -s 999 -p 20000 -m llama-2-7b -u 0.0.0.0:9999 --service-kind triton --backend tensorrtllm", - "triton_vllm": "genai-perf -s 999 -p 20000 -m gpt2_vllm --service-kind triton --backend vllm", + "nim_chat": "genai-perf profile -s 999 -p 20000 -m llama-2-7b-chat -u http://localhost:9999 --service-kind openai --endpoint-type chat", + "nim_completions": "genai-perf profile -s 999 -p 20000 -m llama-2-7b -u http://localhost:9999 --service-kind openai --endpoint-type completions", + "vllm_openai": "genai-perf profile -s 999 -p 20000 -m mistralai/Mistral-7B-v0.1 --service-kind openai --endpoint-type chat", + "triton_tensorrtllm": "genai-perf profile -s 999 -p 20000 -m llama-2-7b -u 0.0.0.0:9999 --service-kind triton --backend tensorrtllm", + "triton_vllm": "genai-perf profile -s 999 -p 20000 -m gpt2_vllm --service-kind triton --backend vllm", } testname = "" diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index cc005beef..eb891fd02 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -52,10 +52,7 @@ class TestCLIArguments: [ (["-h"], expected_help_output), (["--help"], expected_help_output), - (["-m", "abc", "--help"], expected_help_output), - (["-m", "abc", "-h"], expected_help_output), (["--version"], expected_version_output), - (["-m", "abc", "--version"], expected_version_output), ], ) def test_help_version_arguments_output_and_exit( @@ -226,7 +223,7 @@ def test_help_version_arguments_output_and_exit( ) def test_non_file_flags_parsed(self, monkeypatch, arg, expected_attributes, capsys): logging.init_logging() - combined_args = ["genai-perf", "--model", "test_model"] + arg + combined_args = ["genai-perf", "profile", "--model", "test_model"] + arg monkeypatch.setattr("sys.argv", combined_args) args, _ = parser.parse_args() @@ -267,7 +264,7 @@ def test_multiple_model_args( self, monkeypatch, models, expected_model_list, formatted_name, capsys ): logging.init_logging() - combined_args = ["genai-perf"] + models + combined_args = ["genai-perf", "profile"] + models monkeypatch.setattr("sys.argv", combined_args) args, _ = parser.parse_args() @@ -287,6 +284,7 @@ def test_file_flags_parsed(self, monkeypatch, mocker): _ = mocker.patch("os.path.isfile", return_value=True) combined_args = [ "genai-perf", + "profile", "--model", "test_model", "--input-file", @@ -340,7 +338,7 @@ def test_default_profile_export_filepath( self, monkeypatch, arg, expected_path, capsys ): logging.init_logging() - combined_args = ["genai-perf", "--model", "test_model"] + arg + combined_args = ["genai-perf", "profile", "--model", "test_model"] + arg monkeypatch.setattr("sys.argv", combined_args) args, _ = parser.parse_args() @@ -380,7 +378,7 @@ def test_model_name_artifact_path( self, monkeypatch, arg, expected_path, expected_output, capsys ): logging.init_logging() - combined_args = ["genai-perf"] + arg + combined_args = ["genai-perf", "profile"] + arg monkeypatch.setattr("sys.argv", combined_args) args, _ = parser.parse_args() @@ -390,7 +388,9 @@ def test_model_name_artifact_path( def test_default_load_level(self, monkeypatch, capsys): logging.init_logging() - monkeypatch.setattr("sys.argv", ["genai-perf", "--model", "test_model"]) + monkeypatch.setattr( + "sys.argv", ["genai-perf", "profile", "--model", "test_model"] + ) args, _ = parser.parse_args() assert args.concurrency == 1 captured = capsys.readouterr() @@ -398,7 +398,8 @@ def test_default_load_level(self, monkeypatch, capsys): def test_load_level_mutually_exclusive(self, monkeypatch, capsys): monkeypatch.setattr( - "sys.argv", ["genai-perf", "--concurrency", "3", "--request-rate", "9.0"] + "sys.argv", + ["genai-perf", "profile", "--concurrency", "3", "--request-rate", "9.0"], ) expected_output = ( "argument --request-rate: not allowed with argument --concurrency" @@ -412,7 +413,7 @@ def test_load_level_mutually_exclusive(self, monkeypatch, capsys): assert expected_output in captured.err def test_model_not_provided(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["genai-perf"]) + monkeypatch.setattr("sys.argv", ["genai-perf", "profile"]) expected_output = "The -m/--model option is required and cannot be empty." with pytest.raises(SystemExit) as excinfo: @@ -423,7 +424,7 @@ def test_model_not_provided(self, monkeypatch, capsys): assert expected_output in captured.err def test_pass_through_args(self, monkeypatch): - args = ["genai-perf", "-m", "test_model"] + args = ["genai-perf", "profile", "-m", "test_model"] other_args = ["--", "With", "great", "power"] monkeypatch.setattr("sys.argv", args + other_args) _, pass_through_args = parser.parse_args() @@ -435,6 +436,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "sys.argv", [ "genai-perf", + "profile", "-m", "nonexistent_model", "--wrong-arg", @@ -453,12 +455,20 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "args, expected_output", [ ( - ["genai-perf", "-m", "test_model", "--service-kind", "openai"], + [ + "genai-perf", + "profile", + "-m", + "test_model", + "--service-kind", + "openai", + ], "The --endpoint-type option is required when using the 'openai' service-kind.", ), ( [ "genai-perf", + "profile", "-m", "test_model", "--service-kind", @@ -469,12 +479,20 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "The --endpoint-type option is required when using the 'openai' service-kind.", ), ( - ["genai-perf", "-m", "test_model", "--output-tokens-stddev", "5"], + [ + "genai-perf", + "profile", + "-m", + "test_model", + "--output-tokens-stddev", + "5", + ], "The --output-tokens-mean option is required when using --output-tokens-stddev.", ), ( [ "genai-perf", + "profile", "-m", "test_model", "--output-tokens-mean-deterministic", @@ -484,6 +502,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--output-tokens-mean-deterministic", @@ -493,6 +512,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--service-kind", @@ -508,6 +528,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--batch-size", @@ -518,6 +539,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--service-kind", @@ -531,6 +553,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--service-kind", @@ -544,6 +567,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--service-kind", @@ -557,6 +581,7 @@ def test_unrecognized_arg(self, monkeypatch, capsys): ( [ "genai-perf", + "profile", "-m", "test_model", "--service-kind", @@ -613,7 +638,9 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys): ], ) def test_inferred_output_format(self, monkeypatch, args, expected_format): - monkeypatch.setattr("sys.argv", ["genai-perf", "-m", "test_model"] + args) + monkeypatch.setattr( + "sys.argv", ["genai-perf", "profile", "-m", "test_model"] + args + ) parsed_args, _ = parser.parse_args() assert parsed_args.output_format == expected_format @@ -644,7 +671,7 @@ def test_inferred_output_format(self, monkeypatch, args, expected_format): ], ) def test_repeated_extra_arg_warning(self, monkeypatch, args, expected_error): - combined_args = ["genai-perf", "-m", "test_model"] + args + combined_args = ["genai-perf", "profile", "-m", "test_model"] + args monkeypatch.setattr("sys.argv", combined_args) parsed_args, _ = parser.parse_args() @@ -672,7 +699,7 @@ def test_inferred_prompt_source( _ = mocker.patch("builtins.open", mocker.mock_open(read_data="data")) _ = mocker.patch("os.path.isfile", return_value=True) _ = mocker.patch("os.path.isdir", return_value=True) - combined_args = ["genai-perf", "--model", "test_model"] + args + combined_args = ["genai-perf", "profile", "--model", "test_model"] + args monkeypatch.setattr("sys.argv", combined_args) args, _ = parser.parse_args() @@ -684,6 +711,7 @@ def test_prompt_source_assertions(self, monkeypatch, mocker, capsys): _ = mocker.patch("os.path.isdir", return_value=True) args = [ "genai-perf", + "profile", "--model", "test_model", "--input-dataset", @@ -758,20 +786,6 @@ def test_compare_not_provided(self, monkeypatch, capsys): captured = capsys.readouterr() assert expected_output in captured.err - @pytest.mark.parametrize( - "args, expected_model", - [ - (["--files", "profile1.json", "profile2.json", "profile3.json"], None), - (["--config", "config.yaml"], None), - ], - ) - def test_compare_model_arg(self, monkeypatch, args, expected_model): - combined_args = ["genai-perf", "compare"] + args - monkeypatch.setattr("sys.argv", combined_args) - args, _ = parser.parse_args() - - assert args.model == expected_model - @pytest.mark.parametrize( "extra_inputs_list, expected_dict", [ diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py index ca11377ed..dda62e04a 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_console_exporter.py @@ -35,6 +35,7 @@ class TestConsoleExporter: def test_streaming_llm_output(self, monkeypatch, capsys) -> None: argv = [ "genai-perf", + "profile", "-m", "model_name", "--service-kind", @@ -86,6 +87,7 @@ def test_streaming_llm_output(self, monkeypatch, capsys) -> None: def test_nonstreaming_llm_output(self, monkeypatch, capsys) -> None: argv = [ "genai-perf", + "profile", "-m", "model_name", "--service-kind", @@ -135,6 +137,7 @@ def test_nonstreaming_llm_output(self, monkeypatch, capsys) -> None: def test_embedding_output(self, monkeypatch, capsys) -> None: argv = [ "genai-perf", + "profile", "-m", "model_name", "--service-kind", diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py index bd2d3bb81..6a60bc2dc 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_csv_exporter.py @@ -71,6 +71,7 @@ def test_streaming_llm_csv_output( """ argv = [ "genai-perf", + "profile", "-m", "model_name", "--service-kind", @@ -126,6 +127,7 @@ def test_nonstreaming_llm_csv_output( """ argv = [ "genai-perf", + "profile", "-m", "model_name", "--service-kind", @@ -174,6 +176,7 @@ def test_embedding_csv_output( ) -> None: argv = [ "genai-perf", + "profile", "-m", "model_name", "--service-kind", diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py index 998cc8865..e4a29267d 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py @@ -35,6 +35,7 @@ class TestJsonExporter: def test_generate_json(self, monkeypatch) -> None: cli_cmd = [ "genai-perf", + "profile", "-m", "gpt2_vllm", "--backend", @@ -257,7 +258,7 @@ def test_generate_json(self, monkeypatch) -> None: "artifact_dir": "artifacts/gpt2_vllm-triton-vllm-concurrency1", "tokenizer": "hf-internal-testing/llama-tokenizer", "verbose": false, - "subcommand": null, + "subcommand": "profile", "prompt_source": "synthetic", "extra_inputs": { "max_tokens": 256, diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py b/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py index 184a47f11..fd4c34b51 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py @@ -43,7 +43,14 @@ class TestWrapper: ], ) def test_url_exactly_once_triton(self, monkeypatch, arg): - args = ["genai-perf", "-m", "test_model", "--service-kind", "triton"] + arg + args = [ + "genai-perf", + "profile", + "-m", + "test_model", + "--service-kind", + "triton", + ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() cmd = Profiler.build_cmd(args, extra_args) @@ -70,7 +77,14 @@ def test_url_exactly_once_triton(self, monkeypatch, arg): ], ) def test_profile_export_filepath(self, monkeypatch, arg, expected_filepath): - args = ["genai-perf", "-m", "test_model", "--service-kind", "triton"] + arg + args = [ + "genai-perf", + "profile", + "-m", + "test_model", + "--service-kind", + "triton", + ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() cmd = Profiler.build_cmd(args, extra_args) @@ -87,7 +101,14 @@ def test_profile_export_filepath(self, monkeypatch, arg, expected_filepath): ], ) def test_service_triton(self, monkeypatch, arg): - args = ["genai-perf", "-m", "test_model", "--service-kind", "triton"] + arg + args = [ + "genai-perf", + "profile", + "-m", + "test_model", + "--service-kind", + "triton", + ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() cmd = Profiler.build_cmd(args, extra_args) @@ -111,6 +132,7 @@ def test_service_triton(self, monkeypatch, arg): def test_service_openai(self, monkeypatch, arg): args = [ "genai-perf", + "profile", "-m", "test_model", "--service-kind",