From 7a9e0bdb43d8f616bd5a0de48fc599231b87a76e Mon Sep 17 00:00:00 2001 From: nnshah1 Date: Fri, 6 Dec 2024 00:13:37 -0800 Subject: [PATCH] revert tests --- genai-perf/genai_perf/parser.py | 20 +-- genai-perf/tests/test_cli.py | 147 ++++++++++++---------- genai-perf/tests/test_console_exporter.py | 16 +++ genai-perf/tests/test_csv_exporter.py | 14 +++ genai-perf/tests/test_json_exporter.py | 8 +- genai-perf/tests/test_wrapper.py | 14 ++- 6 files changed, 134 insertions(+), 85 deletions(-) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 7aba0128..8700892a 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -165,19 +165,26 @@ def _check_conditional_args( Check for conditional args and raise an error if they are not set. """ - if args.endpoint_type not in _endpoint_type_map: - parser.error(f"Invalid endpoint type {args.endpoint_type}") - - endpoint_config = _endpoint_type_map[args.endpoint_type] - args.output_format = endpoint_config.output_format - # Endpoint and output format checks if args.service_kind == "openai": + print(args.endpoint_type) if args.endpoint_type is None: parser.error( "The --endpoint-type option is required when using the 'openai' service-kind." ) + if args.service_kind == "triton" and args.endpoint_type is None: + args.endpoint_type = "kserve" + + if args.service_kind == "tensorrtllm_engine" and args.endpoint_type is None: + args.endpoint_type = "tensorrtllm_engine" + + if args.endpoint_type and args.endpoint_type not in _endpoint_type_map: + parser.error(f"Invalid endpoint type {args.endpoint_type}") + + endpoint_config = _endpoint_type_map[args.endpoint_type] + args.output_format = endpoint_config.output_format + if args.endpoint is not None: args.endpoint = args.endpoint.lstrip(" /") else: @@ -708,7 +715,6 @@ def _add_endpoint_args(parser): "--endpoint-type", type=str, choices=list(_endpoint_type_map.keys()), - default="kserve", required=False, help=f"The endpoint-type to send requests to on the " "server.", ) diff --git a/genai-perf/tests/test_cli.py b/genai-perf/tests/test_cli.py index 9669ac0c..1233996b 100644 --- a/genai-perf/tests/test_cli.py +++ b/genai-perf/tests/test_cli.py @@ -84,6 +84,8 @@ def test_help_version_arguments_output_and_exit( "5", "--endpoint-type", "embeddings", + "--service-kind", + "openai", ], {"batch_size_text": 5}, ), @@ -93,6 +95,8 @@ def test_help_version_arguments_output_and_exit( "5", "--endpoint-type", "image_retrieval", + "--service-kind", + "openai", ], {"batch_size_image": 5}, ), @@ -102,30 +106,34 @@ def test_help_version_arguments_output_and_exit( "5", "--endpoint-type", "embeddings", + "--service-kind", + "openai", ], {"batch_size_text": 5}, ), (["--concurrency", "3"], {"concurrency": 3}), ( - ["--endpoint-type", "completions"], + ["--endpoint-type", "completions", "--service-kind", "openai"], {"endpoint": "v1/completions"}, ), ( - ["--endpoint-type", "chat"], + ["--endpoint-type", "chat", "--service-kind", "openai"], {"endpoint": "v1/chat/completions"}, ), ( - ["--endpoint-type", "rankings"], + ["--endpoint-type", "rankings", "--service-kind", "openai"], {"endpoint": "v1/ranking"}, ), ( - ["--endpoint-type", "image_retrieval"], + ["--endpoint-type", "image_retrieval", "--service-kind", "openai"], {"endpoint": "v1/infer"}, ), ( [ "--endpoint-type", "chat", + "--service-kind", + "openai", "--endpoint", "custom/address", ], @@ -135,6 +143,8 @@ def test_help_version_arguments_output_and_exit( [ "--endpoint-type", "chat", + "--service-kind", + "openai", "--endpoint", " /custom/address", ], @@ -144,6 +154,8 @@ def test_help_version_arguments_output_and_exit( [ "--endpoint-type", "completions", + "--service-kind", + "openai", "--endpoint", "custom/address", ], @@ -210,13 +222,13 @@ def test_help_version_arguments_output_and_exit( (["--warmup-request-count", "100"], {"warmup_request_count": 100}), (["--request-rate", "9.0"], {"request_rate": 9.0}), (["-s", "99.5"], {"stability_percentage": 99.5}), - (["--endpoint-type", "kserve"], {"service_kind": "triton"}), + (["--service-kind", "triton"], {"service_kind": "triton"}), ( - ["--endpoint-type", "tensorrtllm_engine"], + ["--service-kind", "tensorrtllm_engine"], {"service_kind": "tensorrtllm_engine"}, ), ( - ["--endpoint-type", "chat"], + ["--service-kind", "openai", "--endpoint-type", "chat"], {"service_kind": "openai", "endpoint": "v1/chat/completions"}, ), (["--stability-percentage", "99.5"], {"stability_percentage": 99.5}), @@ -342,33 +354,33 @@ def test_file_flags_parsed(self, monkeypatch, mocker): "arg, expected_path", [ ( - ["--endpoint-type", "chat"], + ["--service-kind", "openai", "--endpoint-type", "chat"], "artifacts/test_model-openai-chat-concurrency1", ), ( - ["--endpoint-type", "completions"], + ["--service-kind", "openai", "--endpoint-type", "completions"], "artifacts/test_model-openai-completions-concurrency1", ), ( - ["--endpoint-type", "rankings"], + ["--service-kind", "openai", "--endpoint-type", "rankings"], "artifacts/test_model-openai-rankings-concurrency1", ), ( - ["--endpoint-type", "image_retrieval"], + ["--service-kind", "openai", "--endpoint-type", "image_retrieval"], "artifacts/test_model-openai-image_retrieval-concurrency1", ), ( - ["--endpoint-type", "kserve", "--backend", "tensorrtllm"], + ["--service-kind", "triton", "--backend", "tensorrtllm"], "artifacts/test_model-triton-tensorrtllm-concurrency1", ), ( - ["--endpoint-type", "kserve", "--backend", "vllm"], + ["--service-kind", "triton", "--backend", "vllm"], "artifacts/test_model-triton-vllm-concurrency1", ), ( [ - "--endpoint-type", - "kserve", + "--service-kind", + "triton", "--backend", "vllm", "--concurrency", @@ -403,6 +415,8 @@ def test_default_profile_export_filepath( [ "--model", "hello/world/test_model", + "--service-kind", + "openai", "--endpoint-type", "chat", ], @@ -493,44 +507,29 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "args, expected_output", [ ( - ["genai-perf", - "profile", - "-m", - "test_model", - "--endpoint-type", - "chat", - "--service-kind", - "triton", - ], - "Invalid service-kind triton for endpoint-type chat. service-kind is now inferred from endpoint-type and the argument will be removed in a future release." - ), - - ( - ["genai-perf", - "profile", - "-m", - "test_model", - "--endpoint-type", - "kserve", - "--service-kind", - "openai", - ], - "Invalid service-kind openai for endpoint-type kserve. service-kind is now inferred from endpoint-type and the argument will be removed in a future release." - ), - - ( - ["genai-perf", - "profile", - "-m", - "test_model", - "--endpoint-type", - "tensorrtllm_engine", - "--service-kind", - "openai", - ], - "Invalid service-kind openai for endpoint-type tensorrtllm_engine. service-kind is now inferred from endpoint-type and the argument will be removed in a future release." - ), - + [ + "genai-perf", + "profile", + "-m", + "test_model", + "--service-kind", + "openai", + ], + "The --endpoint-type option is required when using the 'openai' service-kind.", + ), + ( + [ + "genai-perf", + "profile", + "-m", + "test_model", + "--service-kind", + "openai", + "--endpoint", + "custom/address", + ], + "The --endpoint-type option is required when using the 'openai' service-kind.", + ), ( [ "genai-perf", @@ -568,13 +567,15 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "profile", "-m", "test_model", + "--service-kind", + "openai", "--endpoint-type", "chat", "--output-tokens-mean", "100", "--output-tokens-mean-deterministic", ], - "The --output-tokens-mean-deterministic option is only supported with the 'kserve' and 'tensorrtllm_engine' endpoint-types", + "The --output-tokens-mean-deterministic option is only supported with the Triton and TensorRT-LLM Engine service-kind", ), ( [ @@ -582,6 +583,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "profile", "-m", "test_model", + "--service-kind", + "openai", "--endpoint-type", "embeddings", "--generate-plots", @@ -594,6 +597,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "profile", "-m", "test_model", + "--service-kind", + "openai", "--endpoint-type", "rankings", "--generate-plots", @@ -606,6 +611,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "profile", "-m", "test_model", + "--service-kind", + "openai", "--endpoint-type", "image_retrieval", "--generate-plots", @@ -618,8 +625,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "profile", "--model", "test_model", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", "--server-metrics-url", "invalid_url", ], @@ -656,12 +663,14 @@ def test_unrecognized_arg(self, monkeypatch, capsys): "profile", "-m", "test_model", + "--service-kind", + "openai", "--endpoint-type", "rankings", "--backend", "vllm", ], - "The --backend option should only be used when using the 'kserve' endpoint-type", + "The --backend option should only be used when using the 'triton' service-kind and 'kserve' endpoint-type.", ), ], ) @@ -679,15 +688,17 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys): "args, expected_format", [ ( - ["--endpoint-type", "chat"], + ["--service-kind", "openai", "--endpoint-type", "chat"], OutputFormat.OPENAI_CHAT_COMPLETIONS, ), ( - ["--endpoint-type", "completions"], + ["--service-kind", "openai", "--endpoint-type", "completions"], OutputFormat.OPENAI_COMPLETIONS, ), ( [ + "--service-kind", + "openai", "--endpoint-type", "completions", "--endpoint", @@ -696,19 +707,19 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys): OutputFormat.OPENAI_COMPLETIONS, ), ( - ["--endpoint-type", "rankings"], + ["--service-kind", "openai", "--endpoint-type", "rankings"], OutputFormat.RANKINGS, ), ( - ["--endpoint-type", "image_retrieval"], + ["--service-kind", "openai", "--endpoint-type", "image_retrieval"], OutputFormat.IMAGE_RETRIEVAL, ), ( - ["--endpoint-type", "kserve", "--backend", "tensorrtllm"], + ["--service-kind", "triton", "--backend", "tensorrtllm"], OutputFormat.TENSORRTLLM, ), - (["--endpoint-type", "kserve", "--backend", "vllm"], OutputFormat.VLLM), - (["--endpoint-type", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE), + (["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM), + (["--service-kind", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE), ], ) def test_inferred_output_format(self, monkeypatch, args, expected_format): @@ -909,8 +920,8 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict): "profile", "--model", "test_model", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", "--server-metrics-url", test_triton_metrics_url, ], @@ -923,8 +934,8 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict): "profile", "--model", "test_model", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", ], None, ), diff --git a/genai-perf/tests/test_console_exporter.py b/genai-perf/tests/test_console_exporter.py index a9f59dd2..1003e601 100644 --- a/genai-perf/tests/test_console_exporter.py +++ b/genai-perf/tests/test_console_exporter.py @@ -49,6 +49,8 @@ def test_streaming_llm_output(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--streaming", @@ -99,6 +101,8 @@ def test_nonstreaming_llm_output(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", ] @@ -147,6 +151,8 @@ def test_embedding_output(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "embeddings", ] @@ -186,6 +192,8 @@ def test_valid_goodput(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--streaming", @@ -239,6 +247,8 @@ def test_invalid_goodput_output(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--streaming", @@ -313,6 +323,8 @@ def test_console_title( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", endpoint_type, ] @@ -338,6 +350,8 @@ def test_valid_telemetry_verbose(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "triton", "--streaming", "--server-metrics-url", "http://tritonmetrics:8002/metrics", @@ -452,6 +466,8 @@ def test_missing_data(self, monkeypatch, capsys) -> None: "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", ] diff --git a/genai-perf/tests/test_csv_exporter.py b/genai-perf/tests/test_csv_exporter.py index 8dc40716..68493f4e 100644 --- a/genai-perf/tests/test_csv_exporter.py +++ b/genai-perf/tests/test_csv_exporter.py @@ -92,6 +92,8 @@ def test_streaming_llm_csv_output( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--streaming", @@ -143,6 +145,8 @@ def test_nonstreaming_llm_csv_output( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--profile-export-file", @@ -187,6 +191,8 @@ def test_embedding_csv_output( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "embeddings", ] @@ -226,6 +232,8 @@ def test_valid_goodput_csv_output( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--streaming", @@ -274,6 +282,8 @@ def test_invalid_goodput_csv_output( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--streaming", @@ -323,6 +333,8 @@ def test_triton_telemetry_output( "profile", "-m", "model_name", + "--service-kind", + "triton", "--streaming", "--server-metrics-url", "http://tritonserver:8002/metrics", @@ -395,6 +407,8 @@ def test_missing_data( "profile", "-m", "model_name", + "--service-kind", + "openai", "--endpoint-type", "chat", "--profile-export-file", diff --git a/genai-perf/tests/test_json_exporter.py b/genai-perf/tests/test_json_exporter.py index 554f444c..8c315e25 100644 --- a/genai-perf/tests/test_json_exporter.py +++ b/genai-perf/tests/test_json_exporter.py @@ -217,8 +217,8 @@ class TestJsonExporter: "backend": "vllm", "batch_size_image": 1, "batch_size_text": 1, - "endpoint": "v2/models/gpt2_vllm/infer", - "endpoint_type": "kserve", + "endpoint": null, + "endpoint_type": null, "service_kind": "triton", "server_metrics_url": null, "streaming": true, @@ -820,8 +820,8 @@ def test_triton_telemetry_output( "profile", "-m", "gpt2_vllm", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", "--streaming", "--server-metrics-url", "http://tritonmetrics:8002/metrics", diff --git a/genai-perf/tests/test_wrapper.py b/genai-perf/tests/test_wrapper.py index 090b1867..0521e589 100644 --- a/genai-perf/tests/test_wrapper.py +++ b/genai-perf/tests/test_wrapper.py @@ -48,8 +48,8 @@ def test_url_exactly_once_triton(self, monkeypatch, arg): "profile", "-m", "test_model", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() @@ -82,8 +82,8 @@ def test_profile_export_filepath(self, monkeypatch, arg, expected_filepath): "profile", "-m", "test_model", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() @@ -106,8 +106,8 @@ def test_service_triton(self, monkeypatch, arg): "profile", "-m", "test_model", - "--endpoint-type", - "kserve", + "--service-kind", + "triton", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() @@ -135,6 +135,8 @@ def test_service_openai(self, monkeypatch, arg): "profile", "-m", "test_model", + "--service-kind", + "openai", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args()