revert tests

triton-inference-server · Dec 6, 2024 · 7a9e0bd · 7a9e0bd
1 parent 1b2514a
commit 7a9e0bd
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 85 deletions.
diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py
@@ -165,19 +165,26 @@ def _check_conditional_args(
     Check for conditional args and raise an error if they are not set.
     """
 
-    if args.endpoint_type not in _endpoint_type_map:
-        parser.error(f"Invalid endpoint type {args.endpoint_type}")
-
-    endpoint_config = _endpoint_type_map[args.endpoint_type]
-    args.output_format = endpoint_config.output_format
-
     # Endpoint and output format checks
     if args.service_kind == "openai":
+        print(args.endpoint_type)
         if args.endpoint_type is None:
             parser.error(
                 "The --endpoint-type option is required when using the 'openai' service-kind."
             )
 
+    if args.service_kind == "triton" and args.endpoint_type is None:
+        args.endpoint_type = "kserve"
+
+    if args.service_kind == "tensorrtllm_engine" and args.endpoint_type is None:
+        args.endpoint_type = "tensorrtllm_engine"
+
+    if args.endpoint_type and args.endpoint_type not in _endpoint_type_map:
+        parser.error(f"Invalid endpoint type {args.endpoint_type}")
+
+    endpoint_config = _endpoint_type_map[args.endpoint_type]
+    args.output_format = endpoint_config.output_format
+
     if args.endpoint is not None:
         args.endpoint = args.endpoint.lstrip(" /")
     else:
@@ -708,7 +715,6 @@ def _add_endpoint_args(parser):
         "--endpoint-type",
         type=str,
         choices=list(_endpoint_type_map.keys()),
-        default="kserve",
         required=False,
         help=f"The endpoint-type to send requests to on the " "server.",
     )

diff --git a/genai-perf/tests/test_cli.py b/genai-perf/tests/test_cli.py
@@ -84,6 +84,8 @@ def test_help_version_arguments_output_and_exit(
                     "5",
                     "--endpoint-type",
                     "embeddings",
+                    "--service-kind",
+                    "openai",
                 ],
                 {"batch_size_text": 5},
             ),
@@ -93,6 +95,8 @@ def test_help_version_arguments_output_and_exit(
                     "5",
                     "--endpoint-type",
                     "image_retrieval",
+                    "--service-kind",
+                    "openai",
                 ],
                 {"batch_size_image": 5},
             ),
@@ -102,30 +106,34 @@ def test_help_version_arguments_output_and_exit(
                     "5",
                     "--endpoint-type",
                     "embeddings",
+                    "--service-kind",
+                    "openai",
                 ],
                 {"batch_size_text": 5},
             ),
             (["--concurrency", "3"], {"concurrency": 3}),
             (
-                ["--endpoint-type", "completions"],
+                ["--endpoint-type", "completions", "--service-kind", "openai"],
                 {"endpoint": "v1/completions"},
             ),
             (
-                ["--endpoint-type", "chat"],
+                ["--endpoint-type", "chat", "--service-kind", "openai"],
                 {"endpoint": "v1/chat/completions"},
             ),
             (
-                ["--endpoint-type", "rankings"],
+                ["--endpoint-type", "rankings", "--service-kind", "openai"],
                 {"endpoint": "v1/ranking"},
             ),
             (
-                ["--endpoint-type", "image_retrieval"],
+                ["--endpoint-type", "image_retrieval", "--service-kind", "openai"],
                 {"endpoint": "v1/infer"},
             ),
             (
                 [
                     "--endpoint-type",
                     "chat",
+                    "--service-kind",
+                    "openai",
                     "--endpoint",
                     "custom/address",
                 ],
@@ -135,6 +143,8 @@ def test_help_version_arguments_output_and_exit(
                 [
                     "--endpoint-type",
                     "chat",
+                    "--service-kind",
+                    "openai",
                     "--endpoint",
                     "   /custom/address",
                 ],
@@ -144,6 +154,8 @@ def test_help_version_arguments_output_and_exit(
                 [
                     "--endpoint-type",
                     "completions",
+                    "--service-kind",
+                    "openai",
                     "--endpoint",
                     "custom/address",
                 ],
@@ -210,13 +222,13 @@ def test_help_version_arguments_output_and_exit(
             (["--warmup-request-count", "100"], {"warmup_request_count": 100}),
             (["--request-rate", "9.0"], {"request_rate": 9.0}),
             (["-s", "99.5"], {"stability_percentage": 99.5}),
-            (["--endpoint-type", "kserve"], {"service_kind": "triton"}),
+            (["--service-kind", "triton"], {"service_kind": "triton"}),
             (
-                ["--endpoint-type", "tensorrtllm_engine"],
+                ["--service-kind", "tensorrtllm_engine"],
                 {"service_kind": "tensorrtllm_engine"},
             ),
             (
-                ["--endpoint-type", "chat"],
+                ["--service-kind", "openai", "--endpoint-type", "chat"],
                 {"service_kind": "openai", "endpoint": "v1/chat/completions"},
             ),
             (["--stability-percentage", "99.5"], {"stability_percentage": 99.5}),
@@ -342,33 +354,33 @@ def test_file_flags_parsed(self, monkeypatch, mocker):
         "arg, expected_path",
         [
             (
-                ["--endpoint-type", "chat"],
+                ["--service-kind", "openai", "--endpoint-type", "chat"],
                 "artifacts/test_model-openai-chat-concurrency1",
             ),
             (
-                ["--endpoint-type", "completions"],
+                ["--service-kind", "openai", "--endpoint-type", "completions"],
                 "artifacts/test_model-openai-completions-concurrency1",
             ),
             (
-                ["--endpoint-type", "rankings"],
+                ["--service-kind", "openai", "--endpoint-type", "rankings"],
                 "artifacts/test_model-openai-rankings-concurrency1",
             ),
             (
-                ["--endpoint-type", "image_retrieval"],
+                ["--service-kind", "openai", "--endpoint-type", "image_retrieval"],
                 "artifacts/test_model-openai-image_retrieval-concurrency1",
             ),
             (
-                ["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
+                ["--service-kind", "triton", "--backend", "tensorrtllm"],
                 "artifacts/test_model-triton-tensorrtllm-concurrency1",
             ),
             (
-                ["--endpoint-type", "kserve", "--backend", "vllm"],
+                ["--service-kind", "triton", "--backend", "vllm"],
                 "artifacts/test_model-triton-vllm-concurrency1",
             ),
             (
                 [
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                     "--backend",
                     "vllm",
                     "--concurrency",
@@ -403,6 +415,8 @@ def test_default_profile_export_filepath(
                 [
                     "--model",
                     "hello/world/test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "chat",
                 ],
@@ -493,44 +507,29 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
         "args, expected_output",
         [
             (
-                ["genai-perf",
-                 "profile",
-                 "-m",
-                 "test_model",
-                 "--endpoint-type",
-                 "chat",
-                 "--service-kind",
-                 "triton",
-                 ],
-                "Invalid service-kind triton for endpoint-type chat. service-kind is now inferred from endpoint-type and the argument will be removed in a future release."
-            ),
-
-            (
-                ["genai-perf",
-                 "profile",
-                 "-m",
-                 "test_model",
-                 "--endpoint-type",
-                 "kserve",
-                 "--service-kind",
-                 "openai",
-                 ],
-                "Invalid service-kind openai for endpoint-type kserve. service-kind is now inferred from endpoint-type and the argument will be removed in a future release."
-            ),
-
-            (
-                ["genai-perf",
-                 "profile",
-                 "-m",
-                 "test_model",
-                 "--endpoint-type",
-                 "tensorrtllm_engine",
-                 "--service-kind",
-                 "openai",
-                 ],
-                "Invalid service-kind openai for endpoint-type tensorrtllm_engine. service-kind is now inferred from endpoint-type and the argument will be removed in a future release."
-            ),
-            
+                [
+                    "genai-perf",
+                    "profile",
+                    "-m",
+                    "test_model",
+                    "--service-kind",
+                    "openai",
+                ],
+                "The --endpoint-type option is required when using the 'openai' service-kind.",
+            ),
+            (
+                [
+                    "genai-perf",
+                    "profile",
+                    "-m",
+                    "test_model",
+                    "--service-kind",
+                    "openai",
+                    "--endpoint",
+                    "custom/address",
+                ],
+                "The --endpoint-type option is required when using the 'openai' service-kind.",
+            ),
             (
                 [
                     "genai-perf",
@@ -568,20 +567,24 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "chat",
                     "--output-tokens-mean",
                     "100",
                     "--output-tokens-mean-deterministic",
                 ],
-                "The --output-tokens-mean-deterministic option is only supported with the 'kserve' and 'tensorrtllm_engine' endpoint-types",
+                "The --output-tokens-mean-deterministic option is only supported with the Triton and TensorRT-LLM Engine service-kind",
             ),
             (
                 [
                     "genai-perf",
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "embeddings",
                     "--generate-plots",
@@ -594,6 +597,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "rankings",
                     "--generate-plots",
@@ -606,6 +611,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "image_retrieval",
                     "--generate-plots",
@@ -618,8 +625,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "--model",
                     "test_model",
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                     "--server-metrics-url",
                     "invalid_url",
                 ],
@@ -656,12 +663,14 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "rankings",
                     "--backend",
                     "vllm",
                 ],
-                "The --backend option should only be used when using the 'kserve' endpoint-type",
+                "The --backend option should only be used when using the 'triton' service-kind and 'kserve' endpoint-type.",
             ),
         ],
     )
@@ -679,15 +688,17 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
         "args, expected_format",
         [
             (
-                ["--endpoint-type", "chat"],
+                ["--service-kind", "openai", "--endpoint-type", "chat"],
                 OutputFormat.OPENAI_CHAT_COMPLETIONS,
             ),
             (
-                ["--endpoint-type", "completions"],
+                ["--service-kind", "openai", "--endpoint-type", "completions"],
                 OutputFormat.OPENAI_COMPLETIONS,
             ),
             (
                 [
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "completions",
                     "--endpoint",
@@ -696,19 +707,19 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
                 OutputFormat.OPENAI_COMPLETIONS,
             ),
             (
-                ["--endpoint-type", "rankings"],
+                ["--service-kind", "openai", "--endpoint-type", "rankings"],
                 OutputFormat.RANKINGS,
             ),
             (
-                ["--endpoint-type", "image_retrieval"],
+                ["--service-kind", "openai", "--endpoint-type", "image_retrieval"],
                 OutputFormat.IMAGE_RETRIEVAL,
             ),
             (
-                ["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
+                ["--service-kind", "triton", "--backend", "tensorrtllm"],
                 OutputFormat.TENSORRTLLM,
             ),
-            (["--endpoint-type", "kserve", "--backend", "vllm"], OutputFormat.VLLM),
-            (["--endpoint-type", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE),
+            (["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM),
+            (["--service-kind", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE),
         ],
     )
     def test_inferred_output_format(self, monkeypatch, args, expected_format):
@@ -909,8 +920,8 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict):
                     "profile",
                     "--model",
                     "test_model",
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                     "--server-metrics-url",
                     test_triton_metrics_url,
                 ],
@@ -923,8 +934,8 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict):
                     "profile",
                     "--model",
                     "test_model",
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                 ],
                 None,
             ),