From 7a9e0bdb43d8f616bd5a0de48fc599231b87a76e Mon Sep 17 00:00:00 2001
From: nnshah1 <neelays@nvidia.com>
Date: Fri, 6 Dec 2024 00:13:37 -0800
Subject: [PATCH] revert tests

---
 genai-perf/genai_perf/parser.py           |  20 +--
 genai-perf/tests/test_cli.py              | 147 ++++++++++++----------
 genai-perf/tests/test_console_exporter.py |  16 +++
 genai-perf/tests/test_csv_exporter.py     |  14 +++
 genai-perf/tests/test_json_exporter.py    |   8 +-
 genai-perf/tests/test_wrapper.py          |  14 ++-
 6 files changed, 134 insertions(+), 85 deletions(-)

diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py
index 7aba0128..8700892a 100644
--- a/genai-perf/genai_perf/parser.py
+++ b/genai-perf/genai_perf/parser.py
@@ -165,19 +165,26 @@ def _check_conditional_args(
     Check for conditional args and raise an error if they are not set.
     """
 
-    if args.endpoint_type not in _endpoint_type_map:
-        parser.error(f"Invalid endpoint type {args.endpoint_type}")
-
-    endpoint_config = _endpoint_type_map[args.endpoint_type]
-    args.output_format = endpoint_config.output_format
-
     # Endpoint and output format checks
     if args.service_kind == "openai":
+        print(args.endpoint_type)
         if args.endpoint_type is None:
             parser.error(
                 "The --endpoint-type option is required when using the 'openai' service-kind."
             )
 
+    if args.service_kind == "triton" and args.endpoint_type is None:
+        args.endpoint_type = "kserve"
+
+    if args.service_kind == "tensorrtllm_engine" and args.endpoint_type is None:
+        args.endpoint_type = "tensorrtllm_engine"
+
+    if args.endpoint_type and args.endpoint_type not in _endpoint_type_map:
+        parser.error(f"Invalid endpoint type {args.endpoint_type}")
+
+    endpoint_config = _endpoint_type_map[args.endpoint_type]
+    args.output_format = endpoint_config.output_format
+
     if args.endpoint is not None:
         args.endpoint = args.endpoint.lstrip(" /")
     else:
@@ -708,7 +715,6 @@ def _add_endpoint_args(parser):
         "--endpoint-type",
         type=str,
         choices=list(_endpoint_type_map.keys()),
-        default="kserve",
         required=False,
         help=f"The endpoint-type to send requests to on the " "server.",
     )
diff --git a/genai-perf/tests/test_cli.py b/genai-perf/tests/test_cli.py
index 9669ac0c..1233996b 100644
--- a/genai-perf/tests/test_cli.py
+++ b/genai-perf/tests/test_cli.py
@@ -84,6 +84,8 @@ def test_help_version_arguments_output_and_exit(
                     "5",
                     "--endpoint-type",
                     "embeddings",
+                    "--service-kind",
+                    "openai",
                 ],
                 {"batch_size_text": 5},
             ),
@@ -93,6 +95,8 @@ def test_help_version_arguments_output_and_exit(
                     "5",
                     "--endpoint-type",
                     "image_retrieval",
+                    "--service-kind",
+                    "openai",
                 ],
                 {"batch_size_image": 5},
             ),
@@ -102,30 +106,34 @@ def test_help_version_arguments_output_and_exit(
                     "5",
                     "--endpoint-type",
                     "embeddings",
+                    "--service-kind",
+                    "openai",
                 ],
                 {"batch_size_text": 5},
             ),
             (["--concurrency", "3"], {"concurrency": 3}),
             (
-                ["--endpoint-type", "completions"],
+                ["--endpoint-type", "completions", "--service-kind", "openai"],
                 {"endpoint": "v1/completions"},
             ),
             (
-                ["--endpoint-type", "chat"],
+                ["--endpoint-type", "chat", "--service-kind", "openai"],
                 {"endpoint": "v1/chat/completions"},
             ),
             (
-                ["--endpoint-type", "rankings"],
+                ["--endpoint-type", "rankings", "--service-kind", "openai"],
                 {"endpoint": "v1/ranking"},
             ),
             (
-                ["--endpoint-type", "image_retrieval"],
+                ["--endpoint-type", "image_retrieval", "--service-kind", "openai"],
                 {"endpoint": "v1/infer"},
             ),
             (
                 [
                     "--endpoint-type",
                     "chat",
+                    "--service-kind",
+                    "openai",
                     "--endpoint",
                     "custom/address",
                 ],
@@ -135,6 +143,8 @@ def test_help_version_arguments_output_and_exit(
                 [
                     "--endpoint-type",
                     "chat",
+                    "--service-kind",
+                    "openai",
                     "--endpoint",
                     "   /custom/address",
                 ],
@@ -144,6 +154,8 @@ def test_help_version_arguments_output_and_exit(
                 [
                     "--endpoint-type",
                     "completions",
+                    "--service-kind",
+                    "openai",
                     "--endpoint",
                     "custom/address",
                 ],
@@ -210,13 +222,13 @@ def test_help_version_arguments_output_and_exit(
             (["--warmup-request-count", "100"], {"warmup_request_count": 100}),
             (["--request-rate", "9.0"], {"request_rate": 9.0}),
             (["-s", "99.5"], {"stability_percentage": 99.5}),
-            (["--endpoint-type", "kserve"], {"service_kind": "triton"}),
+            (["--service-kind", "triton"], {"service_kind": "triton"}),
             (
-                ["--endpoint-type", "tensorrtllm_engine"],
+                ["--service-kind", "tensorrtllm_engine"],
                 {"service_kind": "tensorrtllm_engine"},
             ),
             (
-                ["--endpoint-type", "chat"],
+                ["--service-kind", "openai", "--endpoint-type", "chat"],
                 {"service_kind": "openai", "endpoint": "v1/chat/completions"},
             ),
             (["--stability-percentage", "99.5"], {"stability_percentage": 99.5}),
@@ -342,33 +354,33 @@ def test_file_flags_parsed(self, monkeypatch, mocker):
         "arg, expected_path",
         [
             (
-                ["--endpoint-type", "chat"],
+                ["--service-kind", "openai", "--endpoint-type", "chat"],
                 "artifacts/test_model-openai-chat-concurrency1",
             ),
             (
-                ["--endpoint-type", "completions"],
+                ["--service-kind", "openai", "--endpoint-type", "completions"],
                 "artifacts/test_model-openai-completions-concurrency1",
             ),
             (
-                ["--endpoint-type", "rankings"],
+                ["--service-kind", "openai", "--endpoint-type", "rankings"],
                 "artifacts/test_model-openai-rankings-concurrency1",
             ),
             (
-                ["--endpoint-type", "image_retrieval"],
+                ["--service-kind", "openai", "--endpoint-type", "image_retrieval"],
                 "artifacts/test_model-openai-image_retrieval-concurrency1",
             ),
             (
-                ["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
+                ["--service-kind", "triton", "--backend", "tensorrtllm"],
                 "artifacts/test_model-triton-tensorrtllm-concurrency1",
             ),
             (
-                ["--endpoint-type", "kserve", "--backend", "vllm"],
+                ["--service-kind", "triton", "--backend", "vllm"],
                 "artifacts/test_model-triton-vllm-concurrency1",
             ),
             (
                 [
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                     "--backend",
                     "vllm",
                     "--concurrency",
@@ -403,6 +415,8 @@ def test_default_profile_export_filepath(
                 [
                     "--model",
                     "hello/world/test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "chat",
                 ],
@@ -493,44 +507,29 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
         "args, expected_output",
         [
             (
-                ["genai-perf",
-                 "profile",
-                 "-m",
-                 "test_model",
-                 "--endpoint-type",
-                 "chat",
-                 "--service-kind",
-                 "triton",
-                 ],
-                "Invalid service-kind triton for endpoint-type chat. service-kind is now inferred from endpoint-type and the argument will be removed in a future release."
-            ),
-
-            (
-                ["genai-perf",
-                 "profile",
-                 "-m",
-                 "test_model",
-                 "--endpoint-type",
-                 "kserve",
-                 "--service-kind",
-                 "openai",
-                 ],
-                "Invalid service-kind openai for endpoint-type kserve. service-kind is now inferred from endpoint-type and the argument will be removed in a future release."
-            ),
-
-            (
-                ["genai-perf",
-                 "profile",
-                 "-m",
-                 "test_model",
-                 "--endpoint-type",
-                 "tensorrtllm_engine",
-                 "--service-kind",
-                 "openai",
-                 ],
-                "Invalid service-kind openai for endpoint-type tensorrtllm_engine. service-kind is now inferred from endpoint-type and the argument will be removed in a future release."
-            ),
-            
+                [
+                    "genai-perf",
+                    "profile",
+                    "-m",
+                    "test_model",
+                    "--service-kind",
+                    "openai",
+                ],
+                "The --endpoint-type option is required when using the 'openai' service-kind.",
+            ),
+            (
+                [
+                    "genai-perf",
+                    "profile",
+                    "-m",
+                    "test_model",
+                    "--service-kind",
+                    "openai",
+                    "--endpoint",
+                    "custom/address",
+                ],
+                "The --endpoint-type option is required when using the 'openai' service-kind.",
+            ),
             (
                 [
                     "genai-perf",
@@ -568,13 +567,15 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "chat",
                     "--output-tokens-mean",
                     "100",
                     "--output-tokens-mean-deterministic",
                 ],
-                "The --output-tokens-mean-deterministic option is only supported with the 'kserve' and 'tensorrtllm_engine' endpoint-types",
+                "The --output-tokens-mean-deterministic option is only supported with the Triton and TensorRT-LLM Engine service-kind",
             ),
             (
                 [
@@ -582,6 +583,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "embeddings",
                     "--generate-plots",
@@ -594,6 +597,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "rankings",
                     "--generate-plots",
@@ -606,6 +611,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "image_retrieval",
                     "--generate-plots",
@@ -618,8 +625,8 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "--model",
                     "test_model",
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                     "--server-metrics-url",
                     "invalid_url",
                 ],
@@ -656,12 +663,14 @@ def test_unrecognized_arg(self, monkeypatch, capsys):
                     "profile",
                     "-m",
                     "test_model",
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "rankings",
                     "--backend",
                     "vllm",
                 ],
-                "The --backend option should only be used when using the 'kserve' endpoint-type",
+                "The --backend option should only be used when using the 'triton' service-kind and 'kserve' endpoint-type.",
             ),
         ],
     )
@@ -679,15 +688,17 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
         "args, expected_format",
         [
             (
-                ["--endpoint-type", "chat"],
+                ["--service-kind", "openai", "--endpoint-type", "chat"],
                 OutputFormat.OPENAI_CHAT_COMPLETIONS,
             ),
             (
-                ["--endpoint-type", "completions"],
+                ["--service-kind", "openai", "--endpoint-type", "completions"],
                 OutputFormat.OPENAI_COMPLETIONS,
             ),
             (
                 [
+                    "--service-kind",
+                    "openai",
                     "--endpoint-type",
                     "completions",
                     "--endpoint",
@@ -696,19 +707,19 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
                 OutputFormat.OPENAI_COMPLETIONS,
             ),
             (
-                ["--endpoint-type", "rankings"],
+                ["--service-kind", "openai", "--endpoint-type", "rankings"],
                 OutputFormat.RANKINGS,
             ),
             (
-                ["--endpoint-type", "image_retrieval"],
+                ["--service-kind", "openai", "--endpoint-type", "image_retrieval"],
                 OutputFormat.IMAGE_RETRIEVAL,
             ),
             (
-                ["--endpoint-type", "kserve", "--backend", "tensorrtllm"],
+                ["--service-kind", "triton", "--backend", "tensorrtllm"],
                 OutputFormat.TENSORRTLLM,
             ),
-            (["--endpoint-type", "kserve", "--backend", "vllm"], OutputFormat.VLLM),
-            (["--endpoint-type", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE),
+            (["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM),
+            (["--service-kind", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE),
         ],
     )
     def test_inferred_output_format(self, monkeypatch, args, expected_format):
@@ -909,8 +920,8 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict):
                     "profile",
                     "--model",
                     "test_model",
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                     "--server-metrics-url",
                     test_triton_metrics_url,
                 ],
@@ -923,8 +934,8 @@ def test_get_extra_inputs_as_dict(self, extra_inputs_list, expected_dict):
                     "profile",
                     "--model",
                     "test_model",
-                    "--endpoint-type",
-                    "kserve",
+                    "--service-kind",
+                    "triton",
                 ],
                 None,
             ),
diff --git a/genai-perf/tests/test_console_exporter.py b/genai-perf/tests/test_console_exporter.py
index a9f59dd2..1003e601 100644
--- a/genai-perf/tests/test_console_exporter.py
+++ b/genai-perf/tests/test_console_exporter.py
@@ -49,6 +49,8 @@ def test_streaming_llm_output(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--streaming",
@@ -99,6 +101,8 @@ def test_nonstreaming_llm_output(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
         ]
@@ -147,6 +151,8 @@ def test_embedding_output(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "embeddings",
         ]
@@ -186,6 +192,8 @@ def test_valid_goodput(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--streaming",
@@ -239,6 +247,8 @@ def test_invalid_goodput_output(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--streaming",
@@ -313,6 +323,8 @@ def test_console_title(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             endpoint_type,
         ]
@@ -338,6 +350,8 @@ def test_valid_telemetry_verbose(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "triton",
             "--streaming",
             "--server-metrics-url",
             "http://tritonmetrics:8002/metrics",
@@ -452,6 +466,8 @@ def test_missing_data(self, monkeypatch, capsys) -> None:
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
         ]
diff --git a/genai-perf/tests/test_csv_exporter.py b/genai-perf/tests/test_csv_exporter.py
index 8dc40716..68493f4e 100644
--- a/genai-perf/tests/test_csv_exporter.py
+++ b/genai-perf/tests/test_csv_exporter.py
@@ -92,6 +92,8 @@ def test_streaming_llm_csv_output(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--streaming",
@@ -143,6 +145,8 @@ def test_nonstreaming_llm_csv_output(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--profile-export-file",
@@ -187,6 +191,8 @@ def test_embedding_csv_output(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "embeddings",
         ]
@@ -226,6 +232,8 @@ def test_valid_goodput_csv_output(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--streaming",
@@ -274,6 +282,8 @@ def test_invalid_goodput_csv_output(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--streaming",
@@ -323,6 +333,8 @@ def test_triton_telemetry_output(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "triton",
             "--streaming",
             "--server-metrics-url",
             "http://tritonserver:8002/metrics",
@@ -395,6 +407,8 @@ def test_missing_data(
             "profile",
             "-m",
             "model_name",
+            "--service-kind",
+            "openai",
             "--endpoint-type",
             "chat",
             "--profile-export-file",
diff --git a/genai-perf/tests/test_json_exporter.py b/genai-perf/tests/test_json_exporter.py
index 554f444c..8c315e25 100644
--- a/genai-perf/tests/test_json_exporter.py
+++ b/genai-perf/tests/test_json_exporter.py
@@ -217,8 +217,8 @@ class TestJsonExporter:
           "backend": "vllm",
           "batch_size_image": 1,
           "batch_size_text": 1,
-          "endpoint": "v2/models/gpt2_vllm/infer",
-          "endpoint_type": "kserve",
+          "endpoint": null,
+          "endpoint_type": null,
           "service_kind": "triton",
           "server_metrics_url": null,
           "streaming": true,
@@ -820,8 +820,8 @@ def test_triton_telemetry_output(
             "profile",
             "-m",
             "gpt2_vllm",
-            "--endpoint-type",
-            "kserve",
+            "--service-kind",
+            "triton",
             "--streaming",
             "--server-metrics-url",
             "http://tritonmetrics:8002/metrics",
diff --git a/genai-perf/tests/test_wrapper.py b/genai-perf/tests/test_wrapper.py
index 090b1867..0521e589 100644
--- a/genai-perf/tests/test_wrapper.py
+++ b/genai-perf/tests/test_wrapper.py
@@ -48,8 +48,8 @@ def test_url_exactly_once_triton(self, monkeypatch, arg):
             "profile",
             "-m",
             "test_model",
-            "--endpoint-type",
-            "kserve",
+            "--service-kind",
+            "triton",
         ] + arg
         monkeypatch.setattr("sys.argv", args)
         args, extra_args = parser.parse_args()
@@ -82,8 +82,8 @@ def test_profile_export_filepath(self, monkeypatch, arg, expected_filepath):
             "profile",
             "-m",
             "test_model",
-            "--endpoint-type",
-            "kserve",
+            "--service-kind",
+            "triton",
         ] + arg
         monkeypatch.setattr("sys.argv", args)
         args, extra_args = parser.parse_args()
@@ -106,8 +106,8 @@ def test_service_triton(self, monkeypatch, arg):
             "profile",
             "-m",
             "test_model",
-            "--endpoint-type",
-            "kserve",
+            "--service-kind",
+            "triton",
         ] + arg
         monkeypatch.setattr("sys.argv", args)
         args, extra_args = parser.parse_args()
@@ -135,6 +135,8 @@ def test_service_openai(self, monkeypatch, arg):
             "profile",
             "-m",
             "test_model",
+            "--service-kind",
+            "openai",
         ] + arg
         monkeypatch.setattr("sys.argv", args)
         args, extra_args = parser.parse_args()