From 7fd0906853987328e2c41d9bf1526026c614c768 Mon Sep 17 00:00:00 2001 From: Neelay Shah Date: Sat, 7 Sep 2024 22:01:32 +0000 Subject: [PATCH 1/3] adding support for generate endpoint --- .../genai_perf/inputs/converters/__init__.py | 2 + .../inputs/converters/generate_converter.py | 109 +++++++++++++++ .../converters/triton_generate_converter.py | 109 +++++++++++++++ .../genai_perf/inputs/input_constants.py | 1 + .../inputs/output_format_converter_factory.py | 2 + genai-perf/genai_perf/parser.py | 131 +++++++++--------- .../llm_profile_data_parser.py | 41 ++++-- .../profile_data_parser.py | 4 + 8 files changed, 328 insertions(+), 71 deletions(-) create mode 100644 genai-perf/genai_perf/inputs/converters/generate_converter.py create mode 100644 genai-perf/genai_perf/inputs/converters/triton_generate_converter.py diff --git a/genai-perf/genai_perf/inputs/converters/__init__.py b/genai-perf/genai_perf/inputs/converters/__init__.py index aefd157d..746654de 100644 --- a/genai-perf/genai_perf/inputs/converters/__init__.py +++ b/genai-perf/genai_perf/inputs/converters/__init__.py @@ -31,6 +31,7 @@ from .tensorrtllm_converter import TensorRTLLMConverter from .tensorrtllm_engine_converter import TensorRTLLMEngineConverter from .vllm_converter import VLLMConverter +from .triton_generate_converter import TritonGenerateConverter __all__ = [ "OpenAIChatCompletionsConverter", @@ -40,4 +41,5 @@ "TensorRTLLMConverter", "TensorRTLLMEngineConverter", "VLLMConverter", + "TritonGenerateConverter" ] diff --git a/genai-perf/genai_perf/inputs/converters/generate_converter.py b/genai-perf/genai_perf/inputs/converters/generate_converter.py new file mode 100644 index 00000000..b1b2d921 --- /dev/null +++ b/genai-perf/genai_perf/inputs/converters/generate_converter.py @@ -0,0 +1,109 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import random +from copy import deepcopy +from typing import Dict, List + +from genai_perf.inputs.converters.base_converter import BaseConverter +from genai_perf.inputs.input_constants import ( + DEFAULT_OUTPUT_TOKENS_MEAN, + EMPTY_JSON_IN_OPENAI_PA_FORMAT, +) +from genai_perf.inputs.inputs_config import InputsConfig + + +class GenerateConverter(BaseConverter): + def convert( + self, + generic_dataset: Dict, + config: InputsConfig, + ) -> Dict: + ( + system_role_headers, + user_role_headers, + text_input_headers, + ) = self._determine_json_feature_roles(generic_dataset) + + pa_json = self._create_pa_json( + generic_dataset, + system_role_headers, + user_role_headers, + text_input_headers, + config, + ) + + return pa_json + + def _create_pa_json( + self, + generic_dataset: Dict, + system_role_headers: List[str], + user_role_headers: List[str], + text_input_headers: List[str], + config: InputsConfig, + ) -> Dict: + pa_json = dict = {"data": [{"payload": [{}]} for _ in generic_dataset["rows"]]} + + for index, entry in enumerate(generic_dataset["rows"]): + iter_model_name = self._select_model_name(config, index) + text_input = [] + for header, content in entry.items(): + new_text_input = self._create_new_text_input( + header, + system_role_headers, + user_role_headers, + text_input_headers, + content, + ) + if new_text_input: + text_input.append(new_text_input) + + text_input = " ".join(text_input) + pa_json["data"][index]["payload"][0]["text_input"] = text_input + + self._add_optional_tags( + pa_json["data"][index], + config + ) + + return pa_json + + + def _add_optional_tags( + self, + pa_json: Dict, + config, + ) -> None: + payload = pa_json["payload"][0] + if config.add_stream: + payload["stream"] = True + if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN: + payload["max_tokens"] = int( + random.gauss(config.output_tokens_mean, config.output_tokens_stddev) + ) + for key, value in config.extra_inputs.items(): + payload[key] = value diff --git a/genai-perf/genai_perf/inputs/converters/triton_generate_converter.py b/genai-perf/genai_perf/inputs/converters/triton_generate_converter.py new file mode 100644 index 00000000..522a6aaf --- /dev/null +++ b/genai-perf/genai_perf/inputs/converters/triton_generate_converter.py @@ -0,0 +1,109 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import random +from copy import deepcopy +from typing import Dict, List + +from genai_perf.inputs.converters.base_converter import BaseConverter +from genai_perf.inputs.input_constants import ( + DEFAULT_OUTPUT_TOKENS_MEAN, + EMPTY_JSON_IN_OPENAI_PA_FORMAT, +) +from genai_perf.inputs.inputs_config import InputsConfig + + +class TritonGenerateConverter(BaseConverter): + def convert( + self, + generic_dataset: Dict, + config: InputsConfig, + ) -> Dict: + ( + system_role_headers, + user_role_headers, + text_input_headers, + ) = self._determine_json_feature_roles(generic_dataset) + + pa_json = self._create_pa_json( + generic_dataset, + system_role_headers, + user_role_headers, + text_input_headers, + config, + ) + + return pa_json + + def _create_pa_json( + self, + generic_dataset: Dict, + system_role_headers: List[str], + user_role_headers: List[str], + text_input_headers: List[str], + config: InputsConfig, + ) -> Dict: + pa_json = dict = {"data": [{"payload": [{}]} for _ in generic_dataset["rows"]]} + + for index, entry in enumerate(generic_dataset["rows"]): + iter_model_name = self._select_model_name(config, index) + text_input = [] + for header, content in entry.items(): + new_text_input = self._create_new_text_input( + header, + system_role_headers, + user_role_headers, + text_input_headers, + content, + ) + if new_text_input: + text_input.append(new_text_input) + + text_input = " ".join(text_input) + pa_json["data"][index]["payload"][0]["text_input"] = text_input + + self._add_optional_tags( + pa_json["data"][index], + config + ) + + return pa_json + + + def _add_optional_tags( + self, + pa_json: Dict, + config, + ) -> None: + payload = pa_json["payload"][0] + if config.add_stream: + payload["stream"] = True + if config.output_tokens_mean != DEFAULT_OUTPUT_TOKENS_MEAN: + payload["max_tokens"] = int( + random.gauss(config.output_tokens_mean, config.output_tokens_stddev) + ) + for key, value in config.extra_inputs.items(): + payload[key] = value diff --git a/genai-perf/genai_perf/inputs/input_constants.py b/genai-perf/genai_perf/inputs/input_constants.py index 02b1b99a..f2a450b4 100644 --- a/genai-perf/genai_perf/inputs/input_constants.py +++ b/genai-perf/genai_perf/inputs/input_constants.py @@ -49,6 +49,7 @@ class OutputFormat(Enum): TENSORRTLLM = auto() VLLM = auto() TENSORRTLLM_ENGINE = auto() + TRITON_GENERATE = auto() def to_lowercase(self): return self.name.lower() diff --git a/genai-perf/genai_perf/inputs/output_format_converter_factory.py b/genai-perf/genai_perf/inputs/output_format_converter_factory.py index 694fd49b..f416afc9 100644 --- a/genai-perf/genai_perf/inputs/output_format_converter_factory.py +++ b/genai-perf/genai_perf/inputs/output_format_converter_factory.py @@ -47,6 +47,8 @@ def create(output_format: OutputFormat): OutputFormat.VLLM: VLLMConverter, OutputFormat.TENSORRTLLM: TensorRTLLMConverter, OutputFormat.TENSORRTLLM_ENGINE: TensorRTLLMEngineConverter, + OutputFormat.TRITON_GENERATE: TritonGenerateConverter, + } if output_format not in converters: raise GenAIPerfException(f"Output format {output_format} is not supported") diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 668f56cb..0bf91dcc 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -42,7 +42,7 @@ from genai_perf.plots.plot_manager import PlotManager from genai_perf.telemetry_data import TelemetryDataCollector from genai_perf.tokenizer import DEFAULT_TOKENIZER - +from dataclasses import dataclass from . import __version__ @@ -64,13 +64,41 @@ def to_lowercase(self): logger = logging.getLogger(__name__) +@dataclass +class EndpointConfig: + endpoint:str + service_kind: str + output_format:ic.OutputFormat + + _endpoint_type_map = { - "chat": "v1/chat/completions", - "completions": "v1/completions", - "embeddings": "v1/embeddings", - "rankings": "v1/ranking", - "vision": "v1/chat/completions", - "image_retrieval": "v1/infer", + "chat": EndpointConfig("v1/chat/completions", + "openai", + ic.OutputFormat.OPENAI_CHAT_COMPLETIONS), + "completions": EndpointConfig("v1/completions", + "openai", + ic.OutputFormat.OPENAI_COMPLETIONS), + "embeddings": EndpointConfig("v1/embeddings", + "openai", + ic.OutputFormat.OPENAI_EMBEDDINGS), + "rankings": EndpointConfig("v1/ranking", + "openai", + ic.OutputFormat.RANKINGS), + "vision": EndpointConfig("v1/chat/completions", + "openai", + ic.OutputFormat.OPENAI_VISION), + "image_retrieval": EndpointConfig("v1/infer", + "openai", + ic.OutputFormat.IMAGE_RETRIEVAL), + "generate": EndpointConfig("v2/models/{MODEL_NAME}/generate", + "openai", + ic.OutputFormat.TRITON_GENERATE), + "kserve": EndpointConfig("v2/models/{MODEL_NAME}/infer", + "triton", + None), + "inproc-tensorrtllm":EndpointConfig("", + "tensortllm_engine", + ic.OutputFormat.TENSORRTLLM_ENGINE), } @@ -135,38 +163,25 @@ def _check_conditional_args( Check for conditional args and raise an error if they are not set. """ - # Endpoint and output format checks - if args.service_kind == "openai": - if args.endpoint_type is None: - parser.error( - "The --endpoint-type option is required when using the 'openai' service-kind." - ) - else: - if args.endpoint_type == "chat": - args.output_format = ic.OutputFormat.OPENAI_CHAT_COMPLETIONS - elif args.endpoint_type == "completions": - args.output_format = ic.OutputFormat.OPENAI_COMPLETIONS - elif args.endpoint_type == "embeddings": - args.output_format = ic.OutputFormat.OPENAI_EMBEDDINGS - elif args.endpoint_type == "rankings": - args.output_format = ic.OutputFormat.RANKINGS - elif args.endpoint_type == "image_retrieval": - args.output_format = ic.OutputFormat.IMAGE_RETRIEVAL - - # (TMA-1986) deduce vision format from chat completions + image CLI - # because there's no openai vision endpoint. - elif args.endpoint_type == "vision": - args.output_format = ic.OutputFormat.OPENAI_VISION - - if args.endpoint is not None: - args.endpoint = args.endpoint.lstrip(" /") - else: - args.endpoint = _endpoint_type_map[args.endpoint_type] - elif args.endpoint_type is not None: - parser.error( - "The --endpoint-type option should only be used when using the 'openai' service-kind." - ) + if args.endpoint_type not in _endpoint_type_map: + parser.error(f"Invalid endpoint type {args.endpoint_type}") + return + endpoint_config = _endpoint_type_map[args.endpoint_type] + args.output_format = endpoint_config.output_format + args.service_kind = endpoint_config.service_kind + + if args.endpoint is not None: + args.endpoint = args.endpoint.lstrip(" /") + else: + if args.model: + model_name = args.model[0] + else: + model_name = "" + args.endpoint = endpoint_config.endpoint.format( + MODEL_NAME=model_name + ) + if args.service_kind == "triton": args = _convert_str_to_enum_entry(args, "backend", ic.OutputFormat) args.output_format = args.backend @@ -200,7 +215,6 @@ def _check_conditional_args( def _check_conditional_args_embeddings_rankings( parser: argparse.ArgumentParser, args: argparse.Namespace ): - if args.output_format in [ ic.OutputFormat.OPENAI_EMBEDDINGS, ic.OutputFormat.RANKINGS, @@ -640,7 +654,7 @@ def _add_endpoint_args(parser): endpoint_group.add_argument( "--backend", type=str, - choices=utils.get_enum_names(ic.OutputFormat)[2:], + choices=["vllm","tensorrtllm"], #utils.get_enum_names(ic.OutputFormat)[2:], default="tensorrtllm", required=False, help=f'When using the "triton" service-kind, ' @@ -660,29 +674,22 @@ def _add_endpoint_args(parser): endpoint_group.add_argument( "--endpoint-type", type=str, - choices=[ - "chat", - "completions", - "embeddings", - "rankings", - "vision", - "image_retrieval", - ], + choices=list(_endpoint_type_map.keys()), required=False, - help=f"The endpoint-type to send requests to on the " - 'server. This is only used with the "openai" service-kind.', - ) - - endpoint_group.add_argument( - "--service-kind", - type=str, - choices=["triton", "openai", "tensorrtllm_engine"], - default="triton", - required=False, - help="The kind of service perf_analyzer will " - 'generate load for. In order to use "openai", ' - "you must specify an api via --endpoint-type.", - ) + default="kserve", + help=f"The endpoint-type for requests. Inputs will be formatted and outputs processed according to endpoint-type.", + ) + + # endpoint_group.add_argument( + # "--service-kind", + # type=str, + # choices=["triton", "openai", "tensorrtllm_engine"], + # default="triton", + # required=False, + # help="The kind of service perf_analyzer will " + # 'generate load for. In order to use "openai", ' + # "you must specify an api via --endpoint-type.", + # ) endpoint_group.add_argument( "--server-metrics-url", diff --git a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py index 39d05372..c0e98be6 100755 --- a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py +++ b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py @@ -204,14 +204,22 @@ def _preprocess_response( responses = response.strip().split("\n\n") if len(responses) > 1: merged_response = load_json_str(remove_sse_prefix(responses[0])) - if ( + if (self._response_format != ResponseFormat.TRITON_GENERATE) and ( merged_response["choices"][0]["delta"].get("content", None) is None ): merged_response["choices"][0]["delta"]["content"] = "" + elif (self._response_format == ResponseFormat.TRITON_GENERATE) and ( + "text_output" not in merged_response + ): + merged_response["text_output"] = "" for r in responses[1:]: - text = self._extract_openai_text_output(r) - merged_response["choices"][0]["delta"]["content"] += text + if self._response_format == ResponseFormat.TRITON_GENERATE: + text = self._extract_generate_text_output(r) + merged_response["text_output"] += text + else: + text = self._extract_openai_text_output(r) + merged_response["choices"][0]["delta"]["content"] += text res_outputs[i] = {"response": json.dumps(merged_response)} @@ -234,15 +242,17 @@ def _get_input_token_count(self, req_inputs: dict) -> int: elif self._service_kind == "triton_c_api": return len(req_inputs["input_ids"]) # no tokenizer required elif self._service_kind == "openai": - input_text = self._get_openai_input_text(req_inputs) + input_text = self._get_input_text(req_inputs) else: raise ValueError(f"Unknown service kind: '{self._service_kind}'.") return len(self._tokenizer.encode(input_text)) - def _get_openai_input_text(self, req_inputs: dict) -> str: + def _get_input_text(self, req_inputs: dict) -> str: """Tokenize the OpenAI request input texts.""" payload = load_json_str(req_inputs["payload"]) + if self._response_format == ResponseFormat.TRITON_GENERATE: + return payload["text_input"] if self._response_format == ResponseFormat.OPENAI_CHAT_COMPLETIONS: return payload["messages"][0]["content"] elif self._response_format == ResponseFormat.OPENAI_COMPLETIONS: @@ -265,7 +275,7 @@ def _get_output_token_counts( # No tokenizer is need to get the token counts. return self._get_tensorrtllm_engine_token_counts(res_outputs) elif self._service_kind == "openai": - output_texts = self._get_openai_output_tokens(res_outputs) + output_texts = self._get_text_output_tokens(res_outputs) else: raise ValueError(f"Unknown service kind: '{self._service_kind}'.") @@ -290,11 +300,14 @@ def _get_triton_output_tokens(self, res_outputs: List[Dict]) -> List[str]: """Return a list of Triton response texts.""" return [r["text_output"] for r in res_outputs] - def _get_openai_output_tokens(self, res_outputs: List[Dict]) -> List[str]: + def _get_text_output_tokens(self, res_outputs: List[Dict]) -> List[str]: """Return a list of OpenAI response texts.""" output_texts = [] for output in res_outputs: - text = self._extract_openai_text_output(output["response"]) + if self._response_format == ResponseFormat.TRITON_GENERATE: + text = self._extract_generate_text_output(output["response"]) + else: + text = self._extract_openai_text_output(output["response"]) output_texts.append(text) return output_texts @@ -307,6 +320,13 @@ def _get_response_output_tokens(self, output_texts: List[str]) -> List[List[int] encodings = self._tokenizer(["!" + txt for txt in output_texts]) return [out[1:] for out in encodings.data["input_ids"]] + def _extract_generate_text_output(self, response: str) -> str: + response = remove_sse_prefix(response) + if response == "": + return response + data = json.loads(response) + return data["text_output"] + def _extract_openai_text_output(self, response: str) -> str: """Extracts text/content of the OpenAI response object.""" response = remove_sse_prefix(response) @@ -336,7 +356,10 @@ def _extract_openai_text_output(self, response: str) -> str: def _is_openai_empty_response(self, response: str) -> bool: """Returns true if the response is an openai response with no content (or empty content)""" - text = self._extract_openai_text_output(response) + if self._response_format == ResponseFormat.TRITON_GENERATE: + text = self._extract_generate_text_output(response) + else: + text = self._extract_openai_text_output(response) if text: return False return True diff --git a/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py index e798452b..69913730 100755 --- a/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py +++ b/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py @@ -44,6 +44,7 @@ class ResponseFormat(Enum): RANKINGS = auto() IMAGE_RETRIEVAL = auto() TRITON = auto() + TRITON_GENERATE= auto() class ProfileDataParser: @@ -63,6 +64,7 @@ def __init__( def _get_profile_metadata(self, data: dict) -> None: self._service_kind = data["service_kind"] + self._endpoint = data["endpoint"] if self._service_kind == "openai": if data["endpoint"] == "rerank": self._response_format = ResponseFormat.HUGGINGFACE_RANKINGS @@ -84,6 +86,8 @@ def _get_profile_metadata(self, data: dict) -> None: self._response_format = ResponseFormat.RANKINGS elif data["endpoint"] == "v1/infer": self._response_format = ResponseFormat.IMAGE_RETRIEVAL + elif "generate" in data["endpoint"]: + self._response_format = ResponseFormat.TRITON_GENERATE else: # (TPA-66) add PA metadata to handle this case # When endpoint field is either empty or custom endpoint, fall From fd0610745f55a8e6e3d7e9d59860ef575f487ea7 Mon Sep 17 00:00:00 2001 From: nnshah1 Date: Mon, 16 Sep 2024 17:48:49 -0700 Subject: [PATCH 2/3] fixing tests --- genai-perf/tests/test_json_exporter.py | 4 +--- genai-perf/tests/test_wrapper.py | 8 -------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/genai-perf/tests/test_json_exporter.py b/genai-perf/tests/test_json_exporter.py index 1743164e..bb5059b1 100644 --- a/genai-perf/tests/test_json_exporter.py +++ b/genai-perf/tests/test_json_exporter.py @@ -217,7 +217,7 @@ class TestJsonExporter: "backend": "vllm", "batch_size": 1, "endpoint": null, - "endpoint_type": null, + "endpoint_type": "kserve", "service_kind": "triton", "server_metrics_url": null, "streaming": true, @@ -815,8 +815,6 @@ def test_triton_telemetry_output( "profile", "-m", "gpt2_vllm", - "--service-kind", - "triton", "--streaming", "--server-metrics-url", "http://tritonmetrics:8002/metrics", diff --git a/genai-perf/tests/test_wrapper.py b/genai-perf/tests/test_wrapper.py index 0521e589..27b165d3 100644 --- a/genai-perf/tests/test_wrapper.py +++ b/genai-perf/tests/test_wrapper.py @@ -48,8 +48,6 @@ def test_url_exactly_once_triton(self, monkeypatch, arg): "profile", "-m", "test_model", - "--service-kind", - "triton", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() @@ -82,8 +80,6 @@ def test_profile_export_filepath(self, monkeypatch, arg, expected_filepath): "profile", "-m", "test_model", - "--service-kind", - "triton", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() @@ -106,8 +102,6 @@ def test_service_triton(self, monkeypatch, arg): "profile", "-m", "test_model", - "--service-kind", - "triton", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() @@ -135,8 +129,6 @@ def test_service_openai(self, monkeypatch, arg): "profile", "-m", "test_model", - "--service-kind", - "openai", ] + arg monkeypatch.setattr("sys.argv", args) args, extra_args = parser.parse_args() From 0e23bf55887c630a797b6a2db26aec83419b239c Mon Sep 17 00:00:00 2001 From: nnshah1 Date: Tue, 17 Sep 2024 14:06:47 -0700 Subject: [PATCH 3/3] fixing more tests --- genai-perf/genai_perf/parser.py | 2 +- genai-perf/tests/test_cli.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 0bf91dcc..38daf79e 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -97,7 +97,7 @@ class EndpointConfig: "triton", None), "inproc-tensorrtllm":EndpointConfig("", - "tensortllm_engine", + "tensorrtllm_engine", ic.OutputFormat.TENSORRTLLM_ENGINE), } diff --git a/genai-perf/tests/test_cli.py b/genai-perf/tests/test_cli.py index 428975f6..6e189260 100644 --- a/genai-perf/tests/test_cli.py +++ b/genai-perf/tests/test_cli.py @@ -683,17 +683,15 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys): "args, expected_format", [ ( - ["--service-kind", "openai", "--endpoint-type", "chat"], + ["--endpoint-type", "chat"], OutputFormat.OPENAI_CHAT_COMPLETIONS, ), ( - ["--service-kind", "openai", "--endpoint-type", "completions"], + ["--endpoint-type", "completions"], OutputFormat.OPENAI_COMPLETIONS, ), ( [ - "--service-kind", - "openai", "--endpoint-type", "completions", "--endpoint", @@ -702,19 +700,19 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys): OutputFormat.OPENAI_COMPLETIONS, ), ( - ["--service-kind", "openai", "--endpoint-type", "rankings"], + ["--endpoint-type", "rankings"], OutputFormat.RANKINGS, ), ( - ["--service-kind", "openai", "--endpoint-type", "image_retrieval"], + ["--endpoint-type", "image_retrieval"], OutputFormat.IMAGE_RETRIEVAL, ), ( - ["--service-kind", "triton", "--backend", "tensorrtllm"], + ["--backend", "tensorrtllm"], OutputFormat.TENSORRTLLM, ), - (["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM), - (["--service-kind", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE), + (["--backend", "vllm"], OutputFormat.VLLM), + (["--endpoint-type", "inproc-tensorrtllm"], OutputFormat.TENSORRTLLM_ENGINE), ], ) def test_inferred_output_format(self, monkeypatch, args, expected_format):