Skip to content

Commit

Permalink
Add tensorrtllm_engine option to service-kind and update testing (#700)
Browse files Browse the repository at this point in the history
* Add tensorrtllm_engine option to service-kind and update testing

* Add output format check for tensorrtllm_engine
  • Loading branch information
debermudez authored and nv-hwoo committed Jul 23, 2024
1 parent 3e2d54f commit ff0e9b3
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class OutputFormat(Enum):
RANKINGS = auto()
TENSORRTLLM = auto()
VLLM = auto()
TENSORRTLLM_ENGINE = auto()

def to_lowercase(self):
return self.name.lower()
Expand Down
45 changes: 25 additions & 20 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ def _check_conditional_args(
args = _convert_str_to_enum_entry(args, "backend", OutputFormat)
args.output_format = args.backend

if args.service_kind == "tensorrtllm_engine":
args.output_format = OutputFormat.TENSORRTLLM_ENGINE

# Output token distribution checks
if args.output_tokens_mean == LlmInputs.DEFAULT_OUTPUT_TOKENS_MEAN:
if args.output_tokens_stddev != LlmInputs.DEFAULT_OUTPUT_TOKENS_STDDEV:
Expand Down Expand Up @@ -268,6 +271,8 @@ def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
name += [f"{args.service_kind}-{args.endpoint_type}"]
elif args.service_kind == "triton":
name += [f"{args.service_kind}-{args.backend.to_lowercase()}"]
elif args.service_kind == "tensorrtllm_engine":
name += [f"{args.service_kind}"]
else:
raise ValueError(f"Unknown service kind '{args.service_kind}'.")

Expand Down Expand Up @@ -528,25 +533,6 @@ def _add_profile_args(parser):
def _add_endpoint_args(parser):
endpoint_group = parser.add_argument_group("Endpoint")

endpoint_group.add_argument(
"-m",
"--model",
nargs="+",
default=[],
help=f"The name of the model(s) to benchmark.",
)
endpoint_group.add_argument(
"--model-selection-strategy",
type=str,
choices=utils.get_enum_names(ModelSelectionStrategy),
default="round_robin",
required=False,
help=f"When multiple model are specified, this is how a specific model "
"should be assigned to a prompt. round_robin means that ith prompt in the "
"list gets assigned to i mod len(models). random means that assignment is "
"uniformly random",
)

endpoint_group.add_argument(
"--backend",
type=str,
Expand Down Expand Up @@ -576,10 +562,29 @@ def _add_endpoint_args(parser):
'server. This is only used with the "openai" service-kind.',
)

endpoint_group.add_argument(
"-m",
"--model",
nargs="+",
default=[],
help=f"The name of the model(s) to benchmark.",
)
endpoint_group.add_argument(
"--model-selection-strategy",
type=str,
choices=utils.get_enum_names(ModelSelectionStrategy),
default="round_robin",
required=False,
help=f"When multiple model are specified, this is how a specific model "
"should be assigned to a prompt. round_robin means that ith prompt in the "
"list gets assigned to i mod len(models). random means that assignment is "
"uniformly random",
)

endpoint_group.add_argument(
"--service-kind",
type=str,
choices=["triton", "openai"],
choices=["triton", "openai", "tensorrtllm_engine"],
default="triton",
required=False,
help="The kind of service perf_analyzer will "
Expand Down
5 changes: 5 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ def test_help_version_arguments_output_and_exit(
(["--request-rate", "9.0"], {"request_rate": 9.0}),
(["-s", "99.5"], {"stability_percentage": 99.5}),
(["--service-kind", "triton"], {"service_kind": "triton"}),
(
["--service-kind", "tensorrtllm_engine"],
{"service_kind": "tensorrtllm_engine"},
),
(
["--service-kind", "openai", "--endpoint-type", "chat"],
{"service_kind": "openai", "endpoint": "v1/chat/completions"},
Expand Down Expand Up @@ -654,6 +658,7 @@ def test_conditional_errors(self, args, expected_output, monkeypatch, capsys):
OutputFormat.TENSORRTLLM,
),
(["--service-kind", "triton", "--backend", "vllm"], OutputFormat.VLLM),
(["--service-kind", "tensorrtllm_engine"], OutputFormat.TENSORRTLLM_ENGINE),
],
)
def test_inferred_output_format(self, monkeypatch, args, expected_format):
Expand Down

0 comments on commit ff0e9b3

Please sign in to comment.