Skip to content

Commit

Permalink
address feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Aug 8, 2024
1 parent 3ebe717 commit 7fa5313
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ def add_protocol_args(args: Namespace) -> List[str]:
cmd += ["-u", f"{DEFAULT_GRPC_URL}"]
if args.output_format == OutputFormat.TENSORRTLLM:
cmd += ["--shape", "max_tokens:1", "--shape", "text_input:1"]
elif args.service_kind == "tensorrtllm_engine":
args.service_kind = "triton_c_api" # for PA
cmd += ["--streaming"]
elif args.service_kind == "openai":
cmd += ["-i", "http"]
return cmd
Expand Down Expand Up @@ -128,6 +125,10 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s
cmd += [f"-{arg}"]
else:
cmd += [f"--{arg}"]
# GAP needs to call PA using triton_c_api service kind when running
# against tensorrtllm engine.
elif arg == "service_kind" and value == "tensorrtllm_engine":
cmd += ["--service-kind", "triton_c_api", "--streaming"]
else:
if len(arg) == 1:
cmd += [f"-{arg}", f"{value}"]
Expand Down

0 comments on commit 7fa5313

Please sign in to comment.