diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index ec23362e..c7b27a6b 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -46,9 +46,6 @@ def add_protocol_args(args: Namespace) -> List[str]: cmd += ["-u", f"{DEFAULT_GRPC_URL}"] if args.output_format == OutputFormat.TENSORRTLLM: cmd += ["--shape", "max_tokens:1", "--shape", "text_input:1"] - elif args.service_kind == "tensorrtllm_engine": - args.service_kind = "triton_c_api" # for PA - cmd += ["--streaming"] elif args.service_kind == "openai": cmd += ["-i", "http"] return cmd @@ -128,6 +125,10 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s cmd += [f"-{arg}"] else: cmd += [f"--{arg}"] + # GAP needs to call PA using triton_c_api service kind when running + # against tensorrtllm engine. + elif arg == "service_kind" and value == "tensorrtllm_engine": + cmd += ["--service-kind", "triton_c_api", "--streaming"] else: if len(arg) == 1: cmd += [f"-{arg}", f"{value}"]