Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: WIP: Tokens to TRTLLM backend #650

Closed
wants to merge 11 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,17 @@
num_of_output_prompts: int,
) -> Dict[str, Any]:
dataset_json: Dict[str, Any] = {}
dataset_json["features"] = [{"name": "text_input"}]
# dataset_json["features"] = [{"name": "text_input"}]
dataset_json["features"] = [{"name": "input_ids"}, {"name": "input_lengths"}]
dataset_json["rows"] = []
for _ in range(num_of_output_prompts):
synthetic_prompt = cls._create_synthetic_prompt(
synthetic_prompt, prompt_tokens = cls._create_synthetic_prompt(
tokenizer,
prompt_tokens_mean,
prompt_tokens_stddev,
)
dataset_json["rows"].append({"row": {"text_input": synthetic_prompt}})
# dataset_json["rows"].append({"row": {"text_input": synthetic_prompt}})
dataset_json["rows"].append({"row": {"input_ids": prompt_tokens, "input_lengths": len(prompt_tokens)}})

return dataset_json

Expand Down Expand Up @@ -733,20 +735,20 @@
)

for index, entry in enumerate(dataset_json["rows"]):
pa_json["data"].append({"text_input": [""]})
pa_json["data"].append({"input_ids": entry['input_ids'], "input_lengths": entry['input_lengths']})

for header, content in entry.items():
new_text_input = cls._create_new_text_input(
header,
system_role_headers,
user_role_headers,
text_input_headers,
content,
)
# for header, content in entry.items():
# new_text_input = cls._create_new_text_input(
# header,
# system_role_headers,
# user_role_headers,
# text_input_headers,
# content,
Fixed Show fixed Hide fixed
# )

pa_json = cls._add_new_text_input_to_json(
pa_json, index, new_text_input
)
# pa_json = cls._add_new_text_input_to_json(
# pa_json, index, new_text_input
# )

pa_json = cls._add_required_tags_to_trtllm_json(
pa_json, index, default_max_tokens
Expand Down Expand Up @@ -975,7 +977,8 @@
)
if output_tokens_deterministic:
row["min_length"] = [number_of_tokens]
row["max_tokens"] = [number_of_tokens]
row["input_lengths"] = [2000]
row["request_output_len"] = [number_of_tokens]
for key, value in extra_inputs.items():
row[key] = [value]

Expand All @@ -990,7 +993,7 @@
) -> Dict:
row = pa_json["data"][index]
if default_max_tokens:
row["max_tokens"] = [cls.DEFAULT_TENSORRTLLM_MAX_TOKENS]
row["request_output_len"] = [cls.DEFAULT_TENSORRTLLM_MAX_TOKENS]

return pa_json

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def create_synthetic_prompt(
num_prompt_tokens, farewell_lines, tokenizer
)

return prompt
return prompt, tokenizer.encode(prompt)

@classmethod
def _create_farewell_lines(cls) -> List[str]:
Expand Down
13 changes: 6 additions & 7 deletions src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,12 @@
def main():
# Interactive use will catch exceptions and log formatted errors rather than
# tracebacks.
try:
run()
except Exception as e:
traceback.print_exc()
logger = logging.getLogger(__name__)
logger.error(e)
return 1
run()
# except Exception as e:
# traceback.print_exc()
# logger = logging.getLogger(__name__)
# logger.error(e)
# return 1
Fixed Show fixed Hide fixed

return 0

Expand Down
3 changes: 2 additions & 1 deletion src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def add_protocol_args(args: Namespace) -> List[str]:
if args.u is None: # url
cmd += ["-u", f"{DEFAULT_GRPC_URL}"]
if args.output_format == OutputFormat.TENSORRTLLM:
cmd += ["--shape", "max_tokens:1", "--shape", "text_input:1"]
cmd += ["--shape", "input_ids:2000", "--shape", "input_lengths:1", "--shape", "request_output_len:1"]
# cmd += ["--shape", "max_tokens:1", "--shape", "text_input:1"]
elif args.service_kind == "openai":
cmd += ["-i", "http"]
return cmd
Expand Down
Loading