Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support input type payload for timing strategy #209

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions genai-perf/genai_perf/inputs/input_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class ModelSelectionStrategy(Enum):
class PromptSource(Enum):
SYNTHETIC = auto()
FILE = auto()
PAYLOAD = auto()


class OutputFormat(Enum):
Expand Down
31 changes: 24 additions & 7 deletions genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,17 +351,29 @@ def parse_goodput(values):


def _infer_prompt_source(args: argparse.Namespace) -> argparse.Namespace:

args.synthetic_input_files = None
args.payload_input_file = None

if args.input_file:
if str(args.input_file).startswith("synthetic:"):
input_file_str = str(args.input_file)
if input_file_str.startswith("synthetic:"):
args.prompt_source = ic.PromptSource.SYNTHETIC
synthetic_input_files_str = str(args.input_file).split(":", 1)[1]
synthetic_input_files_str = input_file_str.split(":", 1)[1]
args.synthetic_input_files = synthetic_input_files_str.split(",")
logger.debug(
f"Input source is synthetic data: {args.synthetic_input_files}"
)
elif input_file_str.startswith("payload:"):
args.prompt_source = ic.PromptSource.PAYLOAD
payload_input_file_str = input_file_str.split(":", 1)[1]
if not payload_input_file_str:
raise ValueError(
f"Invalid payload input: '{input_file_str}' is missing the file path"
)
args.payload_input_file = payload_input_file_str.split(",")
logger.debug(
f"Input source is a payload file with timing information in the following path: {args.payload_input_file}"
)
else:
args.prompt_source = ic.PromptSource.FILE
logger.debug(f"Input source is the following path: {args.input_file}")
Expand All @@ -384,7 +396,7 @@ def _convert_str_to_enum_entry(args, option, enum):


def file_or_directory(value: str) -> Path:
if value.startswith("synthetic:"):
if value.startswith("synthetic:") or value.startswith("payload:"):
return Path(value)
else:
path = Path(value)
Expand Down Expand Up @@ -459,12 +471,17 @@ def _add_input_args(parser):
required=False,
help="The input file or directory containing the content to use for "
"profiling. Each line should be a JSON object with a 'text' or "
"'image' field 'in JSONL format. Example: {\"text\":"
' "Your prompt here"}\'. To use synthetic files for a converter that '
"'image' field in JSONL format. Example: {\"text\": "
'"Your prompt here"}. To use synthetic files for a converter that '
"needs multiple files, prefix the path with 'synthetic:', followed "
"by a comma-separated list of filenames. The synthetic filenames "
"should not have extensions. For example, "
"'synthetic:queries,passages'. ",
"'synthetic:queries,passages'. For payload data, prefix the path with 'payload:', "
"followed by a JSON string representing a payload object. The payload should "
lkomali marked this conversation as resolved.
Show resolved Hide resolved
"contain fields such as 'timestamp', 'input_length', 'output_length', "
"and you can optionally add 'text_input', 'session_id', 'hash_ids', and 'priority'. "
'Example: \'payload:{"timestamp": 123.45, "input_length": 10, "output_length": 12, '
'"session_id": 1, "priority": 5, "text_input": "Your prompt here"}\'.',
)

input_group.add_argument(
Expand Down
1 change: 1 addition & 0 deletions genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s
"output_tokens_mean",
"output_tokens_mean_deterministic",
"output_tokens_stddev",
"payload_input_file",
"prompt_source",
"random_seed",
"request_rate",
Expand Down
42 changes: 36 additions & 6 deletions genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,25 +785,55 @@ def test_goodput_args_warning(self, monkeypatch, args, expected_error):
assert str(exc_info.value) == expected_error

@pytest.mark.parametrize(
"args, expected_prompt_source",
"args, expected_prompt_source, expected_payload_input_file, expect_error",
[
([], PromptSource.SYNTHETIC),
(["--input-file", "prompt.txt"], PromptSource.FILE),
([], PromptSource.SYNTHETIC, None, False),
(["--input-file", "prompt.txt"], PromptSource.FILE, None, False),
(
["--input-file", "prompt.txt", "--synthetic-input-tokens-mean", "10"],
PromptSource.FILE,
None,
False,
),
(
["--input-file", "payload:test.jsonl"],
PromptSource.PAYLOAD,
["test.jsonl"],
False,
),
(["--input-file", "payload:"], PromptSource.PAYLOAD, [], True),
(
["--input-file", "synthetic:test.jsonl"],
PromptSource.SYNTHETIC,
None,
False,
),
(["--input-file", "invalidinput"], PromptSource.FILE, None, False),
],
)
def test_inferred_prompt_source(
self, monkeypatch, mocker, args, expected_prompt_source
self,
monkeypatch,
mocker,
args,
expected_prompt_source,
expected_payload_input_file,
expect_error,
):
mocker.patch.object(Path, "is_file", return_value=True)
combined_args = ["genai-perf", "profile", "--model", "test_model"] + args
monkeypatch.setattr("sys.argv", combined_args)
args, _ = parser.parse_args()

assert args.prompt_source == expected_prompt_source
if expect_error:
with pytest.raises(ValueError):
parser.parse_args()
else:
args, _ = parser.parse_args()

assert args.prompt_source == expected_prompt_source

if expected_payload_input_file is not None:
assert args.payload_input_file == expected_payload_input_file

@pytest.mark.parametrize(
"args",
Expand Down
1 change: 1 addition & 0 deletions genai-perf/tests/test_json_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ class TestJsonExporter:
"output_tokens_mean": -1,
"output_tokens_mean_deterministic": false,
"output_tokens_stddev": 0,
"payload_input_file": null,
"random_seed": 0,
"request_count": 0,
"synthetic_input_files": null,
Expand Down
Loading