diff --git a/genai-perf/README.md b/genai-perf/README.md index af34aae0..5f7d7eee 100644 --- a/genai-perf/README.md +++ b/genai-perf/README.md @@ -293,6 +293,9 @@ options: filepaths to images to use for benchmarking as JSON objects. For any dataset, you can specify the following options: +* `--num-prefix-prompts `: The number of synthetic prefix prompts to + sample from. If this value is >0, synthetic prefix prompts will be prepended + to user prompts. * `--output-tokens-mean `: The mean number of tokens in each output. Ensure the `--tokenizer` value is set correctly, >= 1. * `--output-tokens-stddev `: The standard deviation of the number of tokens @@ -303,6 +306,8 @@ For any dataset, you can specify the following options: Triton service-kind. Note that there is still some variability in the requested number of output tokens, but GenAi-Perf attempts its best effort with your model to get the right number of output tokens. +* `--prefix-prompt-length `: The number of tokens to include in each + prefix prompt. This value is only used if --num-prefix-prompts is positive. You can optionally set additional model inputs with the following option: * `--extra-inputs :`: An additional input for use with the @@ -461,6 +466,12 @@ extensions. For example, 'synthetic:queries,passages'. The number of unique payloads to sample from. These will be reused until benchmarking is complete. (default: `100`) +##### `--num-prefix-prompts ` + +The number of prefix prompts to select from. If this value is not zero, these +are prompts that are prepended to input prompts. This is useful for +benchmarking models that use a K-V cache. (default: `0`) + ##### `--output-tokens-mean ` ##### `--osl` @@ -502,6 +513,13 @@ data. (default: `550`) The standard deviation of number of tokens in the generated prompts when using synthetic data. (default: `0`) +##### `--prefix-prompt-length ` + +The number of tokens in each prefix prompt. This value is only used if +--num-prefix-prompts is positive. Note that due to the prefix and user prompts +being concatenated, the number of tokens in the final prompt may be off by one. +(default: `100`) + ##### `--warmup-request-count ` The number of warmup requests to send before benchmarking. (default: `0`) diff --git a/genai-perf/genai_perf/inputs/input_constants.py b/genai-perf/genai_perf/inputs/input_constants.py index 78a4cde8..a1527cc7 100644 --- a/genai-perf/genai_perf/inputs/input_constants.py +++ b/genai-perf/genai_perf/inputs/input_constants.py @@ -85,6 +85,8 @@ def to_lowercase(self): DEFAULT_OUTPUT_TOKENS_MEAN = -1 DEFAULT_OUTPUT_TOKENS_STDDEV = 0 DEFAULT_NUM_DATASET_ENTRIES = 100 +DEFAULT_NUM_PREFIX_PROMPTS = 0 +DEFAULT_PREFIX_PROMPT_LENGTH = 100 ########################### # Default Image Parameters diff --git a/genai-perf/genai_perf/inputs/inputs_config.py b/genai-perf/genai_perf/inputs/inputs_config.py index 2982287c..5b1429f5 100644 --- a/genai-perf/genai_perf/inputs/inputs_config.py +++ b/genai-perf/genai_perf/inputs/inputs_config.py @@ -35,8 +35,10 @@ DEFAULT_IMAGE_WIDTH_STDDEV, DEFAULT_LENGTH, DEFAULT_NUM_DATASET_ENTRIES, + DEFAULT_NUM_PREFIX_PROMPTS, DEFAULT_OUTPUT_TOKENS_MEAN, DEFAULT_OUTPUT_TOKENS_STDDEV, + DEFAULT_PREFIX_PROMPT_LENGTH, DEFAULT_PROMPT_TOKENS_MEAN, DEFAULT_PROMPT_TOKENS_STDDEV, DEFAULT_RANDOM_SEED, @@ -142,3 +144,9 @@ class InputsConfig: # Seed used to generate random values random_seed: int = DEFAULT_RANDOM_SEED + + # The number of prefix prompts to generate and pool from + num_prefix_prompts: int = DEFAULT_NUM_PREFIX_PROMPTS + + # The length of the prefix prompts to generate + prefix_prompt_length: int = DEFAULT_PREFIX_PROMPT_LENGTH diff --git a/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py b/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py index da5f8c2c..a307e123 100644 --- a/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py +++ b/genai-perf/genai_perf/inputs/retrievers/file_input_retriever.py @@ -39,6 +39,9 @@ GenericDataset, ) from genai_perf.inputs.retrievers.synthetic_image_generator import ImageFormat +from genai_perf.inputs.retrievers.synthetic_prompt_generator import ( + SyntheticPromptGenerator, +) from genai_perf.utils import load_json_str from PIL import Image @@ -152,6 +155,15 @@ def _get_content_from_input_file( """ prompts = [] images = [] + + use_prefix_prompts = self.config.num_prefix_prompts > 0 + if use_prefix_prompts: + SyntheticPromptGenerator.create_prefix_prompts_pool( + self.config.tokenizer, + self.config.num_prefix_prompts, + self.config.prefix_prompt_length, + ) + with open(filename, mode="r", newline=None) as file: for line in file: if line.strip(): @@ -164,7 +176,13 @@ def _get_content_from_input_file( "Each data entry must have only one of 'text_input' or 'text' key name." ) prompt = prompt if prompt else prompt_alt - prompts.append(prompt.strip() if prompt else prompt) + if use_prefix_prompts: + prefix_prompt = ( + SyntheticPromptGenerator.get_random_prefix_prompt() + ) + prompt = f"{prefix_prompt} {prompt}" + if prompt is not None: + prompts.append(prompt.strip()) image = data.get("image") if image is not None: image = self._encode_image(image.strip()) diff --git a/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py b/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py index 2de952b2..71979b13 100644 --- a/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py +++ b/genai-perf/genai_perf/inputs/retrievers/synthetic_data_retriever.py @@ -51,17 +51,30 @@ def retrieve_data(self) -> GenericDataset: files = self.config.synthetic_input_filenames or [DEFAULT_SYNTHETIC_FILENAME] synthetic_dataset = GenericDataset(files_data={}) + use_prefix_prompts = self.config.num_prefix_prompts > 0 + if use_prefix_prompts: + SyntheticPromptGenerator.create_prefix_prompts_pool( + self.config.tokenizer, + self.config.num_prefix_prompts, + self.config.prefix_prompt_length, + ) + for file in files: data_rows: List[DataRow] = [] for _ in range(self.config.num_dataset_entries): row = DataRow(texts=[], images=[]) - prompt = SyntheticPromptGenerator.create_synthetic_prompt( - self.config.tokenizer, - self.config.prompt_tokens_mean, - self.config.prompt_tokens_stddev, - ) for _ in range(self.config.batch_size_text): + prompt = SyntheticPromptGenerator.create_synthetic_prompt( + self.config.tokenizer, + self.config.prompt_tokens_mean, + self.config.prompt_tokens_stddev, + ) + if use_prefix_prompts: + prefix_prompt = ( + SyntheticPromptGenerator.get_random_prefix_prompt() + ) + prompt = f"{prefix_prompt} {prompt}" row.texts.append(prompt) for _ in range(self.config.batch_size_image): diff --git a/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py b/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py index 30926169..9ca745c4 100644 --- a/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py +++ b/genai-perf/genai_perf/inputs/retrievers/synthetic_prompt_generator.py @@ -24,6 +24,7 @@ class SyntheticPromptGenerator: _tokenized_corpus = None _corpus_length = 0 + _prefix_prompts: List[str] = [] @classmethod def create_synthetic_prompt( @@ -106,3 +107,32 @@ def _generate_prompt(cls, tokenizer: Tokenizer, num_tokens: int) -> str: prompt_tokens += cls._tokenized_corpus[: end_idx - cls._corpus_length] return tokenizer.decode(prompt_tokens) + + @classmethod + def create_prefix_prompts_pool( + cls, tokenizer: Tokenizer, num_prompts: int, prompt_length: int + ) -> None: + """ + Generate a pool of prefix prompts. + + Args: + tokenizer: Tokenizer instance. + num_prompts: Number of prefix prompts to generate. + prompt_length: Number of tokens per prefix prompt. + """ + if cls._tokenized_corpus is None: + cls._initialize_corpus(tokenizer) + + cls._prefix_prompts = [ + cls._generate_prompt(tokenizer, prompt_length) for _ in range(num_prompts) + ] + + @classmethod + def get_random_prefix_prompt(cls) -> str: + """ + Fetch a random prefix prompt from the pool. + + Returns: + A random prefix prompt. + """ + return random.choice(cls._prefix_prompts) diff --git a/genai-perf/genai_perf/main.py b/genai-perf/genai_perf/main.py index 1541886e..a23a49ff 100755 --- a/genai-perf/genai_perf/main.py +++ b/genai-perf/genai_perf/main.py @@ -96,6 +96,8 @@ def create_config_options(args: Namespace) -> InputsConfig: batch_size_image=args.batch_size_image, batch_size_text=args.batch_size_text, output_dir=args.artifact_dir, + num_prefix_prompts=args.num_prefix_prompts, + prefix_prompt_length=args.prefix_prompt_length, ) diff --git a/genai-perf/genai_perf/parser.py b/genai-perf/genai_perf/parser.py index 2890488b..03dd119f 100644 --- a/genai-perf/genai_perf/parser.py +++ b/genai-perf/genai_perf/parser.py @@ -506,6 +506,17 @@ def _add_input_args(parser): "These will be reused until benchmarking is complete.", ) + input_group.add_argument( + "--num-prefix-prompts", + type=int, + default=ic.DEFAULT_NUM_PREFIX_PROMPTS, + required=False, + help=f"The number of prefix prompts to select from. " + "If this value is not zero, these are prompts that are " + "prepended to input prompts. This is useful for " + "benchmarking models that use a K-V cache.", + ) + input_group.add_argument( "--output-tokens-mean", "--osl", @@ -573,6 +584,17 @@ def _add_input_args(parser): help=f"The standard deviation of number of tokens in the generated prompts when using synthetic data.", ) + input_group.add_argument( + "--prefix-prompt-length", + type=int, + default=ic.DEFAULT_PREFIX_PROMPT_LENGTH, + required=False, + help=f"The number of tokens in each prefix prompt. This value is only " + "used if --num-prefix-prompts is positive. Note that due to " + "the prefix and user prompts being concatenated, the number of tokens " + "in the final prompt may be off by one.", + ) + input_group.add_argument( "--warmup-request-count", type=int, diff --git a/genai-perf/genai_perf/wrapper.py b/genai-perf/genai_perf/wrapper.py index b718cf5b..1c33816a 100644 --- a/genai-perf/genai_perf/wrapper.py +++ b/genai-perf/genai_perf/wrapper.py @@ -88,6 +88,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "model", "model_selection_strategy", "num_dataset_entries", + "num_prefix_prompts", "output_format", "output_tokens_mean", "output_tokens_mean_deterministic", @@ -105,6 +106,7 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "synthetic_input_files", "synthetic_input_tokens_mean", "synthetic_input_tokens_stddev", + "prefix_prompt_length", "tokenizer", "tokenizer_trust_remote_code", "tokenizer_revision", diff --git a/genai-perf/tests/test_cli.py b/genai-perf/tests/test_cli.py index 92e3bede..60b54cd5 100644 --- a/genai-perf/tests/test_cli.py +++ b/genai-perf/tests/test_cli.py @@ -192,6 +192,7 @@ def test_help_version_arguments_output_and_exit( ), (["--num-dataset-entries", "101"], {"num_dataset_entries": 101}), (["--num-prompts", "101"], {"num_dataset_entries": 101}), + (["--num-prefix-prompts", "101"], {"num_prefix_prompts": 101}), ( ["--output-tokens-mean", "6"], {"output_tokens_mean": 6}, @@ -245,6 +246,10 @@ def test_help_version_arguments_output_and_exit( ["--synthetic-input-tokens-stddev", "7"], {"synthetic_input_tokens_stddev": 7}, ), + ( + ["--prefix-prompt-length", "6"], + {"prefix_prompt_length": 6}, + ), ( ["--image-width-mean", "123"], {"image_width_mean": 123}, diff --git a/genai-perf/tests/test_file_input_retriever.py b/genai-perf/tests/test_file_input_retriever.py index 77cca3fc..de8075ae 100644 --- a/genai-perf/tests/test_file_input_retriever.py +++ b/genai-perf/tests/test_file_input_retriever.py @@ -346,3 +346,39 @@ def test_get_input_datasets_from_empty_dir( ) with pytest.raises(ValueError, match="No JSONL files found in directory"): _ = file_retriever._get_input_datasets_from_dir() + + @patch("builtins.open", side_effect=open_side_effect) + @patch( + "genai_perf.inputs.retrievers.file_input_retriever.SyntheticPromptGenerator.create_prefix_prompts_pool" + ) + @patch( + "genai_perf.inputs.retrievers.file_input_retriever.SyntheticPromptGenerator.get_random_prefix_prompt", + return_value="prefix prompt", + ) + @patch("pathlib.Path.exists", return_value=True) + def test_get_input_file_multiple_prompts_with_prefix_prompts( + self, + mock_exists, + mock_random_prefix_prompt, + mock_create_prefix_prompts_pool, + mock_file, + ): + file_retriever = FileInputRetriever( + InputsConfig( + tokenizer=get_empty_tokenizer(), + model_name=["test_model_A"], + model_selection_strategy=ModelSelectionStrategy.ROUND_ROBIN, + input_filename=Path("multiple_prompts.jsonl"), + num_prefix_prompts=3, + prefix_prompt_length=15, + ) + ) + file_data = file_retriever._get_input_dataset_from_file( + Path("multiple_prompts.jsonl") + ) + + assert file_data is not None + assert len(file_data.rows) == 3 + mock_create_prefix_prompts_pool.assert_called_once() + for row in file_data.rows: + assert row.texts[0].startswith("prefix prompt ") diff --git a/genai-perf/tests/test_json_exporter.py b/genai-perf/tests/test_json_exporter.py index 99adadc0..a3704de8 100644 --- a/genai-perf/tests/test_json_exporter.py +++ b/genai-perf/tests/test_json_exporter.py @@ -224,6 +224,7 @@ class TestJsonExporter: "streaming": true, "u": null, "num_dataset_entries": 100, + "num_prefix_prompts": 0, "output_tokens_mean": -1, "output_tokens_mean_deterministic": false, "output_tokens_stddev": 0, @@ -232,6 +233,7 @@ class TestJsonExporter: "synthetic_input_files": null, "synthetic_input_tokens_mean": 550, "synthetic_input_tokens_stddev": 0, + "prefix_prompt_length": 100, "warmup_request_count": 0, "image_width_mean": 100, "image_width_stddev": 0, diff --git a/genai-perf/tests/test_synthetic_data_retriever.py b/genai-perf/tests/test_synthetic_data_retriever.py index 91038b7e..1b9adf8a 100644 --- a/genai-perf/tests/test_synthetic_data_retriever.py +++ b/genai-perf/tests/test_synthetic_data_retriever.py @@ -139,3 +139,55 @@ def test_synthetic_multiple_files(self, mock_prompt, mock_image): assert len(row.images) == 1 assert row.texts[0] == "test prompt" assert row.images[0] == "_base64_encoding" + + @patch( + "genai_perf.inputs.retrievers.synthetic_data_retriever.SyntheticPromptGenerator.create_synthetic_prompt", + return_value="test prompt", + ) + @patch( + "genai_perf.inputs.retrievers.synthetic_data_retriever.SyntheticPromptGenerator.create_prefix_prompts_pool" + ) + @patch( + "genai_perf.inputs.retrievers.synthetic_data_retriever.SyntheticPromptGenerator.get_random_prefix_prompt", + return_value="prompt prefix", + ) + def test_synthetic_with_prefix_prompts( + self, + mock_random_prefix_prompt, + mock_create_prefix_prompts_pool, + mock_create_synthetic_prompt, + ): + config = InputsConfig( + num_dataset_entries=3, + num_prefix_prompts=3, + prefix_prompt_length=20, + batch_size_text=1, + output_format=OutputFormat.OPENAI_COMPLETIONS, + synthetic_input_filenames=[DEFAULT_SYNTHETIC_FILENAME], + tokenizer=get_empty_tokenizer(), + ) + + synthetic_retriever = SyntheticDataRetriever(config) + dataset = synthetic_retriever.retrieve_data() + + # Validate the number of rows in the retrieved dataset matches the expected count + synthetic_input_filenames = cast(list[str], config.synthetic_input_filenames) + expected_row_count = config.num_dataset_entries + actual_row_count = len(dataset.files_data[synthetic_input_filenames[0]].rows) + + assert ( + actual_row_count == expected_row_count + ), f"Expected {expected_row_count} rows, got {actual_row_count}" + + # Ensure the prompt prefixess pool was created exactly once + mock_create_prefix_prompts_pool.assert_called_once() + + # Validate that every text in the dataset has the right prefix + for row_index, row in enumerate( + dataset.files_data[synthetic_input_filenames[0]].rows + ): + expected_prefix = "prompt prefix " + for text_index, text in enumerate(row.texts): + assert text.startswith( + expected_prefix + ), f"Row {row_index}, text {text_index}: text does not start with '{expected_prefix}'. Actual: '{text}'" diff --git a/templates/genai-perf-templates/README_template b/templates/genai-perf-templates/README_template index 2e8af84c..31a1dc3a 100644 --- a/templates/genai-perf-templates/README_template +++ b/templates/genai-perf-templates/README_template @@ -291,6 +291,9 @@ options: filepaths to images to use for benchmarking as JSON objects. For any dataset, you can specify the following options: +* `--num-prefix-prompts `: The number of synthetic prefix prompts to + sample from. If this value is >0, synthetic prefix prompts will be prepended + to user prompts. * `--output-tokens-mean `: The mean number of tokens in each output. Ensure the `--tokenizer` value is set correctly, >= 1. * `--output-tokens-stddev `: The standard deviation of the number of tokens @@ -301,6 +304,8 @@ For any dataset, you can specify the following options: Triton service-kind. Note that there is still some variability in the requested number of output tokens, but GenAi-Perf attempts its best effort with your model to get the right number of output tokens. +* `--prefix-prompt-length `: The number of tokens to include in each + prefix prompt. This value is only used if --num-prefix-prompts is positive. You can optionally set additional model inputs with the following option: * `--extra-inputs :`: An additional input for use with the @@ -459,6 +464,12 @@ extensions. For example, 'synthetic:queries,passages'. The number of unique payloads to sample from. These will be reused until benchmarking is complete. (default: `100`) +##### `--num-prefix-prompts ` + +The number of prefix prompts to select from. If this value is not zero, these +are prompts that are prepended to input prompts. This is useful for +benchmarking models that use a K-V cache. (default: `0`) + ##### `--output-tokens-mean ` ##### `--osl` @@ -500,6 +511,13 @@ data. (default: `550`) The standard deviation of number of tokens in the generated prompts when using synthetic data. (default: `0`) +##### `--prefix-prompt-length ` + +The number of tokens in each prefix prompt. This value is only used if +--num-prefix-prompts is positive. Note that due to the prefix and user prompts +being concatenated, the number of tokens in the final prompt may be off by one. +(default: `100`) + ##### `--warmup-request-count ` The number of warmup requests to send before benchmarking. (default: `0`)