diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py index 057c33562..fe27539c9 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py @@ -86,6 +86,8 @@ class LlmInputs: DEFAULT_IMAGE_WIDTH_STDDEV = 0 DEFAULT_IMAGE_HEIGHT_MEAN = 100 DEFAULT_IMAGE_HEIGHT_STDDEV = 0 + DEFAULT_IMAGES_COUNT_MIN = 0 + DEFAULT_IMAGES_COUNT_MAX = 1 EMPTY_JSON_IN_VLLM_PA_FORMAT: Dict = {"data": []} EMPTY_JSON_IN_TENSORRTLLM_PA_FORMAT: Dict = {"data": []} @@ -114,6 +116,8 @@ def create_llm_inputs( image_height_mean: int = DEFAULT_IMAGE_HEIGHT_MEAN, image_height_stddev: int = DEFAULT_IMAGE_HEIGHT_STDDEV, image_format: ImageFormat = ImageFormat.PNG, + images_count_min: int = DEFAULT_IMAGES_COUNT_MIN, + images_count_max: int = DEFAULT_IMAGES_COUNT_MAX, random_seed: int = DEFAULT_RANDOM_SEED, num_of_output_prompts: int = DEFAULT_NUM_PROMPTS, add_model_name: bool = False, @@ -166,6 +170,10 @@ def create_llm_inputs( The standard deviation of height of images when generating synthetic image data. image_format: The compression format of the images. + images_count_min: + Minimum number of synthetic images to be added to a prompt. + images_count_max: + Maximum number of synthetic images to be added to a prompt. batch_size: The number of inputs per request (currently only used for the embeddings and rankings endpoints) @@ -207,6 +215,8 @@ def create_llm_inputs( image_height_mean, image_height_stddev, image_format, + images_count_min, + images_count_max, batch_size, input_filename, ) @@ -247,6 +257,8 @@ def get_generic_dataset_json( image_height_mean: int, image_height_stddev: int, image_format: ImageFormat, + images_count_min: int, + images_count_max: int, batch_size: int, input_filename: Optional[Path], ) -> Dict: @@ -283,6 +295,10 @@ def get_generic_dataset_json( The standard deviation of height of images when generating synthetic image data. image_format: The compression format of the images. + images_count_min: + Minimum number of synthetic images to be added to a prompt. + images_count_max: + Maximum number of synthetic images to be added to a prompt. batch_size: The number of inputs per request (currently only used for the embeddings and rankings endpoints) input_filename: @@ -350,6 +366,8 @@ def get_generic_dataset_json( image_height_mean, image_height_stddev, image_format, + images_count_min, + images_count_max, output_format, ) generic_dataset_json = ( @@ -480,6 +498,8 @@ def _get_input_dataset_from_synthetic( image_height_mean: int, image_height_stddev: int, image_format: ImageFormat, + images_count_min: int, + images_count_max: int, output_format: OutputFormat, ) -> Dict[str, Any]: dataset_json: Dict[str, Any] = {} @@ -495,14 +515,18 @@ def _get_input_dataset_from_synthetic( row["row"]["text_input"] = synthetic_prompt if output_format == OutputFormat.OPENAI_VISION: - synthetic_image = cls._create_synthetic_image( - image_width_mean=image_width_mean, - image_width_stddev=image_width_stddev, - image_height_mean=image_height_mean, - image_height_stddev=image_height_stddev, - image_format=image_format, - ) - row["row"]["image"] = synthetic_image + N = random.randint(images_count_min, images_count_max) + synthetic_images = [ + cls._create_synthetic_image( + image_width_mean=image_width_mean, + image_width_stddev=image_width_stddev, + image_height_mean=image_height_mean, + image_height_stddev=image_height_stddev, + image_format=image_format, + ) + for _ in range(N) + ] + row["row"]["images"] = synthetic_images dataset_json["rows"].append(row) @@ -607,8 +631,9 @@ def _get_input_dataset_from_file(cls, input_filename: Path) -> Dict: dataset_json["features"] = [{"name": "text_input"}] dataset_json["rows"] = [] for prompt, image in zip(prompts, images): - content = {"text_input": prompt} - content.update({"image": image} if image else {}) + # (TMA-2004) support variable images per request through input file + content: Dict[str, Any] = {"text_input": prompt} + content.update({"images": [image]} if image else {}) dataset_json["rows"].append({"row": content}) return dataset_json @@ -652,16 +677,19 @@ def _convert_to_openai_multi_modal_content( Converts to multi-modal content format of OpenAI Chat Completions API. """ for row in generic_dataset_json["rows"]: - if row["image"]: + if row["images"]: row["text_input"] = [ { "type": "text", "text": row["text_input"], }, - { - "type": "image_url", - "image_url": {"url": row["image"]}, - }, + *[ + { + "type": "image_url", + "image_url": {"url": image}, + } + for image in row["images"] + ], ] return generic_dataset_json diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py index 776535d15..1fd1f4ddb 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py @@ -131,6 +131,12 @@ def _check_image_input_args( parser.error( "Both --image-width-stddev and --image-height-stddev values must be non-negative." ) + if args.images_count_min < 0: + parser.error("--images-count-min must be a non-negative integer.") + if args.images_count_max < args.images_count_min: + parser.error( + "--images-count-max must be greater than or equal to --images-count-min." + ) args = _convert_str_to_enum_entry(args, "image_format", ImageFormat) return args @@ -481,6 +487,22 @@ def _add_image_input_args(parser): "If format is not selected, format of generated image is selected at random", ) + input_group.add_argument( + "--images-count-min", + type=int, + default=LlmInputs.DEFAULT_IMAGES_COUNT_MIN, + required=False, + help=f"Minimum number of synthetic images to be added to a prompt.", + ) + + input_group.add_argument( + "--images-count-max", + type=int, + default=LlmInputs.DEFAULT_IMAGES_COUNT_MAX, + required=False, + help=f"Maximum number of synthetic images to be added to a prompt.", + ) + def _add_profile_args(parser): profile_group = parser.add_argument_group("Profiling") diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py index 183f21fd2..577ed2986 100755 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py @@ -220,6 +220,10 @@ def _get_openai_input_text(self, req_inputs: dict) -> str: return payload["prompt"] elif self._response_format == ResponseFormat.OPENAI_VISION: content = payload["messages"][0]["content"] + # When no images were included in the request input, the content + # is same as text-only chat completions format (e.g. string). + if isinstance(content, str): + return content return " ".join(c["text"] for c in content if c["type"] == "text") else: raise ValueError( diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py index 76ef3e321..245e4b832 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py @@ -98,6 +98,8 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s "image_height_mean", "image_height_stddev", "image_format", + "images_count_min", + "images_count_max", ] utils.remove_file(args.profile_export_file) diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py index 2ef5d52ba..e376052df 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py @@ -234,6 +234,10 @@ def test_help_version_arguments_output_and_exit( {"image_height_stddev": 456}, ), (["--image-format", "png"], {"image_format": ImageFormat.PNG}), + ( + ["--images-count-min", "123", "--images-count-max", "321"], + {"images_count_min": 123, "images_count_max": 321}, + ), (["-v"], {"verbose": True}), (["--verbose"], {"verbose": True}), (["-u", "test_url"], {"u": "test_url"}), diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py b/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py index f82e59312..f47684989 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py @@ -254,6 +254,8 @@ def test_generate_json(self, monkeypatch) -> None: "image_height_mean": 100, "image_height_stddev": 0, "image_format": null, + "images_count_min": 0, + "images_count_max": 1, "concurrency": 1, "measurement_interval": 10000, "request_rate": null, diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py index 028e72849..717766f73 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py @@ -557,8 +557,8 @@ def test_llm_inputs_with_defaults(self, default_configured_url): def test_add_image_inputs_openai_vision(self) -> None: generic_json = { "rows": [ - {"text_input": "test input one", "image": "test_image1"}, - {"text_input": "test input two", "image": "test_image2"}, + {"text_input": "test input one", "images": ["test_image1"]}, + {"text_input": "test input two", "images": ["test_image2"]}, ] } @@ -608,8 +608,12 @@ def test_add_image_inputs_openai_vision(self) -> None: OutputFormat.TENSORRTLLM, ], ) + @pytest.mark.parametrize( + "images_count", + [0, 5], + ) def test_get_input_dataset_from_synthetic( - self, mock_prompt, mock_image, output_format + self, mock_prompt, mock_image, output_format, images_count ) -> None: _placeholder = 123 # dummy value num_prompts = 3 @@ -624,6 +628,8 @@ def test_get_input_dataset_from_synthetic( image_height_mean=_placeholder, image_height_stddev=_placeholder, image_format=ImageFormat.PNG, + images_count_min=images_count, + images_count_max=images_count, output_format=output_format, ) @@ -635,7 +641,7 @@ def test_get_input_dataset_from_synthetic( if output_format == OutputFormat.OPENAI_VISION: assert row == { "text_input": "This is test prompt", - "image": "test_image_base64", + "images": images_count * ["test_image_base64"], } else: assert row == { @@ -805,7 +811,7 @@ def test_get_input_file_with_multi_modal_data( assert len(dataset["rows"]) == len(expected_data) for i, data in enumerate(expected_data): assert dataset["rows"][i]["row"]["text_input"] == data.text_input - assert dataset["rows"][i]["row"]["image"] == data.image + assert dataset["rows"][i]["row"]["images"] == [data.image] @pytest.mark.parametrize( "seed, model_name_list, index,model_selection_strategy,expected_model", diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py index d776a6a85..439f13d21 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_llm_profile_data_parser.py @@ -649,7 +649,7 @@ def test_empty_response(self, mock_read_write: pytest.MonkeyPatch) -> None: { "timestamp": 2, "request_inputs": { - "payload": '{"messages":[{"role":"user","content":[{"type":"text","text":"This is test too"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abcdef"}}]}],"model":"llava-1.6","stream":true}', + "payload": '{"messages":[{"role":"user","content":"This is test too"}],"model":"llava-1.6","stream":true}', }, # the first, and the last two responses will be ignored because they have no "content" "response_timestamps": [4, 7, 11, 15, 18, 19],