Skip to content

Commit

Permalink
Variable number of images per prompt (#765)
Browse files Browse the repository at this point in the history
* variable number of images per prompt

* add TODO comment

Co-authored-by: Hyunjae Woo <[email protected]>

* apply code review suggestions

* Update src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py

Co-authored-by: Hyunjae Woo <[email protected]>

---------

Co-authored-by: Hyunjae Woo <[email protected]>
  • Loading branch information
mwawrzos and nv-hwoo authored Jul 30, 2024
1 parent ebafa2d commit ed0312e
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class LlmInputs:
DEFAULT_IMAGE_WIDTH_STDDEV = 0
DEFAULT_IMAGE_HEIGHT_MEAN = 100
DEFAULT_IMAGE_HEIGHT_STDDEV = 0
DEFAULT_IMAGES_COUNT_MIN = 0
DEFAULT_IMAGES_COUNT_MAX = 1

EMPTY_JSON_IN_VLLM_PA_FORMAT: Dict = {"data": []}
EMPTY_JSON_IN_TENSORRTLLM_PA_FORMAT: Dict = {"data": []}
Expand Down Expand Up @@ -114,6 +116,8 @@ def create_llm_inputs(
image_height_mean: int = DEFAULT_IMAGE_HEIGHT_MEAN,
image_height_stddev: int = DEFAULT_IMAGE_HEIGHT_STDDEV,
image_format: ImageFormat = ImageFormat.PNG,
images_count_min: int = DEFAULT_IMAGES_COUNT_MIN,
images_count_max: int = DEFAULT_IMAGES_COUNT_MAX,
random_seed: int = DEFAULT_RANDOM_SEED,
num_of_output_prompts: int = DEFAULT_NUM_PROMPTS,
add_model_name: bool = False,
Expand Down Expand Up @@ -166,6 +170,10 @@ def create_llm_inputs(
The standard deviation of height of images when generating synthetic image data.
image_format:
The compression format of the images.
images_count_min:
Minimum number of synthetic images to be added to a prompt.
images_count_max:
Maximum number of synthetic images to be added to a prompt.
batch_size:
The number of inputs per request (currently only used for the embeddings and rankings endpoints)
Expand Down Expand Up @@ -207,6 +215,8 @@ def create_llm_inputs(
image_height_mean,
image_height_stddev,
image_format,
images_count_min,
images_count_max,
batch_size,
input_filename,
)
Expand Down Expand Up @@ -247,6 +257,8 @@ def get_generic_dataset_json(
image_height_mean: int,
image_height_stddev: int,
image_format: ImageFormat,
images_count_min: int,
images_count_max: int,
batch_size: int,
input_filename: Optional[Path],
) -> Dict:
Expand Down Expand Up @@ -283,6 +295,10 @@ def get_generic_dataset_json(
The standard deviation of height of images when generating synthetic image data.
image_format:
The compression format of the images.
images_count_min:
Minimum number of synthetic images to be added to a prompt.
images_count_max:
Maximum number of synthetic images to be added to a prompt.
batch_size:
The number of inputs per request (currently only used for the embeddings and rankings endpoints)
input_filename:
Expand Down Expand Up @@ -350,6 +366,8 @@ def get_generic_dataset_json(
image_height_mean,
image_height_stddev,
image_format,
images_count_min,
images_count_max,
output_format,
)
generic_dataset_json = (
Expand Down Expand Up @@ -480,6 +498,8 @@ def _get_input_dataset_from_synthetic(
image_height_mean: int,
image_height_stddev: int,
image_format: ImageFormat,
images_count_min: int,
images_count_max: int,
output_format: OutputFormat,
) -> Dict[str, Any]:
dataset_json: Dict[str, Any] = {}
Expand All @@ -495,14 +515,18 @@ def _get_input_dataset_from_synthetic(
row["row"]["text_input"] = synthetic_prompt

if output_format == OutputFormat.OPENAI_VISION:
synthetic_image = cls._create_synthetic_image(
image_width_mean=image_width_mean,
image_width_stddev=image_width_stddev,
image_height_mean=image_height_mean,
image_height_stddev=image_height_stddev,
image_format=image_format,
)
row["row"]["image"] = synthetic_image
N = random.randint(images_count_min, images_count_max)
synthetic_images = [
cls._create_synthetic_image(
image_width_mean=image_width_mean,
image_width_stddev=image_width_stddev,
image_height_mean=image_height_mean,
image_height_stddev=image_height_stddev,
image_format=image_format,
)
for _ in range(N)
]
row["row"]["images"] = synthetic_images

dataset_json["rows"].append(row)

Expand Down Expand Up @@ -607,8 +631,9 @@ def _get_input_dataset_from_file(cls, input_filename: Path) -> Dict:
dataset_json["features"] = [{"name": "text_input"}]
dataset_json["rows"] = []
for prompt, image in zip(prompts, images):
content = {"text_input": prompt}
content.update({"image": image} if image else {})
# (TMA-2004) support variable images per request through input file
content: Dict[str, Any] = {"text_input": prompt}
content.update({"images": [image]} if image else {})
dataset_json["rows"].append({"row": content})

return dataset_json
Expand Down Expand Up @@ -652,16 +677,19 @@ def _convert_to_openai_multi_modal_content(
Converts to multi-modal content format of OpenAI Chat Completions API.
"""
for row in generic_dataset_json["rows"]:
if row["image"]:
if row["images"]:
row["text_input"] = [
{
"type": "text",
"text": row["text_input"],
},
{
"type": "image_url",
"image_url": {"url": row["image"]},
},
*[
{
"type": "image_url",
"image_url": {"url": image},
}
for image in row["images"]
],
]

return generic_dataset_json
Expand Down
22 changes: 22 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ def _check_image_input_args(
parser.error(
"Both --image-width-stddev and --image-height-stddev values must be non-negative."
)
if args.images_count_min < 0:
parser.error("--images-count-min must be a non-negative integer.")
if args.images_count_max < args.images_count_min:
parser.error(
"--images-count-max must be greater than or equal to --images-count-min."
)

args = _convert_str_to_enum_entry(args, "image_format", ImageFormat)
return args
Expand Down Expand Up @@ -481,6 +487,22 @@ def _add_image_input_args(parser):
"If format is not selected, format of generated image is selected at random",
)

input_group.add_argument(
"--images-count-min",
type=int,
default=LlmInputs.DEFAULT_IMAGES_COUNT_MIN,
required=False,
help=f"Minimum number of synthetic images to be added to a prompt.",
)

input_group.add_argument(
"--images-count-max",
type=int,
default=LlmInputs.DEFAULT_IMAGES_COUNT_MAX,
required=False,
help=f"Maximum number of synthetic images to be added to a prompt.",
)


def _add_profile_args(parser):
profile_group = parser.add_argument_group("Profiling")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ def _get_openai_input_text(self, req_inputs: dict) -> str:
return payload["prompt"]
elif self._response_format == ResponseFormat.OPENAI_VISION:
content = payload["messages"][0]["content"]
# When no images were included in the request input, the content
# is same as text-only chat completions format (e.g. string).
if isinstance(content, str):
return content
return " ".join(c["text"] for c in content if c["type"] == "text")
else:
raise ValueError(
Expand Down
2 changes: 2 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s
"image_height_mean",
"image_height_stddev",
"image_format",
"images_count_min",
"images_count_max",
]

utils.remove_file(args.profile_export_file)
Expand Down
4 changes: 4 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ def test_help_version_arguments_output_and_exit(
{"image_height_stddev": 456},
),
(["--image-format", "png"], {"image_format": ImageFormat.PNG}),
(
["--images-count-min", "123", "--images-count-max", "321"],
{"images_count_min": 123, "images_count_max": 321},
),
(["-v"], {"verbose": True}),
(["--verbose"], {"verbose": True}),
(["-u", "test_url"], {"u": "test_url"}),
Expand Down
2 changes: 2 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ def test_generate_json(self, monkeypatch) -> None:
"image_height_mean": 100,
"image_height_stddev": 0,
"image_format": null,
"images_count_min": 0,
"images_count_max": 1,
"concurrency": 1,
"measurement_interval": 10000,
"request_rate": null,
Expand Down
16 changes: 11 additions & 5 deletions src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,8 @@ def test_llm_inputs_with_defaults(self, default_configured_url):
def test_add_image_inputs_openai_vision(self) -> None:
generic_json = {
"rows": [
{"text_input": "test input one", "image": "test_image1"},
{"text_input": "test input two", "image": "test_image2"},
{"text_input": "test input one", "images": ["test_image1"]},
{"text_input": "test input two", "images": ["test_image2"]},
]
}

Expand Down Expand Up @@ -608,8 +608,12 @@ def test_add_image_inputs_openai_vision(self) -> None:
OutputFormat.TENSORRTLLM,
],
)
@pytest.mark.parametrize(
"images_count",
[0, 5],
)
def test_get_input_dataset_from_synthetic(
self, mock_prompt, mock_image, output_format
self, mock_prompt, mock_image, output_format, images_count
) -> None:
_placeholder = 123 # dummy value
num_prompts = 3
Expand All @@ -624,6 +628,8 @@ def test_get_input_dataset_from_synthetic(
image_height_mean=_placeholder,
image_height_stddev=_placeholder,
image_format=ImageFormat.PNG,
images_count_min=images_count,
images_count_max=images_count,
output_format=output_format,
)

Expand All @@ -635,7 +641,7 @@ def test_get_input_dataset_from_synthetic(
if output_format == OutputFormat.OPENAI_VISION:
assert row == {
"text_input": "This is test prompt",
"image": "test_image_base64",
"images": images_count * ["test_image_base64"],
}
else:
assert row == {
Expand Down Expand Up @@ -805,7 +811,7 @@ def test_get_input_file_with_multi_modal_data(
assert len(dataset["rows"]) == len(expected_data)
for i, data in enumerate(expected_data):
assert dataset["rows"][i]["row"]["text_input"] == data.text_input
assert dataset["rows"][i]["row"]["image"] == data.image
assert dataset["rows"][i]["row"]["images"] == [data.image]

@pytest.mark.parametrize(
"seed, model_name_list, index,model_selection_strategy,expected_model",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def test_empty_response(self, mock_read_write: pytest.MonkeyPatch) -> None:
{
"timestamp": 2,
"request_inputs": {
"payload": '{"messages":[{"role":"user","content":[{"type":"text","text":"This is test too"},{"type":"image_url","image_url":{"url":""}}]}],"model":"llava-1.6","stream":true}',
"payload": '{"messages":[{"role":"user","content":"This is test too"}],"model":"llava-1.6","stream":true}',
},
# the first, and the last two responses will be ignored because they have no "content"
"response_timestamps": [4, 7, 11, 15, 18, 19],
Expand Down

0 comments on commit ed0312e

Please sign in to comment.