Skip to content

Commit

Permalink
Support synthetic image generation in GenAI-Perf (#754)
Browse files Browse the repository at this point in the history
* support synthetic image generation for VLM model

* add test

* integrate sythetic image generator into LlmInputs

* add source images for synthetic image data

* use abs to get positive int
  • Loading branch information
nv-hwoo authored Jul 18, 2024
1 parent 8e5570e commit e7925c8
Show file tree
Hide file tree
Showing 12 changed files with 225 additions and 131 deletions.
100 changes: 48 additions & 52 deletions src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,55 +23,16 @@
from genai_perf import utils
from genai_perf.constants import CNN_DAILY_MAIL, DEFAULT_INPUT_DATA_JSON, OPEN_ORCA
from genai_perf.exceptions import GenAIPerfException
from genai_perf.llm_inputs.synthetic_image_generator import (
ImageFormat,
SyntheticImageGenerator,
)
from genai_perf.llm_inputs.synthetic_prompt_generator import SyntheticPromptGenerator
from genai_perf.tokenizer import DEFAULT_TOKENIZER, Tokenizer, get_tokenizer
from PIL import Image, ImageDraw
from PIL import Image
from requests import Response


# (TMA-1984) Remove the dummy image input with random noise image
def make_snowman_image():
# Create a blank image with white background
img = Image.new("RGB", (600, 800), color="skyblue")
d = ImageDraw.Draw(img)

# Draw the snowman's body (three circles)
body_color = "white"
d.ellipse([200, 500, 400, 700], fill=body_color, outline="black") # Bottom circle
d.ellipse([225, 350, 375, 550], fill=body_color, outline="black") # Middle circle
d.ellipse([250, 200, 350, 400], fill=body_color, outline="black") # Head circle

# Draw the snowman's eyes
eye_color = "black"
d.ellipse([275, 250, 285, 260], fill=eye_color) # Left eye
d.ellipse([315, 250, 325, 260], fill=eye_color) # Right eye

# Draw the snowman's nose (carrot)
nose_color = "orange"
d.polygon([(300, 270), (300, 280), (340, 275)], fill=nose_color) # Nose

# Draw the snowman's mouth (smile)
mouth_color = "black"
d.arc([275, 290, 325, 310], start=0, end=180, fill=mouth_color) # Smile

# Draw the snowman's buttons
d.ellipse([290, 420, 310, 440], fill=eye_color) # Top button
d.ellipse([290, 460, 310, 480], fill=eye_color) # Middle button
d.ellipse([290, 500, 310, 520], fill=eye_color) # Bottom button

# Draw the snowman's arms
arm_color = "brown"
d.line([225, 450, 150, 400], fill=arm_color, width=5) # Left arm
d.line([375, 450, 450, 400], fill=arm_color, width=5) # Right arm

return img


class ImageFormat(Enum):
PNG = auto()
JPEG = auto()


class ModelSelectionStrategy(Enum):
ROUND_ROBIN = auto()
RANDOM = auto()
Expand Down Expand Up @@ -378,18 +339,17 @@ def get_generic_dataset_json(
dataset
)
elif input_type == PromptSource.SYNTHETIC:
# (TMA-1989) support synthetic image generation for VLM input
if output_format == OutputFormat.OPENAI_VISION:
raise GenAIPerfException(
f"{OutputFormat.OPENAI_VISION.to_lowercase()} currently "
"does not support synthetic input."
)

synthetic_dataset = cls._get_input_dataset_from_synthetic(
tokenizer,
prompt_tokens_mean,
prompt_tokens_stddev,
num_of_output_prompts,
image_width_mean,
image_width_stddev,
image_height_mean,
image_height_stddev,
image_format,
output_format,
)
generic_dataset_json = (
cls._convert_input_synthetic_or_file_dataset_to_generic_json(
Expand Down Expand Up @@ -514,17 +474,36 @@ def _get_input_dataset_from_synthetic(
prompt_tokens_mean: int,
prompt_tokens_stddev: int,
num_of_output_prompts: int,
image_width_mean: int,
image_width_stddev: int,
image_height_mean: int,
image_height_stddev: int,
image_format: ImageFormat,
output_format: OutputFormat,
) -> Dict[str, Any]:
dataset_json: Dict[str, Any] = {}
dataset_json["features"] = [{"name": "text_input"}]
dataset_json["rows"] = []
for _ in range(num_of_output_prompts):
row: Dict["str", Any] = {"row": {}}
synthetic_prompt = cls._create_synthetic_prompt(
tokenizer,
prompt_tokens_mean,
prompt_tokens_stddev,
)
dataset_json["rows"].append({"row": {"text_input": synthetic_prompt}})
row["row"]["text_input"] = synthetic_prompt

if output_format == OutputFormat.OPENAI_VISION:
synthetic_image = cls._create_synthetic_image(
image_width_mean=image_width_mean,
image_width_stddev=image_width_stddev,
image_height_mean=image_height_mean,
image_height_stddev=image_height_stddev,
image_format=image_format,
)
row["row"]["image"] = synthetic_image

dataset_json["rows"].append(row)

return dataset_json

Expand Down Expand Up @@ -1586,3 +1565,20 @@ def _create_synthetic_prompt(
return SyntheticPromptGenerator.create_synthetic_prompt(
tokenizer, prompt_tokens_mean, prompt_tokens_stddev
)

@classmethod
def _create_synthetic_image(
cls,
image_width_mean: int,
image_width_stddev: int,
image_height_mean: int,
image_height_stddev: int,
image_format: ImageFormat,
) -> str:
return SyntheticImageGenerator.create_synthetic_image(
image_width_mean=image_width_mean,
image_width_stddev=image_width_stddev,
image_height_mean=image_height_mean,
image_height_stddev=image_height_stddev,
image_format=image_format,
)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -1,62 +1,79 @@
import base64
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import glob
import random
from enum import Enum, auto
from io import BytesIO
from typing import Optional, cast
from pathlib import Path

import numpy as np
from genai_perf import utils
from PIL import Image


class ImageFormat(Enum):
JPEG = auto()
PNG = auto()
JPEG = auto()


class SyntheticImageGenerator:
def __init__(
self,
"""A simple synthetic image generator that generates multiple synthetic
images from the source images.
"""

@classmethod
def create_synthetic_image(
cls,
image_width_mean: int,
image_height_mean: int,
image_width_stddev: int,
image_height_mean: int,
image_height_stddev: int,
image_format: ImageFormat = ImageFormat.PNG,
rng: Optional[np.random.Generator] = None,
):
self._image_width_mean = image_width_mean
self._image_height_mean = image_height_mean
self._image_width_stddev = image_width_stddev
self._image_height_stddev = image_height_stddev
self.image_format = image_format
self.rng = cast(np.random.Generator, rng or np.random.default_rng())

def __iter__(self):
return self

def _sample_random_positive_integer(self, mean: int, stddev: int) -> int:
while True:
n = int(self.rng.normal(mean, stddev))
if n > 0:
break
return n

def _get_next_image(self):
width = self._sample_random_positive_integer(
self._image_width_mean, self._image_width_stddev
image_format: ImageFormat,
) -> str:
"""Generate base64 encoded synthetic image using the source images."""
width = cls._sample_random_positive_integer(
image_width_mean, image_width_stddev
)
height = self._sample_random_positive_integer(
self._image_height_mean, self._image_height_stddev
height = cls._sample_random_positive_integer(
image_height_mean, image_height_stddev
)
shape = width, height, 3
noise = self.rng.integers(0, 256, shape, dtype=np.uint8)
return Image.fromarray(noise)

def _encode(self, image):
buffered = BytesIO()
image.save(buffered, format=self.image_format.name)
data = base64.b64encode(buffered.getvalue()).decode("utf-8")
return f"data:image/{self.image_format.name.lower()};base64,{data}"

def __next__(self) -> str:
image = self._get_next_image()
base64_string = self._encode(image)
return base64_string

image = cls._sample_source_image()
image = image.resize(size=(width, height))

img_base64 = utils.encode_image(image, image_format.name)
return f"data:image/{image_format.name.lower()};base64,{img_base64}"

@classmethod
def _sample_source_image(cls):
"""Sample one image among the source images."""
filepath = Path(__file__).parent.resolve() / "source_images" / "*"
filenames = glob.glob(str(filepath))
return Image.open(random.choice(filenames))

@classmethod
def _sample_random_positive_integer(cls, mean: int, stddev: int) -> int:
n = int(abs(random.gauss(mean, stddev)))
return n if n != 0 else 1 # avoid zero
2 changes: 1 addition & 1 deletion src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@
OPEN_ORCA,
)
from genai_perf.llm_inputs.llm_inputs import (
ImageFormat,
LlmInputs,
ModelSelectionStrategy,
OutputFormat,
PromptSource,
)
from genai_perf.llm_inputs.synthetic_image_generator import ImageFormat
from genai_perf.plots.plot_config_parser import PlotConfigParser
from genai_perf.plots.plot_manager import PlotManager
from genai_perf.tokenizer import DEFAULT_TOKENIZER
Expand Down
5 changes: 5 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def encode_image(img: Image, format: str):
import base64
from io import BytesIO

# JPEG does not support P or RGBA mode (commonly used for PNG) so it needs
# to be converted to RGB before an image can be saved as JPEG format.
if format == "JPEG" and img.mode != "RGB":
img = img.convert("RGB")

buffered = BytesIO()
img.save(buffered, format=format)
return base64.b64encode(buffered.getvalue()).decode("utf-8")
Expand Down
5 changes: 5 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def build_cmd(args: Namespace, extra_args: Optional[List[str]] = None) -> List[s
"synthetic_input_tokens_stddev",
"subcommand",
"tokenizer",
"image_width_mean",
"image_width_stddev",
"image_height_mean",
"image_height_stddev",
"image_format",
]

utils.remove_file(args.profile_export_file)
Expand Down
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
OutputFormat,
PromptSource,
)
from genai_perf.llm_inputs.synthetic_image_generator import ImageFormat
from genai_perf.parser import PathType


Expand Down
57 changes: 56 additions & 1 deletion src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
OutputFormat,
PromptSource,
)
from genai_perf.tokenizer import Tokenizer
from genai_perf.llm_inputs.synthetic_image_generator import ImageFormat
from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer
from PIL import Image

mocked_openorca_data = {
Expand Down Expand Up @@ -587,6 +588,60 @@ def test_add_image_inputs_openai_vision(self) -> None:
},
]

@patch(
"genai_perf.llm_inputs.llm_inputs.LlmInputs._create_synthetic_prompt",
return_value="This is test prompt",
)
@patch(
"genai_perf.llm_inputs.llm_inputs.LlmInputs._create_synthetic_image",
return_value="test_image_base64",
)
@pytest.mark.parametrize(
"output_format",
[
OutputFormat.OPENAI_CHAT_COMPLETIONS,
OutputFormat.OPENAI_COMPLETIONS,
OutputFormat.OPENAI_EMBEDDINGS,
OutputFormat.RANKINGS,
OutputFormat.OPENAI_VISION,
OutputFormat.VLLM,
OutputFormat.TENSORRTLLM,
],
)
def test_get_input_dataset_from_synthetic(
self, mock_prompt, mock_image, output_format
) -> None:
_placeholder = 123 # dummy value
num_prompts = 3

dataset_json = LlmInputs._get_input_dataset_from_synthetic(
tokenizer=get_tokenizer(DEFAULT_TOKENIZER),
prompt_tokens_mean=_placeholder,
prompt_tokens_stddev=_placeholder,
num_of_output_prompts=num_prompts,
image_width_mean=_placeholder,
image_width_stddev=_placeholder,
image_height_mean=_placeholder,
image_height_stddev=_placeholder,
image_format=ImageFormat.PNG,
output_format=output_format,
)

assert len(dataset_json["rows"]) == num_prompts

for i in range(num_prompts):
row = dataset_json["rows"][i]["row"]

if output_format == OutputFormat.OPENAI_VISION:
assert row == {
"text_input": "This is test prompt",
"image": "test_image_base64",
}
else:
assert row == {
"text_input": "This is test prompt",
}

# def test_trtllm_default_max_tokens(self, default_tokenizer: Tokenizer) -> None:
# input_name = "max_tokens"
# input_value = 256
Expand Down
Loading

0 comments on commit e7925c8

Please sign in to comment.