Skip to content

Commit

Permalink
Add CLI options for synthetic image generation
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Jul 12, 2024
1 parent 916fe91 commit 0127e9b
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ class LlmInputs:
DEFAULT_OUTPUT_TOKENS_STDDEV = 0
DEFAULT_NUM_PROMPTS = 100

DEFAULT_IMAGE_WIDTH_MEAN = 100
DEFAULT_IMAGE_WIDTH_STDDEV = 0
DEFAULT_IMAGE_HEIGHT_MEAN = 100
DEFAULT_IMAGE_HEIGHT_STDDEV = 0

EMPTY_JSON_IN_VLLM_PA_FORMAT: Dict = {"data": []}
EMPTY_JSON_IN_TENSORRTLLM_PA_FORMAT: Dict = {"data": []}
EMPTY_JSON_IN_OPENAI_PA_FORMAT: Dict = {"data": []}
Expand All @@ -143,6 +148,11 @@ def create_llm_inputs(
output_tokens_deterministic: bool = False,
prompt_tokens_mean: int = DEFAULT_PROMPT_TOKENS_MEAN,
prompt_tokens_stddev: int = DEFAULT_PROMPT_TOKENS_STDDEV,
image_width_mean: int = DEFAULT_IMAGE_WIDTH_MEAN,
image_width_stddev: int = DEFAULT_IMAGE_WIDTH_STDDEV,
image_height_mean: int = DEFAULT_IMAGE_HEIGHT_MEAN,
image_height_stddev: int = DEFAULT_IMAGE_HEIGHT_STDDEV,
image_format: ImageFormat = ImageFormat.PNG,
random_seed: int = DEFAULT_RANDOM_SEED,
num_of_output_prompts: int = DEFAULT_NUM_PROMPTS,
add_model_name: bool = False,
Expand Down Expand Up @@ -185,6 +195,16 @@ def create_llm_inputs(
The standard deviation of the length of the output to generate. This is only used if output_tokens_mean is provided.
output_tokens_deterministic:
If true, the output tokens will set the minimum and maximum tokens to be equivalent.
image_width_mean:
The mean width of images when generating synthetic image data.
image_width_stddev:
The standard deviation of width of images when generating synthetic image data.
image_height_mean:
The mean height of images when generating synthetic image data.
image_height_stddev:
The standard deviation of height of images when generating synthetic image data.
image_format:
The compression format of the images.
batch_size:
The number of inputs per request (currently only used for the embeddings and rankings endpoints)
Expand Down Expand Up @@ -221,6 +241,11 @@ def create_llm_inputs(
prompt_tokens_mean,
prompt_tokens_stddev,
num_of_output_prompts,
image_width_mean,
image_width_stddev,
image_height_mean,
image_height_stddev,
image_format,
batch_size,
input_filename,
)
Expand Down Expand Up @@ -256,6 +281,11 @@ def get_generic_dataset_json(
prompt_tokens_mean: int,
prompt_tokens_stddev: int,
num_of_output_prompts: int,
image_width_mean: int,
image_width_stddev: int,
image_height_mean: int,
image_height_stddev: int,
image_format: ImageFormat,
batch_size: int,
input_filename: Optional[Path],
) -> Dict:
Expand All @@ -282,6 +312,16 @@ def get_generic_dataset_json(
The standard deviation of the length of the prompt to generate
num_of_output_prompts:
The number of synthetic output prompts to generate
image_width_mean:
The mean width of images when generating synthetic image data.
image_width_stddev:
The standard deviation of width of images when generating synthetic image data.
image_height_mean:
The mean height of images when generating synthetic image data.
image_height_stddev:
The standard deviation of height of images when generating synthetic image data.
image_format:
The compression format of the images.
batch_size:
The number of inputs per request (currently only used for the embeddings and rankings endpoints)
input_filename:
Expand Down Expand Up @@ -361,7 +401,7 @@ def get_generic_dataset_json(
input_filename = cast(Path, input_filename)
input_file_dataset = cls._get_input_dataset_from_file(input_filename)
input_file_dataset = cls._encode_images_in_input_dataset(
input_file_dataset
input_file_dataset, image_format
)
generic_dataset_json = (
cls._convert_input_synthetic_or_file_dataset_to_generic_json(
Expand Down Expand Up @@ -648,13 +688,14 @@ def _convert_to_openai_multi_modal_content(
return generic_dataset_json

@classmethod
def _encode_images_in_input_dataset(cls, input_file_dataset: Dict) -> Dict:
def _encode_images_in_input_dataset(
cls, input_file_dataset: Dict, image_format: ImageFormat
) -> Dict:
for row in input_file_dataset["rows"]:
filename = row["row"].get("image")
if filename:
img = Image.open(filename)
# (TMA-1985) Support multiple image formats
img_base64 = cls._encode_image(img, ImageFormat.PNG)
img_base64 = cls._encode_image(img, image_format)
row["row"]["image"] = f"data:image/png;base64,{img_base64}"

return input_file_dataset
Expand Down
5 changes: 5 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
output_tokens_mean=args.output_tokens_mean,
output_tokens_stddev=args.output_tokens_stddev,
output_tokens_deterministic=args.output_tokens_mean_deterministic,
image_width_mean=args.image_width_mean,
image_width_stddev=args.image_width_stddev,
image_height_mean=args.image_height_mean,
image_height_stddev=args.image_height_stddev,
image_format=args.image_format,
random_seed=args.random_seed,
num_of_output_prompts=args.num_prompts,
add_model_name=add_model_name,
Expand Down
67 changes: 67 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
OPEN_ORCA,
)
from genai_perf.llm_inputs.llm_inputs import (
ImageFormat,
LlmInputs,
ModelSelectionStrategy,
OutputFormat,
Expand Down Expand Up @@ -116,6 +117,25 @@ def _check_compare_args(
return args


def _check_image_input_args(
parser: argparse.ArgumentParser, args: argparse.Namespace
) -> argparse.Namespace:
"""
Sanity check the image input args
"""
if args.image_width_mean <= 0 or args.image_height_mean <= 0:
parser.error(
"Both --image-width-mean and --image-height-mean values must be positive."
)
if args.image_width_stddev < 0 or args.image_height_stddev < 0:
parser.error(
"Both --image-width-stddev and --image-height-stddev values must be non-negative."
)

args = _convert_str_to_enum_entry(args, "image_format", ImageFormat)
return args


def _check_conditional_args(
parser: argparse.ArgumentParser, args: argparse.Namespace
) -> argparse.Namespace:
Expand Down Expand Up @@ -417,6 +437,51 @@ def _add_input_args(parser):
)


def _add_image_input_args(parser):
input_group = parser.add_argument_group("Image Input")

input_group.add_argument(
"--image-width-mean",
type=int,
default=LlmInputs.DEFAULT_IMAGE_WIDTH_MEAN,
required=False,
help=f"The mean width of images when generating synthetic image data.",
)

input_group.add_argument(
"--image-width-stddev",
type=int,
default=LlmInputs.DEFAULT_IMAGE_WIDTH_STDDEV,
required=False,
help=f"The standard deviation of width of images when generating synthetic image data.",
)

input_group.add_argument(
"--image-height-mean",
type=int,
default=LlmInputs.DEFAULT_IMAGE_HEIGHT_MEAN,
required=False,
help=f"The mean height of images when generating synthetic image data.",
)

input_group.add_argument(
"--image-height-stddev",
type=int,
default=LlmInputs.DEFAULT_IMAGE_HEIGHT_STDDEV,
required=False,
help=f"The standard deviation of height of images when generating synthetic image data.",
)

input_group.add_argument(
"--image-format",
type=str,
choices=utils.get_enum_names(ImageFormat),
default="png",
required=False,
help=f"The compression format of the images.",
)


def _add_profile_args(parser):
profile_group = parser.add_argument_group("Profiling")
load_management_group = profile_group.add_mutually_exclusive_group(required=False)
Expand Down Expand Up @@ -664,6 +729,7 @@ def _parse_profile_args(subparsers) -> argparse.ArgumentParser:
)
_add_endpoint_args(profile)
_add_input_args(profile)
_add_image_input_args(profile)
_add_profile_args(profile)
_add_output_args(profile)
_add_other_args(profile)
Expand Down Expand Up @@ -743,6 +809,7 @@ def refine_args(
args = _infer_prompt_source(args)
args = _check_model_args(parser, args)
args = _check_conditional_args(parser, args)
args = _check_image_input_args(parser, args)
args = _check_load_manager_args(args)
args = _set_artifact_paths(args)
elif args.subcommand == Subcommand.COMPARE.to_lowercase():
Expand Down
40 changes: 39 additions & 1 deletion src/c++/perf_analyzer/genai-perf/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import pytest
from genai_perf import __version__, parser
from genai_perf.llm_inputs.llm_inputs import (
ImageFormat,
ModelSelectionStrategy,
OutputFormat,
PromptSource,
Expand All @@ -40,7 +41,7 @@

class TestCLIArguments:
# ================================================
# GENAI-PERF COMMAND
# PROFILE COMMAND
# ================================================
expected_help_output = (
"CLI to profile LLMs and Generative AI models with Perf Analyzer"
Expand Down Expand Up @@ -215,6 +216,23 @@ def test_help_version_arguments_output_and_exit(
["--synthetic-input-tokens-stddev", "7"],
{"synthetic_input_tokens_stddev": 7},
),
(
["--image-width-mean", "123"],
{"image_width_mean": 123},
),
(
["--image-width-stddev", "123"],
{"image_width_stddev": 123},
),
(
["--image-height-mean", "456"],
{"image_height_mean": 456},
),
(
["--image-height-stddev", "456"],
{"image_height_stddev": 456},
),
(["--image-format", "png"], {"image_format": ImageFormat.PNG}),
(["-v"], {"verbose": True}),
(["--verbose"], {"verbose": True}),
(["-u", "test_url"], {"u": "test_url"}),
Expand Down Expand Up @@ -732,6 +750,26 @@ def test_prompt_source_assertions(self, monkeypatch, mocker, capsys):
captured = capsys.readouterr()
assert expected_output in captured.err

@pytest.mark.parametrize(
"args",
[
# negative numbers
["--image-width-mean", "-123"],
["--image-width-stddev", "-34"],
["--image-height-mean", "-123"],
["--image-height-stddev", "-34"],
# zeros
["--image-width-mean", "0"],
["--image-height-mean", "0"],
],
)
def test_positive_image_input_args(self, monkeypatch, args):
combined_args = ["genai-perf", "profile", "-m", "test_model"] + args
monkeypatch.setattr("sys.argv", combined_args)

with pytest.raises(SystemExit) as excinfo:
parser.parse_args()

# ================================================
# COMPARE SUBCOMMAND
# ================================================
Expand Down
5 changes: 5 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_json_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ def test_generate_json(self, monkeypatch) -> None:
"random_seed": 0,
"synthetic_input_tokens_mean": 550,
"synthetic_input_tokens_stddev": 0,
"image_width_mean": 100,
"image_width_stddev": 0,
"image_height_mean": 100,
"image_height_stddev": 0,
"image_format": "png",
"concurrency": 1,
"measurement_interval": 10000,
"request_rate": null,
Expand Down

0 comments on commit 0127e9b

Please sign in to comment.