-
Notifications
You must be signed in to change notification settings - Fork 234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Synthetic image generator #751
Changes from 13 commits
a8656f8
a5b6dbc
9630704
7915dd7
6176dc4
9f8a426
5178b27
09e81a8
7f5d573
c4e7c35
5673a51
d64cd27
6d5b4ea
edad485
935da0b
d8712eb
b5d4b64
fb6e982
287edba
af0a93a
e0b43fd
1ef4f71
ae66dc3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import base64 | ||
from enum import Enum, auto | ||
from io import BytesIO | ||
from pathlib import Path | ||
from typing import List, Optional, Tuple, cast | ||
Check notice Code scanning / CodeQL Unused import Note
Import of 'List' is not used.
|
||
|
||
import numpy as np | ||
from genai_perf.exceptions import GenAIPerfException | ||
Check notice Code scanning / CodeQL Unused import Note
Import of 'GenAIPerfException' is not used.
|
||
from PIL import Image | ||
|
||
|
||
class ImageFormat(Enum): | ||
JPEG = auto() | ||
PNG = auto() | ||
|
||
|
||
class RandomFormatBase64Encoder: | ||
def __init__(self, image_formats: List[ImageFormat] = [ImageFormat.PNG]): | ||
self.image_formats = image_formats | ||
|
||
def __call__(self, image): | ||
choice = np.random.randint(len(self.image_formats)) | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
image_format = self.image_formats[choice] | ||
buffered = BytesIO() | ||
image.save(buffered, format=image_format.name) | ||
data = base64.b64encode(buffered.getvalue()).decode("utf-8") | ||
prefix = f"data:image/{image_format.name.lower()};base64" | ||
return f"{prefix},{data}" | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def images_from_file_generator(image_path: Path): | ||
if not image_path.exists(): | ||
raise GenAIPerfException(f"File not found: {image_path}") | ||
|
||
image = Image.open(image_path) | ||
while True: | ||
yield image | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def white_images_generator(): | ||
white_image = Image.new("RGB", (100, 100), color="white") | ||
while True: | ||
yield white_image | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def build_synthetic_image_generator( | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mean_size: Tuple[int, int], | ||
dimensions_stddev: Tuple[int, int], | ||
image_path: Optional[Path] = None, | ||
formats: List[ImageFormat] = [ImageFormat.PNG], | ||
): | ||
if image_path is None: | ||
image_iterator = white_images_generator() | ||
else: | ||
image_path = cast(Path, image_path) | ||
image_iterator = images_from_file_generator(image_path) | ||
|
||
image_generator = SyntheticImageGenerator( | ||
mean_size=mean_size, | ||
dimensions_stddev=dimensions_stddev, | ||
image_iterator=image_iterator, | ||
) | ||
base64_encode = RandomFormatBase64Encoder(formats) | ||
return (base64_encode(image) for image in image_generator) | ||
|
||
|
||
class SyntheticImageGenerator: | ||
def __init__( | ||
self, | ||
mean_size, | ||
dimensions_stddev, | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
image_iterator, | ||
): | ||
self.image_iterator = image_iterator | ||
self.mean_size = mean_size | ||
self.dimensions_stddev = dimensions_stddev | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __iter__(self): | ||
return self | ||
|
||
def _sample_random_positive_pair( | ||
self, mean: Tuple[int, int], stddev: Tuple[int, int] | ||
) -> Tuple[int, int]: | ||
new_size = np.array([-1, -1]) | ||
while any(int(dim) <= 0 for dim in new_size): | ||
new_size = np.random.normal(self.mean_size, self.dimensions_stddev) | ||
return tuple(new_size.astype(int)) | ||
|
||
def random_resize(self, image): | ||
new_size = self._sample_random_positive_pair( | ||
self.mean_size, self.dimensions_stddev | ||
) | ||
return image.resize(new_size) | ||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __next__(self): | ||
image = next(self.image_iterator) | ||
image = self.random_resize(image) | ||
return image |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,106 @@ | ||||||
import base64 | ||||||
from io import BytesIO | ||||||
from pathlib import Path | ||||||
Check notice Code scanning / CodeQL Unused import Note test
Import of 'Path' is not used.
|
||||||
from unittest.mock import patch | ||||||
Check notice Code scanning / CodeQL Unused import Note test
Import of 'patch' is not used.
|
||||||
|
||||||
import numpy as np | ||||||
import pytest | ||||||
from genai_perf.exceptions import GenAIPerfException | ||||||
Check notice Code scanning / CodeQL Unused import Note test
Import of 'GenAIPerfException' is not used.
|
||||||
from genai_perf.llm_inputs.synthetic_image_generator import ( | ||||||
ImageFormat, | ||||||
RandomFormatBase64Encoder, | ||||||
SyntheticImageGenerator, | ||||||
images_from_file_generator, | ||||||
white_images_generator, | ||||||
) | ||||||
from PIL import Image | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize( | ||||||
"image_size", | ||||||
mwawrzos marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
[ | ||||||
(100, 100), | ||||||
(200, 200), | ||||||
], | ||||||
) | ||||||
def test_different_image_size(image_size): | ||||||
sut = SyntheticImageGenerator( | ||||||
mean_size=image_size, | ||||||
dimensions_stddev=[0, 0], | ||||||
image_iterator=white_images_generator(), | ||||||
) | ||||||
|
||||||
image = next(sut) | ||||||
|
||||||
assert isinstance(image, Image.Image), "generator produces unexpected type of data" | ||||||
assert image.size == image_size, "image not resized to the target size" | ||||||
|
||||||
|
||||||
def test_negative_size_is_not_selected(): | ||||||
sut = SyntheticImageGenerator( | ||||||
mean_size=(-1, -1), | ||||||
dimensions_stddev=[10, 10], | ||||||
image_iterator=white_images_generator(), | ||||||
) | ||||||
|
||||||
# exception is raised, when PIL.Image.resize is called with negative values | ||||||
next(sut) | ||||||
|
||||||
|
||||||
@patch("pathlib.Path.exists", return_value=False) | ||||||
def test_images_from_file_raises_when_file_not_found(mock_exists): | ||||||
DUMMY_PATH = Path("dummy-image.png") | ||||||
sut = images_from_file_generator(DUMMY_PATH) | ||||||
|
||||||
with pytest.raises(GenAIPerfException): | ||||||
next(sut) | ||||||
|
||||||
|
||||||
DUMMY_IMAGE = Image.new("RGB", (100, 100), color="blue") | ||||||
|
||||||
|
||||||
@patch("pathlib.Path.exists", return_value=True) | ||||||
@patch( | ||||||
"PIL.Image.open", | ||||||
return_value=DUMMY_IMAGE, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm testing against the |
||||||
) | ||||||
def test_images_from_file_generates_multiple_times(mock_file, mock_exists): | ||||||
DUMMY_PATH = Path("dummy-image.png") | ||||||
sut = images_from_file_generator(DUMMY_PATH) | ||||||
|
||||||
image = next(sut) | ||||||
mock_exists.assert_called_once() | ||||||
mock_file.assert_called_once_with(DUMMY_PATH) | ||||||
assert image == DUMMY_IMAGE, "unexpected image produced" | ||||||
|
||||||
image = next(sut) | ||||||
assert image == DUMMY_IMAGE, "unexpected image produced" | ||||||
|
||||||
|
||||||
def test_white_images_generator(): | ||||||
sut = white_images_generator() | ||||||
|
||||||
image = next(sut) | ||||||
assert isinstance(image, Image.Image), "generator produces unexpected type of data" | ||||||
white_pixel = np.array([[[255, 255, 255]]]) | ||||||
assert (np.array(image) == white_pixel).all(), "not all pixels are white" | ||||||
|
||||||
|
||||||
@pytest.mark.parametrize("image_format", [ImageFormat.PNG, ImageFormat.JPEG]) | ||||||
def test_base64_encoding_with_different_formats(image_format): | ||||||
image = Image.new("RGB", (100, 100)) | ||||||
sut = RandomFormatBase64Encoder(image_formats=[image_format]) | ||||||
|
||||||
base64String = sut(image) | ||||||
|
||||||
base64prefix = f"data:image/{image_format.name.lower()};base64," | ||||||
assert base64String.startswith(base64prefix), "unexpected prefix" | ||||||
data = base64String[len(base64prefix) :] | ||||||
|
||||||
# test if generator encodes to base64 | ||||||
img_data = base64.b64decode(data) | ||||||
img_bytes = BytesIO(img_data) | ||||||
# test if an image is encoded | ||||||
image = Image.open(img_bytes) | ||||||
|
||||||
assert image.format == image_format.name |
Check notice
Code scanning / CodeQL
Unused import Note