Skip to content

Commit

Permalink
Make image dirs data format streamable (#1576)
Browse files Browse the repository at this point in the history
<!-- Contributing guide:
https://github.com/openvinotoolkit/datumaro/blob/develop/CONTRIBUTING.md
-->

### Summary
Geti requires all data formats to be streamable, i. e. generate dataset
items on demand.
This PR allows streaming for image dir format.
<!--
Resolves #111 and #222.
Depends on #1000 (for series of dependent commits).

This PR introduces this capability to make the project better in this
and that.

- Added this feature
- Removed that feature
- Fixed the problem #1234
-->

### How to test
<!-- Describe the testing procedure for reviewers, if changes are
not fully covered by unit tests or manual testing can be complicated.
-->

### Checklist
<!-- Put an 'x' in all the boxes that apply -->
- [ ] I have added unit tests to cover my changes.​
- [ ] I have added integration tests to cover my changes.​
- [ ] I have added the description of my changes into
[CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​
- [ ] I have updated the
[documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs)
accordingly

### License

- [x] I submit _my code changes_ under the same [MIT
License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE)
that covers the project.
  Feel free to contact the maintainers if that's a concern.
- [ ] I have updated the license header for each file (see an example
below).

```python
# Copyright (C) 2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
```
  • Loading branch information
itrushkin authored Aug 5, 2024
2 parents bc1ee4d + 1874a9c commit 30b1add
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 12 deletions.
4 changes: 4 additions & 0 deletions src/datumaro/plugins/data_formats/image_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def __init__(
)
self._ann_types = set()

@property
def is_stream(self) -> bool:
return True


class ImageDirExporter(Exporter):
DEFAULT_IMAGE_EXT = ".jpg"
Expand Down
Binary file added tests/assets/image_dir_dataset/1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/assets/image_dir_dataset/2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
43 changes: 31 additions & 12 deletions tests/unit/test_image_dir_format.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
from unittest import TestCase

import numpy as np
import pytest

from datumaro.components.dataset import Dataset, StreamDataset
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.media import Image
from datumaro.components.project import Dataset
from datumaro.plugins.data_formats.image_dir import ImageDirExporter

from ..requirements import Requirements, mark_requirement

from tests.utils.test_utils import TestDir, check_save_and_load
from tests.utils.assets import get_test_asset_path
from tests.utils.test_utils import TestDir, check_save_and_load, compare_datasets

DUMMY_DATASET_DIR = get_test_asset_path("image_dir_dataset")
FORMAT_NAME = "image_dir"


class ImageDirFormatTest(TestCase):
class ImageDirFormatTest:
@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_load(self):
def test_can_load(self, helper_tc):
dataset = Dataset.from_iterable(
[
DatasetItem(id=1, media=Image.from_numpy(data=np.ones((10, 6, 3)))),
Expand All @@ -24,16 +28,31 @@ def test_can_load(self):

with TestDir() as test_dir:
check_save_and_load(
self,
helper_tc,
dataset,
ImageDirExporter.convert,
test_dir,
importer="image_dir",
importer=FORMAT_NAME,
require_media=True,
)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
@pytest.mark.parametrize("dataset_cls, is_stream", [(Dataset, False), (StreamDataset, True)])
def test_can_import(self, dataset_cls, is_stream, helper_tc):
expected_dataset = Dataset.from_iterable(
[
DatasetItem(id="1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG")),
DatasetItem(id="2", media=Image.from_numpy(data=np.zeros((3, 4, 3)), ext=".bmp")),
]
)

actual_dataset = dataset_cls.import_from(DUMMY_DATASET_DIR, FORMAT_NAME)

assert actual_dataset.is_stream == is_stream
compare_datasets(helper_tc, expected_dataset, actual_dataset)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self, helper_tc):
dataset = Dataset.from_iterable(
[
DatasetItem(
Expand All @@ -44,11 +63,11 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):

with TestDir() as test_dir:
check_save_and_load(
self, dataset, ImageDirExporter.convert, test_dir, importer="image_dir"
helper_tc, dataset, ImageDirExporter.convert, test_dir, importer=FORMAT_NAME
)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_and_load_image_with_arbitrary_extension(self):
def test_can_save_and_load_image_with_arbitrary_extension(self, helper_tc):
dataset = Dataset.from_iterable(
[
DatasetItem(id="1", media=Image.from_numpy(data=np.zeros((4, 3, 3)), ext=".JPEG")),
Expand All @@ -58,10 +77,10 @@ def test_can_save_and_load_image_with_arbitrary_extension(self):

with TestDir() as test_dir:
check_save_and_load(
self,
helper_tc,
dataset,
ImageDirExporter.convert,
test_dir,
importer="image_dir",
importer=FORMAT_NAME,
require_media=True,
)

0 comments on commit 30b1add

Please sign in to comment.