From 6a612ef0f9038d166fc14e50313787860821ef2d Mon Sep 17 00:00:00 2001 From: Vinnam Kim Date: Wed, 19 Jul 2023 16:57:07 +0900 Subject: [PATCH] Enable stream exporters: VOC, YOLO, Datumaro, and COCO data format (#1102) - Ticket no. 114762 Signed-off-by: Kim, Vinnam --- CHANGELOG.md | 2 +- src/datumaro/components/dataset.py | 4 +- src/datumaro/components/dataset_storage.py | 11 +- src/datumaro/components/exporter.py | 12 + .../plugins/data_formats/arrow/exporter.py | 9 +- .../plugins/data_formats/coco/exporter.py | 228 +++++++++++++++--- .../plugins/data_formats/datumaro/exporter.py | 85 ++++++- .../data_formats/datumaro_binary/exporter.py | 5 +- .../plugins/data_formats/voc/exporter.py | 4 + .../plugins/data_formats/yolo/exporter.py | 4 + tests/unit/components/conftest.py | 3 + tests/unit/components/test_dataset_storage.py | 16 +- tests/unit/data_formats/base.py | 8 +- .../datumaro/test_datumaro_format.py | 2 +- .../data_formats/test_yolo_strict_format.py | 26 +- tests/unit/test_coco_format.py | 182 +++++++++----- tests/utils/test_utils.py | 2 +- 17 files changed, 469 insertions(+), 134 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b5de493b6..c0f6439cdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,7 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Migrate DVC v3.0.0 () - Stream dataset import/export - (, , , , , ) + (, , , , , , ) - Support mask annotations for CVAT data format () diff --git a/src/datumaro/components/dataset.py b/src/datumaro/components/dataset.py index cf23073a04..bdb1276888 100644 --- a/src/datumaro/components/dataset.py +++ b/src/datumaro/components/dataset.py @@ -587,6 +587,7 @@ def export( assert "ctx" not in kwargs exporter_kwargs = copy(kwargs) + exporter_kwargs["stream"] = self._stream exporter_kwargs["ctx"] = ExportContext( progress_reporter=progress_reporter, error_policy=error_policy ) @@ -632,7 +633,8 @@ def export( raise e.__cause__ self.bind(save_dir, format, options=copy(kwargs)) - self.flush_changes() + if not self._stream: + self.flush_changes() def save(self, save_dir: Optional[str] = None, **kwargs) -> None: options = dict(self._options) diff --git a/src/datumaro/components/dataset_storage.py b/src/datumaro/components/dataset_storage.py index 9d69108680..de58716da6 100644 --- a/src/datumaro/components/dataset_storage.py +++ b/src/datumaro/components/dataset_storage.py @@ -612,9 +612,9 @@ def __init__( ): if not source.is_stream: raise ValueError("source should be a stream.") - super().__init__(source, infos, categories, media_type) - self._subset_names = None + self._subset_names = list(source.subsets().keys()) self._transform_ids_for_latest_subset_names = [] + super().__init__(source, infos, categories, media_type) def is_cache_initialized(self) -> bool: log.debug("This function has no effect on streaming.") @@ -660,12 +660,9 @@ def get_subset(self, name: str) -> IDataset: @property def subset_names(self): - if self._subset_names is None: - self._subset_names = {item.subset for item in self} - self._transforms_for_latest_subset_names = [id(t) for t in self._transforms] - elif self._transforms_for_latest_subset_names != [id(t) for t in self._transforms]: + if self._transform_ids_for_latest_subset_names != [id(t) for t in self._transforms]: self._subset_names = {item.subset for item in self} - self._transforms_for_latest_subset_names = [id(t) for t in self._transforms] + self._transform_ids_for_latest_subset_names = [id(t) for t in self._transforms] return self._subset_names diff --git a/src/datumaro/components/exporter.py b/src/datumaro/components/exporter.py index 7e58eb7b40..c63efb0c9b 100644 --- a/src/datumaro/components/exporter.py +++ b/src/datumaro/components/exporter.py @@ -182,6 +182,7 @@ def __init__( default_image_ext: Optional[str] = None, save_dataset_meta: bool = False, save_hashkey_meta: bool = False, + stream: bool = False, ctx: Optional[ExportContext] = None, ): default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT @@ -222,6 +223,12 @@ def __init__( else: self._patch = None + if stream and not self.can_stream: + raise DatasetExportError( + f"{self.__class__.__name__} cannot export a dataset in a stream manner" + ) + self._stream = stream + self._ctx: ExportContext = ctx or NullExportContext() def _find_image_ext(self, item: Union[DatasetItem, Image]): @@ -299,6 +306,11 @@ def _check_hash_key_existence(self, item): self._save_hashkey_meta = True return + @property + def can_stream(self) -> bool: + """Flag to indicate whether the exporter can export the dataset in a stream manner or not.""" + return False + # TODO: Currently, ExportContextComponent is introduced only for Datumaro and DatumaroBinary format # for multi-processing. We need to propagate this to everywhere in Datumaro 1.2.0 diff --git a/src/datumaro/plugins/data_formats/arrow/exporter.py b/src/datumaro/plugins/data_formats/arrow/exporter.py index 0d811c3892..42723591b0 100644 --- a/src/datumaro/plugins/data_formats/arrow/exporter.py +++ b/src/datumaro/plugins/data_formats/arrow/exporter.py @@ -58,9 +58,13 @@ def __init__( num_shards: int = 1, max_shard_size: Optional[int] = None, ): - super().__init__(context, "", export_context) + super().__init__( + context=context, + subset=subset, + ann_file="", + export_context=export_context, + ) self._schema = deepcopy(DatumaroArrow.SCHEMA) - self._subset = subset self._writers = [] self._fnames = [] self._max_chunk_size = max_chunk_size @@ -370,6 +374,7 @@ def __init__( num_shards: int = 1, max_shard_size: Optional[int] = None, max_chunk_size: int = 1000, + **kwargs, ): super().__init__( extractor=extractor, diff --git a/src/datumaro/plugins/data_formats/coco/exporter.py b/src/datumaro/plugins/data_formats/coco/exporter.py index f8af43587b..fb59824802 100644 --- a/src/datumaro/plugins/data_formats/coco/exporter.py +++ b/src/datumaro/plugins/data_formats/coco/exporter.py @@ -2,14 +2,18 @@ # # SPDX-License-Identifier: MIT +import json import logging as log import os import os.path as osp +from dataclasses import dataclass from enum import Enum, auto +from io import BufferedWriter from itertools import chain, groupby -from typing import List, Union +from typing import Dict, List, Optional, Type, Union import pycocotools.mask as mask_utils +from json_stream.writer import streamable_dict, streamable_list import datumaro.util.annotation_util as anno_tools import datumaro.util.mask_tools as mask_tools @@ -25,7 +29,7 @@ from datumaro.components.errors import MediaTypeError from datumaro.components.exporter import Exporter from datumaro.components.media import Image -from datumaro.util import cast, dump_json_file, find, str_to_bool +from datumaro.util import cast, dump_json, dump_json_file, find, parse_json, str_to_bool from datumaro.util.image import save_image from .format import CocoPath, CocoTask @@ -37,10 +41,119 @@ class SegmentationMode(Enum): mask = auto() +@dataclass +class _Writer: + fp: BufferedWriter + is_empty: bool + + +class TemporaryWriters: + def __init__(self, subset: str, task: CocoTask, ann_dir: str): + self._subset = subset + self._task = task + self._ann_dir = ann_dir + self._writers = tuple() + self.reset() + + def close(self): + for writer in self._writers: + if not writer.fp.closed: + writer.fp.close() + + def remove(self): + self.close() + + for writer in self._writers: + fpath = writer.fp.name + if osp.exists(fpath): + os.remove(fpath) + + def reset(self): + self.remove() + + self._writers = tuple( + _Writer( + fp=open( + osp.join(self._ann_dir, f"__{self._task.name}_{self._subset}_{key}.tmp"), "wb" + ), + is_empty=True, + ) + for key in ["imgs", "anns"] + ) + + def __del__(self): + self.remove() + + @property + def imgs(self) -> _Writer: + return self._writers[0] + + @property + def anns(self) -> _Writer: + return self._writers[1] + + def add_item(self, data: Dict) -> None: + self.imgs.is_empty = False + writer = self.imgs.fp + writer.write(dump_json(data, append_newline=True)) + + def add_anns(self, data: Dict) -> None: + self.anns.is_empty = False + writer = self.anns.fp + writer.write(dump_json(data, append_newline=True)) + + def merge(self, path: str, header: Dict, min_ann_id: Optional[int]) -> None: + self.close() + + @streamable_list + def _gen_images(): + with open(self.imgs.fp.name, "rb") as fp: + for line in fp: + yield parse_json(line) + + @streamable_list + def _gen_anns(): + with open(self.anns.fp.name, "rb") as fp: + next_id = min_ann_id + for line in fp: + ann = parse_json(line) + if min_ann_id is not None and not ann["id"]: + ann["id"] = next_id + next_id += 1 + yield ann + + @streamable_dict + def _gen(): + yield "licenses", header["licenses"] + yield "info", header["info"] + yield "categories", header["categories"] + + if not self.imgs.is_empty: + yield "images", _gen_images() + else: + yield "images", [] + + if not self.anns.is_empty: + yield "annotations", _gen_anns() + else: + yield "annotations", [] + + with open(path, "w", encoding="utf-8") as fp: + json.dump(_gen(), fp) + + self.remove() + + class _TaskExporter: - def __init__(self, context): + def __init__( + self, context: "CocoExporter", subset: str, task: CocoTask, ann_dir: str, stream: bool + ): self._min_ann_id = 1 self._context = context + self._subset = subset + self._task = task + self._ann_dir = ann_dir + self._stream = stream data = {"licenses": [], "info": {}, "categories": [], "images": [], "annotations": []} @@ -55,9 +168,18 @@ def __init__(self, context): "year": "", } self._data = data + self._temporary_writers = TemporaryWriters( + subset=subset, + task=task, + ann_dir=ann_dir, + ) def is_empty(self): - return len(self._data["annotations"]) == 0 + return ( + len(self._data["annotations"]) == 0 + if not self._stream + else self._temporary_writers.anns.is_empty + ) def _get_image_id(self, item): return self._context._get_image_id(item) @@ -68,18 +190,21 @@ def save_image_info(self, item, filename): if item.media and item.media.size: h, w = item.media.size - self._data["images"].append( - { - "id": self._get_image_id(item), - "width": int(w), - "height": int(h), - "file_name": cast(filename, str, ""), - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0, - } - ) + item_desc = { + "id": self._get_image_id(item), + "width": int(w), + "height": int(h), + "file_name": cast(filename, str, ""), + "license": 0, + "flickr_url": "", + "coco_url": "", + "date_captured": 0, + } + + if not self._stream: + self._data["images"].append(item_desc) + else: + self._temporary_writers.add_item(item_desc) def save_categories(self, dataset): raise NotImplementedError() @@ -96,6 +221,9 @@ def write(self, path): dump_json_file(path, self._data) + if self._stream: + self._temporary_writers.merge(path, self._data, self._min_ann_id) + @property def annotations(self): return self._data["annotations"] @@ -121,7 +249,11 @@ def _convert_attributes(ann): class _ImageInfoExporter(_TaskExporter): def is_empty(self): - return len(self._data["images"]) == 0 + return ( + len(self._data["images"]) == 0 + if not self._stream + else self._temporary_writers.imgs.is_empty + ) def save_categories(self, dataset): pass @@ -158,7 +290,10 @@ def save_annotations(self, item): if attrs: elem["attributes"] = attrs - self.annotations.append(elem) + if not self._stream: + self.annotations.append(elem) + else: + self._temporary_writers.add_anns(elem) class _InstancesExporter(_TaskExporter): @@ -292,10 +427,16 @@ def save_annotations(self, item): for instance in instances: elem = self.convert_instance(instance, item) - if elem: + + if elem is None: + continue + + if not self._stream: self.annotations.append(elem) + else: + self._temporary_writers.add_anns(elem) - def convert_instance(self, instance, item): + def convert_instance(self, instance, item) -> Optional[Dict]: ann, polygons, mask, bbox = instance is_crowd = mask is not None @@ -387,7 +528,10 @@ def save_annotations(self, item): instance = [points, [], None, points.get_bbox()] elem = super().convert_instance(instance, item) elem.update(self.convert_points_object(points)) - self.annotations.append(elem) + if not self._stream: + self.annotations.append(elem) + else: + self._temporary_writers.add_anns(elem) # Create annotations for complete instance + keypoints annotations super().save_annotations(item) @@ -421,7 +565,7 @@ def convert_points_object(ann): "num_keypoints": num_annotated, } - def convert_instance(self, instance, item): + def convert_instance(self, instance, item) -> Optional[Dict]: points_ann = find( item.annotations, lambda x: x.type == AnnotationType.points @@ -474,7 +618,10 @@ def save_annotations(self, item): if attrs: elem["attributes"] = attrs - self.annotations.append(elem) + if not self._stream: + self.annotations.append(elem) + else: + self._temporary_writers.add_anns(elem) class _StuffExporter(_InstancesExporter): @@ -485,6 +632,9 @@ class _PanopticExporter(_TaskExporter): def write(self, path): dump_json_file(path, self._data) + if self._stream: + self._temporary_writers.merge(path, self._data, None) + def save_categories(self, dataset): label_categories = dataset.categories().get(AnnotationType.label) if label_categories is None: @@ -541,7 +691,11 @@ def save_annotations(self, item): "file_name": ann_filename, "segments_info": segments_info, } - self.annotations.append(elem) + + if not self._stream: + self.annotations.append(elem) + else: + self._temporary_writers.add_anns(elem) class CocoExporter(Exporter): @@ -621,7 +775,7 @@ def build_cmdline_parser(cls, **kwargs): DEFAULT_IMAGE_EXT = CocoPath.IMAGE_EXT - _TASK_CONVERTER = { + _TASK_CONVERTER: Dict[CocoTask, Type[_TaskExporter]] = { CocoTask.image_info: _ImageInfoExporter, CocoTask.instances: _InstancesExporter, CocoTask.person_keypoints: _KeypointsExporter, @@ -641,9 +795,10 @@ def __init__( allow_attributes=True, reindex=False, merge_images=False, + stream: bool = False, **kwargs, ): - super().__init__(extractor, save_dir, **kwargs) + super().__init__(extractor, save_dir, stream=stream, **kwargs) assert tasks is None or isinstance(tasks, (CocoTask, list, str)) if isinstance(tasks, CocoTask): @@ -693,14 +848,21 @@ def _make_segmentation_dir(self, subset_name): ) os.makedirs(self._segmentation_dir, exist_ok=True) - def _make_task_converter(self, task): + def _make_task_converter(self, task: CocoTask, subset: str) -> _TaskExporter: if task not in self._TASK_CONVERTER: raise NotImplementedError() - return self._TASK_CONVERTER[task](self) + return self._TASK_CONVERTER[task]( + context=self, + subset=subset, + task=task, + ann_dir=self._ann_dir, + stream=self._stream, + ) - def _make_task_converters(self): + def _make_task_converters(self, subset: str): return { - task: self._make_task_converter(task) for task in (self._tasks or self._TASK_CONVERTER) + task: self._make_task_converter(task, subset) + for task in (self._tasks or self._TASK_CONVERTER) } def _get_image_id(self, item): @@ -725,7 +887,7 @@ def _apply_impl(self): subsets = self._extractor.subsets() pbars = self._ctx.progress_reporter.split(len(subsets)) for pbar, (subset_name, subset) in zip(pbars, subsets.items()): - task_converters = self._make_task_converters() + task_converters = self._make_task_converters(subset_name) for task_conv in task_converters.values(): task_conv.save_categories(subset) if CocoTask.panoptic in task_converters: @@ -795,6 +957,10 @@ def patch(cls, dataset, patch, save_dir, **kwargs): if osp.isfile(image_path): os.unlink(image_path) + @property + def can_stream(self) -> bool: + return True + class CocoInstancesExporter(CocoExporter): def __init__(self, *args, **kwargs): diff --git a/src/datumaro/plugins/data_formats/datumaro/exporter.py b/src/datumaro/plugins/data_formats/datumaro/exporter.py index ae14944cec..f45213b00f 100644 --- a/src/datumaro/plugins/data_formats/datumaro/exporter.py +++ b/src/datumaro/plugins/data_formats/datumaro/exporter.py @@ -4,15 +4,17 @@ # pylint: disable=no-self-use +import json import os import os.path as osp import shutil from contextlib import contextmanager from multiprocessing.pool import Pool -from typing import Optional +from typing import Dict, Optional import numpy as np import pycocotools.mask as mask_utils +from json_stream.writer import streamable_dict, streamable_list from datumaro.components.annotation import ( Annotation, @@ -43,8 +45,15 @@ class _SubsetWriter: - def __init__(self, context: Exporter, ann_file: str, export_context: ExportContextComponent): + def __init__( + self, + context: Exporter, + subset: str, + ann_file: str, + export_context: ExportContextComponent, + ): self._context = context + self._subset = subset self._data = { "dm_format_version": DATUMARO_FORMAT_VERSION, @@ -122,7 +131,10 @@ def context_save_media( else: raise NotImplementedError - def add_item(self, item: DatasetItem, *args, **kwargs): + def add_item(self, item: DatasetItem, *args, **kwargs) -> None: + self.items.append(self._gen_item_desc(item)) + + def _gen_item_desc(self, item: DatasetItem, *args, **kwargs) -> Dict: annotations = [] item_desc = { "id": item.id, @@ -155,8 +167,6 @@ def add_item(self, item: DatasetItem, *args, **kwargs): elif isinstance(item.media, MediaElement): item_desc["media"] = {"path": getattr(item.media, "path", None)} - self.items.append(item_desc) - for ann in item.annotations: if isinstance(ann, Label): converted_ann = self._convert_label_object(ann) @@ -182,6 +192,8 @@ def add_item(self, item: DatasetItem, *args, **kwargs): raise NotImplementedError() annotations.append(converted_ann) + return item_desc + def add_infos(self, infos): self._data["infos"].update(infos) @@ -367,6 +379,40 @@ def _convert_points_categories(self, obj): return converted +class _StreamSubsetWriter(_SubsetWriter): + def __init__( + self, + context: Exporter, + subset: str, + ann_file: str, + export_context: ExportContextComponent, + ): + super().__init__(context, subset, ann_file, export_context) + + def write(self, *args, **kwargs): + @streamable_list + def _item_list(): + subset = self._context._extractor.get_subset(self._subset) + pbar = self._context._ctx.progress_reporter + for item in pbar.iter(subset, desc=f"Exporting '{self._subset}'"): + yield self._gen_item_desc(item) + + @streamable_dict + def _data(): + yield "dm_format_version", self._data["dm_format_version"] + yield "media_type", self._data["media_type"] + yield "infos", self._data["infos"] + yield "categories", self._data["categories"] + yield "items", _item_list() + + with open(self.ann_file, "w", encoding="utf-8") as fp: + json.dump(_data(), fp) + + def is_empty(self): + # TODO: Force empty to be False, but it should be fixed with refactoring `_SubsetWriter`.` + return False + + class DatumaroExporter(Exporter): DEFAULT_IMAGE_EXT = DatumaroPath.IMAGE_EXT PATH_CLS = DatumaroPath @@ -387,10 +433,20 @@ def create_writer( default_image_ext=self._default_image_ext, ) - return _SubsetWriter( - context=self, - ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), - export_context=export_context, + return ( + _SubsetWriter( + context=self, + subset=subset, + ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), + export_context=export_context, + ) + if not self._stream + else _StreamSubsetWriter( + context=self, + subset=subset, + ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), + export_context=export_context, + ) ) def _apply_impl(self, pool: Optional[Pool] = None, *args, **kwargs): @@ -421,10 +477,9 @@ def _apply_impl(self, pool: Optional[Pool] = None, *args, **kwargs): pbar = self._ctx.progress_reporter for subset_name, subset in self._extractor.subsets().items(): - for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"): - writers[subset_name].add_item(item, pool) - - self._check_hash_key_existence(item) + if not self._stream: + for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"): + writers[subset_name].add_item(item, pool) for subset, writer in writers.items(): if self._patch and subset in self._patch.updated_subsets and writer.is_empty(): @@ -470,3 +525,7 @@ def patch(cls, dataset, patch, save_dir, **kwargs): related_images_path = osp.join(save_dir, cls.PATH_CLS.IMAGES_DIR, item.subset, item.id) if osp.isdir(related_images_path): shutil.rmtree(related_images_path) + + @property + def can_stream(self) -> bool: + return True diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py b/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py index 9cb4e44965..2b894c2d09 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py @@ -33,13 +33,14 @@ class _SubsetWriter(__SubsetWriter): def __init__( self, context: Exporter, + subset: str, ann_file: str, export_context: ExportContextComponent, secret_key_file: str, no_media_encryption: bool = False, max_blob_size: int = DatumaroBinaryPath.MAX_BLOB_SIZE, ): - super().__init__(context, ann_file, export_context) + super().__init__(context, subset, ann_file, export_context) self._crypter = self.export_context.crypter self.secret_key_file = secret_key_file @@ -248,6 +249,7 @@ def __init__( encryption: bool = False, num_workers: int = 0, max_blob_size: int = DatumaroBinaryPath.MAX_BLOB_SIZE, + **kwargs, ): """ Parameters @@ -308,6 +310,7 @@ def create_writer(self, subset: str, images_dir: str, pcd_dir: str) -> _SubsetWr return _SubsetWriter( context=self, + subset=subset, ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), export_context=export_context, secret_key_file=osp.join(self._save_dir, self.PATH_CLS.SECRET_KEY_FILE), diff --git a/src/datumaro/plugins/data_formats/voc/exporter.py b/src/datumaro/plugins/data_formats/voc/exporter.py index 1b03532ba7..f994ddb5f5 100644 --- a/src/datumaro/plugins/data_formats/voc/exporter.py +++ b/src/datumaro/plugins/data_formats/voc/exporter.py @@ -795,6 +795,10 @@ def patch(cls, dataset, patch, save_dir, **kwargs): if osp.isfile(path): os.unlink(path) + @property + def can_stream(self) -> bool: + return True + class VocClassificationExporter(VocExporter): def __init__(self, *args, **kwargs): diff --git a/src/datumaro/plugins/data_formats/yolo/exporter.py b/src/datumaro/plugins/data_formats/yolo/exporter.py index b40d428c3a..3cfbeb3994 100644 --- a/src/datumaro/plugins/data_formats/yolo/exporter.py +++ b/src/datumaro/plugins/data_formats/yolo/exporter.py @@ -189,6 +189,10 @@ def patch(cls, dataset, patch, save_dir, **kwargs): if osp.isfile(ann_path): os.remove(ann_path) + @property + def can_stream(self) -> bool: + return True + class YoloUltralyticsExporter(YoloExporter): allowed_subset_names = {"train", "val", "test"} diff --git a/tests/unit/components/conftest.py b/tests/unit/components/conftest.py index 4767fc4717..4eefd5567c 100644 --- a/tests/unit/components/conftest.py +++ b/tests/unit/components/conftest.py @@ -9,6 +9,7 @@ from datumaro.components.annotation import AnnotationType, Label, LabelCategories from datumaro.components.dataset_base import CategoriesInfo, DatasetInfo, DatasetItem, IDataset from datumaro.components.media import MediaElement +from datumaro.util.definitions import DEFAULT_SUBSET_NAME @pytest.fixture @@ -52,4 +53,6 @@ def _reset_iter(): stream_extractor.media_type.return_value = MediaElement stream_extractor.is_stream = True + stream_extractor.subsets.return_value = {DEFAULT_SUBSET_NAME: stream_extractor} + return stream_extractor diff --git a/tests/unit/components/test_dataset_storage.py b/tests/unit/components/test_dataset_storage.py index 953c0e1442..b3a0a86fbc 100644 --- a/tests/unit/components/test_dataset_storage.py +++ b/tests/unit/components/test_dataset_storage.py @@ -48,9 +48,8 @@ def test_subsets(self, fxt_stream_extractor: MagicMock): n_calls = 3 self._test_loop(fxt_stream_extractor, storage, n_calls) - # Iterator should be called more than 3 times (n_calls = 3), - # since it should run the iterator additionally to obtain the subsets dict from the stream. - assert fxt_stream_extractor.__iter__.call_count > n_calls + # Iterator should be called 3 times (n_calls = 3) + assert fxt_stream_extractor.__iter__.call_count == n_calls def _test_subsets( self, fxt_stream_extractor, storage, expect: Set[str] = {DEFAULT_SUBSET_NAME} @@ -90,20 +89,21 @@ def test_item_transform(self, fxt_stream_extractor: MagicMock): def test_subset_transform(self, fxt_stream_extractor: MagicMock): storage = StreamDatasetStorage(source=fxt_stream_extractor) + # No need to iterate since the source already has subset info. self._test_subsets(fxt_stream_extractor, storage) - assert fxt_stream_extractor.__iter__.call_count == 1 + assert fxt_stream_extractor.__iter__.call_count == 0 - # Stack transform 1 level + # Stack transform 1 level, should run 1 iter to get the subset info after transform storage.transform(RandomSplit, splits=[("train", 0.5), ("val", 0.5)], seed=3003) self._test_subsets(fxt_stream_extractor, storage, expect={"train", "val"}) - assert fxt_stream_extractor.__iter__.call_count == 2 + assert fxt_stream_extractor.__iter__.call_count == 1 - # Stack transform 2 level + # Stack transform 2 level, should run 1 iter more to get the subset info after transform storage.transform( MapSubsets, mapping={"train": DEFAULT_SUBSET_NAME, "val": DEFAULT_SUBSET_NAME} ) self._test_subsets(fxt_stream_extractor, storage) - assert fxt_stream_extractor.__iter__.call_count == 3 + assert fxt_stream_extractor.__iter__.call_count == 2 def test_info_transform(self, fxt_stream_extractor: MagicMock, fxt_infos: DatasetInfo): storage = StreamDatasetStorage(source=fxt_stream_extractor) diff --git a/tests/unit/data_formats/base.py b/tests/unit/data_formats/base.py index e7536b668c..147e6b4435 100644 --- a/tests/unit/data_formats/base.py +++ b/tests/unit/data_formats/base.py @@ -75,11 +75,15 @@ def test_can_export_and_import( helper_tc = request.getfixturevalue("helper_tc") + stream = True if dataset_cls == StreamDataset else False exporter.convert( - fxt_expected_dataset, save_dir=test_dir, save_media=True, **fxt_export_kwargs + fxt_expected_dataset, + save_dir=test_dir, + save_media=True, + stream=stream, + **fxt_export_kwargs, ) dataset = dataset_cls.import_from(test_dir, importer.NAME, **fxt_import_kwargs) - stream = True if dataset_cls == StreamDataset else False check_is_stream(dataset, stream) compare_datasets(helper_tc, fxt_expected_dataset, dataset, require_media=True) diff --git a/tests/unit/data_formats/datumaro/test_datumaro_format.py b/tests/unit/data_formats/datumaro/test_datumaro_format.py index 5356745c1f..a3e4e5575e 100644 --- a/tests/unit/data_formats/datumaro/test_datumaro_format.py +++ b/tests/unit/data_formats/datumaro/test_datumaro_format.py @@ -132,7 +132,7 @@ def test_can_save_and_load( self._test_save_and_load( helper_tc, fxt_dataset, - partial(self.exporter.convert, save_media=True, **fxt_export_kwargs), + partial(self.exporter.convert, save_media=True, stream=stream, **fxt_export_kwargs), test_dir, compare=compare, require_media=require_media, diff --git a/tests/unit/data_formats/test_yolo_strict_format.py b/tests/unit/data_formats/test_yolo_strict_format.py index 95987b7b58..94f9327c01 100644 --- a/tests/unit/data_formats/test_yolo_strict_format.py +++ b/tests/unit/data_formats/test_yolo_strict_format.py @@ -71,7 +71,7 @@ def test_can_save_and_load(self, dataset_cls, is_stream, test_dir, helper_tc): categories=["label_" + str(i) for i in range(10)], ) - YoloExporter.convert(source_dataset, test_dir, save_media=True) + YoloExporter.convert(source_dataset, test_dir, save_media=True, stream=is_stream) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream @@ -95,7 +95,7 @@ def test_can_save_dataset_with_image_info(self, dataset_cls, is_stream, test_dir categories=["label_" + str(i) for i in range(10)], ) - YoloExporter.convert(source_dataset, test_dir) + YoloExporter.convert(source_dataset, test_dir, stream=is_stream) save_image( osp.join(test_dir, "obj_train_data", "1.jpg"), np.ones((10, 15, 3)) @@ -125,7 +125,7 @@ def test_can_load_dataset_with_exact_image_info( categories=["label_" + str(i) for i in range(10)], ) - YoloExporter.convert(source_dataset, test_dir) + YoloExporter.convert(source_dataset, test_dir, stream=is_stream) parsed_dataset = dataset_cls.import_from(test_dir, "yolo", image_info={"1": (10, 15)}) assert parsed_dataset.is_stream == is_stream @@ -152,7 +152,7 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename( categories=["label_" + str(i) for i in range(10)], ) - YoloExporter.convert(source_dataset, test_dir, save_media=True) + YoloExporter.convert(source_dataset, test_dir, save_media=True, stream=is_stream) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream @@ -177,7 +177,7 @@ def test_relative_paths(self, dataset_cls, is_stream, save_media, test_dir, help categories=[], ) - YoloExporter.convert(source_dataset, test_dir, save_media=save_media) + YoloExporter.convert(source_dataset, test_dir, save_media=save_media, stream=is_stream) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream @@ -204,7 +204,7 @@ def test_can_save_and_load_image_with_arbitrary_extension( categories=[], ) - YoloExporter.convert(dataset, test_dir, save_media=True) + YoloExporter.convert(dataset, test_dir, save_media=True, stream=is_stream) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream @@ -231,11 +231,11 @@ def test_inplace_save_writes_only_updated_data( ], categories=[], ) - dataset.export(test_dir, "yolo", save_media=True) + dataset.export(test_dir, "yolo", save_media=True, stream=is_stream) dataset.put(DatasetItem(2, subset="train", media=Image.from_numpy(data=np.ones((3, 2, 3))))) dataset.remove(3, "valid") - dataset.save(save_media=True) + dataset.save(save_media=True, stream=is_stream) assert {"1.txt", "2.txt", "1.jpg", "2.jpg"} == set( os.listdir(osp.join(test_dir, "obj_train_data")) @@ -284,7 +284,9 @@ def test_can_save_and_load_with_meta_file(self, dataset_cls, is_stream, test_dir categories=["label_" + str(i) for i in range(10)], ) - YoloExporter.convert(source_dataset, test_dir, save_media=True, save_dataset_meta=True) + YoloExporter.convert( + source_dataset, test_dir, save_media=True, save_dataset_meta=True, stream=is_stream + ) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream @@ -311,7 +313,7 @@ def test_can_save_and_load_with_custom_subset_name( categories=["label_" + str(i) for i in range(10)], ) - YoloExporter.convert(source_dataset, test_dir, save_media=True) + YoloExporter.convert(source_dataset, test_dir, save_media=True, stream=is_stream) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream @@ -354,7 +356,9 @@ def test_can_save_and_load_without_path_prefix( categories=["a", "b"], ) - YoloExporter.convert(source_dataset, test_dir, save_media=True, add_path_prefix=False) + YoloExporter.convert( + source_dataset, test_dir, save_media=True, add_path_prefix=False, stream=is_stream + ) parsed_dataset = dataset_cls.import_from(test_dir, "yolo") assert parsed_dataset.is_stream == is_stream diff --git a/tests/unit/test_coco_format.py b/tests/unit/test_coco_format.py index 008e7b69e8..c48038c27b 100644 --- a/tests/unit/test_coco_format.py +++ b/tests/unit/test_coco_format.py @@ -1456,23 +1456,35 @@ def test_can_report_invalid_ann_field_type(self): self.assertEqual(capture.exception.__cause__.actual, str(type(value))) -class CocoExporterTest(TestCase): +class CocoExporterTest: def _test_save_and_load( - self, source_dataset, converter, test_dir, target_dataset=None, importer_args=None, **kwargs + self, + source_dataset, + converter, + test_dir, + target_dataset=None, + importer_args=None, + stream: bool = False, + **kwargs, ): return check_save_and_load( - self, + TestCase(), source_dataset, converter, test_dir, importer="coco", target_dataset=target_dataset, importer_args=importer_args, + stream=stream, **kwargs, ) + @pytest.fixture(params=[True, False]) + def stream(self, request: pytest.FixtureRequest) -> bool: + return request.param + @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_captions(self): + def test_can_save_and_load_captions(self, stream: bool, test_dir: str): expected_dataset = Dataset.from_iterable( [ DatasetItem( @@ -1503,11 +1515,15 @@ def test_can_save_and_load_captions(self): ] ) - with TestDir() as test_dir: - self._test_save_and_load(expected_dataset, CocoCaptionsExporter.convert, test_dir) + self._test_save_and_load( + expected_dataset, + CocoCaptionsExporter.convert, + test_dir, + stream=stream, + ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_instances(self): + def test_can_save_and_load_instances(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -1663,10 +1679,11 @@ def test_can_save_and_load_instances(self): CocoInstancesExporter.convert, test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_panoptic(self): + def test_can_save_and_load_panoptic(self, stream: bool): dataset = Dataset.from_iterable( [ DatasetItem( @@ -1734,10 +1751,11 @@ def test_can_save_and_load_panoptic(self): partial(CocoPanopticExporter.convert, save_media=True), test_dir, require_media=True, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_stuff(self): + def test_can_save_and_load_stuff(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -1842,11 +1860,15 @@ def test_can_save_and_load_stuff(self): with TestDir() as test_dir: self._test_save_and_load( - source_dataset, CocoStuffExporter.convert, test_dir, target_dataset=target_dataset + source_dataset, + CocoStuffExporter.convert, + test_dir, + target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_merge_polygons_on_loading(self): + def test_can_merge_polygons_on_loading(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -1910,10 +1932,11 @@ def test_can_merge_polygons_on_loading(self): test_dir, importer_args={"merge_instance_polygons": True}, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_crop_covered_segments(self): + def test_can_crop_covered_segments(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2004,10 +2027,11 @@ def test_can_crop_covered_segments(self): partial(CocoInstancesExporter.convert, crop_covered=True), test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_convert_polygons_to_mask(self): + def test_can_convert_polygons_to_mask(self, stream: bool): """ Description: Ensure that the dataset polygon annotation can be properly converted into dataset segmentation mask. @@ -2089,10 +2113,11 @@ def test_can_convert_polygons_to_mask(self): partial(CocoInstancesExporter.convert, segmentation_mode="mask"), test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_convert_masks_to_polygons(self): + def test_can_convert_masks_to_polygons(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2163,10 +2188,11 @@ def test_can_convert_masks_to_polygons(self): partial(CocoInstancesExporter.convert, segmentation_mode="polygons"), test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_images(self): + def test_can_save_and_load_images(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset="train", attributes={"id": 1}), @@ -2179,10 +2205,15 @@ def test_can_save_and_load_images(self): ) with TestDir() as test_dir: - self._test_save_and_load(expected_dataset, CocoImageInfoExporter.convert, test_dir) + self._test_save_and_load( + expected_dataset, + CocoImageInfoExporter.convert, + test_dir, + stream=stream, + ) @mark_requirement(Requirements.DATUM_231) - def test_can_save_dataset_with_cjk_categories(self): + def test_can_save_dataset_with_cjk_categories(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2217,10 +2248,12 @@ def test_can_save_dataset_with_cjk_categories(self): ) with TestDir() as test_dir: - self._test_save_and_load(expected_dataset, CocoInstancesExporter.convert, test_dir) + self._test_save_and_load( + expected_dataset, CocoInstancesExporter.convert, test_dir, stream=stream + ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): + def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem(id="кириллица с пробелом", subset="train", attributes={"id": 1}), @@ -2228,10 +2261,15 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): ) with TestDir() as test_dir: - self._test_save_and_load(expected_dataset, CocoImageInfoExporter.convert, test_dir) + self._test_save_and_load( + expected_dataset, + CocoImageInfoExporter.convert, + test_dir, + stream=stream, + ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_labels(self): + def test_can_save_and_load_labels(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2248,10 +2286,15 @@ def test_can_save_and_load_labels(self): ) with TestDir() as test_dir: - self._test_save_and_load(expected_dataset, CocoLabelsExporter.convert, test_dir) + self._test_save_and_load( + expected_dataset, + CocoLabelsExporter.convert, + test_dir, + stream=stream, + ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_keypoints(self): + def test_can_save_and_load_keypoints(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2351,10 +2394,11 @@ def test_can_save_and_load_keypoints(self): CocoPersonKeypointsExporter.convert, test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_dataset_with_no_subsets(self): + def test_can_save_dataset_with_no_subsets(self, stream: bool): test_dataset = Dataset.from_iterable( [ DatasetItem(id=1, attributes={"id": 1}), @@ -2363,10 +2407,15 @@ def test_can_save_dataset_with_no_subsets(self): ) with TestDir() as test_dir: - self._test_save_and_load(test_dataset, CocoExporter.convert, test_dir) + self._test_save_and_load( + test_dataset, + CocoExporter.convert, + test_dir, + stream=stream, + ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_dataset_with_image_info(self): + def test_can_save_dataset_with_image_info(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2376,10 +2425,12 @@ def test_can_save_dataset_with_image_info(self): ) with TestDir() as test_dir: - self._test_save_and_load(expected_dataset, CocoImageInfoExporter.convert, test_dir) + self._test_save_and_load( + expected_dataset, CocoImageInfoExporter.convert, test_dir, stream=stream + ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_relative_paths(self): + def test_relative_paths(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2404,10 +2455,11 @@ def test_relative_paths(self): partial(CocoImageInfoExporter.convert, save_media=True), test_dir, require_media=True, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_image_with_arbitrary_extension(self): + def test_can_save_and_load_image_with_arbitrary_extension(self, stream: bool): expected = Dataset.from_iterable( [ DatasetItem( @@ -2429,10 +2481,11 @@ def test_can_save_and_load_image_with_arbitrary_extension(self): partial(CocoImageInfoExporter.convert, save_media=True), test_dir, require_media=True, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_preserve_coco_ids(self): + def test_preserve_coco_ids(self, stream: bool): expected_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2449,10 +2502,11 @@ def test_preserve_coco_ids(self): partial(CocoImageInfoExporter.convert, save_media=True), test_dir, require_media=True, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_annotation_attributes(self): + def test_annotation_attributes(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2506,11 +2560,15 @@ def test_annotation_attributes(self): with TestDir() as test_dir: self._test_save_and_load( - source_dataset, CocoExporter.convert, test_dir, target_dataset=target_dataset + source_dataset, + CocoExporter.convert, + test_dir, + target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_auto_annotation_ids(self): + def test_auto_annotation_ids(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2557,11 +2615,15 @@ def test_auto_annotation_ids(self): with TestDir() as test_dir: self._test_save_and_load( - source_dataset, CocoExporter.convert, test_dir, target_dataset=target_dataset + source_dataset, + CocoExporter.convert, + test_dir, + target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_subset_can_contain_underscore(self): + def test_subset_can_contain_underscore(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2617,11 +2679,15 @@ def test_subset_can_contain_underscore(self): with TestDir() as test_dir: self._test_save_and_load( - source_dataset, CocoExporter.convert, test_dir, target_dataset=target_dataset + source_dataset, + CocoExporter.convert, + test_dir, + target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_reindex(self): + def test_reindex(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2673,10 +2739,11 @@ def test_reindex(self): partial(CocoExporter.convert, reindex=True), test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_media_in_single_dir(self): + def test_can_save_media_in_single_dir(self, stream: bool): dataset = Dataset.from_iterable( [ DatasetItem( @@ -2694,11 +2761,12 @@ def test_can_save_media_in_single_dir(self): partial(CocoImageInfoExporter.convert, save_media=True, merge_images=True), test_dir, require_media=True, + stream=stream, ) - self.assertTrue(osp.isfile(osp.join(test_dir, "images", "1.jpg"))) + assert osp.isfile(osp.join(test_dir, "images", "1.jpg")) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_media_in_separate_dirs(self): + def test_can_save_media_in_separate_dirs(self, stream: bool): dataset = Dataset.from_iterable( [ DatasetItem( @@ -2716,11 +2784,12 @@ def test_can_save_media_in_separate_dirs(self): partial(CocoImageInfoExporter.convert, save_media=True, merge_images=False), test_dir, require_media=True, + stream=stream, ) - self.assertTrue(osp.isfile(osp.join(test_dir, "images", "train", "1.jpg"))) + assert osp.isfile(osp.join(test_dir, "images", "train", "1.jpg")) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_inplace_save_writes_only_updated_data(self): + def test_inplace_save_writes_only_updated_data(self, stream: bool): expected = Dataset.from_iterable( [ DatasetItem(1, subset="a"), @@ -2741,16 +2810,15 @@ def test_inplace_save_writes_only_updated_data(self): dataset.put(DatasetItem(2, subset="a", media=Image.from_numpy(data=np.ones((3, 2, 3))))) dataset.remove(3, "c") - dataset.save(save_media=True) + dataset.save(save_media=True, stream=stream) - self.assertEqual( - {"image_info_a.json", "image_info_b.json"}, - set(os.listdir(osp.join(path, "annotations"))), + assert {"image_info_a.json", "image_info_b.json"} == set( + os.listdir(osp.join(path, "annotations")) ) - self.assertTrue(osp.isfile(osp.join(path, "images", "a", "2.jpg"))) - self.assertFalse(osp.isfile(osp.join(path, "images", "c", "3.jpg"))) + assert osp.isfile(osp.join(path, "images", "a", "2.jpg")) + assert osp.isfile(osp.join(path, "images", "c", "3.jpg")) == False compare_datasets( - self, + TestCase(), expected, Dataset.import_from(path, "coco"), require_media=True, @@ -2758,7 +2826,7 @@ def test_inplace_save_writes_only_updated_data(self): ) @mark_requirement(Requirements.DATUM_BUG_425) - def test_can_save_and_load_grouped_masks_and_polygons(self): + def test_can_save_and_load_grouped_masks_and_polygons(self, stream: bool): source_dataset = Dataset.from_iterable( [ DatasetItem( @@ -2832,10 +2900,11 @@ def test_can_save_and_load_grouped_masks_and_polygons(self): partial(CocoInstancesExporter.convert), test_dir, target_dataset=target_dataset, + stream=stream, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_panoptic_with_meta_file(self): + def test_can_save_and_load_panoptic_with_meta_file(self, stream: bool): dataset = Dataset.from_iterable( [ DatasetItem( @@ -2888,11 +2957,12 @@ def test_can_save_and_load_panoptic_with_meta_file(self): partial(CocoPanopticExporter.convert, save_media=True, save_dataset_meta=True), test_dir, require_media=True, + stream=stream, ) - self.assertTrue(osp.isfile(osp.join(test_dir, "dataset_meta.json"))) + assert osp.isfile(osp.join(test_dir, "dataset_meta.json")) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_stuff_with_meta_file(self): + def test_can_save_and_load_stuff_with_meta_file(self, stream: bool): dataset = Dataset.from_iterable( [ DatasetItem( @@ -2939,11 +3009,12 @@ def test_can_save_and_load_stuff_with_meta_file(self): partial(CocoPanopticExporter.convert, save_media=True, save_dataset_meta=True), test_dir, require_media=True, + stream=stream, ) - self.assertTrue(osp.isfile(osp.join(test_dir, "dataset_meta.json"))) + assert osp.isfile(osp.join(test_dir, "dataset_meta.json")) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_export_and_import_ellipse(self): + def test_can_export_and_import_ellipse(self, stream: bool): ellipses = [ Ellipse(0, 0, 5, 5, id=1, label=1, group=1), Ellipse(5, 5, 10, 10, id=2, label=2, group=2), @@ -2997,4 +3068,5 @@ def test_can_export_and_import_ellipse(self): test_dir, target_dataset=target_dataset, ignored_attrs={"is_crowd"}, + stream=stream, ) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 5541fdfbf0..4862e39ec9 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -311,7 +311,7 @@ def _change_path_in_items(dataset, source_path, target_path): item.media._extra_images = new_images with TemporaryDirectory(prefix=test_dir) as tmp_dir: - converter(source_dataset, test_dir) + converter(source_dataset, test_dir, stream=stream) if move_save_dir: save_dir = tmp_dir for file in os.listdir(test_dir):