Skip to content

Commit

Permalink
support video annotation (#1124)
Browse files Browse the repository at this point in the history
<!-- Contributing guide:
https://github.com/openvinotoolkit/datumaro/blob/develop/CONTRIBUTING.md
-->

### Summary

[CVS-116105](https://jira.devtools.intel.com/browse/CVS-116105)

Support the video annotation type for 'datumaro', 'datumaro_binary'
format

<!--
Resolves #111 and #222.
Depends on #1000 (for series of dependent commits).

This PR introduces this capability to make the project better in this
and that.

- Added this feature
- Removed that feature
- Fixed the problem #1234
-->

### How to test
<!-- Describe the testing procedure for reviewers, if changes are
not fully covered by unit tests or manual testing can be complicated.
-->

### Checklist
<!-- Put an 'x' in all the boxes that apply -->
- [x] I have added unit tests to cover my changes.​
- [x] I have added integration tests to cover my changes.​
- [x] I have added the description of my changes into
[CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​
- [x] I have updated the
[documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs)
accordingly

### License

- [x] I submit _my code changes_ under the same [MIT
License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE)
that covers the project.
  Feel free to contact the maintainers if that's a concern.
- [x] I have updated the license header for each file (see an example
below).

```python
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
```
  • Loading branch information
bonhunko authored Aug 17, 2023
1 parent 0d5311e commit 4f848c9
Show file tree
Hide file tree
Showing 19 changed files with 280 additions and 16 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### New features
- Add tabular data import/export
(<https://github.com/openvinotoolkit/datumaro/pull/1089>)
- Support video annotation import/export
(<https://github.com/openvinotoolkit/datumaro/pull/1124>)

### Enhancements
- Remove xfail marks from the convert integration tests
Expand Down
2 changes: 1 addition & 1 deletion docs/source/docs/data-formats/datumaro_format.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ dataset/
│ | ├── img002.png
│ | └── ...
│ └── ...
├── videos/ # directory to store video files
└── annotations/
├── train.json # annotation file with training data
├── val.json # annotation file with validation data
Expand Down
2 changes: 2 additions & 0 deletions docs/source/docs/data-formats/formats/datumaro.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Supported media types:

- `Image`
- `PointCloud`
- `VideoFrame`

Supported annotation types:

Expand Down Expand Up @@ -56,6 +57,7 @@ A Datumaro dataset directory should have the following structure:
│ ├── <image_name1.ext>
│ ├── <image_name2.ext>
│ └── ...
├── videos/ # directory to store video files
└── annotations/
├── <subset_name_1>.json
├── <subset_name_2>.json
Expand Down
3 changes: 3 additions & 0 deletions docs/source/docs/data-formats/formats/datumaro_binary.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Dataset/
│ └── val/
│ ├── <image_name1.ext>
│ └── ...
├── videos/ # directory to store video files
└── annotations/
├── instances_train2017.json
└── instances_val2017.json
Expand All @@ -58,6 +59,7 @@ Supported media types:

- `Image`
- `PointCloud`
- `VideoFrame`

Supported annotation types:

Expand Down Expand Up @@ -103,6 +105,7 @@ A DatumaroBinary dataset directory should have the following structure:
│ ├── <image_name1.ext>
│ ├── <image_name2.ext>
│ └── ...
├── videos/
└── annotations/
├── <subset_name_1>.datum
├── <subset_name_2>.datum
Expand Down
6 changes: 6 additions & 0 deletions src/datumaro/components/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class AnnotationType(IntEnum):
COORDINATE_ROUNDING_DIGITS = 2
CHECK_POLYGON_EQ_EPSILONE = 1e-7
NO_GROUP = 0
NO_OBJECT_ID = -1


@attrs(slots=True, kw_only=True, order=False)
Expand Down Expand Up @@ -83,6 +84,11 @@ class Annotation:
# single object. The value of 0 means there is no group.
group: int = field(default=NO_GROUP, validator=default_if_none(int))

# obeject identifier over the multiple items
# e.g.) in a video, person 'A' could be annotated on the multiple frame images
# the user could assign >=0 value as id of person 'A'.
object_id: int = field(default=NO_OBJECT_ID, validator=default_if_none(int))

_type = AnnotationType.unknown

@property
Expand Down
32 changes: 31 additions & 1 deletion src/datumaro/components/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
DatumaroError,
ItemExportError,
)
from datumaro.components.media import Image, PointCloud
from datumaro.components.media import Image, PointCloud, VideoFrame
from datumaro.components.progress_reporting import NullProgressReporter, ProgressReporter
from datumaro.util.meta_file_util import save_hashkey_file, save_meta_file
from datumaro.util.os_util import rmtree
Expand Down Expand Up @@ -323,6 +323,7 @@ def __init__(
save_media: bool,
images_dir: str,
pcd_dir: str,
video_dir: str,
crypter: Crypter = NULL_CRYPTER,
image_ext: Optional[str] = None,
default_image_ext: Optional[str] = None,
Expand All @@ -332,6 +333,7 @@ def __init__(
self._save_media = save_media
self._images_dir = images_dir
self._pcd_dir = pcd_dir
self._video_dir = video_dir
self._crypter = crypter
self._image_ext = image_ext
self._default_image_ext = default_image_ext
Expand Down Expand Up @@ -363,6 +365,14 @@ def make_pcd_extra_image_filename(self, item, idx, image, *, name=None, subdir=N
item, name=name if name else f"{item.id}/extra_image_{idx}", subdir=subdir
) + self.find_image_ext(image)

def make_video_filename(self, item, *, name=None):
if isinstance(item, DatasetItem) and isinstance(item.media, VideoFrame):
video_file_name = osp.basename(item.media.video.path)
else:
assert "Video item type should be VideoFrame"

return video_file_name

def save_image(
self,
item: DatasetItem,
Expand Down Expand Up @@ -412,6 +422,26 @@ def helper(i, image):

item.media.save(path, helper, crypter=NULL_CRYPTER)

def save_video(
self,
item: DatasetItem,
*,
basedir: Optional[str] = None,
fname: Optional[str] = None,
):
if not item.media or not isinstance(item.media, VideoFrame):
log.warning("Item '%s' has no video", item.id)
return
basedir = self._video_dir if basedir is None else basedir
fname = self.make_video_filename(item) if fname is None else fname

path = osp.join(basedir, fname)
path = osp.abspath(path)

os.makedirs(osp.dirname(path), exist_ok=True)

item.media.video.save(path, crypter=NULL_CRYPTER)

@property
def images_dir(self) -> str:
return self._images_dir
Expand Down
18 changes: 18 additions & 0 deletions src/datumaro/components/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,10 @@ def index(self) -> int:
def video(self) -> Video:
return self._video

@property
def path(self) -> str:
return self._video.path


class _VideoFrameIterator(Iterator[VideoFrame]):
"""
Expand Down Expand Up @@ -808,6 +812,20 @@ def __hash__(self):
# Required for caching
return hash((self._path, self._step, self._start_frame, self._end_frame))

def save(
self,
fp: Union[str, io.IOBase],
crypter: Crypter = NULL_CRYPTER,
):
if isinstance(fp, str):
os.makedirs(osp.dirname(fp), exist_ok=True)
if isinstance(fp, str):
if fp != self.path:
shutil.copyfile(self.path, fp)
elif isinstance(fp, io.IOBase):
with open(self.path, "rb") as f_video:
fp.write(f_video.read())

@property
def path(self) -> str:
"""Path to the media file"""
Expand Down
1 change: 1 addition & 0 deletions src/datumaro/plugins/data_formats/arrow/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def create_writer(self, subset: str, ctx: ExportContext) -> _SubsetWriter:
save_media=self._save_media,
images_dir="",
pcd_dir="",
video_dir="",
crypter=NULL_CRYPTER,
image_ext=self._image_ext,
default_image_ext=self._default_image_ext,
Expand Down
45 changes: 42 additions & 3 deletions src/datumaro/plugins/data_formats/datumaro/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from json_stream.base import StreamingJSONObject

from datumaro.components.annotation import (
NO_OBJECT_ID,
AnnotationType,
Bbox,
Caption,
Expand All @@ -28,7 +29,7 @@
from datumaro.components.dataset_base import DatasetItem, SubsetBase
from datumaro.components.errors import DatasetImportError, MediaTypeError
from datumaro.components.importer import ImportContext
from datumaro.components.media import Image, MediaElement, MediaType, PointCloud
from datumaro.components.media import Image, MediaElement, MediaType, PointCloud, Video, VideoFrame
from datumaro.util import parse_json_file, to_dict_from_streaming_json
from datumaro.version import __version__

Expand All @@ -45,12 +46,15 @@ def __init__(
rootpath: str,
images_dir: str,
pcd_dir: str,
video_dir: str,
ctx: ImportContext,
) -> None:
self._subset = subset
self._rootpath = rootpath
self._images_dir = images_dir
self._pcd_dir = pcd_dir
self._video_dir = video_dir
self._videos = {}
self._ctx = ctx

self._reader = self._init_reader(path)
Expand Down Expand Up @@ -174,6 +178,19 @@ def _parse_item(self, item_desc: Dict) -> Optional[DatasetItem]:
if self.media_type == MediaElement:
self.media_type = PointCloud

video_frame_info = item_desc.get("video_frame")
if media and video_frame_info:
raise MediaTypeError("Dataset cannot contain multiple media types")
if video_frame_info:
video_path = osp.join(self._video_dir, video_frame_info.get("video_path"))
if video_path not in self._videos:
self._videos[video_path] = Video(video_path)
video = self._videos[video_path]

frame_index = video_frame_info.get("frame_index")

media = VideoFrame(video, frame_index)

media_desc = item_desc.get("media")
if not media and media_desc and media_desc.get("path"):
media = MediaElement(path=media_desc.get("path"))
Expand Down Expand Up @@ -203,14 +220,21 @@ def _load_annotations(self, item: Dict):
ann_type = AnnotationType[ann["type"]]
attributes = ann.get("attributes")
group = ann.get("group")
object_id = ann.get("object_id", NO_OBJECT_ID)

label_id = ann.get("label_id")
z_order = ann.get("z_order")
points = ann.get("points")

if ann_type == AnnotationType.label:
loaded.append(
Label(label=label_id, id=ann_id, attributes=attributes, group=group)
Label(
label=label_id,
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
)
)

elif ann_type == AnnotationType.mask:
Expand All @@ -223,6 +247,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
z_order=z_order,
)
)
Expand All @@ -235,6 +260,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
z_order=z_order,
)
)
Expand All @@ -247,6 +273,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
z_order=z_order,
)
)
Expand All @@ -263,6 +290,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
z_order=z_order,
)
)
Expand All @@ -275,6 +303,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
z_order=z_order,
)
)
Expand All @@ -293,6 +322,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
)
)

Expand All @@ -304,6 +334,7 @@ def _load_annotations(self, item: Dict):
id=ann_id,
attributes=attributes,
group=group,
object_id=object_id,
z_order=z_order,
)
)
Expand Down Expand Up @@ -334,9 +365,10 @@ def __init__(
rootpath: str,
images_dir: str,
pcd_dir: str,
video_dir: str,
ctx: ImportContext,
) -> None:
super().__init__(path, subset, rootpath, images_dir, pcd_dir, ctx)
super().__init__(path, subset, rootpath, images_dir, pcd_dir, video_dir, ctx)
self._length = None

def __len__(self):
Expand Down Expand Up @@ -458,6 +490,11 @@ def _init_path(self, path: str):
pcd_dir = osp.join(rootpath, DatumaroPath.PCD_DIR)
self._pcd_dir = pcd_dir

video_dir = ""
if rootpath and osp.isdir(osp.join(rootpath, DatumaroPath.VIDEO_DIR)):
video_dir = osp.join(rootpath, DatumaroPath.VIDEO_DIR)
self._video_dir = video_dir

@property
def is_stream(self) -> bool:
return self._stream
Expand All @@ -480,6 +517,7 @@ def _load_impl(self, path: str) -> None:
self._rootpath,
self._images_dir,
self._pcd_dir,
self._video_dir,
self._ctx,
)
if not self._stream
Expand All @@ -489,6 +527,7 @@ def _load_impl(self, path: str) -> None:
self._rootpath,
self._images_dir,
self._pcd_dir,
self._video_dir,
self._ctx,
)
)
Expand Down
Loading

0 comments on commit 4f848c9

Please sign in to comment.