diff --git a/examples/config_templates/dataset/yaak.yaml b/examples/config_templates/dataset/yaak.yaml index 8e3b953..37ce805 100644 --- a/examples/config_templates/dataset/yaak.yaml +++ b/examples/config_templates/dataset/yaak.yaml @@ -1,13 +1,13 @@ #@yaml/text-templated-strings #@ drives = [ -#@ 'Niro102-HQ/2023-05-08--13-59-22', +#@ 'Niro098-HQ/2024-08-26--06-06-03', #@ ] #@ cameras = [ #@ 'cam_front_left', -#@ 'cam_left_forward', -#@ 'cam_right_forward', +#@ 'cam_left_backward', +#@ 'cam_right_backward', #@ ] --- _target_: rbyte.Dataset @@ -21,14 +21,9 @@ inputs: (@=source_id@): index_column: "ImageMetadata.(@=source_id@).frame_idx" reader: - _target_: rbyte.io.frame.DirectoryFrameReader - path: "${data_dir}/(@=input_id@)/frames/(@=source_id@).defish.mp4/576x324/{:09d}.jpg" - frame_decoder: - _target_: simplejpeg.decode_jpeg - _partial_: true - colorspace: rgb - fastdct: true - fastupsample: true + _target_: rbyte.io.frame.VideoFrameReader + path: "${data_dir}/(@=input_id@)/(@=source_id@).defish.mp4" + resize_shorter_side: 324 #@ end table: @@ -83,7 +78,7 @@ inputs: frame_idx: method: asof - tolerance: 10ms + tolerance: 20ms strategy: nearest #@ end @@ -112,4 +107,4 @@ sample_builder: stride: 1 min_step: 6 filter: | - array_mean(`VehicleMotion.speed`) > 47 + array_lower(`VehicleMotion.speed`) > 80 diff --git a/examples/config_templates/frame_reader/video.yaml b/examples/config_templates/frame_reader/video.yaml new file mode 100644 index 0000000..3cc68ca --- /dev/null +++ b/examples/config_templates/frame_reader/video.yaml @@ -0,0 +1,6 @@ +--- +_target_: rbyte.io.frame.VideoFrameReader +path: ??? +threads: !!null +resize_shorter_side: !!null +with_fallback: !!null diff --git a/examples/config_templates/logger/rerun/yaak.yaml b/examples/config_templates/logger/rerun/yaak.yaml index 845d287..889b01d 100644 --- a/examples/config_templates/logger/rerun/yaak.yaml +++ b/examples/config_templates/logger/rerun/yaak.yaml @@ -2,8 +2,8 @@ #@ cameras = [ #@ 'cam_front_left', -#@ 'cam_left_forward', -#@ 'cam_right_forward', +#@ 'cam_left_backward', +#@ 'cam_right_backward', #@ ] --- diff --git a/pyproject.toml b/pyproject.toml index cacc921..602f285 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ maintainers = [{ name = "Evgenii Gorchakov", email = "evgenii@yaak.ai" }] dependencies = [ "tensordict @ git+https://github.com/pytorch/tensordict.git@85b6b81", "torch>=2.4.1", - "polars>=1.8.0", + "polars>=1.8.2", "pydantic>=2.9.2", "more-itertools>=10.5.0", "hydra-core>=1.3.2", @@ -44,6 +44,7 @@ mcap = [ ] yaak = ["protobuf", "ptars>=0.0.2rc2"] jpeg = ["simplejpeg>=1.7.6"] +video = ["video-reader-rs>=0.1.4"] [project.scripts] rbyte-build-table = 'rbyte.scripts.build_table:main' diff --git a/src/rbyte/io/frame/__init__.py b/src/rbyte/io/frame/__init__.py index ef1af94..14cb1ec 100644 --- a/src/rbyte/io/frame/__init__.py +++ b/src/rbyte/io/frame/__init__.py @@ -1,3 +1,17 @@ from .directory import DirectoryFrameReader __all__ = ["DirectoryFrameReader"] + +try: + from .video import VideoFrameReader +except ImportError: + pass +else: + __all__ += ["VideoFrameReader"] + +try: + from .mcap import McapFrameReader +except ImportError: + pass +else: + __all__ += ["McapFrameReader"] diff --git a/src/rbyte/io/frame/directory/reader.py b/src/rbyte/io/frame/directory/reader.py index aabd624..f160383 100644 --- a/src/rbyte/io/frame/directory/reader.py +++ b/src/rbyte/io/frame/directory/reader.py @@ -26,7 +26,7 @@ def __init__( @cached_property def _path_posix(self) -> str: - return self._path.as_posix() + return self._path.resolve().as_posix() def _decode(self, path: str) -> npt.ArrayLike: with Path(path).open("rb") as f: diff --git a/src/rbyte/io/frame/video/__init__.py b/src/rbyte/io/frame/video/__init__.py new file mode 100644 index 0000000..405f40f --- /dev/null +++ b/src/rbyte/io/frame/video/__init__.py @@ -0,0 +1,3 @@ +from .reader import VideoFrameReader + +__all__ = ["VideoFrameReader"] diff --git a/src/rbyte/io/frame/video/reader.py b/src/rbyte/io/frame/video/reader.py new file mode 100644 index 0000000..a3ad0c1 --- /dev/null +++ b/src/rbyte/io/frame/video/reader.py @@ -0,0 +1,47 @@ +from collections.abc import Callable, Iterable, Sequence +from functools import partial +from os import PathLike +from pathlib import Path +from typing import override + +import torch +import video_reader as vr +from jaxtyping import UInt8 +from pydantic import NonNegativeInt, validate_call +from torch import Tensor + +from rbyte.io.frame.base import FrameReader + + +class VideoFrameReader(FrameReader): + @validate_call + def __init__( + self, + path: PathLike[str], + threads: NonNegativeInt | None = None, + resize_shorter_side: NonNegativeInt | None = None, + with_fallback: bool | None = None, # noqa: FBT001 + ) -> None: + super().__init__() + self._path = Path(path).resolve().as_posix() + + self._get_batch: Callable[[str, Iterable[int]], UInt8[Tensor, "b h w c"]] = ( + partial( + vr.get_batch, # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue] + threads=threads, + resize_shorter_side=resize_shorter_side, + with_fallback=with_fallback, + ) + ) + + @override + def read(self, indexes: Iterable[int]) -> UInt8[Tensor, "b h w c"]: + batch = self._get_batch(self._path, indexes) + + return torch.from_numpy(batch) # pyright: ignore[reportUnknownMemberType] + + @override + def get_available_indexes(self) -> Sequence[int]: + num_frames, *_ = vr.get_shape(self._path) # pyright: ignore[reportAttributeAccessIssue, reportUnknownVariableType, reportUnknownMemberType] + + return range(num_frames) # pyright: ignore[reportUnknownArgumentType]