Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pixel Masks Annotations #142

Merged
merged 5 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions luxonis_ml/data/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,9 @@ def check_exists(name: str):

def get_dataset_info(name: str) -> Tuple[int, List[str], List[str]]:
dataset = LuxonisDataset(name)
try:
size = len(dataset)
except KeyError:
size = -1

try:
loader = LuxonisLoader(dataset, view=SplitType.TRAIN.value)
_, ann = next(iter(loader))
except Exception:
ann = {}
size = len(dataset)
classes, _ = dataset.get_classes()
tasks = list(ann.keys())
return size, classes, tasks
return size, classes, dataset.get_tasks()


def print_info(name: str) -> None:
Expand Down
128 changes: 95 additions & 33 deletions luxonis_ml/data/datasets/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple, TypedDict, Union

import numpy as np
import numpy.typing as npt
import pycocotools.mask as mask_util
from PIL import Image, ImageDraw
from pydantic import Field, model_validator
from pydantic import ConfigDict, Field, field_validator, model_validator
from pydantic.types import FilePath, PositiveInt
from typing_extensions import Annotated, TypeAlias

Expand Down Expand Up @@ -42,6 +43,7 @@ def load_annotation(name: str, js: str, data: Dict[str, Any]) -> "Annotation":
"KeypointAnnotation": KeypointAnnotation,
"RLESegmentationAnnotation": RLESegmentationAnnotation,
"PolylineSegmentationAnnotation": PolylineSegmentationAnnotation,
"MaskSegmentationAnnotation": MaskSegmentationAnnotation,
"ArrayAnnotation": ArrayAnnotation,
"LabelAnnotation": LabelAnnotation,
}[name](**json.loads(js), **data)
Expand Down Expand Up @@ -186,6 +188,27 @@ class SegmentationAnnotation(Annotation):

_label_type = LabelType.SEGMENTATION

@abstractmethod
def to_numpy(
self, class_mapping: Dict[str, int], width: int, height: int
) -> npt.NDArray[np.bool_]:
"""Converts the annotation to a numpy array."""
pass

@staticmethod
def combine_to_numpy(
annotations: List["SegmentationAnnotation"],
class_mapping: Dict[str, int],
height: int,
width: int,
) -> np.ndarray:
seg = np.zeros((len(class_mapping), height, width), dtype=np.bool_)
for ann in annotations:
class_ = class_mapping.get(ann.class_, 0)
seg[class_, ...] |= ann.to_numpy(class_mapping, width, height)

return seg.astype(np.uint8)


class RLESegmentationAnnotation(SegmentationAnnotation):
"""U{Run-length encoded<https://en.wikipedia.org/wiki/Run-length_encoding>}
Expand Down Expand Up @@ -231,25 +254,76 @@ def get_value(self) -> Dict[str, Any]:
"counts": rle["counts"].decode("utf-8"),
}

def to_numpy(self, _: Dict[str, int], width: int, height: int) -> np.ndarray:
def to_numpy(
self, _: Dict[str, int], width: int, height: int
) -> npt.NDArray[np.bool_]:
assert isinstance(self.counts, bytes)
return mask_util.decode({"counts": self.counts, "size": [height, width]})
return mask_util.decode(
{"counts": self.counts, "size": [height, width]}
).astype(np.bool_)


class MaskSegmentationAnnotation(SegmentationAnnotation):
"""Pixel-wise binary segmentation mask.

@type mask: npt.NDArray[np.bool_]
@ivar mask: The segmentation mask as a numpy array. The mask must be 2D and must be
castable to a boolean array.
"""

type_: Literal["mask"] = Field("mask", alias="type")
mask: npt.NDArray[np.bool_]

model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)

@model_validator(mode="before")
@classmethod
def _convert_rle(cls, values: Dict[str, Any]) -> Dict[str, Any]:
if "mask" in values:
return values

if "width" not in values or "height" not in values or "counts" not in values:
kozlov721 marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
"MaskSegmentationAnnotation must have either "
"'mask' or 'width', 'height', and 'counts'"
)

width: int = values.pop("width")
height: int = values.pop("height")
counts: str = values.pop("counts")

values["mask"] = mask_util.decode(
{
"counts": counts.encode("utf-8"),
"size": [height, width],
}
).astype(np.bool_)
return values

@field_validator("mask", mode="after")
@staticmethod
def combine_to_numpy(
annotations: List["RLESegmentationAnnotation"],
class_mapping: Dict[str, int],
height: int,
width: int,
) -> np.ndarray:
seg = np.zeros((len(class_mapping), height, width))
for ann in annotations:
class_ = class_mapping.get(ann.class_, 0)
mask = ann.to_numpy(class_mapping, width, height)
seg[class_, ...] += mask
def _validate_shape(mask: np.ndarray) -> np.ndarray:
if mask.ndim != 2:
raise ValueError("Mask must be a 2D array")
return mask

seg = np.clip(seg, 0, 1)
return seg
@field_validator("mask", mode="after")
@staticmethod
def _cast_mask(mask: np.ndarray) -> npt.NDArray[np.bool_]:
return mask.astype(np.bool_)

def get_value(self) -> Dict[str, Any]:
mask = np.asfortranarray(self.mask.astype(np.uint8))
rle = mask_util.encode(mask)

return {
"height": rle["size"][0],
"width": rle["size"][1],
"counts": rle["counts"].decode("utf-8"), # type: ignore
}

def to_numpy(self, *_) -> npt.NDArray[np.bool_]:
return self.mask


class PolylineSegmentationAnnotation(SegmentationAnnotation):
Expand All @@ -264,27 +338,14 @@ class PolylineSegmentationAnnotation(SegmentationAnnotation):

points: List[Tuple[NormalizedFloat, NormalizedFloat]] = Field(min_length=3)

def to_numpy(self, _: Dict[str, int], width: int, height: int) -> np.ndarray:
def to_numpy(
self, _: Dict[str, int], width: int, height: int
) -> npt.NDArray[np.bool_]:
polyline = [(round(x * width), round(y * height)) for x, y in self.points]
mask = Image.new("L", (width, height), 0)
draw = ImageDraw.Draw(mask)
draw.polygon(polyline, fill=1, outline=1)
return np.array(mask)

@staticmethod
def combine_to_numpy(
annotations: List["PolylineSegmentationAnnotation"],
class_mapping: Dict[str, int],
height: int,
width: int,
) -> np.ndarray:
seg = np.zeros((len(class_mapping), height, width))
for ann in annotations:
class_ = class_mapping.get(ann.class_, 0)
seg[class_, ...] += ann.to_numpy(class_mapping, width, height)

seg = np.clip(seg, 0, 1)
return seg
return np.array(mask).astype(np.bool_)


class ArrayAnnotation(Annotation):
Expand Down Expand Up @@ -358,6 +419,7 @@ class DatasetRecord(BaseModelExtraForbid):
KeypointAnnotation,
RLESegmentationAnnotation,
PolylineSegmentationAnnotation,
MaskSegmentationAnnotation,
ArrayAnnotation,
LabelAnnotation,
]
Expand Down
28 changes: 22 additions & 6 deletions luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
from ..utils.constants import LDF_VERSION
from ..utils.enums import BucketStorage, BucketType
from ..utils.parquet import ParquetFileManager
from .annotation import Annotation, ArrayAnnotation, DatasetRecord
from .annotation import (
Annotation,
ArrayAnnotation,
DatasetRecord,
)
from .base_dataset import BaseDataset, DatasetIterator
from .source import LuxonisSource

Expand Down Expand Up @@ -628,6 +632,15 @@ def _get_file(
return Path(default) if default is not None else None


def _rescale_mask(
mask: np.ndarray, mask_w: int, mask_h: int, x: float, y: float, w: float, h: float
) -> np.ndarray:
return mask[
int(y * mask_h) : int((y + h) * mask_h),
int(x * mask_w) : int((x + w) * mask_w),
].astype(np.uint8)


def _rescale_rle(rle: dict, x: float, y: float, w: float, h: float) -> dict:
height, width = rle["size"]

Expand All @@ -636,10 +649,7 @@ def _rescale_rle(rle: dict, x: float, y: float, w: float, h: float) -> dict:

decoded_mask = mask_utils.decode(rle) # type: ignore

cropped_mask = decoded_mask[
int(y * height) : int((y + h) * height),
int(x * width) : int((x + w) * width),
]
cropped_mask = _rescale_mask(decoded_mask, width, height, x, y, w, h)

bbox_height = int(h * height)
bbox_width = int(w * width)
Expand All @@ -665,6 +675,7 @@ def rescale_values(
List[Tuple[float, float, int]],
List[Tuple[float, float]],
Dict[str, Union[int, List[int]]],
np.ndarray,
]
]:
"""Rescale annotation values based on the bounding box coordinates."""
Expand All @@ -688,8 +699,13 @@ def rescale_values(
if "rle" in ann:
return _rescale_rle(ann["rle"], x, y, w, h)

if "mask" in ann:
mask = ann["mask"]
width, height = mask.shape
return _rescale_mask(ann["mask"], width, height, x, y, w, h)

raise ValueError(
"Invalid segmentation format. Must be either 'polylines' or 'rle'"
"Invalid segmentation format. Must be either 'polylines', 'rle', or 'mask'"
)

return None
Expand Down
11 changes: 2 additions & 9 deletions luxonis_ml/data/parsers/segmentation_mask_directory_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import cv2
import numpy as np
import polars as pl
import pycocotools.mask as mask_util

from luxonis_ml.data import DatasetIterator

Expand Down Expand Up @@ -116,18 +115,12 @@ def generator() -> DatasetIterator:

curr_seg_mask = np.zeros_like(mask)
curr_seg_mask[mask == id] = 1
curr_seg_mask = np.asfortranarray(
curr_seg_mask
) # pycocotools requirement
curr_rle = mask_util.encode(curr_seg_mask)
yield {
"file": file,
"annotation": {
"type": "rle",
"type": "mask",
"class": class_name,
"width": curr_rle["size"][0],
"height": curr_rle["size"][1],
"counts": curr_rle["counts"],
"mask": curr_seg_mask,
},
}

Expand Down
9 changes: 2 additions & 7 deletions luxonis_ml/data/parsers/solo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import cv2
import numpy as np
import pycocotools.mask as mask_util

from luxonis_ml.data import DatasetIterator

Expand Down Expand Up @@ -215,17 +214,13 @@ def generator() -> DatasetIterator:
curr_mask = np.zeros_like(mask)
curr_mask[np.all(mask == [b, g, r], axis=2)] = 1
curr_mask = np.max(curr_mask, axis=2) # 3D->2D
curr_mask = np.asfortranarray(curr_mask)
curr_rle = mask_util.encode(curr_mask)

yield {
"file": img_path,
"annotation": {
"type": "rle",
"type": "mask",
"class": class_name,
"width": curr_rle["size"][0],
"height": curr_rle["size"][1],
"counts": curr_rle["counts"],
"mask": curr_mask,
},
}

Expand Down
3 changes: 2 additions & 1 deletion luxonis_ml/embeddings/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ KDEpy>=1.1.5
kmedoids>=0.4.3
matplotlib>=3.7.2
numpy>=1.22
onnx>=1.14.0
onnx>=1.14.0,<1.16.2; sys_platform == "win32"
onnx>=1.14.0; sys_platform != "win32"
onnxruntime-gpu>=1.15.1; sys_platform == 'linux'
onnxruntime>=1.15.1; sys_platform != 'linux'
opencv-python>=4.7.0.68
Expand Down
Loading