luxonis · kozlov721 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
@@ -51,19 +51,9 @@ def check_exists(name: str):
 
 def get_dataset_info(name: str) -> Tuple[int, List[str], List[str]]:
  dataset = LuxonisDataset(name)
- try:
- size = len(dataset)
- except KeyError:
- size = -1
-
- try:
- loader = LuxonisLoader(dataset, view=SplitType.TRAIN.value)
- _, ann = next(iter(loader))
- except Exception:
- ann = {}
+ size = len(dataset)
  classes, _ = dataset.get_classes()
- tasks = list(ann.keys())
- return size, classes, tasks
+ return size, classes, dataset.get_tasks()
 
 
 def print_info(name: str) -> None:

@@ -4,9 +4,10 @@
 from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple, TypedDict, Union
 
 import numpy as np
+import numpy.typing as npt
 import pycocotools.mask as mask_util
 from PIL import Image, ImageDraw
-from pydantic import Field, model_validator
+from pydantic import ConfigDict, Field, field_validator, model_validator
 from pydantic.types import FilePath, PositiveInt
 from typing_extensions import Annotated, TypeAlias
 
@@ -42,6 +43,7 @@ def load_annotation(name: str, js: str, data: Dict[str, Any]) -> "Annotation":
  "KeypointAnnotation": KeypointAnnotation,
  "RLESegmentationAnnotation": RLESegmentationAnnotation,
  "PolylineSegmentationAnnotation": PolylineSegmentationAnnotation,
+ "MaskSegmentationAnnotation": MaskSegmentationAnnotation,
  "ArrayAnnotation": ArrayAnnotation,
  "LabelAnnotation": LabelAnnotation,
  }[name](**json.loads(js), **data)
@@ -186,6 +188,27 @@ class SegmentationAnnotation(Annotation):
 
  _label_type = LabelType.SEGMENTATION
 
+ @abstractmethod
+ def to_numpy(
+ self, class_mapping: Dict[str, int], width: int, height: int
+ ) -> npt.NDArray[np.bool_]:
+ """Converts the annotation to a numpy array."""
+ pass
+
+ @staticmethod
+ def combine_to_numpy(
+ annotations: List["SegmentationAnnotation"],
+ class_mapping: Dict[str, int],
+ height: int,
+ width: int,
+ ) -> np.ndarray:
+ seg = np.zeros((len(class_mapping), height, width), dtype=np.bool_)
+ for ann in annotations:
+ class_ = class_mapping.get(ann.class_, 0)
+ seg[class_, ...] |= ann.to_numpy(class_mapping, width, height)
+
+ return seg.astype(np.uint8)
+
 
 class RLESegmentationAnnotation(SegmentationAnnotation):
  """U{Run-length encoded<https://en.wikipedia.org/wiki/Run-length_encoding>}
@@ -231,25 +254,76 @@ def get_value(self) -> Dict[str, Any]:
  "counts": rle["counts"].decode("utf-8"),
  }
 
- def to_numpy(self, _: Dict[str, int], width: int, height: int) -> np.ndarray:
+ def to_numpy(
+ self, _: Dict[str, int], width: int, height: int
+ ) -> npt.NDArray[np.bool_]:
  assert isinstance(self.counts, bytes)
- return mask_util.decode({"counts": self.counts, "size": [height, width]})
+ return mask_util.decode(
+ {"counts": self.counts, "size": [height, width]}
+ ).astype(np.bool_)
+
+
+class MaskSegmentationAnnotation(SegmentationAnnotation):
+ """Pixel-wise binary segmentation mask.
+
+ @type mask: npt.NDArray[np.bool_]
+ @ivar mask: The segmentation mask as a numpy array. The mask must be 2D and must be
+ castable to a boolean array.
+ """
+
+ type_: Literal["mask"] = Field("mask", alias="type")
+ mask: npt.NDArray[np.bool_]
+
+ model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
+
+ @model_validator(mode="before")
+ @classmethod
+ def _convert_rle(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+ if "mask" in values:
+ return values
+
+ if "width" not in values or "height" not in values or "counts" not in values:
+ raise ValueError(
+ "MaskSegmentationAnnotation must have either "
+ "'mask' or 'width', 'height', and 'counts'"
+ )
+
+ width: int = values.pop("width")
+ height: int = values.pop("height")
+ counts: str = values.pop("counts")
+
+ values["mask"] = mask_util.decode(
+ {
+ "counts": counts.encode("utf-8"),
+ "size": [height, width],
+ }
+ ).astype(np.bool_)
+ return values
 
+ @field_validator("mask", mode="after")
  @staticmethod
- def combine_to_numpy(
- annotations: List["RLESegmentationAnnotation"],
- class_mapping: Dict[str, int],
- height: int,
- width: int,
- ) -> np.ndarray:
- seg = np.zeros((len(class_mapping), height, width))
- for ann in annotations:
- class_ = class_mapping.get(ann.class_, 0)
- mask = ann.to_numpy(class_mapping, width, height)
- seg[class_, ...] += mask
+ def _validate_shape(mask: np.ndarray) -> np.ndarray:
+ if mask.ndim != 2:
+ raise ValueError("Mask must be a 2D array")
+ return mask
 
- seg = np.clip(seg, 0, 1)
- return seg
+ @field_validator("mask", mode="after")
+ @staticmethod
+ def _cast_mask(mask: np.ndarray) -> npt.NDArray[np.bool_]:
+ return mask.astype(np.bool_)
+
+ def get_value(self) -> Dict[str, Any]:
+ mask = np.asfortranarray(self.mask.astype(np.uint8))
+ rle = mask_util.encode(mask)
+
+ return {
+ "height": rle["size"][0],
+ "width": rle["size"][1],
+ "counts": rle["counts"].decode("utf-8"), # type: ignore
+ }
+
+ def to_numpy(self, *_) -> npt.NDArray[np.bool_]:
+ return self.mask
 
 
 class PolylineSegmentationAnnotation(SegmentationAnnotation):
@@ -264,27 +338,14 @@ class PolylineSegmentationAnnotation(SegmentationAnnotation):
 
  points: List[Tuple[NormalizedFloat, NormalizedFloat]] = Field(min_length=3)
 
- def to_numpy(self, _: Dict[str, int], width: int, height: int) -> np.ndarray:
+ def to_numpy(
+ self, _: Dict[str, int], width: int, height: int
+ ) -> npt.NDArray[np.bool_]:
  polyline = [(round(x * width), round(y * height)) for x, y in self.points]
  mask = Image.new("L", (width, height), 0)
  draw = ImageDraw.Draw(mask)
  draw.polygon(polyline, fill=1, outline=1)
- return np.array(mask)
-
- @staticmethod
- def combine_to_numpy(
- annotations: List["PolylineSegmentationAnnotation"],
- class_mapping: Dict[str, int],
- height: int,
- width: int,
- ) -> np.ndarray:
- seg = np.zeros((len(class_mapping), height, width))
- for ann in annotations:
- class_ = class_mapping.get(ann.class_, 0)
- seg[class_, ...] += ann.to_numpy(class_mapping, width, height)
-
- seg = np.clip(seg, 0, 1)
- return seg
+ return np.array(mask).astype(np.bool_)
 
 
 class ArrayAnnotation(Annotation):
@@ -358,6 +419,7 @@ class DatasetRecord(BaseModelExtraForbid):
  KeypointAnnotation,
  RLESegmentationAnnotation,
  PolylineSegmentationAnnotation,
+ MaskSegmentationAnnotation,
  ArrayAnnotation,
  LabelAnnotation,
  ]

@@ -20,7 +20,11 @@
 from ..utils.constants import LDF_VERSION
 from ..utils.enums import BucketStorage, BucketType
 from ..utils.parquet import ParquetFileManager
-from .annotation import Annotation, ArrayAnnotation, DatasetRecord
+from .annotation import (
+ Annotation,
+ ArrayAnnotation,
+ DatasetRecord,
+)
 from .base_dataset import BaseDataset, DatasetIterator
 from .source import LuxonisSource
 
@@ -628,6 +632,15 @@ def _get_file(
  return Path(default) if default is not None else None
 
 
+def _rescale_mask(
+ mask: np.ndarray, mask_w: int, mask_h: int, x: float, y: float, w: float, h: float
+) -> np.ndarray:
+ return mask[
+ int(y * mask_h) : int((y + h) * mask_h),
+ int(x * mask_w) : int((x + w) * mask_w),
+ ].astype(np.uint8)
+
+
 def _rescale_rle(rle: dict, x: float, y: float, w: float, h: float) -> dict:
  height, width = rle["size"]
 
@@ -636,10 +649,7 @@ def _rescale_rle(rle: dict, x: float, y: float, w: float, h: float) -> dict:
 
  decoded_mask = mask_utils.decode(rle) # type: ignore
 
- cropped_mask = decoded_mask[
- int(y * height) : int((y + h) * height),
- int(x * width) : int((x + w) * width),
- ]
+ cropped_mask = _rescale_mask(decoded_mask, width, height, x, y, w, h)
 
  bbox_height = int(h * height)
  bbox_width = int(w * width)
@@ -665,6 +675,7 @@ def rescale_values(
  List[Tuple[float, float, int]],
  List[Tuple[float, float]],
  Dict[str, Union[int, List[int]]],
+ np.ndarray,
  ]
 ]:
  """Rescale annotation values based on the bounding box coordinates."""
@@ -688,8 +699,13 @@ def rescale_values(
  if "rle" in ann:
  return _rescale_rle(ann["rle"], x, y, w, h)
 
+ if "mask" in ann:
+ mask = ann["mask"]
+ width, height = mask.shape
+ return _rescale_mask(ann["mask"], width, height, x, y, w, h)
+
  raise ValueError(
- "Invalid segmentation format. Must be either 'polylines' or 'rle'"
+ "Invalid segmentation format. Must be either 'polylines', 'rle', or 'mask'"
  )
 
  return None

@@ -4,7 +4,6 @@
 import cv2
 import numpy as np
 import polars as pl
-import pycocotools.mask as mask_util
 
 from luxonis_ml.data import DatasetIterator
 
@@ -116,18 +115,12 @@ def generator() -> DatasetIterator:
 
  curr_seg_mask = np.zeros_like(mask)
  curr_seg_mask[mask == id] = 1
- curr_seg_mask = np.asfortranarray(
- curr_seg_mask
- ) # pycocotools requirement
- curr_rle = mask_util.encode(curr_seg_mask)
  yield {
  "file": file,
  "annotation": {
- "type": "rle",
+ "type": "mask",
  "class": class_name,
- "width": curr_rle["size"][0],
- "height": curr_rle["size"][1],
- "counts": curr_rle["counts"],
+ "mask": curr_seg_mask,
  },
  }
 

@@ -6,7 +6,6 @@
 
 import cv2
 import numpy as np
-import pycocotools.mask as mask_util
 
 from luxonis_ml.data import DatasetIterator
 
@@ -215,17 +214,13 @@ def generator() -> DatasetIterator:
  curr_mask = np.zeros_like(mask)
  curr_mask[np.all(mask == [b, g, r], axis=2)] = 1
  curr_mask = np.max(curr_mask, axis=2) # 3D->2D
- curr_mask = np.asfortranarray(curr_mask)
- curr_rle = mask_util.encode(curr_mask)
 
  yield {
  "file": img_path,
  "annotation": {
- "type": "rle",
+ "type": "mask",
  "class": class_name,
- "width": curr_rle["size"][0],
- "height": curr_rle["size"][1],
- "counts": curr_rle["counts"],
+ "mask": curr_mask,
  },
  }
 

@@ -3,7 +3,8 @@ KDEpy>=1.1.5
 kmedoids>=0.4.3
 matplotlib>=3.7.2
 numpy>=1.22
-onnx>=1.14.0
+onnx>=1.14.0,<1.16.2; sys_platform == "win32"
+onnx>=1.14.0; sys_platform != "win32"
 onnxruntime-gpu>=1.15.1; sys_platform == 'linux'
 onnxruntime>=1.15.1; sys_platform != 'linux'
 opencv-python>=4.7.0.68