Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrating CV Ontologies with the DataStore format #844

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions forte/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
# The index storing entry type in the internal entry data of DataStore.
ENTRY_TYPE_INDEX = 3

# The index storing the payload ID in internal entry data of DataStore
PAYLOAD_INDEX = 0

# The index storing entry type (specific to Link and Group type). It is saved
# in the `tid_idx_dict` in DataStore.
ENTRY_DICT_TYPE_INDEX = 0
Expand Down
3 changes: 3 additions & 0 deletions forte/data/entry_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
SinglePackEntries,
MultiPackEntries,
)
from forte.data.ontology.top import BoundingBox, Box
from forte.utils import get_class, get_full_module_name

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -205,6 +206,8 @@ def get_entry_object(
# the value can be arbitrary since they will all be routed to DataStore.
if data_store_ref._is_annotation(type_name):
entry = entry_class(pack=pack, begin=0, end=0)
elif any(entry_class == box_class for box_class in [BoundingBox, Box]):
entry = entry_class(pack=pack, height=1, width=1)
elif any(
data_store_ref._is_subclass(type_name, type_class)
for type_class in SinglePackEntries + MultiPackEntries
Expand Down
204 changes: 204 additions & 0 deletions forte/data/ontology/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Defines the basic data structures and interfaces for the Forte data
representation system.
"""
import math
import uuid

from abc import abstractmethod, ABC
Expand All @@ -23,6 +24,7 @@
from typing import (
Iterable,
Optional,
Tuple,
Type,
Hashable,
TypeVar,
Expand Down Expand Up @@ -635,5 +637,207 @@ def index_key(self) -> int:
return self.tid


class Grid:
"""
Regular grid with a grid configuration dependent on the image size.
It is a data structure used to retrieve grid-related objects such as grid
cells from the image. Grid itself doesn't store any data.
Based the image size and the grid shape,
we compute the height and the width of grid cells.
For example, if the image size (image_height,image_width) is (640, 480)
and the grid shape (height, width) is (2, 3)
the size of grid cells (self.c_h, self.c_w) will be (320, 160).
However, when the image size is not divisible by the grid shape, we round
up the resulting size(floating number) to an integer.
In this way, as each grid
cell taking one more pixel, we make the last grid cell per column and row
size(height and width) to be the remainder of the image size divided by the
grid cell size which is smaller than other grid cell.
For example, if the image
size is (128, 128) and the grid shape is (13, 13), the first 11 grid cells
per column and row will have a size of (10, 10) since 128/13=9.85, so we
round up to 10. The last grid cell per column and row will have a size of
(8, 8) since 128%10=8.
We require each grid to be bounded/intialized with one image size since
the number of different image shapes are limited per computer vision task.
For example, we can only have one image size (640, 480) from a CV dataset,
and we could augment the dataset with few other image sizes
(320, 240), (480, 640). Then there are only three image sizes.
Therefore, it won't be troublesome to
have a grid for each image size, and we can check the image size during the
initialization of the grid.
By contrast, if the grid is totally "free-form"
that we don't initialize it with any
image size and pass the image size directly into the method/operation on
the fly, the API would be more complex and image size check would be
repeated everytime the method is called.
Args:
height: the number of grid cell per column, the unit is one grid cell.
width: the number of grid cell per row, the unit is one grid cell.
image_height: the number of pixels per column in the image.
image_width: the number of pixels per row in the image.
"""

def __init__(
self,
height: int,
width: int,
image_height: int,
image_width: int,
):
if image_height <= 0 or image_width <= 0:
raise ValueError(
"both image height and width must be positive"
f"but the image shape is {(image_height, image_width)}"
"please input a valid image shape"
)
if height <= 0 or width <= 0:
raise ValueError(
f"height({height}) and "
f"width({width}) both must be larger than 0"
)
if height >= image_height or width >= image_width:
raise ValueError(
"Grid height and width must be smaller than image height and width"
)

self._height = height
self._width = width

self._image_height = image_height
self._image_width = image_width

# if the resulting size of grid is not an integer, we round it up.
# The last grid cell per row and column might be out of the image size
# since we constrain the maximum pixel locations by the image size
self.c_h, self.c_w = (
math.ceil(image_height / self._height),
math.ceil(image_width / self._width),
)

if self.c_h <= 0 or self.c_w <= 0:
raise ValueError(
"cell height and width must be positive"
f"but the cell shape is {(self.c_h, self.c_w)}"
"please adjust image shape or grid shape accordingly"
)

def get_grid_cell(self, img_arr: np.ndarray, h_idx: int, w_idx: int):
"""
Get the array data of a grid cell from image of the image payload index.
The array is a masked version of the original image, and it has
the same size as the original image. The array entries that are not
within the grid cell will masked as zeros. The image array entries that
are within the grid cell will kept.
Note: all indices are zero-based and counted from top left corner of
the image.
Args:
img_arr: image data represented as a numpy array.
h_idx: the zero-based height(row) index of the grid cell in the
grid, the unit is one grid cell.
w_idx: the zero-based width(column) index of the grid cell in the
grid, the unit is one grid cell.
Raises:
ValueError: ``h_idx`` is out of the range specified by ``height``.
ValueError: ``w_idx`` is out of the range specified by ``width``.
Returns:
numpy array that represents the grid cell.
"""
if not 0 <= h_idx < self._height:
raise ValueError(
f"input parameter h_idx ({h_idx}) is"
"out of scope of h_idx range"
f" {(0, self._height)}"
)
if not 0 <= w_idx < self._width:
raise ValueError(
f"input parameter w_idx ({w_idx}) is"
"out of scope of w_idx range"
f" {(0, self._width)}"
)
# initialize a numpy zeros array
array = np.zeros((self._image_height, self._image_width))
# set grid cell entry values to the values of the original image array
# (entry values outside of grid cell remain zeros)
# An example of computing grid height index range is
# index * cell height : min((index + 1) * cell height, image_height).
# It's similar for computing cell width index range
# Plus, we constrain the maximum pixel locations by the image size as
# the last grid cell per row and column might be out of the image size
array[
h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
] = img_arr[
h_idx * self.c_h : min((h_idx + 1) * self.c_h, self._image_height),
w_idx * self.c_w : min((w_idx + 1) * self.c_w, self._image_width),
]
return array

def get_grid_cell_center(self, h_idx: int, w_idx: int) -> Tuple[int, int]:
"""
Get the center pixel position of the grid cell at the specific height
index and width index in the ``Grid``.
The computation of the center position of the grid cell is
dividing the grid cell height range and width range by 2 (round down)
Suppose an extreme case that a grid cell has a height range of (0, 3)
and a width range of (0, 3) the grid cell center would be (1, 1).
Since the grid cell size is usually very large,
the offset of the grid cell center is minor.
Note: all indices are zero-based and counted from top left corner of
the grid.
Args:
h_idx: the height(row) index of the grid cell in the grid,
the unit is one grid cell.
w_idx: the width(column) index of the grid cell in the
grid, the unit is one grid cell.
Returns:
A tuple of (y index, x index)
"""

return (
(h_idx * self.c_h + min((h_idx + 1) * self.c_h, self._image_height))
// 2,
(w_idx * self.c_w + min((w_idx + 1) * self.c_w, self._image_width))
// 2,
)

@property
def num_grid_cells(self):
return self._height * self._width

@property
def height(self):
return self._height

@property
def width(self):
return self._width

def __repr__(self):
return str(
(self._height, self._width, self._image_height, self._image_width)
)

def __eq__(self, other):
if other is None:
return False
return (
self._height,
self._width,
self._image_height,
self._image_width,
) == (
other._height,
other._width,
other.image_height,
other.image_width,
)

def __hash__(self):
return hash(
(self._height, self._width, self._image_height, self._image_width)
)


GroupType = TypeVar("GroupType", bound=BaseGroup)
LinkType = TypeVar("LinkType", bound=BaseLink)
Loading