Skip to content

Commit

Permalink
[YOLOv3] Calculate ignore mask for each example within a batch indivi…
Browse files Browse the repository at this point in the history
…dually (#7)

* calculate ignore mask for each example within a batch individually

* attribution

* multi-gpu support

* vectorize ignore_mask calculation

* typo

* fix iou
  • Loading branch information
ethanyanjiali authored Apr 28, 2020
1 parent 77880ab commit 4365cdf
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 53 deletions.
81 changes: 42 additions & 39 deletions YOLO/tensorflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,50 +28,53 @@ def xywh_to_y1x1y2x2(box):
return y_box


def broadcast_iou(box1, box2):
def broadcast_iou(box_a, box_b):
"""
calculate iou between one box1iction box and multiple box2 box in a broadcast way
calculate iou between box_a and multiple box_b in a broadcast way.
Used this implementation as reference:
https://github.com/dmlc/gluon-cv/blob/c3dd20d4b1c1ef8b7d381ad2a7d04a68c5fa1221/gluoncv/nn/bbox.py#L206
inputs:
box1: a tensor full of boxes, eg. (3, 4)
box2: another tensor full of boxes, eg. (3, 4)
box_a: a tensor full of boxes, eg. (B, N, 4), box is in x1y1x2y2
box_b: another tensor full of boxes, eg. (B, M, 4)
"""

# assert one dimension in order to mix match box1 and box2
# eg:
# box1 -> (3, 1, 4)
# box2 -> (1, 3, 4)
box1 = tf.expand_dims(box1, -2)
box2 = tf.expand_dims(box2, 0)

# derive the union of shape to broadcast
# eg. new_shape -> (3, 3, 4)
new_shape = tf.broadcast_dynamic_shape(tf.shape(box1), tf.shape(box2))

# broadcast (duplicate) box1 and box2 so that
# each box2 has one box1 matched correspondingly
# box1: (3, 3, 4)
# box2: (3, 3, 4)
box1 = tf.broadcast_to(box1, new_shape)
box2 = tf.broadcast_to(box2, new_shape)

# minimum xmax - maximum xmin is the width of intersection.
# but has to be greater or equal to 0
interserction_w = tf.maximum(
tf.minimum(box1[..., 2], box2[..., 2]) - tf.maximum(
box1[..., 0], box2[..., 0]), 0)
# minimum ymax - maximum ymin is the height of intersection.
# but has to be greater or equal to 0
interserction_h = tf.maximum(
tf.minimum(box1[..., 3], box2[..., 3]) - tf.maximum(
box1[..., 1], box2[..., 1]), 0)
intersection_area = interserction_w * interserction_h
box1_area = (box1[..., 2] - box1[..., 0]) * \
(box1[..., 3] - box1[..., 1])
box2_area = (box2[..., 2] - box2[..., 0]) * \
(box2[..., 3] - box2[..., 1])
# intersection over union
return intersection_area / (box1_area + box2_area - intersection_area)
# (B, N, 1, 4)
box_a = tf.expand_dims(box_a, -2)
# (B, 1, M, 4)
box_b = tf.expand_dims(box_b, -3)
# (B, N, M, 4)
new_shape = tf.broadcast_dynamic_shape(tf.shape(box_a), tf.shape(box_b))

# (B, N, M, 4)
# (B, N, M, 4)
box_a = tf.broadcast_to(box_a, new_shape)
box_b = tf.broadcast_to(box_b, new_shape)

# (B, N, M, 1)
al, at, ar, ab = tf.split(box_a, 4, -1)
bl, bt, br, bb = tf.split(box_b, 4, -1)

# (B, N, M, 1)
left = tf.math.maximum(al, bl)
right = tf.math.minimum(ar, br)
top = tf.math.maximum(at, bt)
bot = tf.math.minimum(ab, bb)

# (B, N, M, 1)
iw = tf.clip_by_value(right - left, 0, 1)
ih = tf.clip_by_value(bot - top, 0, 1)
i = iw * ih

# (B, N, M, 1)
area_a = (ar - al) * (ab - at)
area_b = (br - bl) * (bb - bt)
union = area_a + area_b - i

# (B, N, M)
iou = tf.squeeze(i / (union + 1e-7), axis=-1)

return iou


def binary_cross_entropy(logits, labels):
Expand Down
39 changes: 25 additions & 14 deletions YOLO/tensorflow/yolov3.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,27 +434,38 @@ def __call__(self, y_true, y_pred):
obj_loss)

def calc_ignore_mask(self, true_obj, true_box, pred_box):
# eg. true_obj (1, 13, 13, 3, 1)
true_obj = tf.squeeze(true_obj, axis=-1)
# eg. true_obj (1, 13, 13, 3)
# eg. true_box (1, 13, 13, 3, 4)
# eg. pred_box (1, 13, 13, 2, 4)
# eg. true_box_filtered (2, 4) it was (3, 4) but one element got filtered out
true_box_filtered = tf.boolean_mask(true_box, tf.cast(
true_obj, tf.bool))

# YOLOv3:
# "If the bounding box prior is not the best but does overlap a ground
# truth object by more than some threshold we ignore the prediction,
# following [17]. We use the threshold of .5."
# calculate the iou for each pair of pred bbox and true bbox, then find the best among them
# eg. best_iou (1, 1, 1, 2)
best_iou = tf.reduce_max(
broadcast_iou(pred_box, true_box_filtered), axis=-1)

# if best iou is higher than threshold, set the box to be ignored for noobj loss
# eg. ignore_mask(1, 1, 1, 2)
# (None, 13, 13, 3, 4)
true_box_shape = tf.shape(true_box)
# (None, 13, 13, 3, 4)
pred_box_shape = tf.shape(pred_box)
# (None, 507, 4)
true_box = tf.reshape(true_box, [true_box_shape[0], -1, 4])
# sort true_box to have non-zero boxes rank first
true_box = tf.sort(true_box, axis=1, direction="DESCENDING")
# (None, 100, 4)
# only use maximum 100 boxes per groundtruth to calcualte IOU, otherwise
# GPU emory comsumption would explode for a matrix like (16, 52*52*3, 52*52*3, 4)
true_box = true_box[:, 0:100, :]
# (None, 507, 4)
pred_box = tf.reshape(pred_box, [pred_box_shape[0], -1, 4])

# https://github.com/dmlc/gluon-cv/blob/06bb7ec2044cdf3f433721be9362ab84b02c5a90/gluoncv/model_zoo/yolo/yolo_target.py#L198
# (None, 507, 507)
iou = broadcast_iou(pred_box, true_box)
# (None, 507)
best_iou = tf.reduce_max(iou, axis=-1)
# (None, 13, 13, 3)
best_iou = tf.reshape(best_iou, [pred_box_shape[0], pred_box_shape[1], pred_box_shape[2], pred_box_shape[3]])
# ignore_mask = 1 => don't ignore
# ignore_mask = 0 => should ignore
ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32)
# (None, 13, 13, 3, 1)
ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
return ignore_mask

Expand Down

0 comments on commit 4365cdf

Please sign in to comment.