[YOLOv3] Calculate ignore mask for each example within a batch indivi…

…dually (#7) * calculate ignore mask for each example within a batch individually * attribution * multi-gpu support * vectorize ignore_mask calculation * typo * fix iou
ethanyanjiali · Apr 28, 2020 · 4365cdf · 4365cdf
1 parent 77880ab
commit 4365cdf
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 53 deletions.
diff --git a/YOLO/tensorflow/utils.py b/YOLO/tensorflow/utils.py
@@ -28,50 +28,53 @@ def xywh_to_y1x1y2x2(box):
     return y_box
 
 
-def broadcast_iou(box1, box2):
+def broadcast_iou(box_a, box_b):
     """
-    calculate iou between one box1iction box and multiple box2 box in a broadcast way
+    calculate iou between box_a and multiple box_b in a broadcast way.
+    Used this implementation as reference: 
+    https://github.com/dmlc/gluon-cv/blob/c3dd20d4b1c1ef8b7d381ad2a7d04a68c5fa1221/gluoncv/nn/bbox.py#L206
 
     inputs:
-    box1: a tensor full of boxes, eg. (3, 4)
-    box2: another tensor full of boxes, eg. (3, 4)
+    box_a: a tensor full of boxes, eg. (B, N, 4), box is in x1y1x2y2
+    box_b: another tensor full of boxes, eg. (B, M, 4)
     """
 
-    # assert one dimension in order to mix match box1 and box2
-    # eg:
-    # box1 -> (3, 1, 4)
-    # box2 -> (1, 3, 4)
-    box1 = tf.expand_dims(box1, -2)
-    box2 = tf.expand_dims(box2, 0)
-
-    # derive the union of shape to broadcast
-    # eg. new_shape -> (3, 3, 4)
-    new_shape = tf.broadcast_dynamic_shape(tf.shape(box1), tf.shape(box2))
-
-    # broadcast (duplicate) box1 and box2 so that
-    # each box2 has one box1 matched correspondingly
-    # box1: (3, 3, 4)
-    # box2: (3, 3, 4)
-    box1 = tf.broadcast_to(box1, new_shape)
-    box2 = tf.broadcast_to(box2, new_shape)
-
-    # minimum xmax - maximum xmin is the width of intersection.
-    # but has to be greater or equal to 0
-    interserction_w = tf.maximum(
-        tf.minimum(box1[..., 2], box2[..., 2]) - tf.maximum(
-            box1[..., 0], box2[..., 0]), 0)
-    # minimum ymax - maximum ymin is the height of intersection.
-    # but has to be greater or equal to 0
-    interserction_h = tf.maximum(
-        tf.minimum(box1[..., 3], box2[..., 3]) - tf.maximum(
-            box1[..., 1], box2[..., 1]), 0)
-    intersection_area = interserction_w * interserction_h
-    box1_area = (box1[..., 2] - box1[..., 0]) * \
-        (box1[..., 3] - box1[..., 1])
-    box2_area = (box2[..., 2] - box2[..., 0]) * \
-        (box2[..., 3] - box2[..., 1])
-    # intersection over union
-    return intersection_area / (box1_area + box2_area - intersection_area)
+    # (B, N, 1, 4)
+    box_a = tf.expand_dims(box_a, -2)
+    # (B, 1, M, 4)
+    box_b = tf.expand_dims(box_b, -3)
+    # (B, N, M, 4)
+    new_shape = tf.broadcast_dynamic_shape(tf.shape(box_a), tf.shape(box_b))
+
+    # (B, N, M, 4)
+    # (B, N, M, 4)
+    box_a = tf.broadcast_to(box_a, new_shape)
+    box_b = tf.broadcast_to(box_b, new_shape)
+
+    # (B, N, M, 1)
+    al, at, ar, ab = tf.split(box_a, 4, -1)
+    bl, bt, br, bb = tf.split(box_b, 4, -1)
+
+    # (B, N, M, 1)
+    left = tf.math.maximum(al, bl)
+    right = tf.math.minimum(ar, br)
+    top = tf.math.maximum(at, bt)
+    bot = tf.math.minimum(ab, bb)
+
+    # (B, N, M, 1)
+    iw = tf.clip_by_value(right - left, 0, 1)
+    ih = tf.clip_by_value(bot - top, 0, 1)
+    i = iw * ih
+
+    # (B, N, M, 1)
+    area_a = (ar - al) * (ab - at)
+    area_b = (br - bl) * (bb - bt)
+    union = area_a + area_b - i
+
+    # (B, N, M)
+    iou = tf.squeeze(i / (union + 1e-7), axis=-1)
+
+    return iou
 
 
 def binary_cross_entropy(logits, labels):

diff --git a/YOLO/tensorflow/yolov3.py b/YOLO/tensorflow/yolov3.py
@@ -434,27 +434,38 @@ def __call__(self, y_true, y_pred):
                                                            obj_loss)
 
     def calc_ignore_mask(self, true_obj, true_box, pred_box):
-        # eg. true_obj (1, 13, 13, 3, 1)
-        true_obj = tf.squeeze(true_obj, axis=-1)
-        # eg. true_obj (1, 13, 13, 3)
-        # eg. true_box (1, 13, 13, 3, 4)
-        # eg. pred_box (1, 13, 13, 2, 4)
-        # eg. true_box_filtered (2, 4) it was (3, 4) but one element got filtered out
-        true_box_filtered = tf.boolean_mask(true_box, tf.cast(
-            true_obj, tf.bool))
-
         # YOLOv3:
         # "If the bounding box prior is not the best but does overlap a ground
         # truth object by more than some threshold we ignore the prediction,
         # following [17]. We use the threshold of .5."
         # calculate the iou for each pair of pred bbox and true bbox, then find the best among them
-        # eg. best_iou (1, 1, 1, 2)
-        best_iou = tf.reduce_max(
-            broadcast_iou(pred_box, true_box_filtered), axis=-1)
 
-        # if best iou is higher than threshold, set the box to be ignored for noobj loss
-        # eg. ignore_mask(1, 1, 1, 2)
+        # (None, 13, 13, 3, 4)
+        true_box_shape = tf.shape(true_box)
+        # (None, 13, 13, 3, 4)
+        pred_box_shape = tf.shape(pred_box)
+        # (None, 507, 4)
+        true_box = tf.reshape(true_box, [true_box_shape[0], -1, 4])
+        # sort true_box to have non-zero boxes rank first
+        true_box = tf.sort(true_box, axis=1, direction="DESCENDING")
+        # (None, 100, 4)
+        # only use maximum 100 boxes per groundtruth to calcualte IOU, otherwise
+        # GPU emory comsumption would explode for a matrix like (16, 52*52*3, 52*52*3, 4)
+        true_box = true_box[:, 0:100, :]
+        # (None, 507, 4)
+        pred_box = tf.reshape(pred_box, [pred_box_shape[0], -1, 4])
+
+        # https://github.com/dmlc/gluon-cv/blob/06bb7ec2044cdf3f433721be9362ab84b02c5a90/gluoncv/model_zoo/yolo/yolo_target.py#L198
+        # (None, 507, 507)
+        iou = broadcast_iou(pred_box, true_box)
+        # (None, 507)
+        best_iou = tf.reduce_max(iou, axis=-1)
+        # (None, 13, 13, 3)
+        best_iou = tf.reshape(best_iou, [pred_box_shape[0], pred_box_shape[1], pred_box_shape[2], pred_box_shape[3]])
+        # ignore_mask = 1 => don't ignore
+        # ignore_mask = 0 => should ignore
         ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32)
+        # (None, 13, 13, 3, 1)
         ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
         return ignore_mask