pad scale clip preds eval, AP tiny bump up

875f3761 · Vishnu Banna · 8977d435 · 875f3761 · 875f3761 · 875f3761
Commit 875f3761 authored Oct 25, 2021 by Vishnu Banna
3 changed files
--- a/official/vision/beta/projects/yolo/dataloaders/yolo_input.py
+++ b/official/vision/beta/projects/yolo/dataloaders/yolo_input.py
@@ -343,20 +343,17 @@ class Parser(parser.Parser):

    # Update the labels dictionary.
    if not is_training:
-      output_size = tf.cast([height, width], tf.float32)
-      boxes = bbox_ops.denormalize_boxes(gt_boxes, output_size)
-      gt_area = (boxes[..., 2] - boxes[..., 0]) * (boxes[..., 3] - boxes[..., 1])
-
      # Sets up groundtruth data for evaluation.
      groundtruths = {
          'source_id': labels['source_id'],
-          'height': height,
-          'width': width,
-          'num_detections': tf.shape(gt_boxes)[0],
+          'height': data["height"],
+          'width': data["width"],
+          'num_detections': tf.shape(data["groundtruth_boxes"])[0],
          'image_info': info,
-          'boxes': gt_boxes,
-          'classes': gt_classes,
-          'areas': gt_area,
+          'boxes': bbox_ops.denormalize_boxes(data["groundtruth_boxes"],
+                      tf.cast([data["height"], data["width"]], gt_boxes.dtype)),
+          'classes': data["groundtruth_classes"],
+          'areas': data["groundtruth_area"],
          'is_crowds':
              tf.cast(tf.gather(data['groundtruth_is_crowd'], inds), tf.int32),
      }

--- a/official/vision/beta/projects/yolo/ops/preprocessing_ops.py
+++ b/official/vision/beta/projects/yolo/ops/preprocessing_ops.py
@@ -482,11 +482,15 @@ def resize_and_jitter_image(image,
    image_ = tf.pad(
        cropped_image, [[pad[0], pad[2]], [pad[1], pad[3]], [0, 0]],
        constant_values=PAD_VALUE)
+
+    # Pad and scale info
+    isize = tf.cast(tf.shape(image_)[:2], dtype=tf.float32)
+    osize = tf.cast((desired_size[0], desired_size[1]), dtype=tf.float32)
    pad_info = tf.stack([
        tf.cast(tf.shape(cropped_image)[:2], tf.float32),
-        tf.cast(tf.shape(image_)[:2], dtype=tf.float32),
-        tf.ones_like(original_dims, dtype=tf.float32),
-        (-tf.cast(pad[:2], tf.float32))
+        osize, 
+        osize/isize,
+        (-tf.cast(pad[:2], tf.float32)*osize/isize)
    ])
    infos.append(pad_info)


--- a/official/vision/beta/projects/yolo/tasks/yolo.py
+++ b/official/vision/beta/projects/yolo/tasks/yolo.py
@@ -255,16 +255,22 @@ class YoloTask(base_task.Task):
        logs.update({m.name: m.result()})
    return logs

-  def _reorg_boxes(self, boxes, num_detections, image):
+  def _reorg_boxes(self, boxes, info, num_detections):
    """Scale and Clean boxes prior to Evaluation."""
-
-    # Build a prediciton mask to take only the number of detections
    mask = tf.sequence_mask(num_detections, maxlen=tf.shape(boxes)[1])
-    mask = tf.cast(tf.expand_dims(mask, axis=-1), boxes.dtype)
+    mask = tf.cast(tf.expand_dims(mask, axis = -1), boxes.dtype)

    # Denormalize the boxes by the shape of the image
-    inshape = tf.cast(preprocessing_ops.get_image_shape(image), boxes.dtype)
+    inshape = tf.expand_dims(info[:, 1, :], axis = 1)
+    ogshape = tf.expand_dims(info[:, 0, :], axis = 1)
+    scale = tf.expand_dims(info[:, 2, :], axis = 1)
+    offset = tf.expand_dims(info[:, 3, :], axis = 1)
+
    boxes = box_ops.denormalize_boxes(boxes, inshape)
+    boxes = box_ops.clip_boxes(boxes, inshape)
+    boxes += tf.tile(offset, [1, 1, 2])
+    boxes /= tf.tile(scale, [1, 1, 2])
+    boxes = box_ops.clip_boxes(boxes, ogshape)

    # Mask the boxes for usage
    boxes *= mask
@@ -292,10 +298,8 @@ class YoloTask(base_task.Task):
    logs = {self.loss: metric_loss}

    # Reorganize and rescale the boxes
-    boxes = self._reorg_boxes(y_pred['bbox'], y_pred['num_detections'], image)
-    label['groundtruths']['boxes'] = self._reorg_boxes(
-        label['groundtruths']['boxes'], label['groundtruths']['num_detections'],
-        image)
+    info = label['groundtruths']['image_info']
+    boxes = self._reorg_boxes(y_pred['bbox'], info, y_pred["num_detections"])

    # Build the input for the coc evaluation metric
    coco_model_outputs = {