Internal change

PiperOrigin-RevId: 283817737

Internal change
PiperOrigin-RevId: 283817737
5b25005c · Pengchong Jin · A. Unique TensorFlower · f3a61a49 · 5b25005c · 5b25005c
Commit 5b25005c authored Dec 04, 2019 by Pengchong Jin Committed by A. Unique TensorFlower Dec 04, 2019
5 changed files
--- a/official/vision/detection/dataloader/maskrcnn_parser.py
+++ b/official/vision/detection/dataloader/maskrcnn_parser.py
@@ -231,7 +231,7 @@ class Parser(object):
    image_scale = image_info[2, :]
    offset = image_info[3, :]
    boxes = input_utils.resize_and_crop_boxes(
-        boxes, image_scale, (image_height, image_width), offset)
+        boxes, image_scale, image_info[1, :], offset)

    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
@@ -239,10 +239,10 @@ class Parser(object):
    classes = tf.gather(classes, indices)
    if self._include_mask:
      masks = tf.gather(masks, indices)
-      cropped_boxes = boxes + tf.cast(
-          tf.tile(tf.expand_dims(offset, axis=0), [1, 2]), dtype=tf.float32)
-      cropped_boxes = box_utils.normalize_boxes(
-          cropped_boxes, image_info[1, :])
+      # Transfer boxes to the original image space and do normalization.
+      cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
+      cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
+      cropped_boxes = box_utils.normalize_boxes(cropped_boxes, image_shape)
      num_masks = tf.shape(masks)[0]
      masks = tf.image.crop_and_resize(
          tf.expand_dims(masks, axis=-1),

--- a/official/vision/detection/dataloader/retinanet_parser.py
+++ b/official/vision/detection/dataloader/retinanet_parser.py
@@ -249,7 +249,7 @@ class Parser(object):
    image_scale = image_info[2, :]
    offset = image_info[3, :]
    boxes = input_utils.resize_and_crop_boxes(
-        boxes, image_scale, (image_height, image_width), offset)
+        boxes, image_scale, image_info[1, :], offset)
    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
@@ -309,7 +309,7 @@ class Parser(object):
    image_scale = image_info[2, :]
    offset = image_info[3, :]
    boxes = input_utils.resize_and_crop_boxes(
-        boxes, image_scale, (image_height, image_width), offset)
+        boxes, image_scale, image_info[1, :], offset)
    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
@@ -412,7 +412,7 @@ class Parser(object):
      image_scale = image_info[2, :]
      offset = image_info[3, :]
      boxes = input_utils.resize_and_crop_boxes(
-          boxes, image_scale, (image_height, image_width), offset)
+          boxes, image_scale, image_info[1, :], offset)
      # Filters out ground truth boxes that are all zeros.
      indices = box_utils.get_non_empty_box_indices(boxes)
      boxes = tf.gather(boxes, indices)

--- a/official/vision/detection/dataloader/shapemask_parser.py
+++ b/official/vision/detection/dataloader/shapemask_parser.py
@@ -265,7 +265,7 @@ class Parser(object):

    # Resizes and crops boxes and masks.
    boxes = input_utils.resize_and_crop_boxes(
-        boxes, image_scale, self._output_size, offset)
+        boxes, image_scale, image_info[1, :], offset)

    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
@@ -422,7 +422,7 @@ class Parser(object):

    # Resizes and crops boxes and masks.
    boxes = input_utils.resize_and_crop_boxes(
-        boxes, image_scale, self._output_size, offset)
+        boxes, image_scale, image_info[1, :], offset)
    masks = input_utils.resize_and_crop_masks(
        tf.expand_dims(masks, axis=-1), image_scale, self._output_size, offset)


--- a/official/vision/detection/utils/input_utils.py
+++ b/official/vision/detection/utils/input_utils.py
@@ -138,9 +138,9 @@ def resize_and_crop_image(image,
      equals to `output_size`.
    image_info: a 2D `Tensor` that encodes the information of the image and the
      applied preprocessing. It is in the format of
-      [[original_height, original_width], [scaled_height, scaled_width],
-       [y_scale, x_scale], [y_offset, x_offset]], where [scaled_height,
-      scaled_width] is the actual scaled image size, and [y_scale, x_scale] is
+      [[original_height, original_width], [desired_height, desired_width],
+       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
+      desireed_width] is the actual scaled image size, and [y_scale, x_scale] is
      the scaling factory, which is the ratio of
      scaled dimension / original dimension.
  """
@@ -189,9 +189,11 @@ def resize_and_crop_image(image,
    output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
                                                padded_size[0], padded_size[1])

-    image_info = tf.stack(
-        [image_size, scaled_size, image_scale,
-         tf.cast(offset, tf.float32)])
+    image_info = tf.stack([
+        image_size,
+        tf.cast(desired_size, dtype=tf.float32),
+        image_scale,
+        tf.cast(offset, tf.float32)])
    return output_image, image_info


@@ -236,9 +238,9 @@ def resize_and_crop_image_v2(image,
      equals to `output_size`.
    image_info: a 2D `Tensor` that encodes the information of the image and the
      applied preprocessing. It is in the format of
-      [[original_height, original_width], [scaled_height, scaled_width],
-       [y_scale, x_scale], [y_offset, x_offset]], where [scaled_height,
-      scaled_width] is the actual scaled image size, and [y_scale, x_scale] is
+      [[original_height, original_width], [desired_height, desired_width],
+       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
+      desired_width] is the actual scaled image size, and [y_scale, x_scale] is
      the scaling factor, which is the ratio of
      scaled dimension / original dimension.
  """
@@ -295,7 +297,7 @@ def resize_and_crop_image_v2(image,

    image_info = tf.stack([
        image_size,
-        scaled_size,
+        tf.cast(desired_size, dtype=tf.float32),
        image_scale,
        tf.cast(offset, tf.float32)])
    return output_image, image_info

--- a/official/vision/detection/utils/mask_utils.py
+++ b/official/vision/detection/utils/mask_utils.py
@@ -89,10 +89,10 @@ def paste_instance_masks(masks,
    mask = cv2.resize(padded_mask, (w, h))
    mask = np.array(mask > 0.5, dtype=np.uint8)

-    x_0 = max(ref_box[0], 0)
-    x_1 = min(ref_box[2] + 1, image_width)
-    y_0 = max(ref_box[1], 0)
-    y_1 = min(ref_box[3] + 1, image_height)
+    x_0 = min(max(ref_box[0], 0), image_width)
+    x_1 = min(max(ref_box[2] + 1, 0), image_width)
+    y_0 = min(max(ref_box[1], 0), image_height)
+    y_1 = min(max(ref_box[3] + 1, 0), image_height)

    im_mask[y_0:y_1, x_0:x_1] = mask[
        (y_0 - ref_box[1]):(y_1 - ref_box[1]),