Clean up the mask utils.

PiperOrigin-RevId: 282825837

Clean up the mask utils.
PiperOrigin-RevId: 282825837
61d6173d · Pengchong Jin · A. Unique TensorFlower · bd73fdfe · 61d6173d · 61d6173d
Commit 61d6173d authored Nov 27, 2019 by Pengchong Jin Committed by A. Unique TensorFlower Nov 27, 2019
Showing with 35 additions and 109 deletions

official/vision/detection/evaluation/coco_utils.py official/vision/detection/evaluation/coco_utils.py +9 -101

official/vision/detection/utils/mask_utils.py official/vision/detection/utils/mask_utils.py +26 -8

No files found.
--- a/official/vision/detection/evaluation/coco_utils.py
+++ b/official/vision/detection/evaluation/coco_utils.py
@@ -25,103 +25,13 @@ from absl import logging
 import numpy as np
 from PIL import Image
 from pycocotools import coco
-from pycocotools import mask as mask_utils
+from pycocotools import mask as mask_api
 import six
 import tensorflow.compat.v2 as tf
 from official.vision.detection.dataloader import tf_example_decoder
 from official.vision.detection.utils import box_utils
+from official.vision.detection.utils import mask_utils
-import cv2
-def generate_segmentation_from_masks(masks,
-                                     detected_boxes,
-                                     image_height,
-                                     image_width,
-                                     is_image_mask=False):
-  """Generates segmentation result from instance masks.
-  Args:
-    masks: a numpy array of shape [N, mask_height, mask_width] representing the
-      instance masks w.r.t. the `detected_boxes`.
-    detected_boxes: a numpy array of shape [N, 4] representing the reference
-      bounding boxes.
-    image_height: an integer representing the height of the image.
-    image_width: an integer representing the width of the image.
-    is_image_mask: bool. True: input masks are whole-image masks. False: input
-      masks are bounding-box level masks.
-  Returns:
-    segms: a numpy array of shape [N, image_height, image_width] representing
-      the instance masks *pasted* on the image canvas.
-  """
-  def expand_boxes(boxes, scale):
-    """Expands an array of boxes by a given scale."""
-    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227  # pylint: disable=line-too-long
-    # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
-    # whereas `boxes` here is in [x1, y1, w, h] form
-    w_half = boxes[:, 2] * .5
-    h_half = boxes[:, 3] * .5
-    x_c = boxes[:, 0] + w_half
-    y_c = boxes[:, 1] + h_half
-    w_half *= scale
-    h_half *= scale
-    boxes_exp = np.zeros(boxes.shape)
-    boxes_exp[:, 0] = x_c - w_half
-    boxes_exp[:, 2] = x_c + w_half
-    boxes_exp[:, 1] = y_c - h_half
-    boxes_exp[:, 3] = y_c + h_half
-    return boxes_exp
-  # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812  # pylint: disable=line-too-long
-  # To work around an issue with cv2.resize (it seems to automatically pad
-  # with repeated border values), we manually zero-pad the masks by 1 pixel
-  # prior to resizing back to the original image resolution. This prevents
-  # "top hat" artifacts. We therefore need to expand the reference boxes by an
-  # appropriate factor.
-  _, mask_height, mask_width = masks.shape
-  scale = max((mask_width + 2.0) / mask_width,
-              (mask_height + 2.0) / mask_height)
-  ref_boxes = expand_boxes(detected_boxes, scale)
-  ref_boxes = ref_boxes.astype(np.int32)
-  padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
-  segms = []
-  for mask_ind, mask in enumerate(masks):
-    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
-    if is_image_mask:
-      # Process whole-image masks.
-      im_mask[:, :] = mask[:, :]
-    else:
-      # Process mask inside bounding boxes.
-      padded_mask[1:-1, 1:-1] = mask[:, :]
-      ref_box = ref_boxes[mask_ind, :]
-      w = ref_box[2] - ref_box[0] + 1
-      h = ref_box[3] - ref_box[1] + 1
-      w = np.maximum(w, 1)
-      h = np.maximum(h, 1)
-      mask = cv2.resize(padded_mask, (w, h))
-      mask = np.array(mask > 0.5, dtype=np.uint8)
-      x_0 = max(ref_box[0], 0)
-      x_1 = min(ref_box[2] + 1, image_width)
-      y_0 = max(ref_box[1], 0)
-      y_1 = min(ref_box[3] + 1, image_height)
-      im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
-                                       (x_0 - ref_box[0]):(x_1 - ref_box[0])]
-    segms.append(im_mask)
-  segms = np.array(segms)
-  assert masks.shape[0] == segms.shape[0]
-  return segms
 class COCOWrapper(coco.COCO):
@@ -189,7 +99,7 @@ class COCOWrapper(coco.COCO):
        ann['segmentation'] = [
            [x1, y1, x1, y2, x2, y2, x2, y1]]
      elif self._eval_type == 'mask':
-        ann['area'] = mask_utils.area(ann['segmentation'])
+        ann['area'] = mask_api.area(ann['segmentation'])
    res.dataset['annotations'] = copy.deepcopy(predictions)
    res.createIndex()
@@ -237,17 +147,15 @@ def convert_predictions_to_coco_annotations(predictions):
    for j in range(batch_size):
      if 'detection_masks' in predictions:
-        image_masks = generate_segmentation_from_masks(
+        image_masks = mask_utils.paste_instance_masks(
            predictions['detection_masks'][i][j],
            mask_boxes[i][j],
            int(predictions['image_info'][i][j, 0, 0]),
-            int(predictions['image_info'][i][j, 0, 1]),
+            int(predictions['image_info'][i][j, 0, 1]))
-            is_image_mask=False)
        binary_masks = (image_masks > 0.0).astype(np.uint8)
        encoded_masks = [
-            mask_utils.encode(np.asfortranarray(binary_mask))
+            mask_api.encode(np.asfortranarray(binary_mask))
-            for binary_mask in list(binary_masks)
+            for binary_mask in list(binary_masks)]
-        ]
      for k in range(max_num_detections):
        ann = {}
        ann['image_id'] = predictions['source_id'][i][j]
@@ -334,10 +242,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
          np_mask = (
              np.array(mask.getdata()).reshape(height, width).astype(np.uint8))
          np_mask[np_mask > 0] = 255
-          encoded_mask = mask_utils.encode(np.asfortranarray(np_mask))
+          encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
          ann['segmentation'] = encoded_mask
          if 'areas' not in groundtruths:
-            ann['area'] = mask_utils.area(encoded_mask)
+            ann['area'] = mask_api.area(encoded_mask)
        gt_annotations.append(ann)
  for i, ann in enumerate(gt_annotations):

--- a/official/vision/detection/utils/segm_utils.py
+++ b/official/vision/detection/utils/segm_utils.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,8 +21,24 @@ import numpy as np
 import cv2
-def segm_results(masks, detections, image_height, image_width):
+def paste_instance_masks(masks,
-  """Generates segmentation results."""
+                         detected_boxes,
+                         image_height,
+                         image_width):
+  """Paste instance masks to generate the image segmentation results.
+  Args:
+    masks: a numpy array of shape [N, mask_height, mask_width] representing the
+      instance masks w.r.t. the `detected_boxes`.
+    detected_boxes: a numpy array of shape [N, 4] representing the reference
+      bounding boxes.
+    image_height: an integer representing the height of the image.
+    image_width: an integer representing the width of the image.
+  Returns:
+    segms: a numpy array of shape [N, image_height, image_width] representing
+      the instance masks *pasted* on the image canvas.
+  """
  def expand_boxes(boxes, scale):
    """Expands an array of boxes by a given scale."""
@@ -51,14 +67,17 @@ def segm_results(masks, detections, image_height, image_width):
  # prior to resizing back to the original image resolution. This prevents
  # "top hat" artifacts. We therefore need to expand the reference boxes by an
  # appropriate factor.
-  mask_size = masks.shape[2]
+  _, mask_height, mask_width = masks.shape
-  scale = (mask_size + 2.0) / mask_size
+  scale = max((mask_width + 2.0) / mask_width,
+              (mask_height + 2.0) / mask_height)
-  ref_boxes = expand_boxes(detections[:, 1:5], scale)
+  ref_boxes = expand_boxes(detected_boxes, scale)
  ref_boxes = ref_boxes.astype(np.int32)
-  padded_mask = np.zeros((mask_size + 2, mask_size + 2), dtype=np.float32)
+  padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
  segms = []
  for mask_ind, mask in enumerate(masks):
+    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
+    # Process mask inside bounding boxes.
    padded_mask[1:-1, 1:-1] = mask[:, :]
    ref_box = ref_boxes[mask_ind, :]
@@ -69,7 +88,6 @@ def segm_results(masks, detections, image_height, image_width):
    mask = cv2.resize(padded_mask, (w, h))
    mask = np.array(mask > 0.5, dtype=np.uint8)
-    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
    x_0 = max(ref_box[0], 0)
    x_1 = min(ref_box[2] + 1, image_width)