Internal change

PiperOrigin-RevId: 276344518

Internal change
PiperOrigin-RevId: 276344518
6b0146f5 · Yeqing Li · A. Unique TensorFlower · 7d1cfc1e · 6b0146f5 · 6b0146f5
Commit 6b0146f5 authored Oct 23, 2019 by Yeqing Li Committed by A. Unique TensorFlower Oct 23, 2019
2 changed files
--- a/official/vision/detection/evaluation/coco_evaluator.py
+++ b/official/vision/detection/evaluation/coco_evaluator.py
@@ -33,17 +33,19 @@ from __future__ import print_function
 import atexit
 import tempfile
 import numpy as np
+from absl import logging
 from pycocotools import cocoeval
 import six
 import tensorflow.compat.v2 as tf
 from official.vision.detection.evaluation import coco_utils
+from official.vision.detection.utils import class_utils
 class COCOEvaluator(object):
  """COCO evaluation metric class."""
-  def __init__(self, annotation_file, include_mask):
+  def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True):
    """Constructs COCO evaluation class.
    The class provides the interface to metrics_fn in TPUEstimator. The
@@ -57,6 +59,8 @@ class COCOEvaluator(object):
        from the dataloader.
      include_mask: a boolean to indicate whether or not to include the mask
        eval.
+      need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
+        to absolute values (`image_info` is needed in this case).
    """
    if annotation_file:
      if annotation_file.startswith('gs://'):
@@ -72,13 +76,20 @@ class COCOEvaluator(object):
          annotation_file=local_val_json)
    self._annotation_file = annotation_file
    self._include_mask = include_mask
-    self._metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
+    self._metric_names = [
-                          'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
+        'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 'ARmax10',
+        'ARmax100', 'ARs', 'ARm', 'ARl'
+    ]
    self._required_prediction_fields = [
-        'source_id', 'image_info', 'num_detections', 'detection_classes',
+        'source_id', 'num_detections', 'detection_classes', 'detection_scores',
-        'detection_scores', 'detection_boxes']
+        'detection_boxes'
+    ]
+    self._need_rescale_bboxes = need_rescale_bboxes
+    if self._need_rescale_bboxes:
+      self._required_prediction_fields.append('image_info')
    self._required_groundtruth_fields = [
-        'source_id', 'height', 'width', 'classes', 'boxes']
+        'source_id', 'height', 'width', 'classes', 'boxes'
+    ]
    if self._include_mask:
      mask_metric_names = ['mask_' + x for x in self._metric_names]
      self._metric_names.extend(mask_metric_names)
@@ -101,12 +112,14 @@ class COCOEvaluator(object):
        coco-style evaluation metrics (box and mask).
    """
    if not self._annotation_file:
+      logging.info('Thre is no annotation_file in COCOEvaluator.')
      gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
          self._groundtruths)
      coco_gt = coco_utils.COCOWrapper(
          eval_type=('mask' if self._include_mask else 'box'),
          gt_dataset=gt_dataset)
    else:
+      logging.info('Using annotation file: %s', self._annotation_file)
      coco_gt = self._coco_gt
    coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
        self._predictions)
@@ -144,7 +157,12 @@ class COCOEvaluator(object):
  def _process_predictions(self, predictions):
    image_scale = np.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2))
    predictions['detection_boxes'] = (
-        predictions['detection_boxes'] / image_scale)
+        predictions['detection_boxes'].astype(np.float32))
+    predictions['detection_boxes'] /= image_scale
+    if 'detection_outer_boxes' in predictions:
+      predictions['detection_outer_boxes'] = (
+          predictions['detection_outer_boxes'].astype(np.float32))
+      predictions['detection_outer_boxes'] /= image_scale
  def update(self, predictions, groundtruths=None):
    """Update and aggregate detection results and groundtruth data.
@@ -154,8 +172,10 @@ class COCOEvaluator(object):
        See different parsers under `../dataloader` for more details.
        Required fields:
          - source_id: a numpy array of int or string of shape [batch_size].
-          - image_info: a numpy array of float of shape [batch_size, 4, 2].
+          - image_info [if `need_rescale_bboxes` is True]: a numpy array of
-          - num_detections: a numpy array of int of shape [batch_size].
+            float of shape [batch_size, 4, 2].
+          - num_detections: a numpy array of
+            int of shape [batch_size].
          - detection_boxes: a numpy array of float of shape [batch_size, K, 4].
          - detection_classes: a numpy array of int of shape [batch_size, K].
          - detection_scores: a numpy array of float of shape [batch_size, K].
@@ -186,8 +206,9 @@ class COCOEvaluator(object):
    """
    for k in self._required_prediction_fields:
      if k not in predictions:
-        raise ValueError('Missing the required key `{}` in predictions!'
+        raise ValueError(
-                         .format(k))
+            'Missing the required key `{}` in predictions!'.format(k))
+    if self._need_rescale_bboxes:
      self._process_predictions(predictions)
    for k, v in six.iteritems(predictions):
      if k not in self._predictions:
@@ -199,10 +220,95 @@ class COCOEvaluator(object):
      assert groundtruths
      for k in self._required_groundtruth_fields:
        if k not in groundtruths:
-          raise ValueError('Missing the required key `{}` in groundtruths!'
+          raise ValueError(
-                           .format(k))
+              'Missing the required key `{}` in groundtruths!'.format(k))
      for k, v in six.iteritems(groundtruths):
        if k not in self._groundtruths:
          self._groundtruths[k] = [v]
        else:
          self._groundtruths[k].append(v)
+class ShapeMaskCOCOEvaluator(COCOEvaluator):
+  """COCO evaluation metric class for ShapeMask."""
+  def __init__(self, mask_eval_class, **kwargs):
+    """Constructs COCO evaluation class.
+    The class provides the interface to metrics_fn in TPUEstimator. The
+    _update_op() takes detections from each image and push them to
+    self.detections. The _evaluate() loads a JSON file in COCO annotation format
+    as the groundtruths and runs COCO evaluation.
+    Args:
+      mask_eval_class: the set of classes for mask evaluation.
+      **kwargs: other keyword arguments passed to the parent class initializer.
+    """
+    super(ShapeMaskCOCOEvaluator, self).__init__(**kwargs)
+    self._mask_eval_class = mask_eval_class
+    self._eval_categories = class_utils.coco_split_class_ids(mask_eval_class)
+    if mask_eval_class != 'all':
+      self._metric_names = [
+          x.replace('mask', 'novel_mask') for x in self._metric_names
+      ]
+  def evaluate(self):
+    """Evaluates with detections from all images with COCO API.
+    Returns:
+      coco_metric: float numpy array with shape [24] representing the
+        coco-style evaluation metrics (box and mask).
+    """
+    if not self._annotation_file:
+      gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
+          self._groundtruths)
+      coco_gt = coco_utils.COCOWrapper(
+          eval_type=('mask' if self._include_mask else 'box'),
+          gt_dataset=gt_dataset)
+    else:
+      coco_gt = self._coco_gt
+    coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
+        self._predictions)
+    coco_dt = coco_gt.loadRes(predictions=coco_predictions)
+    image_ids = [ann['image_id'] for ann in coco_predictions]
+    coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='bbox')
+    coco_eval.params.imgIds = image_ids
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    coco_metrics = coco_eval.stats
+    if self._include_mask:
+      mcoco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='segm')
+      mcoco_eval.params.imgIds = image_ids
+      mcoco_eval.evaluate()
+      mcoco_eval.accumulate()
+      mcoco_eval.summarize()
+      if self._mask_eval_class == 'all':
+        metrics = np.hstack((coco_metrics, mcoco_eval.stats))
+      else:
+        mask_coco_metrics = mcoco_eval.category_stats
+        val_catg_idx = np.isin(mcoco_eval.params.catIds,
+                               self._eval_categories)
+        # Gather the valid evaluation of the eval categories.
+        if np.any(val_catg_idx):
+          mean_val_metrics = []
+          for mid in range(len(self._metric_names) // 2):
+            mean_val_metrics.append(
+                np.nanmean(mask_coco_metrics[mid][val_catg_idx]))
+          mean_val_metrics = np.array(mean_val_metrics)
+        else:
+          mean_val_metrics = np.zeros(len(self._metric_names) // 2)
+        metrics = np.hstack((coco_metrics, mean_val_metrics))
+    else:
+      metrics = coco_metrics
+    # Cleans up the internal variables in order for a fresh eval next time.
+    self.reset()
+    metrics_dict = {}
+    for i, name in enumerate(self._metric_names):
+      metrics_dict[name] = metrics[i].astype(np.float32)
+    return metrics_dict
--- a/official/vision/detection/evaluation/coco_utils.py
+++ b/official/vision/detection/evaluation/coco_utils.py
@@ -32,6 +32,97 @@ import tensorflow.compat.v2 as tf
 from official.vision.detection.dataloader import tf_example_decoder
 from official.vision.detection.utils import box_utils
+import cv2
+def generate_segmentation_from_masks(masks,
+                                     detected_boxes,
+                                     image_height,
+                                     image_width,
+                                     is_image_mask=False):
+  """Generates segmentation result from instance masks.
+  Args:
+    masks: a numpy array of shape [N, mask_height, mask_width] representing the
+      instance masks w.r.t. the `detected_boxes`.
+    detected_boxes: a numpy array of shape [N, 4] representing the reference
+      bounding boxes.
+    image_height: an integer representing the height of the image.
+    image_width: an integer representing the width of the image.
+    is_image_mask: bool. True: input masks are whole-image masks. False: input
+      masks are bounding-box level masks.
+  Returns:
+    segms: a numpy array of shape [N, image_height, image_width] representing
+      the instance masks *pasted* on the image canvas.
+  """
+  def expand_boxes(boxes, scale):
+    """Expands an array of boxes by a given scale."""
+    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227  # pylint: disable=line-too-long
+    # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
+    # whereas `boxes` here is in [x1, y1, w, h] form
+    w_half = boxes[:, 2] * .5
+    h_half = boxes[:, 3] * .5
+    x_c = boxes[:, 0] + w_half
+    y_c = boxes[:, 1] + h_half
+    w_half *= scale
+    h_half *= scale
+    boxes_exp = np.zeros(boxes.shape)
+    boxes_exp[:, 0] = x_c - w_half
+    boxes_exp[:, 2] = x_c + w_half
+    boxes_exp[:, 1] = y_c - h_half
+    boxes_exp[:, 3] = y_c + h_half
+    return boxes_exp
+  # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812  # pylint: disable=line-too-long
+  # To work around an issue with cv2.resize (it seems to automatically pad
+  # with repeated border values), we manually zero-pad the masks by 1 pixel
+  # prior to resizing back to the original image resolution. This prevents
+  # "top hat" artifacts. We therefore need to expand the reference boxes by an
+  # appropriate factor.
+  _, mask_height, mask_width = masks.shape
+  scale = max((mask_width + 2.0) / mask_width,
+              (mask_height + 2.0) / mask_height)
+  ref_boxes = expand_boxes(detected_boxes, scale)
+  ref_boxes = ref_boxes.astype(np.int32)
+  padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
+  segms = []
+  for mask_ind, mask in enumerate(masks):
+    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
+    if is_image_mask:
+      # Process whole-image masks.
+      im_mask[:, :] = mask[:, :]
+    else:
+      # Process mask inside bounding boxes.
+      padded_mask[1:-1, 1:-1] = mask[:, :]
+      ref_box = ref_boxes[mask_ind, :]
+      w = ref_box[2] - ref_box[0] + 1
+      h = ref_box[3] - ref_box[1] + 1
+      w = np.maximum(w, 1)
+      h = np.maximum(h, 1)
+      mask = cv2.resize(padded_mask, (w, h))
+      mask = np.array(mask > 0.5, dtype=np.uint8)
+      x_0 = max(ref_box[0], 0)
+      x_1 = min(ref_box[2] + 1, image_width)
+      y_0 = max(ref_box[1], 0)
+      y_1 = min(ref_box[3] + 1, image_height)
+      im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
+                                       (x_0 - ref_box[0]):(x_1 - ref_box[0])]
+    segms.append(im_mask)
+  segms = np.array(segms)
+  assert masks.shape[0] == segms.shape[0]
+  return segms
 class COCOWrapper(coco.COCO):
  """COCO wrapper class.
@@ -98,7 +189,6 @@ class COCOWrapper(coco.COCO):
        ann['segmentation'] = [
            [x1, y1, x1, y2, x2, y2, x2, y1]]
      elif self._eval_type == 'mask':
-        ann['bbox'] = mask_utils.toBbox(ann['segmentation'])
        ann['area'] = mask_utils.area(ann['segmentation'])
    res.dataset['annotations'] = copy.deepcopy(predictions)
@@ -134,24 +224,38 @@ def convert_predictions_to_coco_annotations(predictions):
  num_batches = len(predictions['source_id'])
  batch_size = predictions['source_id'][0].shape[0]
  max_num_detections = predictions['detection_classes'][0].shape[1]
+  use_outer_box = 'detection_outer_boxes' in predictions
  for i in range(num_batches):
+    predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh(
+        predictions['detection_boxes'][i])
+    if use_outer_box:
+      predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh(
+          predictions['detection_outer_boxes'][i])
+      mask_boxes = predictions['detection_outer_boxes']
+    else:
+      mask_boxes = predictions['detection_boxes']
    for j in range(batch_size):
+      if 'detection_masks' in predictions:
+        image_masks = generate_segmentation_from_masks(
+            predictions['detection_masks'][i][j],
+            mask_boxes[i][j],
+            int(predictions['image_info'][i][j, 0, 0]),
+            int(predictions['image_info'][i][j, 0, 1]),
+            is_image_mask=False)
+        binary_masks = (image_masks > 0.0).astype(np.uint8)
+        encoded_masks = [
+            mask_utils.encode(np.asfortranarray(binary_mask))
+            for binary_mask in list(binary_masks)
+        ]
      for k in range(max_num_detections):
        ann = {}
        ann['image_id'] = predictions['source_id'][i][j]
        ann['category_id'] = predictions['detection_classes'][i][j, k]
-        boxes = predictions['detection_boxes'][i]
+        ann['bbox'] = predictions['detection_boxes'][i][j, k]
-        ann['bbox'] = [
-            boxes[j, k, 1],
-            boxes[j, k, 0],
-            boxes[j, k, 3] - boxes[j, k, 1],
-            boxes[j, k, 2] - boxes[j, k, 0]]
        ann['score'] = predictions['detection_scores'][i][j, k]
        if 'detection_masks' in predictions:
-          encoded_mask = mask_utils.encode(
+          ann['segmentation'] = encoded_masks[k]
-              np.asfortranarray(
-                  predictions['detection_masks'][i][j, k].astype(np.uint8)))
-          ann['segmentation'] = encoded_mask
        coco_predictions.append(ann)
  for i, ann in enumerate(coco_predictions):