Merge pull request #3293 from pkulzc/master

Internal changes of object_detection

Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
fd7b6887 · Jonathan Huang · GitHub · f98ec55e · 1efe98bb · fd7b6887
Unverified Commit fd7b6887 authored Feb 09, 2018 by Jonathan Huang Committed by GitHub Feb 09, 2018
20 changed files
--- a/research/object_detection/utils/np_box_mask_list.py
+++ b/research/object_detection/utils/np_box_mask_list.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Numpy BoxMaskList classes and functions."""
+
+import numpy as np
+from object_detection.utils import np_box_list
+
+
+class BoxMaskList(np_box_list.BoxList):
+  """Convenience wrapper for BoxList with masks.
+
+  BoxMaskList extends the np_box_list.BoxList to contain masks as well.
+  In particular, its constructor receives both boxes and masks. Note that the
+  masks correspond to the full image.
+  """
+
+  def __init__(self, box_data, mask_data):
+    """Constructs box collection.
+
+    Args:
+      box_data: a numpy array of shape [N, 4] representing box coordinates
+      mask_data: a numpy array of shape [N, height, width] representing masks
+        with values are in {0,1}. The masks correspond to the full
+        image. The height and the width will be equal to image height and width.
+
+    Raises:
+      ValueError: if bbox data is not a numpy array
+      ValueError: if invalid dimensions for bbox data
+      ValueError: if mask data is not a numpy array
+      ValueError: if invalid dimension for mask data
+    """
+    super(BoxMaskList, self).__init__(box_data)
+    if not isinstance(mask_data, np.ndarray):
+      raise ValueError('Mask data must be a numpy array.')
+    if len(mask_data.shape) != 3:
+      raise ValueError('Invalid dimensions for mask data.')
+    if mask_data.dtype != np.uint8:
+      raise ValueError('Invalid data type for mask data: uint8 is required.')
+    if mask_data.shape[0] != box_data.shape[0]:
+      raise ValueError('There should be the same number of boxes and masks.')
+    self.data['masks'] = mask_data
+
+  def get_masks(self):
+    """Convenience function for accessing masks.
+
+    Returns:
+      a numpy array of shape [N, height, width] representing masks
+    """
+    return self.get_field('masks')
+
--- a/research/object_detection/utils/np_box_mask_list_ops.py
+++ b/research/object_detection/utils/np_box_mask_list_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Operations for np_box_mask_list.BoxMaskList.
+
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+
+import numpy as np
+
+from object_detection.utils import np_box_list_ops
+from object_detection.utils import np_box_mask_list
+from object_detection.utils import np_mask_ops
+
+
+def box_list_to_box_mask_list(boxlist):
+  """Converts a BoxList containing 'masks' into a BoxMaskList.
+
+  Args:
+    boxlist: An np_box_list.BoxList object.
+
+  Returns:
+    An np_box_mask_list.BoxMaskList object.
+
+  Raises:
+    ValueError: If boxlist does not contain `masks` as a field.
+  """
+  if not boxlist.has_field('masks'):
+    raise ValueError('boxlist does not contain mask field.')
+  box_mask_list = np_box_mask_list.BoxMaskList(
+      box_data=boxlist.get(),
+      mask_data=boxlist.get_field('masks'))
+  extra_fields = boxlist.get_extra_fields()
+  for key in extra_fields:
+    if key != 'masks':
+      box_mask_list.data[key] = boxlist.get_field(key)
+  return box_mask_list
+
+
+def area(box_mask_list):
+  """Computes area of masks.
+
+  Args:
+    box_mask_list: np_box_mask_list.BoxMaskList holding N boxes and masks
+
+  Returns:
+    a numpy array with shape [N*1] representing mask areas
+  """
+  return np_mask_ops.area(box_mask_list.get_masks())
+
+
+def intersection(box_mask_list1, box_mask_list2):
+  """Compute pairwise intersection areas between masks.
+
+  Args:
+    box_mask_list1: BoxMaskList holding N boxes and masks
+    box_mask_list2: BoxMaskList holding M boxes and masks
+
+  Returns:
+    a numpy array with shape [N*M] representing pairwise intersection area
+  """
+  return np_mask_ops.intersection(box_mask_list1.get_masks(),
+                                  box_mask_list2.get_masks())
+
+
+def iou(box_mask_list1, box_mask_list2):
+  """Computes pairwise intersection-over-union between box and mask collections.
+
+  Args:
+    box_mask_list1: BoxMaskList holding N boxes and masks
+    box_mask_list2: BoxMaskList holding M boxes and masks
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise iou scores.
+  """
+  return np_mask_ops.iou(box_mask_list1.get_masks(),
+                         box_mask_list2.get_masks())
+
+
+def ioa(box_mask_list1, box_mask_list2):
+  """Computes pairwise intersection-over-area between box and mask collections.
+
+  Intersection-over-area (ioa) between two masks mask1 and mask2 is defined as
+  their intersection area over mask2's area. Note that ioa is not symmetric,
+  that is, IOA(mask1, mask2) != IOA(mask2, mask1).
+
+  Args:
+    box_mask_list1: np_box_mask_list.BoxMaskList holding N boxes and masks
+    box_mask_list2: np_box_mask_list.BoxMaskList holding M boxes and masks
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise ioa scores.
+  """
+  return np_mask_ops.ioa(box_mask_list1.get_masks(), box_mask_list2.get_masks())
+
+
+def gather(box_mask_list, indices, fields=None):
+  """Gather boxes from np_box_mask_list.BoxMaskList according to indices.
+
+  By default, gather returns boxes corresponding to the input index list, as
+  well as all additional fields stored in the box_mask_list (indexing into the
+  first dimension).  However one can optionally only gather from a
+  subset of fields.
+
+  Args:
+    box_mask_list: np_box_mask_list.BoxMaskList holding N boxes
+    indices: a 1-d numpy array of type int_
+    fields: (optional) list of fields to also gather from.  If None (default),
+        all fields are gathered from.  Pass an empty fields list to only gather
+        the box coordinates.
+
+  Returns:
+    subbox_mask_list: a np_box_mask_list.BoxMaskList corresponding to the subset
+        of the input box_mask_list specified by indices
+
+  Raises:
+    ValueError: if specified field is not contained in box_mask_list or if the
+        indices are not of type int_
+  """
+  if fields is not None:
+    if 'masks' not in fields:
+      fields.append('masks')
+  return box_list_to_box_mask_list(
+      np_box_list_ops.gather(
+          boxlist=box_mask_list, indices=indices, fields=fields))
+
+
+def sort_by_field(box_mask_list, field,
+                  order=np_box_list_ops.SortOrder.DESCEND):
+  """Sort boxes and associated fields according to a scalar field.
+
+  A common use case is reordering the boxes according to descending scores.
+
+  Args:
+    box_mask_list: BoxMaskList holding N boxes.
+    field: A BoxMaskList field for sorting and reordering the BoxMaskList.
+    order: (Optional) 'descend' or 'ascend'. Default is descend.
+
+  Returns:
+    sorted_box_mask_list: A sorted BoxMaskList with the field in the specified
+      order.
+  """
+  return box_list_to_box_mask_list(
+      np_box_list_ops.sort_by_field(
+          boxlist=box_mask_list, field=field, order=order))
+
+
+def non_max_suppression(box_mask_list,
+                        max_output_size=10000,
+                        iou_threshold=1.0,
+                        score_threshold=-10.0):
+  """Non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes. In each iteration, the detected bounding box with
+  highest score in the available pool is selected.
+
+  Args:
+    box_mask_list: np_box_mask_list.BoxMaskList holding N boxes.  Must contain
+      a 'scores' field representing detection scores. All scores belong to the
+      same class.
+    max_output_size: maximum number of retained boxes
+    iou_threshold: intersection over union threshold.
+    score_threshold: minimum score threshold. Remove the boxes with scores
+                     less than this value. Default value is set to -10. A very
+                     low threshold to pass pretty much all the boxes, unless
+                     the user sets a different score threshold.
+
+  Returns:
+    an np_box_mask_list.BoxMaskList holding M boxes where M <= max_output_size
+
+  Raises:
+    ValueError: if 'scores' field does not exist
+    ValueError: if threshold is not in [0, 1]
+    ValueError: if max_output_size < 0
+  """
+  if not box_mask_list.has_field('scores'):
+    raise ValueError('Field scores does not exist')
+  if iou_threshold < 0. or iou_threshold > 1.0:
+    raise ValueError('IOU threshold must be in [0, 1]')
+  if max_output_size < 0:
+    raise ValueError('max_output_size must be bigger than 0.')
+
+  box_mask_list = filter_scores_greater_than(box_mask_list, score_threshold)
+  if box_mask_list.num_boxes() == 0:
+    return box_mask_list
+
+  box_mask_list = sort_by_field(box_mask_list, 'scores')
+
+  # Prevent further computation if NMS is disabled.
+  if iou_threshold == 1.0:
+    if box_mask_list.num_boxes() > max_output_size:
+      selected_indices = np.arange(max_output_size)
+      return gather(box_mask_list, selected_indices)
+    else:
+      return box_mask_list
+
+  masks = box_mask_list.get_masks()
+  num_masks = box_mask_list.num_boxes()
+
+  # is_index_valid is True only for all remaining valid boxes,
+  is_index_valid = np.full(num_masks, 1, dtype=bool)
+  selected_indices = []
+  num_output = 0
+  for i in xrange(num_masks):
+    if num_output < max_output_size:
+      if is_index_valid[i]:
+        num_output += 1
+        selected_indices.append(i)
+        is_index_valid[i] = False
+        valid_indices = np.where(is_index_valid)[0]
+        if valid_indices.size == 0:
+          break
+
+        intersect_over_union = np_mask_ops.iou(
+            np.expand_dims(masks[i], axis=0), masks[valid_indices])
+        intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+        is_index_valid[valid_indices] = np.logical_and(
+            is_index_valid[valid_indices],
+            intersect_over_union <= iou_threshold)
+  return gather(box_mask_list, np.array(selected_indices))
+
+
+def multi_class_non_max_suppression(box_mask_list, score_thresh, iou_thresh,
+                                    max_output_size):
+  """Multi-class version of non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  It operates independently for each class for
+  which scores are provided (via the scores field of the input box_list),
+  pruning boxes with score less than a provided threshold prior to
+  applying NMS.
+
+  Args:
+    box_mask_list: np_box_mask_list.BoxMaskList holding N boxes.  Must contain a
+      'scores' field representing detection scores.  This scores field is a
+      tensor that can be 1 dimensional (in the case of a single class) or
+      2-dimensional, in which case we assume that it takes the
+      shape [num_boxes, num_classes]. We further assume that this rank is known
+      statically and that scores.shape[1] is also known (i.e., the number of
+      classes is fixed and known at graph construction time).
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
+      with previously selected boxes are removed).
+    max_output_size: maximum number of retained boxes per class.
+
+  Returns:
+    a box_mask_list holding M boxes with a rank-1 scores field representing
+      corresponding scores for each box with scores sorted in decreasing order
+      and a rank-1 classes field representing a class label for each box.
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input box_mask_list does
+      not have a valid scores field.
+  """
+  if not 0 <= iou_thresh <= 1.0:
+    raise ValueError('thresh must be between 0 and 1')
+  if not isinstance(box_mask_list, np_box_mask_list.BoxMaskList):
+    raise ValueError('box_mask_list must be a box_mask_list')
+  if not box_mask_list.has_field('scores'):
+    raise ValueError('input box_mask_list must have \'scores\' field')
+  scores = box_mask_list.get_field('scores')
+  if len(scores.shape) == 1:
+    scores = np.reshape(scores, [-1, 1])
+  elif len(scores.shape) == 2:
+    if scores.shape[1] is None:
+      raise ValueError('scores field must have statically defined second '
+                       'dimension')
+  else:
+    raise ValueError('scores field must be of rank 1 or 2')
+
+  num_boxes = box_mask_list.num_boxes()
+  num_scores = scores.shape[0]
+  num_classes = scores.shape[1]
+
+  if num_boxes != num_scores:
+    raise ValueError('Incorrect scores field length: actual vs expected.')
+
+  selected_boxes_list = []
+  for class_idx in range(num_classes):
+    box_mask_list_and_class_scores = np_box_mask_list.BoxMaskList(
+        box_data=box_mask_list.get(),
+        mask_data=box_mask_list.get_masks())
+    class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+    box_mask_list_and_class_scores.add_field('scores', class_scores)
+    box_mask_list_filt = filter_scores_greater_than(
+        box_mask_list_and_class_scores, score_thresh)
+    nms_result = non_max_suppression(
+        box_mask_list_filt,
+        max_output_size=max_output_size,
+        iou_threshold=iou_thresh,
+        score_threshold=score_thresh)
+    nms_result.add_field(
+        'classes',
+        np.zeros_like(nms_result.get_field('scores')) + class_idx)
+    selected_boxes_list.append(nms_result)
+  selected_boxes = np_box_list_ops.concatenate(selected_boxes_list)
+  sorted_boxes = np_box_list_ops.sort_by_field(selected_boxes, 'scores')
+  return box_list_to_box_mask_list(boxlist=sorted_boxes)
+
+
+def prune_non_overlapping_masks(box_mask_list1, box_mask_list2, minoverlap=0.0):
+  """Prunes the boxes in list1 that overlap less than thresh with list2.
+
+  For each mask in box_mask_list1, we want its IOA to be more than minoverlap
+  with at least one of the masks in box_mask_list2. If it does not, we remove
+  it. If the masks are not full size image, we do the pruning based on boxes.
+
+  Args:
+    box_mask_list1: np_box_mask_list.BoxMaskList holding N boxes and masks.
+    box_mask_list2: np_box_mask_list.BoxMaskList holding M boxes and masks.
+    minoverlap: Minimum required overlap between boxes, to count them as
+                overlapping.
+
+  Returns:
+    A pruned box_mask_list with size [N', 4].
+  """
+  intersection_over_area = ioa(box_mask_list2, box_mask_list1)  # [M, N] tensor
+  intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+  keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+  keep_inds = np.nonzero(keep_bool)[0]
+  new_box_mask_list1 = gather(box_mask_list1, keep_inds)
+  return new_box_mask_list1
+
+
+def concatenate(box_mask_lists, fields=None):
+  """Concatenate list of box_mask_lists.
+
+  This op concatenates a list of input box_mask_lists into a larger
+  box_mask_list.  It also
+  handles concatenation of box_mask_list fields as long as the field tensor
+  shapes are equal except for the first dimension.
+
+  Args:
+    box_mask_lists: list of np_box_mask_list.BoxMaskList objects
+    fields: optional list of fields to also concatenate.  By default, all
+      fields from the first BoxMaskList in the list are included in the
+      concatenation.
+
+  Returns:
+    a box_mask_list with number of boxes equal to
+      sum([box_mask_list.num_boxes() for box_mask_list in box_mask_list])
+  Raises:
+    ValueError: if box_mask_lists is invalid (i.e., is not a list, is empty, or
+      contains non box_mask_list objects), or if requested fields are not
+      contained in all box_mask_lists
+  """
+  if fields is not None:
+    if 'masks' not in fields:
+      fields.append('masks')
+  return box_list_to_box_mask_list(
+      np_box_list_ops.concatenate(boxlists=box_mask_lists, fields=fields))
+
+
+def filter_scores_greater_than(box_mask_list, thresh):
+  """Filter to keep only boxes and masks with score exceeding a given threshold.
+
+  This op keeps the collection of boxes and masks whose corresponding scores are
+  greater than the input threshold.
+
+  Args:
+    box_mask_list: BoxMaskList holding N boxes and masks.  Must contain a
+      'scores' field representing detection scores.
+    thresh: scalar threshold
+
+  Returns:
+    a BoxMaskList holding M boxes and masks where M <= N
+
+  Raises:
+    ValueError: if box_mask_list not a np_box_mask_list.BoxMaskList object or
+      if it does not have a scores field
+  """
+  if not isinstance(box_mask_list, np_box_mask_list.BoxMaskList):
+    raise ValueError('box_mask_list must be a BoxMaskList')
+  if not box_mask_list.has_field('scores'):
+    raise ValueError('input box_mask_list must have \'scores\' field')
+  scores = box_mask_list.get_field('scores')
+  if len(scores.shape) > 2:
+    raise ValueError('Scores should have rank 1 or 2')
+  if len(scores.shape) == 2 and scores.shape[1] != 1:
+    raise ValueError('Scores should have rank 1 or have shape '
+                     'consistent with [None, 1]')
+  high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
+                                  [-1]).astype(np.int32)
+  return gather(box_mask_list, high_score_indices)
--- a/research/object_detection/utils/np_box_mask_list_ops_test.py
+++ b/research/object_detection/utils/np_box_mask_list_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.np_box_mask_list_ops."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import np_box_mask_list
+from object_detection.utils import np_box_mask_list_ops
+
+
+class AreaRelatedTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
+                      dtype=float)
+    masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],
+                         [1, 1, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks1 = np.stack([masks1_0, masks1_1])
+    boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                       [0.0, 0.0, 20.0, 20.0]],
+                      dtype=float)
+    masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks2 = np.stack([masks2_0, masks2_1, masks2_2])
+    self.box_mask_list1 = np_box_mask_list.BoxMaskList(
+        box_data=boxes1, mask_data=masks1)
+    self.box_mask_list2 = np_box_mask_list.BoxMaskList(
+        box_data=boxes2, mask_data=masks2)
+
+  def test_area(self):
+    areas = np_box_mask_list_ops.area(self.box_mask_list1)
+    expected_areas = np.array([8.0, 10.0], dtype=float)
+    self.assertAllClose(expected_areas, areas)
+
+  def test_intersection(self):
+    intersection = np_box_mask_list_ops.intersection(self.box_mask_list1,
+                                                     self.box_mask_list2)
+    expected_intersection = np.array([[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]],
+                                     dtype=float)
+    self.assertAllClose(intersection, expected_intersection)
+
+  def test_iou(self):
+    iou = np_box_mask_list_ops.iou(self.box_mask_list1, self.box_mask_list2)
+    expected_iou = np.array(
+        [[1.0, 0.0, 8.0 / 25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=float)
+    self.assertAllClose(iou, expected_iou)
+
+  def test_ioa(self):
+    ioa21 = np_box_mask_list_ops.ioa(self.box_mask_list1, self.box_mask_list2)
+    expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0],
+                               [0.0, 9.0/15.0, 7.0/25.0]],
+                              dtype=np.float32)
+    self.assertAllClose(ioa21, expected_ioa21)
+
+
+class NonMaximumSuppressionTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes1 = np.array(
+        [[4.0, 3.0, 7.0, 6.0], [5.0, 6.0, 10.0, 10.0]], dtype=float)
+    boxes2 = np.array(
+        [[3.0, 4.0, 6.0, 8.0], [5.0, 6.0, 10.0, 10.0], [1.0, 1.0, 10.0, 10.0]],
+        dtype=float)
+    masks1 = np.array(
+        [[[0, 1, 0], [1, 1, 0], [0, 0, 0]], [[0, 1, 1], [0, 1, 1], [0, 1, 1]]],
+        dtype=np.uint8)
+    masks2 = np.array(
+        [[[0, 1, 0], [1, 1, 1], [0, 0, 0]], [[0, 1, 0], [0, 0, 1], [0, 1, 1]],
+         [[0, 1, 1], [0, 1, 1], [0, 1, 1]]],
+        dtype=np.uint8)
+    self.boxes1 = boxes1
+    self.boxes2 = boxes2
+    self.masks1 = masks1
+    self.masks2 = masks2
+
+  def test_with_no_scores_field(self):
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=self.boxes1, mask_data=self.masks1)
+    max_output_size = 3
+    iou_threshold = 0.5
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list_ops.non_max_suppression(
+          box_mask_list, max_output_size, iou_threshold)
+
+  def test_nms_disabled_max_output_size_equals_one(self):
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=self.boxes2, mask_data=self.masks2)
+    box_mask_list.add_field('scores',
+                            np.array([.9, .75, .6], dtype=float))
+    max_output_size = 1
+    iou_threshold = 1.  # No NMS
+    expected_boxes = np.array([[3.0, 4.0, 6.0, 8.0]], dtype=float)
+    expected_masks = np.array(
+        [[[0, 1, 0], [1, 1, 1], [0, 0, 0]]], dtype=np.uint8)
+    nms_box_mask_list = np_box_mask_list_ops.non_max_suppression(
+        box_mask_list, max_output_size, iou_threshold)
+    self.assertAllClose(nms_box_mask_list.get(), expected_boxes)
+    self.assertAllClose(nms_box_mask_list.get_masks(), expected_masks)
+
+  def test_multiclass_nms(self):
+    boxes = np.array(
+        [[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
+        dtype=np.float32)
+    mask0 = np.array([[0, 0, 0, 0, 0],
+                      [0, 0, 1, 1, 0],
+                      [0, 0, 1, 1, 0],
+                      [0, 0, 1, 1, 0],
+                      [0, 0, 0, 0, 0]],
+                     dtype=np.uint8)
+    mask1 = np.array([[0, 0, 0, 0, 0],
+                      [0, 0, 0, 0, 0],
+                      [0, 1, 1, 1, 0],
+                      [0, 1, 1, 1, 0],
+                      [0, 0, 0, 0, 0]],
+                     dtype=np.uint8)
+    mask2 = np.array([[0, 0, 0, 0, 0],
+                      [0, 0, 0, 0, 0],
+                      [0, 0, 0, 0, 0],
+                      [1, 1, 1, 1, 1],
+                      [1, 1, 1, 1, 1]],
+                     dtype=np.uint8)
+    masks = np.stack([mask0, mask1, mask2])
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=masks)
+    scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
+                       [0.7, -0.7, 0.6, 0.2, -0.9],
+                       [0.4, 0.34, -0.9, 0.2, 0.31]],
+                      dtype=np.float32)
+    box_mask_list.add_field('scores', scores)
+    box_mask_list_clean = np_box_mask_list_ops.multi_class_non_max_suppression(
+        box_mask_list, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
+
+    scores_clean = box_mask_list_clean.get_field('scores')
+    classes_clean = box_mask_list_clean.get_field('classes')
+    boxes = box_mask_list_clean.get()
+    masks = box_mask_list_clean.get_masks()
+    expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
+    expected_classes = np.array([0, 2, 1, 4])
+    expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
+                               [0.4, 0.2, 0.8, 0.8],
+                               [0.6, 0.0, 1.0, 1.0],
+                               [0.6, 0.0, 1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(scores_clean, expected_scores)
+    self.assertAllClose(classes_clean, expected_classes)
+    self.assertAllClose(boxes, expected_boxes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/utils/np_box_mask_list_test.py
+++ b/research/object_detection/utils/np_box_mask_list_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.np_box_mask_list_test."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import np_box_mask_list
+
+
+class BoxMaskListTest(tf.test.TestCase):
+
+  def test_invalid_box_mask_data(self):
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=[0, 0, 1, 1],
+          mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([[0, 0, 1, 1]], dtype=int),
+          mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([0, 1, 1, 3, 4], dtype=float),
+          mask_data=np.zeros([1, 3, 3], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float),
+          mask_data=np.zeros([2, 3, 3], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
+          mask_data=np.zeros([3, 5, 5], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
+          mask_data=np.zeros([2, 5], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
+          mask_data=np.zeros([2, 5, 5, 5], dtype=np.uint8))
+
+    with self.assertRaises(ValueError):
+      np_box_mask_list.BoxMaskList(
+          box_data=np.array([[0, 1, 1, 3], [1, 1, 1, 5]], dtype=float),
+          mask_data=np.zeros([2, 5, 5], dtype=np.int32))
+
+  def test_has_field_with_existed_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=np.zeros([3, 5, 5], dtype=np.uint8))
+    self.assertTrue(box_mask_list.has_field('boxes'))
+    self.assertTrue(box_mask_list.has_field('masks'))
+
+  def test_has_field_with_nonexisted_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=np.zeros([3, 3, 3], dtype=np.uint8))
+    self.assertFalse(box_mask_list.has_field('scores'))
+
+  def test_get_field_with_existed_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    masks = np.zeros([3, 3, 3], dtype=np.uint8)
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=masks)
+    self.assertTrue(np.allclose(box_mask_list.get_field('boxes'), boxes))
+    self.assertTrue(np.allclose(box_mask_list.get_field('masks'), masks))
+
+  def test_get_field_with_nonexited_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    masks = np.zeros([3, 3, 3], dtype=np.uint8)
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=masks)
+    with self.assertRaises(ValueError):
+      box_mask_list.get_field('scores')
+
+
+class AddExtraFieldTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    masks = np.zeros([3, 3, 3], dtype=np.uint8)
+    self.box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=masks)
+
+  def test_add_already_existed_field_bbox(self):
+    with self.assertRaises(ValueError):
+      self.box_mask_list.add_field('boxes',
+                                   np.array([[0, 0, 0, 1, 0]], dtype=float))
+
+  def test_add_already_existed_field_mask(self):
+    with self.assertRaises(ValueError):
+      self.box_mask_list.add_field('masks',
+                                   np.zeros([3, 3, 3], dtype=np.uint8))
+
+  def test_add_invalid_field_data(self):
+    with self.assertRaises(ValueError):
+      self.box_mask_list.add_field('scores', np.array([0.5, 0.7], dtype=float))
+    with self.assertRaises(ValueError):
+      self.box_mask_list.add_field('scores',
+                                   np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
+
+  def test_add_single_dimensional_field_data(self):
+    box_mask_list = self.box_mask_list
+    scores = np.array([0.5, 0.7, 0.9], dtype=float)
+    box_mask_list.add_field('scores', scores)
+    self.assertTrue(np.allclose(scores, self.box_mask_list.get_field('scores')))
+
+  def test_add_multi_dimensional_field_data(self):
+    box_mask_list = self.box_mask_list
+    labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
+                      dtype=int)
+    box_mask_list.add_field('labels', labels)
+    self.assertTrue(np.allclose(labels, self.box_mask_list.get_field('labels')))
+
+  def test_get_extra_fields(self):
+    box_mask_list = self.box_mask_list
+    self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks'])
+
+    scores = np.array([0.5, 0.7, 0.9], dtype=float)
+    box_mask_list.add_field('scores', scores)
+    self.assertItemsEqual(box_mask_list.get_extra_fields(), ['masks', 'scores'])
+
+    labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
+                      dtype=int)
+    box_mask_list.add_field('labels', labels)
+    self.assertItemsEqual(box_mask_list.get_extra_fields(),
+                          ['masks', 'scores', 'labels'])
+
+  def test_get_coordinates(self):
+    y_min, x_min, y_max, x_max = self.box_mask_list.get_coordinates()
+
+    expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
+    expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
+    expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
+    expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
+
+    self.assertTrue(np.allclose(y_min, expected_y_min))
+    self.assertTrue(np.allclose(x_min, expected_x_min))
+    self.assertTrue(np.allclose(y_max, expected_y_max))
+    self.assertTrue(np.allclose(x_max, expected_x_max))
+
+  def test_num_boxes(self):
+    boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
+    masks = np.zeros([2, 5, 5], dtype=np.uint8)
+    box_mask_list = np_box_mask_list.BoxMaskList(
+        box_data=boxes, mask_data=masks)
+    expected_num_boxes = 2
+    self.assertEquals(box_mask_list.num_boxes(), expected_num_boxes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/utils/np_mask_ops.py
+++ b/research/object_detection/utils/np_mask_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Operations for [N, height, width] numpy arrays representing masks.
+
+Example mask operations that are supported:
+  * Areas: compute mask areas
+  * IOU: pairwise intersection-over-union scores
+"""
+import numpy as np
+
+EPSILON = 1e-7
+
+
+def area(masks):
+  """Computes area of masks.
+
+  Args:
+    masks: Numpy array with shape [N, height, width] holding N masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+
+  Returns:
+    a numpy array with shape [N*1] representing mask areas.
+
+  Raises:
+    ValueError: If masks.dtype is not np.uint8
+  """
+  if masks.dtype != np.uint8:
+    raise ValueError('Masks type should be np.uint8')
+  return np.sum(masks, axis=(1, 2), dtype=np.float32)
+
+
+def intersection(masks1, masks2):
+  """Compute pairwise intersection areas between masks.
+
+  Args:
+    masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+    masks2: a numpy array with shape [M, height, width] holding M masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+
+  Returns:
+    a numpy array with shape [N*M] representing pairwise intersection area.
+
+  Raises:
+    ValueError: If masks1 and masks2 are not of type np.uint8.
+  """
+  if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+    raise ValueError('masks1 and masks2 should be of type np.uint8')
+  n = masks1.shape[0]
+  m = masks2.shape[0]
+  answer = np.zeros([n, m], dtype=np.float32)
+  for i in np.arange(n):
+    for j in np.arange(m):
+      answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32)
+  return answer
+
+
+def iou(masks1, masks2):
+  """Computes pairwise intersection-over-union between mask collections.
+
+  Args:
+    masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+    masks2: a numpy array with shape [M, height, width] holding N masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise iou scores.
+
+  Raises:
+    ValueError: If masks1 and masks2 are not of type np.uint8.
+  """
+  if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+    raise ValueError('masks1 and masks2 should be of type np.uint8')
+  intersect = intersection(masks1, masks2)
+  area1 = area(masks1)
+  area2 = area(masks2)
+  union = np.expand_dims(area1, axis=1) + np.expand_dims(
+      area2, axis=0) - intersect
+  return intersect / np.maximum(union, EPSILON)
+
+
+def ioa(masks1, masks2):
+  """Computes pairwise intersection-over-area between box collections.
+
+  Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
+  their intersection area over mask2's area. Note that ioa is not symmetric,
+  that is, IOA(mask1, mask2) != IOA(mask2, mask1).
+
+  Args:
+    masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+    masks2: a numpy array with shape [M, height, width] holding N masks. Masks
+      values are of type np.uint8 and values are in {0,1}.
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise ioa scores.
+
+  Raises:
+    ValueError: If masks1 and masks2 are not of type np.uint8.
+  """
+  if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+    raise ValueError('masks1 and masks2 should be of type np.uint8')
+  intersect = intersection(masks1, masks2)
+  areas = np.expand_dims(area(masks2), axis=0)
+  return intersect / (areas + EPSILON)
--- a/research/object_detection/utils/np_mask_ops_test.py
+++ b/research/object_detection/utils/np_mask_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.np_mask_ops."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import np_mask_ops
+
+
+class MaskOpsTests(tf.test.TestCase):
+
+  def setUp(self):
+    masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],
+                         [1, 1, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks1 = np.stack([masks1_0, masks1_1])
+    masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0],
+                         [1, 1, 1, 1, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0],
+                         [1, 1, 1, 1, 1, 0, 0, 0]],
+                        dtype=np.uint8)
+    masks2 = np.stack([masks2_0, masks2_1, masks2_2])
+    self.masks1 = masks1
+    self.masks2 = masks2
+
+  def testArea(self):
+    areas = np_mask_ops.area(self.masks1)
+    expected_areas = np.array([8.0, 10.0], dtype=np.float32)
+    self.assertAllClose(expected_areas, areas)
+
+  def testIntersection(self):
+    intersection = np_mask_ops.intersection(self.masks1, self.masks2)
+    expected_intersection = np.array(
+        [[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]], dtype=np.float32)
+    self.assertAllClose(intersection, expected_intersection)
+
+  def testIOU(self):
+    iou = np_mask_ops.iou(self.masks1, self.masks2)
+    expected_iou = np.array(
+        [[1.0, 0.0, 8.0/25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=np.float32)
+    self.assertAllClose(iou, expected_iou)
+
+  def testIOA(self):
+    ioa21 = np_mask_ops.ioa(self.masks1, self.masks2)
+    expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0],
+                               [0.0, 9.0/15.0, 7.0/25.0]],
+                              dtype=np.float32)
+    self.assertAllClose(ioa21, expected_ioa21)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/utils/object_detection_evaluation.py
+++ b/research/object_detection/utils/object_detection_evaluation.py
@@ -109,7 +109,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
               matching_iou_threshold=0.5,
               evaluate_corlocs=False,
               metric_prefix=None,
-               use_weighted_mean_ap=False):
+               use_weighted_mean_ap=False,
+               evaluate_masks=False):
    """Constructor.

    Args:
@@ -125,20 +126,28 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
      use_weighted_mean_ap: (optional) boolean which determines if the mean
        average precision is computed directly from the scores and tp_fp_labels
        of all classes.
+      evaluate_masks: If False, evaluation will be performed based on boxes.
+        If True, mask evaluation will be performed instead.
+
+    Raises:
+      ValueError: If the category ids are not 1-indexed.
    """
    super(ObjectDetectionEvaluator, self).__init__(categories)
    self._num_classes = max([cat['id'] for cat in categories])
+    if min(cat['id'] for cat in categories) < 1:
+      raise ValueError('Classes should be 1-indexed.')
    self._matching_iou_threshold = matching_iou_threshold
    self._use_weighted_mean_ap = use_weighted_mean_ap
    self._label_id_offset = 1
+    self._evaluate_masks = evaluate_masks
    self._evaluation = ObjectDetectionEvaluation(
-        self._num_classes,
+        num_groundtruth_classes=self._num_classes,
        matching_iou_threshold=self._matching_iou_threshold,
        use_weighted_mean_ap=self._use_weighted_mean_ap,
        label_id_offset=self._label_id_offset)
    self._image_ids = set([])
    self._evaluate_corlocs = evaluate_corlocs
-    self._metric_prefix = (metric_prefix + '/') if metric_prefix else ''
+    self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''

  def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
    """Adds groundtruth for a single image to be used for evaluation.
@@ -156,16 +165,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
          M numpy boolean array denoting whether a ground truth box is a
          difficult instance or not. This field is optional to support the case
          that no boxes are difficult.
+        standard_fields.InputDataFields.groundtruth_instance_masks: Optional
+          numpy array of shape [num_boxes, height, width] with values in {0, 1}.

    Raises:
-      ValueError: On adding groundtruth for an image more than once.
+      ValueError: On adding groundtruth for an image more than once. Will also
+        raise error if instance masks are not in groundtruth dictionary.
    """
    if image_id in self._image_ids:
      raise ValueError('Image with id {} already added.'.format(image_id))

-    groundtruth_classes = groundtruth_dict[
-        standard_fields.InputDataFields.groundtruth_classes]
-    groundtruth_classes -= self._label_id_offset
+    groundtruth_classes = (
+        groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
+        self._label_id_offset)
    # If the key is not present in the groundtruth_dict or the array is empty
    # (unless there are no annotations for the groundtruth on this image)
    # use values from the dictionary or insert None otherwise.
@@ -181,11 +193,20 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
        logging.warn(
            'image %s does not have groundtruth difficult flag specified',
            image_id)
+    groundtruth_masks = None
+    if self._evaluate_masks:
+      if (standard_fields.InputDataFields.groundtruth_instance_masks not in
+          groundtruth_dict):
+        raise ValueError('Instance masks not in groundtruth dictionary.')
+      groundtruth_masks = groundtruth_dict[
+          standard_fields.InputDataFields.groundtruth_instance_masks]
    self._evaluation.add_single_ground_truth_image_info(
-        image_id,
-        groundtruth_dict[standard_fields.InputDataFields.groundtruth_boxes],
-        groundtruth_classes,
-        groundtruth_is_difficult_list=groundtruth_difficult)
+        image_key=image_id,
+        groundtruth_boxes=groundtruth_dict[
+            standard_fields.InputDataFields.groundtruth_boxes],
+        groundtruth_class_labels=groundtruth_classes,
+        groundtruth_is_difficult_list=groundtruth_difficult,
+        groundtruth_masks=groundtruth_masks)
    self._image_ids.update([image_id])

  def add_single_detected_image_info(self, image_id, detections_dict):
@@ -202,15 +223,31 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
        standard_fields.DetectionResultFields.detection_classes: integer numpy
          array of shape [num_boxes] containing 1-indexed detection classes for
          the boxes.
+        standard_fields.DetectionResultFields.detection_masks: uint8 numpy
+          array of shape [num_boxes, height, width] containing `num_boxes` masks
+          of values ranging between 0 and 1.
+
+    Raises:
+      ValueError: If detection masks are not in detections dictionary.
    """
-    detection_classes = detections_dict[
-        standard_fields.DetectionResultFields.detection_classes]
-    detection_classes -= self._label_id_offset
+    detection_classes = (
+        detections_dict[standard_fields.DetectionResultFields.detection_classes]
+        - self._label_id_offset)
+    detection_masks = None
+    if self._evaluate_masks:
+      if (standard_fields.DetectionResultFields.detection_masks not in
+          detections_dict):
+        raise ValueError('Detection masks not in detections dictionary.')
+      detection_masks = detections_dict[
+          standard_fields.DetectionResultFields.detection_masks]
    self._evaluation.add_single_detected_image_info(
-        image_id,
-        detections_dict[standard_fields.DetectionResultFields.detection_boxes],
-        detections_dict[standard_fields.DetectionResultFields.detection_scores],
-        detection_classes)
+        image_key=image_id,
+        detected_boxes=detections_dict[
+            standard_fields.DetectionResultFields.detection_boxes],
+        detected_scores=detections_dict[
+            standard_fields.DetectionResultFields.detection_scores],
+        detected_class_labels=detection_classes,
+        detected_masks=detection_masks)

  def evaluate(self):
    """Compute evaluation result.
@@ -257,7 +294,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
  def clear(self):
    """Clears the state to prepare for a fresh evaluation."""
    self._evaluation = ObjectDetectionEvaluation(
-        self._num_classes,
+        num_groundtruth_classes=self._num_classes,
        matching_iou_threshold=self._matching_iou_threshold,
        use_weighted_mean_ap=self._use_weighted_mean_ap,
        label_id_offset=self._label_id_offset)
@@ -272,7 +309,7 @@ class PascalDetectionEvaluator(ObjectDetectionEvaluator):
        categories,
        matching_iou_threshold=matching_iou_threshold,
        evaluate_corlocs=False,
-        metric_prefix='PASCAL',
+        metric_prefix='PascalBoxes',
        use_weighted_mean_ap=False)


@@ -295,10 +332,47 @@ class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
        categories,
        matching_iou_threshold=matching_iou_threshold,
        evaluate_corlocs=False,
-        metric_prefix='WeightedPASCAL',
+        metric_prefix='WeightedPascalBoxes',
        use_weighted_mean_ap=True)


+class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
+  """A class to evaluate instance masks using PASCAL metrics."""
+
+  def __init__(self, categories, matching_iou_threshold=0.5):
+    super(PascalInstanceSegmentationEvaluator, self).__init__(
+        categories,
+        matching_iou_threshold=matching_iou_threshold,
+        evaluate_corlocs=False,
+        metric_prefix='PascalMasks',
+        use_weighted_mean_ap=False,
+        evaluate_masks=True)
+
+
+class WeightedPascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
+  """A class to evaluate instance masks using weighted PASCAL metrics.
+
+  Weighted PASCAL metrics computes the mean average precision as the average
+  precision given the scores and tp_fp_labels of all classes. In comparison,
+  PASCAL metrics computes the mean average precision as the mean of the
+  per-class average precisions.
+
+  This definition is very similar to the mean of the per-class average
+  precisions weighted by class frequency. However, they are typically not the
+  same as the average precision is not a linear function of the scores and
+  tp_fp_labels.
+  """
+
+  def __init__(self, categories, matching_iou_threshold=0.5):
+    super(WeightedPascalInstanceSegmentationEvaluator, self).__init__(
+        categories,
+        matching_iou_threshold=matching_iou_threshold,
+        evaluate_corlocs=False,
+        metric_prefix='WeightedPascalMasks',
+        use_weighted_mean_ap=True,
+        evaluate_masks=True)
+
+
 class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
  """A class to evaluate detections using Open Images V2 metrics.

@@ -348,9 +422,9 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
    if image_id in self._image_ids:
      raise ValueError('Image with id {} already added.'.format(image_id))

-    groundtruth_classes = groundtruth_dict[
-        standard_fields.InputDataFields.groundtruth_classes]
-    groundtruth_classes -= self._label_id_offset
+    groundtruth_classes = (
+        groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] -
+        self._label_id_offset)
    # If the key is not present in the groundtruth_dict or the array is empty
    # (unless there are no annotations for the groundtruth on this image)
    # use values from the dictionary or insert None otherwise.
@@ -392,19 +466,29 @@ class ObjectDetectionEvaluation(object):
               nms_max_output_boxes=10000,
               use_weighted_mean_ap=False,
               label_id_offset=0):
+    if num_groundtruth_classes < 1:
+      raise ValueError('Need at least 1 groundtruth class for evaluation.')
+
    self.per_image_eval = per_image_evaluation.PerImageEvaluation(
-        num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
-        nms_max_output_boxes)
+        num_groundtruth_classes=num_groundtruth_classes,
+        matching_iou_threshold=matching_iou_threshold,
+        nms_iou_threshold=nms_iou_threshold,
+        nms_max_output_boxes=nms_max_output_boxes)
    self.num_class = num_groundtruth_classes
+    self.use_weighted_mean_ap = use_weighted_mean_ap
    self.label_id_offset = label_id_offset

    self.groundtruth_boxes = {}
    self.groundtruth_class_labels = {}
+    self.groundtruth_masks = {}
    self.groundtruth_is_difficult_list = {}
    self.groundtruth_is_group_of_list = {}
    self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
    self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

+    self._initialize_detections()
+
+  def _initialize_detections(self):
    self.detection_keys = set()
    self.scores_per_class = [[] for _ in range(self.num_class)]
    self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
@@ -415,24 +499,16 @@ class ObjectDetectionEvaluation(object):
    self.recalls_per_class = []
    self.corloc_per_class = np.ones(self.num_class, dtype=float)

-    self.use_weighted_mean_ap = use_weighted_mean_ap
-
  def clear_detections(self):
-    self.detection_keys = {}
-    self.scores_per_class = [[] for _ in range(self.num_class)]
-    self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
-    self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
-    self.average_precision_per_class = np.zeros(self.num_class, dtype=float)
-    self.precisions_per_class = []
-    self.recalls_per_class = []
-    self.corloc_per_class = np.ones(self.num_class, dtype=float)
+    self._initialize_detections()

  def add_single_ground_truth_image_info(self,
                                         image_key,
                                         groundtruth_boxes,
                                         groundtruth_class_labels,
                                         groundtruth_is_difficult_list=None,
-                                         groundtruth_is_group_of_list=None):
+                                         groundtruth_is_group_of_list=None,
+                                         groundtruth_masks=None):
    """Adds groundtruth for a single image to be used for evaluation.

    Args:
@@ -448,6 +524,9 @@ class ObjectDetectionEvaluation(object):
      groundtruth_is_group_of_list: A length M numpy boolean array denoting
          whether a ground truth box is a group-of box or not. To support
          the case that no boxes are groups-of, it is by default set as None.
+      groundtruth_masks: uint8 numpy array of shape
+        [num_boxes, height, width] containing `num_boxes` groundtruth masks.
+        The mask values range from 0 to 1.
    """
    if image_key in self.groundtruth_boxes:
      logging.warn(
@@ -457,6 +536,7 @@ class ObjectDetectionEvaluation(object):

    self.groundtruth_boxes[image_key] = groundtruth_boxes
    self.groundtruth_class_labels[image_key] = groundtruth_class_labels
+    self.groundtruth_masks[image_key] = groundtruth_masks
    if groundtruth_is_difficult_list is None:
      num_boxes = groundtruth_boxes.shape[0]
      groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool)
@@ -474,7 +554,8 @@ class ObjectDetectionEvaluation(object):
        groundtruth_is_group_of_list.astype(dtype=bool))

  def add_single_detected_image_info(self, image_key, detected_boxes,
-                                     detected_scores, detected_class_labels):
+                                     detected_scores, detected_class_labels,
+                                     detected_masks=None):
    """Adds detections for a single image to be used for evaluation.

    Args:
@@ -486,6 +567,9 @@ class ObjectDetectionEvaluation(object):
        detection scores for the boxes.
      detected_class_labels: integer numpy array of shape [num_boxes] containing
        0-indexed detection classes for the boxes.
+      detected_masks: np.uint8 numpy array of shape [num_boxes, height, width]
+        containing `num_boxes` detection masks with values ranging
+        between 0 and 1.

    Raises:
      ValueError: if the number of boxes, scores and class labels differ in
@@ -508,6 +592,10 @@ class ObjectDetectionEvaluation(object):
    if image_key in self.groundtruth_boxes:
      groundtruth_boxes = self.groundtruth_boxes[image_key]
      groundtruth_class_labels = self.groundtruth_class_labels[image_key]
+      # Masks are popped instead of look up. The reason is that we do not want
+      # to keep all masks in memory which can cause memory overflow.
+      groundtruth_masks = self.groundtruth_masks.pop(
+          image_key)
      groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[
          image_key]
      groundtruth_is_group_of_list = self.groundtruth_is_group_of_list[
@@ -515,13 +603,23 @@ class ObjectDetectionEvaluation(object):
    else:
      groundtruth_boxes = np.empty(shape=[0, 4], dtype=float)
      groundtruth_class_labels = np.array([], dtype=int)
+      if detected_masks is None:
+        groundtruth_masks = None
+      else:
+        groundtruth_masks = np.empty(shape=[0, 1, 1], dtype=float)
      groundtruth_is_difficult_list = np.array([], dtype=bool)
      groundtruth_is_group_of_list = np.array([], dtype=bool)
    scores, tp_fp_labels, is_class_correctly_detected_in_image = (
        self.per_image_eval.compute_object_detection_metrics(
-            detected_boxes, detected_scores, detected_class_labels,
-            groundtruth_boxes, groundtruth_class_labels,
-            groundtruth_is_difficult_list, groundtruth_is_group_of_list))
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detected_class_labels,
+            groundtruth_boxes=groundtruth_boxes,
+            groundtruth_class_labels=groundtruth_class_labels,
+            groundtruth_is_difficult_list=groundtruth_is_difficult_list,
+            groundtruth_is_group_of_list=groundtruth_is_group_of_list,
+            detected_masks=detected_masks,
+            groundtruth_masks=groundtruth_masks))

    for i in range(self.num_class):
      if scores[i].shape[0] > 0:

--- a/research/object_detection/utils/object_detection_evaluation_test.py
+++ b/research/object_detection/utils/object_detection_evaluation_test.py
@@ -89,12 +89,12 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):
    })
    metrics = oiv2_evaluator.evaluate()
    self.assertAlmostEqual(
-        metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
+        metrics['OpenImagesV2_PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
    self.assertAlmostEqual(
-        metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
+        metrics['OpenImagesV2_PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
    self.assertAlmostEqual(
-        metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
-    self.assertAlmostEqual(metrics['OpenImagesV2/Precision/mAP@0.5IOU'],
+        metrics['OpenImagesV2_PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
+    self.assertAlmostEqual(metrics['OpenImagesV2_Precision/mAP@0.5IOU'],
                           0.05555555)
    oiv2_evaluator.clear()
    self.assertFalse(oiv2_evaluator._image_ids)
@@ -102,7 +102,7 @@ class OpenImagesV2EvaluationTest(tf.test.TestCase):

 class PascalEvaluationTest(tf.test.TestCase):

-  def test_returns_correct_metric_values(self):
+  def test_returns_correct_metric_values_on_boxes(self):
    categories = [{'id': 1, 'name': 'cat'},
                  {'id': 2, 'name': 'dog'},
                  {'id': 3, 'name': 'elephant'}]
@@ -158,12 +158,138 @@ class PascalEvaluationTest(tf.test.TestCase):

    metrics = pascal_evaluator.evaluate()
    self.assertAlmostEqual(
-        metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
+        metrics['PascalBoxes_PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
+    self.assertAlmostEqual(
+        metrics['PascalBoxes_PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
+    self.assertAlmostEqual(
+        metrics['PascalBoxes_PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
+    self.assertAlmostEqual(metrics['PascalBoxes_Precision/mAP@0.5IOU'],
+                           0.05555555)
+    pascal_evaluator.clear()
+    self.assertFalse(pascal_evaluator._image_ids)
+
+  def test_returns_correct_metric_values_on_masks(self):
+    categories = [{'id': 1, 'name': 'cat'},
+                  {'id': 2, 'name': 'dog'},
+                  {'id': 3, 'name': 'elephant'}]
+    #  Add groundtruth
+    pascal_evaluator = (
+        object_detection_evaluation.PascalInstanceSegmentationEvaluator(
+            categories))
+    image_key1 = 'img1'
+    groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                  dtype=float)
+    groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
+    groundtruth_masks_1_0 = np.array([[1, 0, 0, 0],
+                                      [1, 0, 0, 0],
+                                      [1, 0, 0, 0]], dtype=np.uint8)
+    groundtruth_masks_1_1 = np.array([[0, 0, 1, 0],
+                                      [0, 0, 1, 0],
+                                      [0, 0, 1, 0]], dtype=np.uint8)
+    groundtruth_masks_1_2 = np.array([[0, 1, 0, 0],
+                                      [0, 1, 0, 0],
+                                      [0, 1, 0, 0]], dtype=np.uint8)
+    groundtruth_masks1 = np.stack(
+        [groundtruth_masks_1_0, groundtruth_masks_1_1, groundtruth_masks_1_2],
+        axis=0)
+
+    pascal_evaluator.add_single_ground_truth_image_info(
+        image_key1, {
+            standard_fields.InputDataFields.groundtruth_boxes:
+                groundtruth_boxes1,
+            standard_fields.InputDataFields.groundtruth_instance_masks:
+                groundtruth_masks1,
+            standard_fields.InputDataFields.groundtruth_classes:
+                groundtruth_class_labels1,
+            standard_fields.InputDataFields.groundtruth_difficult:
+                np.array([], dtype=bool)
+        })
+    image_key2 = 'img2'
+    groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
+                                   [10, 10, 12, 12]], dtype=float)
+    groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
+    groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
+    groundtruth_masks_2_0 = np.array([[1, 1, 1, 1],
+                                      [0, 0, 0, 0],
+                                      [0, 0, 0, 0]], dtype=np.uint8)
+    groundtruth_masks_2_1 = np.array([[0, 0, 0, 0],
+                                      [1, 1, 1, 1],
+                                      [0, 0, 0, 0]], dtype=np.uint8)
+    groundtruth_masks_2_2 = np.array([[0, 0, 0, 0],
+                                      [0, 0, 0, 0],
+                                      [1, 1, 1, 1]], dtype=np.uint8)
+    groundtruth_masks2 = np.stack(
+        [groundtruth_masks_2_0, groundtruth_masks_2_1, groundtruth_masks_2_2],
+        axis=0)
+    pascal_evaluator.add_single_ground_truth_image_info(
+        image_key2, {
+            standard_fields.InputDataFields.groundtruth_boxes:
+                groundtruth_boxes2,
+            standard_fields.InputDataFields.groundtruth_instance_masks:
+                groundtruth_masks2,
+            standard_fields.InputDataFields.groundtruth_classes:
+                groundtruth_class_labels2,
+            standard_fields.InputDataFields.groundtruth_difficult:
+                groundtruth_is_difficult_list2
+        })
+    image_key3 = 'img3'
+    groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_class_labels3 = np.array([2], dtype=int)
+    groundtruth_masks_3_0 = np.array([[1, 1, 1, 1],
+                                      [1, 1, 1, 1],
+                                      [1, 1, 1, 1]], dtype=np.uint8)
+    groundtruth_masks3 = np.stack([groundtruth_masks_3_0], axis=0)
+    pascal_evaluator.add_single_ground_truth_image_info(
+        image_key3, {
+            standard_fields.InputDataFields.groundtruth_boxes:
+                groundtruth_boxes3,
+            standard_fields.InputDataFields.groundtruth_instance_masks:
+                groundtruth_masks3,
+            standard_fields.InputDataFields.groundtruth_classes:
+                groundtruth_class_labels3
+        })
+
+    # Add detections
+    image_key = 'img2'
+    detected_boxes = np.array(
+        [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
+        dtype=float)
+    detected_class_labels = np.array([1, 1, 3], dtype=int)
+    detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
+    detected_masks_0 = np.array([[1, 1, 1, 1],
+                                 [0, 0, 1, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_1 = np.array([[1, 0, 0, 0],
+                                 [1, 1, 0, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_2 = np.array([[0, 1, 0, 0],
+                                 [0, 1, 1, 0],
+                                 [0, 1, 0, 0]], dtype=np.uint8)
+    detected_masks = np.stack(
+        [detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
+
+    pascal_evaluator.add_single_detected_image_info(
+        image_key, {
+            standard_fields.DetectionResultFields.detection_boxes:
+                detected_boxes,
+            standard_fields.DetectionResultFields.detection_masks:
+                detected_masks,
+            standard_fields.DetectionResultFields.detection_scores:
+                detected_scores,
+            standard_fields.DetectionResultFields.detection_classes:
+                detected_class_labels
+        })
+
+    metrics = pascal_evaluator.evaluate()
+
+    self.assertAlmostEqual(
+        metrics['PascalMasks_PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
    self.assertAlmostEqual(
-        metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
+        metrics['PascalMasks_PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
    self.assertAlmostEqual(
-        metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
-    self.assertAlmostEqual(metrics['PASCAL/Precision/mAP@0.5IOU'], 0.05555555)
+        metrics['PascalMasks_PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
+    self.assertAlmostEqual(metrics['PascalMasks_Precision/mAP@0.5IOU'],
+                           0.05555555)
    pascal_evaluator.clear()
    self.assertFalse(pascal_evaluator._image_ids)

@@ -363,6 +489,11 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
    self.od_eval.add_single_detected_image_info(
        image_key, detected_boxes, detected_scores, detected_class_labels)

+  def test_value_error_on_zero_classes(self):
+    with self.assertRaises(ValueError):
+      object_detection_evaluation.ObjectDetectionEvaluation(
+          num_groundtruth_classes=0)
+
  def test_add_single_ground_truth_image_info(self):
    expected_num_gt_instances_per_class = np.array([3, 1, 1], dtype=int)
    expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)

--- a/research/object_detection/utils/ops.py
+++ b/research/object_detection/utils/ops.py
@@ -23,6 +23,7 @@ import tensorflow as tf
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
 from object_detection.core import standard_fields as fields
+from object_detection.utils import shape_utils
 from object_detection.utils import static_shape


@@ -67,7 +68,7 @@ def normalized_to_image_coordinates(normalized_boxes, image_shape,
        box_list.BoxList(normalized_boxes),
        image_shape[1], image_shape[2], check_range=False).get()

-  absolute_boxes = tf.map_fn(
+  absolute_boxes = shape_utils.static_or_dynamic_map_fn(
      _to_absolute_coordinates,
      elems=(normalized_boxes),
      dtype=tf.float32,
@@ -115,6 +116,28 @@ def meshgrid(x, y):
    return xgrid, ygrid


+def fixed_padding(inputs, kernel_size, rate=1):
+  """Pads the input along the spatial dimensions independently of input size.
+
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+                 Should be a positive integer.
+    rate: An integer, rate for atrous convolution.
+
+  Returns:
+    output: A tensor of size [batch, height_out, width_out, channels] with the
+      input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
+  """
+  kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
+  pad_total = kernel_size_effective - 1
+  pad_beg = pad_total // 2
+  pad_end = pad_total - pad_beg
+  padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
+                                  [pad_beg, pad_end], [0, 0]])
+  return padded_inputs
+
+
 def pad_to_multiple(tensor, multiple):
  """Returns the tensor zero padded to the specified multiple.

@@ -209,8 +232,10 @@ def padded_one_hot_encoding(indices, depth, left_pad):
    raise ValueError('`left_pad` must be a non-negative integer.')
  if depth == 0:
    return None
-  if len(indices.get_shape().as_list()) != 1:
-    raise ValueError('`indices` must have rank 1')
+
+  rank = len(indices.get_shape().as_list())
+  if rank != 1:
+    raise ValueError('`indices` must have rank 1, but has rank=%s' % rank)

  def one_hot_and_pad():
    one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
@@ -284,6 +309,11 @@ def indices_to_dense_vector(indices,
                           [zeros, values])


+def reduce_sum_trailing_dimensions(tensor, ndims):
+  """Computes sum across all dimensions following first `ndims` dimensions."""
+  return tf.reduce_sum(tensor, axis=tuple(range(ndims, tensor.shape.ndims)))
+
+
 def retain_groundtruth(tensor_dict, valid_indices):
  """Retains groundtruth by valid indices.

@@ -627,7 +657,7 @@ def position_sensitive_crop_regions(image,
    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
    # Then average over spatial positions within the bins.
    position_sensitive_features = tf.reduce_mean(
-        position_sensitive_features, [1, 2], keep_dims=True)
+        position_sensitive_features, [1, 2], keepdims=True)
  else:
    # Reorder height/width to depth channel.
    block_size = bin_crop_size[0]
@@ -739,3 +769,53 @@ def merge_boxes_with_multiple_labels(boxes, classes, num_classes):
  class_encodings = tf.reshape(class_encodings, [-1, num_classes])
  merged_box_indices = tf.reshape(merged_box_indices, [-1])
  return merged_boxes, class_encodings, merged_box_indices
+
+
+def nearest_neighbor_upsampling(input_tensor, scale):
+  """Nearest neighbor upsampling implementation.
+
+  Nearest neighbor upsampling function that maps input tensor with shape
+  [batch_size, height, width, channels] to [batch_size, height * scale
+  , width * scale, channels]. This implementation only uses reshape and tile to
+  make it compatible with certain hardware.
+
+  Args:
+    input_tensor: A float32 tensor of size [batch, height_in, width_in,
+      channels].
+    scale: An integer multiple to scale resolution of input data.
+  Returns:
+    data_up: A float32 tensor of size
+      [batch, height_in*scale, width_in*scale, channels].
+  """
+  shape = shape_utils.combined_static_and_dynamic_shape(input_tensor)
+  shape_before_tile = [shape[0], shape[1], 1, shape[2], 1, shape[3]]
+  shape_after_tile = [shape[0], shape[1] * scale, shape[2] * scale, shape[3]]
+  data_reshaped = tf.reshape(input_tensor, shape_before_tile)
+  resized_tensor = tf.tile(data_reshaped, [1, 1, scale, 1, scale, 1])
+  resized_tensor = tf.reshape(resized_tensor, shape_after_tile)
+  return resized_tensor
+
+
+def matmul_gather_on_zeroth_axis(params, indices, scope=None):
+  """Matrix multiplication based implementation of tf.gather on zeroth axis.
+
+  TODO(rathodv, jonathanhuang): enable sparse matmul option.
+
+  Args:
+    params: A float32 Tensor. The tensor from which to gather values.
+      Must be at least rank 1.
+    indices: A Tensor. Must be one of the following types: int32, int64.
+      Must be in range [0, params.shape[0])
+    scope: A name for the operation (optional).
+
+  Returns:
+    A Tensor. Has the same type as params. Values from params gathered
+    from indices given by indices, with shape indices.shape + params.shape[1:].
+  """
+  with tf.name_scope(scope, 'MatMulGather'):
+    index_range = params.shape[0]
+    params2d = tf.reshape(params, [index_range, -1])
+    indicator_matrix = tf.one_hot(indices, index_range)
+    gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
+    return tf.reshape(gathered_result_flattened,
+                      indices.shape.concatenate(params.shape[1:]))
--- a/research/object_detection/utils/ops_test.py
+++ b/research/object_detection/utils/ops_test.py
@@ -19,6 +19,7 @@ import tensorflow as tf

 from object_detection.core import standard_fields as fields
 from object_detection.utils import ops
+from object_detection.utils import test_case


 class NormalizedToImageCoordinatesTest(tf.test.TestCase):
@@ -42,6 +43,18 @@ class NormalizedToImageCoordinatesTest(tf.test.TestCase):
    self.assertAllEqual(absolute_boxes, expected_boxes)


+class ReduceSumTrailingDimensions(tf.test.TestCase):
+
+  def test_reduce_sum_trailing_dimensions(self):
+    input_tensor = tf.placeholder(tf.float32, shape=[None, None, None])
+    reduced_tensor = ops.reduce_sum_trailing_dimensions(input_tensor, ndims=2)
+    with self.test_session() as sess:
+      reduced_np = sess.run(reduced_tensor,
+                            feed_dict={input_tensor: np.ones((2, 2, 2),
+                                                             np.float32)})
+    self.assertAllClose(reduced_np, 2 * np.ones((2, 2), np.float32))
+
+
 class MeshgridTest(tf.test.TestCase):

  def test_meshgrid_numpy_comparison(self):
@@ -83,6 +96,30 @@ class MeshgridTest(tf.test.TestCase):
      self.assertEqual(ygrid_output[yind + xind], y[yind])


+class OpsTestFixedPadding(tf.test.TestCase):
+
+  def test_3x3_kernel(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.fixed_padding(tensor, 3)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
+
+  def test_5x5_kernel(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.fixed_padding(tensor, 5)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 6, 6, 1), padded_tensor_out.shape)
+
+  def test_3x3_atrous_kernel(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.fixed_padding(tensor, 3, 2)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 6, 6, 1), padded_tensor_out.shape)
+
+
 class OpsTestPadToMultiple(tf.test.TestCase):

  def test_zero_padding(self):
@@ -1128,5 +1165,66 @@ class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):
      self.assertAllEqual(np_merged_box_indices.shape, [0])


+class NearestNeighborUpsamplingTest(test_case.TestCase):
+
+  def test_upsampling(self):
+
+    def graph_fn(inputs):
+      custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
+      tf_op_output = tf.image.resize_images(
+          inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+      return (custom_op_output, tf_op_output)
+    inputs = np.reshape(np.arange(2**4), [2, 2, 2, 2])
+    (custom_op_output, tf_op_output) = self.execute(graph_fn, [inputs])
+    self.assertAllClose(custom_op_output, tf_op_output)
+
+
+class MatmulGatherOnZerothAxis(test_case.TestCase):
+
+  def test_gather_2d(self):
+
+    def graph_fn(params, indices):
+      return ops.matmul_gather_on_zeroth_axis(params, indices)
+
+    params = np.array([[1, 2, 3, 4],
+                       [5, 6, 7, 8],
+                       [9, 10, 11, 12],
+                       [0, 1, 0, 0]], dtype=np.float32)
+    indices = np.array([2, 2, 1])
+    expected_output = np.array([[9, 10, 11, 12], [9, 10, 11, 12], [5, 6, 7, 8]])
+    gather_output = self.execute(graph_fn, [params, indices])
+    self.assertAllClose(gather_output, expected_output)
+
+  def test_gather_3d(self):
+
+    def graph_fn(params, indices):
+      return ops.matmul_gather_on_zeroth_axis(params, indices)
+
+    params = np.array([[[1, 2], [3, 4]],
+                       [[5, 6], [7, 8]],
+                       [[9, 10], [11, 12]],
+                       [[0, 1], [0, 0]]], dtype=np.float32)
+    indices = np.array([0, 3, 1])
+    expected_output = np.array([[[1, 2], [3, 4]],
+                                [[0, 1], [0, 0]],
+                                [[5, 6], [7, 8]]])
+    gather_output = self.execute(graph_fn, [params, indices])
+    self.assertAllClose(gather_output, expected_output)
+
+  def test_gather_with_many_indices(self):
+
+    def graph_fn(params, indices):
+      return ops.matmul_gather_on_zeroth_axis(params, indices)
+
+    params = np.array([[1, 2, 3, 4],
+                       [5, 6, 7, 8],
+                       [9, 10, 11, 12],
+                       [0, 1, 0, 0]], dtype=np.float32)
+    indices = np.array([0, 0, 0, 0, 0, 0])
+    expected_output = np.array(6*[[1, 2, 3, 4]])
+    gather_output = self.execute(graph_fn, [params, indices])
+    self.assertAllClose(gather_output, expected_output)
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/utils/per_image_evaluation.py
+++ b/research/object_detection/utils/per_image_evaluation.py
@@ -17,11 +17,15 @@
 Annotate each detected result as true positives or false positive according to
 a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
 detection is supported by default.
+Based on the settings, per image evaluation is either performed on boxes or
+on object masks.
 """
 import numpy as np

 from object_detection.utils import np_box_list
 from object_detection.utils import np_box_list_ops
+from object_detection.utils import np_box_mask_list
+from object_detection.utils import np_box_mask_list_ops


 class PerImageEvaluation(object):
@@ -49,7 +53,8 @@ class PerImageEvaluation(object):
  def compute_object_detection_metrics(
      self, detected_boxes, detected_scores, detected_class_labels,
      groundtruth_boxes, groundtruth_class_labels,
-      groundtruth_is_difficult_lists, groundtruth_is_group_of_list):
+      groundtruth_is_difficult_list, groundtruth_is_group_of_list,
+      detected_masks=None, groundtruth_masks=None):
    """Evaluates detections as being tp, fp or ignored from a single image.

    The evaluation is done in two stages:
@@ -70,10 +75,15 @@ class PerImageEvaluation(object):
          regions of object instances in ground truth
      groundtruth_class_labels: An integer numpy array of shape [M, 1],
          representing M class labels of object instances in ground truth
-      groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
+      groundtruth_is_difficult_list: A boolean numpy array of length M denoting
          whether a ground truth box is a difficult instance or not
      groundtruth_is_group_of_list: A boolean numpy array of length M denoting
          whether a ground truth box has group-of tag
+      detected_masks: (optional) A uint8 numpy array of shape
+        [N, height, width]. If not None, the metrics will be computed based
+        on masks.
+      groundtruth_masks: (optional) A uint8 numpy array of shape
+        [M, height, width].

    Returns:
      scores: A list of C float numpy arrays. Each numpy array is of
@@ -86,22 +96,35 @@ class PerImageEvaluation(object):
          shape [C, 1], indicating whether the correponding class has a least
          one instance being correctly detected in the image
    """
-    detected_boxes, detected_scores, detected_class_labels = (
+    detected_boxes, detected_scores, detected_class_labels, detected_masks = (
        self._remove_invalid_boxes(detected_boxes, detected_scores,
-                                   detected_class_labels))
+                                   detected_class_labels, detected_masks))
    scores, tp_fp_labels = self._compute_tp_fp(
-        detected_boxes, detected_scores, detected_class_labels,
-        groundtruth_boxes, groundtruth_class_labels,
-        groundtruth_is_difficult_lists, groundtruth_is_group_of_list)
+        detected_boxes=detected_boxes,
+        detected_scores=detected_scores,
+        detected_class_labels=detected_class_labels,
+        groundtruth_boxes=groundtruth_boxes,
+        groundtruth_class_labels=groundtruth_class_labels,
+        groundtruth_is_difficult_list=groundtruth_is_difficult_list,
+        groundtruth_is_group_of_list=groundtruth_is_group_of_list,
+        detected_masks=detected_masks,
+        groundtruth_masks=groundtruth_masks)

    is_class_correctly_detected_in_image = self._compute_cor_loc(
-        detected_boxes, detected_scores, detected_class_labels,
-        groundtruth_boxes, groundtruth_class_labels)
+        detected_boxes=detected_boxes,
+        detected_scores=detected_scores,
+        detected_class_labels=detected_class_labels,
+        groundtruth_boxes=groundtruth_boxes,
+        groundtruth_class_labels=groundtruth_class_labels,
+        detected_masks=detected_masks,
+        groundtruth_masks=groundtruth_masks)
+
    return scores, tp_fp_labels, is_class_correctly_detected_in_image

  def _compute_cor_loc(self, detected_boxes, detected_scores,
                       detected_class_labels, groundtruth_boxes,
-                       groundtruth_class_labels):
+                       groundtruth_class_labels, detected_masks=None,
+                       groundtruth_masks=None):
    """Compute CorLoc score for object detection result.

    Args:
@@ -116,28 +139,51 @@ class PerImageEvaluation(object):
          regions of object instances in ground truth
      groundtruth_class_labels: An integer numpy array of shape [M, 1],
          representing M class labels of object instances in ground truth
+      detected_masks: (optional) A uint8 numpy array of shape
+        [N, height, width]. If not None, the scores will be computed based
+        on masks.
+      groundtruth_masks: (optional) A uint8 numpy array of shape
+        [M, height, width].
+
    Returns:
      is_class_correctly_detected_in_image: a numpy integer array of
          shape [C, 1], indicating whether the correponding class has a least
          one instance being correctly detected in the image
+
+    Raises:
+      ValueError: If detected masks is not None but groundtruth masks are None,
+        or the other way around.
    """
+    if (detected_masks is not None and
+        groundtruth_masks is None) or (detected_masks is None and
+                                       groundtruth_masks is not None):
+      raise ValueError(
+          'If `detected_masks` is provided, then `groundtruth_masks` should '
+          'also be provided.'
+      )
+
    is_class_correctly_detected_in_image = np.zeros(
        self.num_groundtruth_classes, dtype=int)
    for i in range(self.num_groundtruth_classes):
-      gt_boxes_at_ith_class = groundtruth_boxes[groundtruth_class_labels ==
-                                                i, :]
-      detected_boxes_at_ith_class = detected_boxes[detected_class_labels ==
-                                                   i, :]
-      detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
+      (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+       detected_boxes_at_ith_class, detected_scores_at_ith_class,
+       detected_masks_at_ith_class) = self._get_ith_class_arrays(
+           detected_boxes, detected_scores, detected_masks,
+           detected_class_labels, groundtruth_boxes, groundtruth_masks,
+           groundtruth_class_labels, i)
      is_class_correctly_detected_in_image[i] = (
-          self._compute_is_aclass_correctly_detected_in_image(
-              detected_boxes_at_ith_class, detected_scores_at_ith_class,
-              gt_boxes_at_ith_class))
+          self._compute_is_class_correctly_detected_in_image(
+              detected_boxes=detected_boxes_at_ith_class,
+              detected_scores=detected_scores_at_ith_class,
+              groundtruth_boxes=gt_boxes_at_ith_class,
+              detected_masks=detected_masks_at_ith_class,
+              groundtruth_masks=gt_masks_at_ith_class))

    return is_class_correctly_detected_in_image

-  def _compute_is_aclass_correctly_detected_in_image(
-      self, detected_boxes, detected_scores, groundtruth_boxes):
+  def _compute_is_class_correctly_detected_in_image(
+      self, detected_boxes, detected_scores, groundtruth_boxes,
+      detected_masks=None, groundtruth_masks=None):
    """Compute CorLoc score for a single class.

    Args:
@@ -147,6 +193,11 @@ class PerImageEvaluation(object):
          score
      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
          box coordinates
+      detected_masks: (optional) A np.uint8 numpy array of shape
+        [N, height, width]. If not None, the scores will be computed based
+        on masks.
+      groundtruth_masks: (optional) A np.uint8 numpy array of shape
+        [M, height, width].

    Returns:
      is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
@@ -155,18 +206,30 @@ class PerImageEvaluation(object):
    if detected_boxes.size > 0:
      if groundtruth_boxes.size > 0:
        max_score_id = np.argmax(detected_scores)
-        detected_boxlist = np_box_list.BoxList(
-            np.expand_dims(detected_boxes[max_score_id, :], axis=0))
-        gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
-        iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
+        mask_mode = False
+        if detected_masks is not None and groundtruth_masks is not None:
+          mask_mode = True
+        if mask_mode:
+          detected_boxlist = np_box_mask_list.BoxMaskList(
+              box_data=np.expand_dims(detected_boxes[max_score_id], axis=0),
+              mask_data=np.expand_dims(detected_masks[max_score_id], axis=0))
+          gt_boxlist = np_box_mask_list.BoxMaskList(
+              box_data=groundtruth_boxes, mask_data=groundtruth_masks)
+          iou = np_box_mask_list_ops.iou(detected_boxlist, gt_boxlist)
+        else:
+          detected_boxlist = np_box_list.BoxList(
+              np.expand_dims(detected_boxes[max_score_id, :], axis=0))
+          gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
+          iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
        if np.max(iou) >= self.matching_iou_threshold:
          return 1
    return 0

  def _compute_tp_fp(self, detected_boxes, detected_scores,
                     detected_class_labels, groundtruth_boxes,
-                     groundtruth_class_labels, groundtruth_is_difficult_lists,
-                     groundtruth_is_group_of_list):
+                     groundtruth_class_labels, groundtruth_is_difficult_list,
+                     groundtruth_is_group_of_list,
+                     detected_masks=None, groundtruth_masks=None):
    """Labels true/false positives of detections of an image across all classes.

    Args:
@@ -181,10 +244,15 @@ class PerImageEvaluation(object):
          regions of object instances in ground truth
      groundtruth_class_labels: An integer numpy array of shape [M, 1],
          representing M class labels of object instances in ground truth
-      groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
+      groundtruth_is_difficult_list: A boolean numpy array of length M denoting
          whether a ground truth box is a difficult instance or not
      groundtruth_is_group_of_list: A boolean numpy array of length M denoting
          whether a ground truth box has group-of tag
+      detected_masks: (optional) A np.uint8 numpy array of shape
+        [N, height, width]. If not None, the scores will be computed based
+        on masks.
+      groundtruth_masks: (optional) A np.uint8 numpy array of shape
+        [M, height, width].

    Returns:
      result_scores: A list of float numpy arrays. Each numpy array is of
@@ -193,37 +261,134 @@ class PerImageEvaluation(object):
      result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
          shape [K, 1], representing K True/False positive label of object
          instances detected with class label c
+
+    Raises:
+      ValueError: If detected masks is not None but groundtruth masks are None,
+        or the other way around.
    """
+    if detected_masks is not None and groundtruth_masks is None:
+      raise ValueError(
+          'Detected masks is available but groundtruth masks is not.')
+    if detected_masks is None and groundtruth_masks is not None:
+      raise ValueError(
+          'Groundtruth masks is available but detected masks is not.')
+
    result_scores = []
    result_tp_fp_labels = []
    for i in range(self.num_groundtruth_classes):
-      gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i
-                                                ), :]
      groundtruth_is_difficult_list_at_ith_class = (
-          groundtruth_is_difficult_lists[groundtruth_class_labels == i])
+          groundtruth_is_difficult_list[groundtruth_class_labels == i])
      groundtruth_is_group_of_list_at_ith_class = (
          groundtruth_is_group_of_list[groundtruth_class_labels == i])
-      detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
-                                                   ), :]
-      detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
+      (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+       detected_boxes_at_ith_class, detected_scores_at_ith_class,
+       detected_masks_at_ith_class) = self._get_ith_class_arrays(
+           detected_boxes, detected_scores, detected_masks,
+           detected_class_labels, groundtruth_boxes, groundtruth_masks,
+           groundtruth_class_labels, i)
      scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
-          detected_boxes_at_ith_class, detected_scores_at_ith_class,
-          gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class,
-          groundtruth_is_group_of_list_at_ith_class)
+          detected_boxes=detected_boxes_at_ith_class,
+          detected_scores=detected_scores_at_ith_class,
+          groundtruth_boxes=gt_boxes_at_ith_class,
+          groundtruth_is_difficult_list=
+          groundtruth_is_difficult_list_at_ith_class,
+          groundtruth_is_group_of_list=
+          groundtruth_is_group_of_list_at_ith_class,
+          detected_masks=detected_masks_at_ith_class,
+          groundtruth_masks=gt_masks_at_ith_class)
      result_scores.append(scores)
      result_tp_fp_labels.append(tp_fp_labels)
    return result_scores, result_tp_fp_labels

-  def _remove_invalid_boxes(self, detected_boxes, detected_scores,
-                            detected_class_labels):
-    valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
-                                   detected_boxes[:, 1] < detected_boxes[:, 3])
-    return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
-            detected_class_labels[valid_indices])
+  def _get_overlaps_and_scores_mask_mode(
+      self, detected_boxes, detected_scores, detected_masks, groundtruth_boxes,
+      groundtruth_masks, groundtruth_is_group_of_list):
+    """Computes overlaps and scores between detected and groudntruth masks.
+
+    Args:
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      detected_masks: A uint8 numpy array of shape [N, height, width]. If not
+          None, the scores will be computed based on masks.
+      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
+          box coordinates
+      groundtruth_masks: A uint8 numpy array of shape [M, height, width].
+      groundtruth_is_group_of_list: A boolean numpy array of length M denoting
+          whether a ground truth box has group-of tag. If a groundtruth box
+          is group-of box, every detection matching this box is ignored.
+
+    Returns:
+      iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+          gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
+      ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+          gt_group_of_boxlist.num_boxes() == 0 it will be None.
+      scores: The score of the detected boxlist.
+      num_boxes: Number of non-maximum suppressed detected boxes.
+    """
+    detected_boxlist = np_box_mask_list.BoxMaskList(
+        box_data=detected_boxes, mask_data=detected_masks)
+    detected_boxlist.add_field('scores', detected_scores)
+    detected_boxlist = np_box_mask_list_ops.non_max_suppression(
+        detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+    gt_non_group_of_boxlist = np_box_mask_list.BoxMaskList(
+        box_data=groundtruth_boxes[~groundtruth_is_group_of_list],
+        mask_data=groundtruth_masks[~groundtruth_is_group_of_list])
+    gt_group_of_boxlist = np_box_mask_list.BoxMaskList(
+        box_data=groundtruth_boxes[groundtruth_is_group_of_list],
+        mask_data=groundtruth_masks[groundtruth_is_group_of_list])
+    iou = np_box_mask_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
+    ioa = np_box_mask_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
+    scores = detected_boxlist.get_field('scores')
+    num_boxes = detected_boxlist.num_boxes()
+    return iou, ioa, scores, num_boxes
+
+  def _get_overlaps_and_scores_box_mode(
+      self,
+      detected_boxes,
+      detected_scores,
+      groundtruth_boxes,
+      groundtruth_is_group_of_list):
+    """Computes overlaps and scores between detected and groudntruth boxes.
+
+    Args:
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
+          box coordinates
+      groundtruth_is_group_of_list: A boolean numpy array of length M denoting
+          whether a ground truth box has group-of tag. If a groundtruth box
+          is group-of box, every detection matching this box is ignored.
+
+    Returns:
+      iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+          gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
+      ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+          gt_group_of_boxlist.num_boxes() == 0 it will be None.
+      scores: The score of the detected boxlist.
+      num_boxes: Number of non-maximum suppressed detected boxes.
+    """
+    detected_boxlist = np_box_list.BoxList(detected_boxes)
+    detected_boxlist.add_field('scores', detected_scores)
+    detected_boxlist = np_box_list_ops.non_max_suppression(
+        detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+    gt_non_group_of_boxlist = np_box_list.BoxList(
+        groundtruth_boxes[~groundtruth_is_group_of_list])
+    gt_group_of_boxlist = np_box_list.BoxList(
+        groundtruth_boxes[groundtruth_is_group_of_list])
+    iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
+    ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
+    scores = detected_boxlist.get_field('scores')
+    num_boxes = detected_boxlist.num_boxes()
+    return iou, ioa, scores, num_boxes

  def _compute_tp_fp_for_single_class(
      self, detected_boxes, detected_scores, groundtruth_boxes,
-      groundtruth_is_difficult_list, groundtruth_is_group_of_list):
+      groundtruth_is_difficult_list, groundtruth_is_group_of_list,
+      detected_masks=None, groundtruth_masks=None):
    """Labels boxes detected with the same class from the same image as tp/fp.

    Args:
@@ -240,6 +405,11 @@ class PerImageEvaluation(object):
      groundtruth_is_group_of_list: A boolean numpy array of length M denoting
          whether a ground truth box has group-of tag. If a groundtruth box
          is group-of box, every detection matching this box is ignored.
+      detected_masks: (optional) A uint8 numpy array of shape
+        [N, height, width]. If not None, the scores will be computed based
+        on masks.
+      groundtruth_masks: (optional) A uint8 numpy array of shape
+        [M, height, width].

    Returns:
      Two arrays of the same size, containing all boxes that were evaluated as
@@ -249,25 +419,37 @@ class PerImageEvaluation(object):
      scores: A numpy array representing the detection scores.
      tp_fp_labels: a boolean numpy array indicating whether a detection is a
          true positive.
-
    """
    if detected_boxes.size == 0:
      return np.array([], dtype=float), np.array([], dtype=bool)
-    detected_boxlist = np_box_list.BoxList(detected_boxes)
-    detected_boxlist.add_field('scores', detected_scores)
-    detected_boxlist = np_box_list_ops.non_max_suppression(
-        detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)

-    scores = detected_boxlist.get_field('scores')
+    mask_mode = False
+    if detected_masks is not None and groundtruth_masks is not None:
+      mask_mode = True
+
+    if mask_mode:
+      (iou, ioa, scores,
+       num_detected_boxes) = self._get_overlaps_and_scores_mask_mode(
+           detected_boxes=detected_boxes,
+           detected_scores=detected_scores,
+           detected_masks=detected_masks,
+           groundtruth_boxes=groundtruth_boxes,
+           groundtruth_masks=groundtruth_masks,
+           groundtruth_is_group_of_list=groundtruth_is_group_of_list)
+    else:
+      (iou, ioa, scores,
+       num_detected_boxes) = self._get_overlaps_and_scores_box_mode(
+           detected_boxes=detected_boxes,
+           detected_scores=detected_scores,
+           groundtruth_boxes=groundtruth_boxes,
+           groundtruth_is_group_of_list=groundtruth_is_group_of_list)

    if groundtruth_boxes.size == 0:
-      return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
+      return scores, np.zeros(num_detected_boxes, dtype=bool)

-    tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
-    is_matched_to_difficult_box = np.zeros(
-        detected_boxlist.num_boxes(), dtype=bool)
-    is_matched_to_group_of_box = np.zeros(
-        detected_boxlist.num_boxes(), dtype=bool)
+    tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool)
+    is_matched_to_difficult_box = np.zeros(num_detected_boxes, dtype=bool)
+    is_matched_to_group_of_box = np.zeros(num_detected_boxes, dtype=bool)

    # The evaluation is done in two stages:
    # 1. All detections are matched to non group-of boxes; true positives are
@@ -276,16 +458,12 @@ class PerImageEvaluation(object):
    #    group-of boxes and ignored if matched.

    # Tp-fp evaluation for non-group of boxes (if any).
-    gt_non_group_of_boxlist = np_box_list.BoxList(
-        groundtruth_boxes[~groundtruth_is_group_of_list, :])
-    if gt_non_group_of_boxlist.num_boxes() > 0:
+    if iou.shape[1] > 0:
      groundtruth_nongroup_of_is_difficult_list = groundtruth_is_difficult_list[
          ~groundtruth_is_group_of_list]
-      iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
      max_overlap_gt_ids = np.argmax(iou, axis=1)
-      is_gt_box_detected = np.zeros(
-          gt_non_group_of_boxlist.num_boxes(), dtype=bool)
-      for i in range(detected_boxlist.num_boxes()):
+      is_gt_box_detected = np.zeros(iou.shape[1], dtype=bool)
+      for i in range(num_detected_boxes):
        gt_id = max_overlap_gt_ids[i]
        if iou[i, gt_id] >= self.matching_iou_threshold:
          if not groundtruth_nongroup_of_is_difficult_list[gt_id]:
@@ -296,12 +474,9 @@ class PerImageEvaluation(object):
            is_matched_to_difficult_box[i] = True

    # Tp-fp evaluation for group of boxes.
-    gt_group_of_boxlist = np_box_list.BoxList(
-        groundtruth_boxes[groundtruth_is_group_of_list, :])
-    if gt_group_of_boxlist.num_boxes() > 0:
-      ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
+    if ioa.shape[0] > 0:
      max_overlap_group_of_gt = np.max(ioa, axis=0)
-      for i in range(detected_boxlist.num_boxes()):
+      for i in range(num_detected_boxes):
        if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and
            max_overlap_group_of_gt[i] >= self.matching_iou_threshold):
          is_matched_to_group_of_box[i] = True
@@ -310,3 +485,83 @@ class PerImageEvaluation(object):
                  & ~is_matched_to_group_of_box], tp_fp_labels[
                      ~is_matched_to_difficult_box
                      & ~is_matched_to_group_of_box]
+
+  def _get_ith_class_arrays(self, detected_boxes, detected_scores,
+                            detected_masks, detected_class_labels,
+                            groundtruth_boxes, groundtruth_masks,
+                            groundtruth_class_labels, class_index):
+    """Returns numpy arrays belonging to class with index `class_index`.
+
+    Args:
+      detected_boxes: A numpy array containing detected boxes.
+      detected_scores: A numpy array containing detected scores.
+      detected_masks: A numpy array containing detected masks.
+      detected_class_labels: A numpy array containing detected class labels.
+      groundtruth_boxes: A numpy array containing groundtruth boxes.
+      groundtruth_masks: A numpy array containing groundtruth masks.
+      groundtruth_class_labels: A numpy array containing groundtruth class
+        labels.
+      class_index: An integer index.
+
+    Returns:
+      gt_boxes_at_ith_class: A numpy array containing groundtruth boxes labeled
+        as ith class.
+      gt_masks_at_ith_class: A numpy array containing groundtruth masks labeled
+        as ith class.
+      detected_boxes_at_ith_class: A numpy array containing detected boxes
+        corresponding to the ith class.
+      detected_scores_at_ith_class: A numpy array containing detected scores
+        corresponding to the ith class.
+      detected_masks_at_ith_class: A numpy array containing detected masks
+        corresponding to the ith class.
+    """
+    selected_groundtruth = (groundtruth_class_labels == class_index)
+    gt_boxes_at_ith_class = groundtruth_boxes[selected_groundtruth]
+    if groundtruth_masks is not None:
+      gt_masks_at_ith_class = groundtruth_masks[selected_groundtruth]
+    else:
+      gt_masks_at_ith_class = None
+    selected_detections = (detected_class_labels == class_index)
+    detected_boxes_at_ith_class = detected_boxes[selected_detections]
+    detected_scores_at_ith_class = detected_scores[selected_detections]
+    if detected_masks is not None:
+      detected_masks_at_ith_class = detected_masks[selected_detections]
+    else:
+      detected_masks_at_ith_class = None
+    return (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+            detected_boxes_at_ith_class, detected_scores_at_ith_class,
+            detected_masks_at_ith_class)
+
+  def _remove_invalid_boxes(self, detected_boxes, detected_scores,
+                            detected_class_labels, detected_masks=None):
+    """Removes entries with invalid boxes.
+
+    A box is invalid if either its xmax is smaller than its xmin, or its ymax
+    is smaller than its ymin.
+
+    Args:
+      detected_boxes: A float numpy array of size [num_boxes, 4] containing box
+        coordinates in [ymin, xmin, ymax, xmax] format.
+      detected_scores: A float numpy array of size [num_boxes].
+      detected_class_labels: A int32 numpy array of size [num_boxes].
+      detected_masks: A uint8 numpy array of size [num_boxes, height, width].
+
+    Returns:
+      valid_detected_boxes: A float numpy array of size [num_valid_boxes, 4]
+        containing box coordinates in [ymin, xmin, ymax, xmax] format.
+      valid_detected_scores: A float numpy array of size [num_valid_boxes].
+      valid_detected_class_labels: A int32 numpy array of size
+        [num_valid_boxes].
+      valid_detected_masks: A uint8 numpy array of size
+        [num_valid_boxes, height, width].
+    """
+    valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
+                                   detected_boxes[:, 1] < detected_boxes[:, 3])
+    detected_boxes = detected_boxes[valid_indices]
+    detected_scores = detected_scores[valid_indices]
+    detected_class_labels = detected_class_labels[valid_indices]
+    if detected_masks is not None:
+      detected_masks = detected_masks[valid_indices]
+    return [
+        detected_boxes, detected_scores, detected_class_labels, detected_masks
+    ]
--- a/research/object_detection/utils/per_image_evaluation_test.py
+++ b/research/object_detection/utils/per_image_evaluation_test.py
@@ -35,10 +35,29 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
                                   dtype=float)
    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
+    detected_masks_0 = np.array([[0, 1, 1, 0],
+                                 [0, 0, 1, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_1 = np.array([[1, 0, 0, 0],
+                                 [1, 1, 0, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_2 = np.array([[0, 0, 0, 0],
+                                 [0, 1, 1, 0],
+                                 [0, 1, 0, 0]], dtype=np.uint8)
+    self.detected_masks = np.stack(
+        [detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
    self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]],
                                      dtype=float)
-
-  def test_match_to_not_difficult_box(self):
+    groundtruth_masks_0 = np.array([[1, 1, 0, 0],
+                                    [1, 1, 0, 0],
+                                    [0, 0, 0, 0]], dtype=np.uint8)
+    groundtruth_masks_1 = np.array([[0, 0, 0, 1],
+                                    [0, 0, 0, 1],
+                                    [0, 0, 0, 1]], dtype=np.uint8)
+    self.groundtruth_masks = np.stack(
+        [groundtruth_masks_0, groundtruth_masks_1], axis=0)
+
+  def test_match_to_gt_box_0(self):
    groundtruth_groundtruth_is_difficult_list = np.array([False, True],
                                                         dtype=bool)
    groundtruth_groundtruth_is_group_of_list = np.array(
@@ -52,7 +71,25 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
    self.assertTrue(np.allclose(expected_scores, scores))
    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))

-  def test_match_to_difficult_box(self):
+  def test_mask_match_to_gt_mask_0(self):
+    groundtruth_groundtruth_is_difficult_list = np.array([False, True],
+                                                         dtype=bool)
+    groundtruth_groundtruth_is_group_of_list = np.array(
+        [False, False], dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes,
+        self.detected_scores,
+        self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list,
+        groundtruth_groundtruth_is_group_of_list,
+        detected_masks=self.detected_masks,
+        groundtruth_masks=self.groundtruth_masks)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+  def test_match_to_gt_box_1(self):
    groundtruth_groundtruth_is_difficult_list = np.array([True, False],
                                                         dtype=bool)
    groundtruth_groundtruth_is_group_of_list = np.array(
@@ -66,6 +103,24 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
    self.assertTrue(np.allclose(expected_scores, scores))
    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))

+  def test_mask_match_to_gt_mask_1(self):
+    groundtruth_groundtruth_is_difficult_list = np.array([True, False],
+                                                         dtype=bool)
+    groundtruth_groundtruth_is_group_of_list = np.array(
+        [False, False], dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes,
+        self.detected_scores,
+        self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list,
+        groundtruth_groundtruth_is_group_of_list,
+        detected_masks=self.detected_masks,
+        groundtruth_masks=self.groundtruth_masks)
+    expected_scores = np.array([0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+

 class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):

@@ -81,8 +136,31 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
    self.detected_boxes = np.array(
        [[0, 0, 1, 1], [0, 0, 2, 1], [0, 0, 3, 1]], dtype=float)
    self.detected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    detected_masks_0 = np.array([[0, 1, 1, 0],
+                                 [0, 0, 1, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_1 = np.array([[1, 0, 0, 0],
+                                 [1, 1, 0, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_2 = np.array([[0, 0, 0, 0],
+                                 [0, 1, 1, 0],
+                                 [0, 1, 0, 0]], dtype=np.uint8)
+    self.detected_masks = np.stack(
+        [detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
+
    self.groundtruth_boxes = np.array(
        [[0, 0, 1, 1], [0, 0, 5, 5], [10, 10, 20, 20]], dtype=float)
+    groundtruth_masks_0 = np.array([[1, 0, 0, 0],
+                                    [1, 0, 0, 0],
+                                    [1, 0, 0, 0]], dtype=np.uint8)
+    groundtruth_masks_1 = np.array([[0, 0, 1, 0],
+                                    [0, 0, 1, 0],
+                                    [0, 0, 1, 0]], dtype=np.uint8)
+    groundtruth_masks_2 = np.array([[0, 1, 0, 0],
+                                    [0, 1, 0, 0],
+                                    [0, 1, 0, 0]], dtype=np.uint8)
+    self.groundtruth_masks = np.stack(
+        [groundtruth_masks_0, groundtruth_masks_1, groundtruth_masks_2], axis=0)

  def test_match_to_non_group_of_and_group_of_box(self):
    groundtruth_groundtruth_is_difficult_list = np.array(
@@ -98,6 +176,24 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
    self.assertTrue(np.allclose(expected_scores, scores))
    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))

+  def test_mask_match_to_non_group_of_and_group_of_box(self):
+    groundtruth_groundtruth_is_difficult_list = np.array(
+        [False, False, False], dtype=bool)
+    groundtruth_groundtruth_is_group_of_list = np.array(
+        [False, True, True], dtype=bool)
+    expected_scores = np.array([0.6], dtype=float)
+    expected_tp_fp_labels = np.array([True], dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes,
+        self.detected_scores,
+        self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list,
+        groundtruth_groundtruth_is_group_of_list,
+        detected_masks=self.detected_masks,
+        groundtruth_masks=self.groundtruth_masks)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
  def test_match_two_to_group_of_box(self):
    groundtruth_groundtruth_is_difficult_list = np.array(
        [False, False, False], dtype=bool)
@@ -112,32 +208,61 @@ class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
    self.assertTrue(np.allclose(expected_scores, scores))
    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))

+  def test_mask_match_two_to_group_of_box(self):
+    groundtruth_groundtruth_is_difficult_list = np.array(
+        [False, False, False], dtype=bool)
+    groundtruth_groundtruth_is_group_of_list = np.array(
+        [True, False, True], dtype=bool)
+    expected_scores = np.array([0.8], dtype=float)
+    expected_tp_fp_labels = np.array([True], dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes,
+        self.detected_scores,
+        self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list,
+        groundtruth_groundtruth_is_group_of_list,
+        detected_masks=self.detected_masks,
+        groundtruth_masks=self.groundtruth_masks)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+

 class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):

  def setUp(self):
    num_groundtruth_classes = 1
-    matching_iou_threshold1 = 0.5
-    matching_iou_threshold2 = 0.1
+    matching_iou_threshold_high_iou = 0.5
+    matching_iou_threshold_low_iou = 0.1
    nms_iou_threshold = 1.0
    nms_max_output_boxes = 10000
-    self.eval1 = per_image_evaluation.PerImageEvaluation(
-        num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold,
-        nms_max_output_boxes)
+    self.eval_high_iou = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold_high_iou,
+        nms_iou_threshold, nms_max_output_boxes)

-    self.eval2 = per_image_evaluation.PerImageEvaluation(
-        num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold,
-        nms_max_output_boxes)
+    self.eval_low_iou = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold_low_iou,
+        nms_iou_threshold, nms_max_output_boxes)

    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
                                   dtype=float)
    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
+    detected_masks_0 = np.array([[0, 1, 1, 0],
+                                 [0, 0, 1, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_1 = np.array([[1, 0, 0, 0],
+                                 [1, 1, 0, 0],
+                                 [0, 0, 0, 0]], dtype=np.uint8)
+    detected_masks_2 = np.array([[0, 0, 0, 0],
+                                 [0, 1, 1, 0],
+                                 [0, 1, 0, 0]], dtype=np.uint8)
+    self.detected_masks = np.stack(
+        [detected_masks_0, detected_masks_1, detected_masks_2], axis=0)

  def test_no_true_positives(self):
    groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
    groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
-    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+    scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
        self.detected_boxes, self.detected_scores, groundtruth_boxes,
        groundtruth_groundtruth_is_difficult_list,
        groundtruth_groundtruth_is_group_of_list)
@@ -146,11 +271,32 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
    self.assertTrue(np.allclose(expected_scores, scores))
    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))

+  def test_mask_no_true_positives(self):
+    groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
+    groundtruth_masks_0 = np.array([[1, 1, 1, 1],
+                                    [1, 1, 1, 1],
+                                    [1, 1, 1, 1]], dtype=np.uint8)
+    groundtruth_masks = np.stack([groundtruth_masks_0], axis=0)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
+    scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
+        self.detected_boxes,
+        self.detected_scores,
+        groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list,
+        groundtruth_groundtruth_is_group_of_list,
+        detected_masks=self.detected_masks,
+        groundtruth_masks=groundtruth_masks)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
  def test_one_true_positives_with_large_iou_threshold(self):
    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
    groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
-    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+    scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
        self.detected_boxes, self.detected_scores, groundtruth_boxes,
        groundtruth_groundtruth_is_difficult_list,
        groundtruth_groundtruth_is_group_of_list)
@@ -159,11 +305,32 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
    self.assertTrue(np.allclose(expected_scores, scores))
    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))

+  def test_mask_one_true_positives_with_large_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_masks_0 = np.array([[1, 0, 0, 0],
+                                    [1, 1, 0, 0],
+                                    [0, 0, 0, 0]], dtype=np.uint8)
+    groundtruth_masks = np.stack([groundtruth_masks_0], axis=0)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
+    scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
+        self.detected_boxes,
+        self.detected_scores,
+        groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list,
+        groundtruth_groundtruth_is_group_of_list,
+        detected_masks=self.detected_masks,
+        groundtruth_masks=groundtruth_masks)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
  def test_one_true_positives_with_very_small_iou_threshold(self):
    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
    groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
-    scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
+    scores, tp_fp_labels = self.eval_low_iou._compute_tp_fp_for_single_class(
        self.detected_boxes, self.detected_scores, groundtruth_boxes,
        groundtruth_groundtruth_is_difficult_list,
        groundtruth_groundtruth_is_group_of_list)
@@ -177,7 +344,7 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
    groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
    groundtruth_groundtruth_is_group_of_list = np.array(
        [False, False], dtype=bool)
-    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+    scores, tp_fp_labels = self.eval_high_iou._compute_tp_fp_for_single_class(
        self.detected_boxes, self.detected_scores, groundtruth_boxes,
        groundtruth_groundtruth_is_difficult_list,
        groundtruth_groundtruth_is_group_of_list)

--- a/research/object_detection/utils/shape_utils.py
+++ b/research/object_detection/utils/shape_utils.py
@@ -17,6 +17,8 @@

 import tensorflow as tf

+from object_detection.utils import static_shape
+

 def _is_tensor(t):
  """Returns a boolean indicating whether the input is a tensor.
@@ -125,12 +127,183 @@ def combined_static_and_dynamic_shape(tensor):
  Returns:
    A list of size tensor.shape.ndims containing integers or a scalar tensor.
  """
-  static_shape = tensor.shape.as_list()
-  dynamic_shape = tf.shape(tensor)
+  static_tensor_shape = tensor.shape.as_list()
+  dynamic_tensor_shape = tf.shape(tensor)
  combined_shape = []
-  for index, dim in enumerate(static_shape):
+  for index, dim in enumerate(static_tensor_shape):
    if dim is not None:
      combined_shape.append(dim)
    else:
-      combined_shape.append(dynamic_shape[index])
+      combined_shape.append(dynamic_tensor_shape[index])
  return combined_shape
+
+
+def static_or_dynamic_map_fn(fn, elems, dtype=None,
+                             parallel_iterations=32, back_prop=True):
+  """Runs map_fn as a (static) for loop when possible.
+
+  This function rewrites the map_fn as an explicit unstack input -> for loop
+  over function calls -> stack result combination.  This allows our graphs to
+  be acyclic when the batch size is static.
+  For comparison, see https://www.tensorflow.org/api_docs/python/tf/map_fn.
+
+  Note that `static_or_dynamic_map_fn` currently is not *fully* interchangeable
+  with the default tf.map_fn function as it does not accept nested inputs (only
+  Tensors or lists of Tensors).  Likewise, the output of `fn` can only be a
+  Tensor or list of Tensors.
+
+  TODO: make this function fully interchangeable with tf.map_fn.
+
+  Args:
+    fn: The callable to be performed. It accepts one argument, which will have
+      the same structure as elems. Its output must have the
+      same structure as elems.
+    elems: A tensor or list of tensors, each of which will
+      be unpacked along their first dimension. The sequence of the
+      resulting slices will be applied to fn.
+    dtype:  (optional) The output type(s) of fn. If fn returns a structure of
+      Tensors differing from the structure of elems, then dtype is not optional
+      and must have the same structure as the output of fn.
+    parallel_iterations: (optional) number of batch items to process in
+      parallel.  This flag is only used if the native tf.map_fn is used
+      and defaults to 32 instead of 10 (unlike the standard tf.map_fn default).
+    back_prop: (optional) True enables support for back propagation.
+      This flag is only used if the native tf.map_fn is used.
+
+  Returns:
+    A tensor or sequence of tensors. Each tensor packs the
+    results of applying fn to tensors unpacked from elems along the first
+    dimension, from first to last.
+  Raises:
+    ValueError: if `elems` a Tensor or a list of Tensors.
+    ValueError: if `fn` does not return a Tensor or list of Tensors
+  """
+  if isinstance(elems, list):
+    for elem in elems:
+      if not isinstance(elem, tf.Tensor):
+        raise ValueError('`elems` must be a Tensor or list of Tensors.')
+
+    elem_shapes = [elem.shape.as_list() for elem in elems]
+    # Fall back on tf.map_fn if shapes of each entry of `elems` are None or fail
+    # to all be the same size along the batch dimension.
+    for elem_shape in elem_shapes:
+      if (not elem_shape or not elem_shape[0]
+          or elem_shape[0] != elem_shapes[0][0]):
+        return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop)
+    arg_tuples = zip(*[tf.unstack(elem) for elem in elems])
+    outputs = [fn(arg_tuple) for arg_tuple in arg_tuples]
+  else:
+    if not isinstance(elems, tf.Tensor):
+      raise ValueError('`elems` must be a Tensor or list of Tensors.')
+    elems_shape = elems.shape.as_list()
+    if not elems_shape or not elems_shape[0]:
+      return tf.map_fn(fn, elems, dtype, parallel_iterations, back_prop)
+    outputs = [fn(arg) for arg in tf.unstack(elems)]
+  # Stack `outputs`, which is a list of Tensors or list of lists of Tensors
+  if all([isinstance(output, tf.Tensor) for output in outputs]):
+    return tf.stack(outputs)
+  else:
+    if all([isinstance(output, list) for output in outputs]):
+      if all([all(
+          [isinstance(entry, tf.Tensor) for entry in output_list])
+              for output_list in outputs]):
+        return [tf.stack(output_tuple) for output_tuple in zip(*outputs)]
+  raise ValueError('`fn` should return a Tensor or a list of Tensors.')
+
+
+def check_min_image_dim(min_dim, image_tensor):
+  """Checks that the image width/height are greater than some number.
+
+  This function is used to check that the width and height of an image are above
+  a certain value. If the image shape is static, this function will perform the
+  check at graph construction time. Otherwise, if the image shape varies, an
+  Assertion control dependency will be added to the graph.
+
+  Args:
+    min_dim: The minimum number of pixels along the width and height of the
+             image.
+    image_tensor: The image tensor to check size for.
+
+  Returns:
+    If `image_tensor` has dynamic size, return `image_tensor` with a Assert
+    control dependency. Otherwise returns image_tensor.
+
+  Raises:
+    ValueError: if `image_tensor`'s' width or height is smaller than `min_dim`.
+  """
+  image_shape = image_tensor.get_shape()
+  image_height = static_shape.get_height(image_shape)
+  image_width = static_shape.get_width(image_shape)
+  if image_height is None or image_width is None:
+    shape_assert = tf.Assert(
+        tf.logical_and(tf.greater_equal(tf.shape(image_tensor)[1], min_dim),
+                       tf.greater_equal(tf.shape(image_tensor)[2], min_dim)),
+        ['image size must be >= {} in both height and width.'.format(min_dim)])
+    with tf.control_dependencies([shape_assert]):
+      return tf.identity(image_tensor)
+
+  if image_height < min_dim or image_width < min_dim:
+    raise ValueError(
+        'image size must be >= %d in both height and width; image dim = %d,%d' %
+        (min_dim, image_height, image_width))
+
+  return image_tensor
+
+
+def assert_shape_equal(shape_a, shape_b):
+  """Asserts that shape_a and shape_b are equal.
+
+  If the shapes are static, raises a ValueError when the shapes
+  mismatch.
+
+  If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
+  mismatch.
+
+  Args:
+    shape_a: a list containing shape of the first tensor.
+    shape_b: a list containing shape of the second tensor.
+
+  Returns:
+    Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
+    when the shapes are dynamic.
+
+  Raises:
+    ValueError: When shapes are both static and unequal.
+  """
+  if (all(isinstance(dim, int) for dim in shape_a) and
+      all(isinstance(dim, int) for dim in shape_b)):
+    if shape_a != shape_b:
+      raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b))
+    else: return tf.no_op()
+  else:
+    return tf.assert_equal(shape_a, shape_b)
+
+
+def assert_shape_equal_along_first_dimension(shape_a, shape_b):
+  """Asserts that shape_a and shape_b are the same along the 0th-dimension.
+
+  If the shapes are static, raises a ValueError when the shapes
+  mismatch.
+
+  If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
+  mismatch.
+
+  Args:
+    shape_a: a list containing shape of the first tensor.
+    shape_b: a list containing shape of the second tensor.
+
+  Returns:
+    Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
+    when the shapes are dynamic.
+
+  Raises:
+    ValueError: When shapes are both static and unequal.
+  """
+  if isinstance(shape_a[0], int) and isinstance(shape_b[0], int):
+    if shape_a[0] != shape_b[0]:
+      raise ValueError('Unequal first dimension {}, {}'.format(
+          shape_a[0], shape_b[0]))
+    else: return tf.no_op()
+  else:
+    return tf.assert_equal(shape_a[0], shape_b[0])
+
--- a/research/object_detection/utils/shape_utils_test.py
+++ b/research/object_detection/utils/shape_utils_test.py
@@ -15,6 +15,7 @@

 """Tests for object_detection.utils.shape_utils."""

+import numpy as np
 import tensorflow as tf

 from object_detection.utils import shape_utils
@@ -123,5 +124,198 @@ class UtilTest(tf.test.TestCase):
    self.assertListEqual(combined_shape[1:], [2, 3])


+class StaticOrDynamicMapFnTest(tf.test.TestCase):
+
+  def test_with_dynamic_shape(self):
+    def fn(input_tensor):
+      return tf.reduce_sum(input_tensor)
+    input_tensor = tf.placeholder(tf.float32, shape=(None, 2))
+    map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)
+
+    op_names = [op.name for op in tf.get_default_graph().get_operations()]
+    self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))
+
+    with self.test_session() as sess:
+      result1 = sess.run(
+          map_fn_output, feed_dict={
+              input_tensor: [[1, 2], [3, 1], [0, 4]]})
+      result2 = sess.run(
+          map_fn_output, feed_dict={
+              input_tensor: [[-1, 1], [0, 9]]})
+      self.assertAllEqual(result1, [3, 4, 4])
+      self.assertAllEqual(result2, [0, 9])
+
+  def test_with_static_shape(self):
+    def fn(input_tensor):
+      return tf.reduce_sum(input_tensor)
+    input_tensor = tf.constant([[1, 2], [3, 1], [0, 4]], dtype=tf.float32)
+    map_fn_output = shape_utils.static_or_dynamic_map_fn(fn, input_tensor)
+
+    op_names = [op.name for op in tf.get_default_graph().get_operations()]
+    self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))
+
+    with self.test_session() as sess:
+      result = sess.run(map_fn_output)
+      self.assertAllEqual(result, [3, 4, 4])
+
+  def test_with_multiple_dynamic_shapes(self):
+    def fn(elems):
+      input_tensor, scalar_index_tensor = elems
+      return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]), [])
+
+    input_tensor = tf.placeholder(tf.float32, shape=(None, 3))
+    scalar_index_tensor = tf.placeholder(tf.int32, shape=(None, 1))
+    map_fn_output = shape_utils.static_or_dynamic_map_fn(
+        fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)
+
+    op_names = [op.name for op in tf.get_default_graph().get_operations()]
+    self.assertTrue(any(['map' == op_name[:3] for op_name in op_names]))
+
+    with self.test_session() as sess:
+      result1 = sess.run(
+          map_fn_output, feed_dict={
+              input_tensor: [[1, 2, 3], [4, 5, -1], [0, 6, 9]],
+              scalar_index_tensor: [[0], [2], [1]],
+          })
+      result2 = sess.run(
+          map_fn_output, feed_dict={
+              input_tensor: [[-1, 1, 0], [3, 9, 30]],
+              scalar_index_tensor: [[1], [0]]
+          })
+      self.assertAllEqual(result1, [1, -1, 6])
+      self.assertAllEqual(result2, [1, 3])
+
+  def test_with_multiple_static_shapes(self):
+    def fn(elems):
+      input_tensor, scalar_index_tensor = elems
+      return tf.reshape(tf.slice(input_tensor, scalar_index_tensor, [1]), [])
+
+    input_tensor = tf.constant([[1, 2, 3], [4, 5, -1], [0, 6, 9]],
+                               dtype=tf.float32)
+    scalar_index_tensor = tf.constant([[0], [2], [1]], dtype=tf.int32)
+    map_fn_output = shape_utils.static_or_dynamic_map_fn(
+        fn, [input_tensor, scalar_index_tensor], dtype=tf.float32)
+
+    op_names = [op.name for op in tf.get_default_graph().get_operations()]
+    self.assertTrue(all(['map' != op_name[:3] for op_name in op_names]))
+
+    with self.test_session() as sess:
+      result = sess.run(map_fn_output)
+      self.assertAllEqual(result, [1, -1, 6])
+
+  def test_fails_with_nested_input(self):
+    def fn(input_tensor):
+      return input_tensor
+    input_tensor1 = tf.constant([1])
+    input_tensor2 = tf.constant([2])
+    with self.assertRaisesRegexp(
+        ValueError, '`elems` must be a Tensor or list of Tensors.'):
+      shape_utils.static_or_dynamic_map_fn(
+          fn, [input_tensor1, [input_tensor2]], dtype=tf.float32)
+
+
+class CheckMinImageShapeTest(tf.test.TestCase):
+
+  def test_check_min_image_dim_static_shape(self):
+    input_tensor = tf.constant(np.zeros([1, 42, 42, 3]))
+    _ = shape_utils.check_min_image_dim(33, input_tensor)
+
+    with self.assertRaisesRegexp(
+        ValueError, 'image size must be >= 64 in both height and width.'):
+      _ = shape_utils.check_min_image_dim(64, input_tensor)
+
+  def test_check_min_image_dim_dynamic_shape(self):
+    input_placeholder = tf.placeholder(tf.float32, shape=[1, None, None, 3])
+    image_tensor = shape_utils.check_min_image_dim(33, input_placeholder)
+
+    with self.test_session() as sess:
+      sess.run(image_tensor,
+               feed_dict={input_placeholder: np.zeros([1, 42, 42, 3])})
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        sess.run(image_tensor,
+                 feed_dict={input_placeholder: np.zeros([1, 32, 32, 3])})
+
+
+class AssertShapeEqualTest(tf.test.TestCase):
+
+  def test_unequal_static_shape_raises_exception(self):
+    shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
+    shape_b = tf.constant(np.zeros([4, 2, 3, 1]))
+    with self.assertRaisesRegexp(
+        ValueError, 'Unequal shapes'):
+      shape_utils.assert_shape_equal(
+          shape_utils.combined_static_and_dynamic_shape(shape_a),
+          shape_utils.combined_static_and_dynamic_shape(shape_b))
+
+  def test_equal_static_shape_succeeds(self):
+    shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
+    shape_b = tf.constant(np.zeros([4, 2, 2, 1]))
+    with self.test_session() as sess:
+      op = shape_utils.assert_shape_equal(
+          shape_utils.combined_static_and_dynamic_shape(shape_a),
+          shape_utils.combined_static_and_dynamic_shape(shape_b))
+      sess.run(op)
+
+  def test_unequal_dynamic_shape_raises_tf_assert(self):
+    tensor_a = tf.placeholder(tf.float32, shape=[1, None, None, 3])
+    tensor_b = tf.placeholder(tf.float32, shape=[1, None, None, 3])
+    op = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(tensor_a),
+        shape_utils.combined_static_and_dynamic_shape(tensor_b))
+    with self.test_session() as sess:
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
+                                tensor_b: np.zeros([1, 4, 4, 3])})
+
+  def test_equal_dynamic_shape_succeeds(self):
+    tensor_a = tf.placeholder(tf.float32, shape=[1, None, None, 3])
+    tensor_b = tf.placeholder(tf.float32, shape=[1, None, None, 3])
+    op = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(tensor_a),
+        shape_utils.combined_static_and_dynamic_shape(tensor_b))
+    with self.test_session() as sess:
+      sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
+                              tensor_b: np.zeros([1, 2, 2, 3])})
+
+  def test_unequal_static_shape_along_first_dim_raises_exception(self):
+    shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
+    shape_b = tf.constant(np.zeros([6, 2, 3, 1]))
+    with self.assertRaisesRegexp(
+        ValueError, 'Unequal first dimension'):
+      shape_utils.assert_shape_equal_along_first_dimension(
+          shape_utils.combined_static_and_dynamic_shape(shape_a),
+          shape_utils.combined_static_and_dynamic_shape(shape_b))
+
+  def test_equal_static_shape_along_first_dim_succeeds(self):
+    shape_a = tf.constant(np.zeros([4, 2, 2, 1]))
+    shape_b = tf.constant(np.zeros([4, 7, 2]))
+    with self.test_session() as sess:
+      op = shape_utils.assert_shape_equal_along_first_dimension(
+          shape_utils.combined_static_and_dynamic_shape(shape_a),
+          shape_utils.combined_static_and_dynamic_shape(shape_b))
+      sess.run(op)
+
+  def test_unequal_dynamic_shape_along_first_dim_raises_tf_assert(self):
+    tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
+    tensor_b = tf.placeholder(tf.float32, shape=[None, None, 3])
+    op = shape_utils.assert_shape_equal_along_first_dimension(
+        shape_utils.combined_static_and_dynamic_shape(tensor_a),
+        shape_utils.combined_static_and_dynamic_shape(tensor_b))
+    with self.test_session() as sess:
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        sess.run(op, feed_dict={tensor_a: np.zeros([1, 2, 2, 3]),
+                                tensor_b: np.zeros([2, 4, 3])})
+
+  def test_equal_dynamic_shape_along_first_dim_succeeds(self):
+    tensor_a = tf.placeholder(tf.float32, shape=[None, None, None, 3])
+    tensor_b = tf.placeholder(tf.float32, shape=[None])
+    op = shape_utils.assert_shape_equal_along_first_dimension(
+        shape_utils.combined_static_and_dynamic_shape(tensor_a),
+        shape_utils.combined_static_and_dynamic_shape(tensor_b))
+    with self.test_session() as sess:
+      sess.run(op, feed_dict={tensor_a: np.zeros([5, 2, 2, 3]),
+                              tensor_b: np.zeros([5])})
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/utils/test_case.py
+++ b/research/object_detection/utils/test_case.py
+"""A convenience wrapper around tf.test.TestCase to enable TPU tests."""
+
+import tensorflow as tf
+from tensorflow.contrib import tpu
+
+flags = tf.app.flags
+
+flags.DEFINE_bool('tpu_test', False, 'Whether to configure test for TPU.')
+FLAGS = flags.FLAGS
+
+
+class TestCase(tf.test.TestCase):
+  """Extends tf.test.TestCase to optionally allow running tests on TPU."""
+
+  def execute_tpu(self, graph_fn, inputs):
+    """Constructs the graph, executes it on TPU and returns the result.
+
+    Args:
+      graph_fn: a callable that constructs the tensorflow graph to test. The
+        arguments of this function should correspond to `inputs`.
+      inputs: a list of numpy arrays to feed input to the computation graph.
+
+    Returns:
+      A list of numpy arrays or a scalar returned from executing the tensorflow
+      graph.
+    """
+    with self.test_session(graph=tf.Graph()) as sess:
+      placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
+      tpu_computation = tpu.rewrite(graph_fn, placeholders)
+      sess.run(tpu.initialize_system())
+      sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
+                tf.local_variables_initializer()])
+      materialized_results = sess.run(tpu_computation,
+                                      feed_dict=dict(zip(placeholders, inputs)))
+      sess.run(tpu.shutdown_system())
+      if len(materialized_results) == 1:
+        materialized_results = materialized_results[0]
+    return materialized_results
+
+  def execute_cpu(self, graph_fn, inputs):
+    """Constructs the graph, executes it on CPU and returns the result.
+
+    Args:
+      graph_fn: a callable that constructs the tensorflow graph to test. The
+        arguments of this function should correspond to `inputs`.
+      inputs: a list of numpy arrays to feed input to the computation graph.
+
+    Returns:
+      A list of numpy arrays or a scalar returned from executing the tensorflow
+      graph.
+    """
+    with self.test_session(graph=tf.Graph()) as sess:
+      placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
+      results = graph_fn(*placeholders)
+      sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
+                tf.local_variables_initializer()])
+      materialized_results = sess.run(results, feed_dict=dict(zip(placeholders,
+                                                                  inputs)))
+      if len(materialized_results) == 1:
+        materialized_results = materialized_results[0]
+    return materialized_results
+
+  def execute(self, graph_fn, inputs):
+    """Constructs the graph, creates a test session and returns the results.
+
+    The graph is executed either on TPU or CPU based on the `tpu_test` flag.
+
+    Args:
+      graph_fn: a callable that constructs the tensorflow graph to test. The
+        arguments of this function should correspond to `inputs`.
+      inputs: a list of numpy arrays to feed input to the computation graph.
+
+    Returns:
+      A list of numpy arrays or a scalar returned from executing the tensorflow
+      graph.
+    """
+    if FLAGS.tpu_test:
+      return self.execute_tpu(graph_fn, inputs)
+    else:
+      return self.execute_cpu(graph_fn, inputs)
--- a/research/object_detection/utils/test_utils.py
+++ b/research/object_detection/utils/test_utils.py
@@ -46,12 +46,13 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
    super(MockBoxPredictor, self).__init__(is_training, num_classes)

  def _predict(self, image_features, num_predictions_per_location):
+    image_feature = image_features[0]
    combined_feature_shape = shape_utils.combined_static_and_dynamic_shape(
-        image_features)
+        image_feature)
    batch_size = combined_feature_shape[0]
    num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
    code_size = 4
-    zero = tf.reduce_sum(0 * image_features)
+    zero = tf.reduce_sum(0 * image_feature)
    box_encodings = zero + tf.zeros(
        (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
    class_predictions_with_background = zero + tf.zeros(

--- a/research/object_detection/utils/variables_helper.py
+++ b/research/object_detection/utils/variables_helper.py
@@ -96,7 +96,9 @@ def freeze_gradients_matching_regex(grads_and_vars, regex_list):
  return kept_grads_and_vars


-def get_variables_available_in_checkpoint(variables, checkpoint_path):
+def get_variables_available_in_checkpoint(variables,
+                                          checkpoint_path,
+                                          include_global_step=True):
  """Returns the subset of variables available in the checkpoint.

  Inspects given checkpoint and returns the subset of variables that are
@@ -107,6 +109,8 @@ def get_variables_available_in_checkpoint(variables, checkpoint_path):
  Args:
    variables: a list or dictionary of variables to find in checkpoint.
    checkpoint_path: path to the checkpoint to restore variables from.
+    include_global_step: whether to include `global_step` variable, if it
+      exists. Default True.

  Returns:
    A list or dictionary of variables.
@@ -120,13 +124,20 @@ def get_variables_available_in_checkpoint(variables, checkpoint_path):
  else:
    raise ValueError('`variables` is expected to be a list or dict.')
  ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path)
-  ckpt_vars = ckpt_reader.get_variable_to_shape_map().keys()
+  ckpt_vars_to_shape_map = ckpt_reader.get_variable_to_shape_map()
+  if not include_global_step:
+    ckpt_vars_to_shape_map.pop(tf.GraphKeys.GLOBAL_STEP, None)
  vars_in_ckpt = {}
  for variable_name, variable in sorted(variable_names_map.items()):
-    if variable_name in ckpt_vars:
-      vars_in_ckpt[variable_name] = variable
+    if variable_name in ckpt_vars_to_shape_map:
+      if ckpt_vars_to_shape_map[variable_name] == variable.shape.as_list():
+        vars_in_ckpt[variable_name] = variable
+      else:
+        logging.warning('Variable [%s] is available in checkpoint, but has an '
+                        'incompatible shape with model variable.',
+                        variable_name)
    else:
-      logging.warning('Variable [%s] not available in checkpoint',
+      logging.warning('Variable [%s] is not available in checkpoint',
                      variable_name)
  if isinstance(variables, list):
    return vars_in_ckpt.values()

--- a/research/object_detection/utils/variables_helper_test.py
+++ b/research/object_detection/utils/variables_helper_test.py
@@ -145,8 +145,11 @@ class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):

  def test_return_variables_available_in_checkpoint(self):
    checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
+    weight_variable = tf.Variable(1.0, name='weights')
+    global_step = tf.train.get_or_create_global_step()
    graph1_variables = [
-        tf.Variable(1.0, name='weights'),
+        weight_variable,
+        global_step
    ]
    init_op = tf.global_variables_initializer()
    saver = tf.train.Saver(graph1_variables)
@@ -156,8 +159,8 @@ class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):

    graph2_variables = graph1_variables + [tf.Variable(1.0, name='biases')]
    out_variables = variables_helper.get_variables_available_in_checkpoint(
-        graph2_variables, checkpoint_path)
-    self.assertItemsEqual(out_variables, graph1_variables)
+        graph2_variables, checkpoint_path, include_global_step=False)
+    self.assertItemsEqual(out_variables, [weight_variable])

  def test_return_variables_available_an_checkpoint_with_dict_inputs(self):
    checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
@@ -180,6 +183,31 @@ class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
    self.assertItemsEqual(out_variables.keys(), ['ckpt_weights'])
    self.assertTrue(out_variables['ckpt_weights'].op.name == 'weights')

+  def test_return_variables_with_correct_sizes(self):
+    checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
+    bias_variable = tf.Variable(3.0, name='biases')
+    global_step = tf.train.get_or_create_global_step()
+    graph1_variables = [
+        tf.Variable([[1.0, 2.0], [3.0, 4.0]], name='weights'),
+        bias_variable,
+        global_step
+    ]
+    init_op = tf.global_variables_initializer()
+    saver = tf.train.Saver(graph1_variables)
+    with self.test_session() as sess:
+      sess.run(init_op)
+      saver.save(sess, checkpoint_path)
+
+    graph2_variables = [
+        tf.Variable([1.0, 2.0], name='weights'),  # Note the new variable shape.
+        bias_variable,
+        global_step
+    ]
+
+    out_variables = variables_helper.get_variables_available_in_checkpoint(
+        graph2_variables, checkpoint_path, include_global_step=True)
+    self.assertItemsEqual(out_variables, [bias_variable, global_step])
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/utils/visualization_utils.py
+++ b/research/object_detection/utils/visualization_utils.py
@@ -21,7 +21,9 @@ The functions do not return a value, instead they modify the image itself.
 """
 import collections
 import functools
-import matplotlib.pyplot as plt
+# Set headless-friendly backend.
+import matplotlib; matplotlib.use('Agg')  # pylint: disable=multiple-statements
+import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
 import numpy as np
 import PIL.Image as Image
 import PIL.ImageColor as ImageColor
@@ -30,6 +32,8 @@ import PIL.ImageFont as ImageFont
 import six
 import tensorflow as tf

+from object_detection.core import standard_fields as fields
+

 _TITLE_LEFT_MARGIN = 10
 _TITLE_TOP_MARGIN = 10
@@ -100,9 +104,12 @@ def draw_bounding_box_on_image_array(image,
                                     use_normalized_coordinates=True):
  """Adds a bounding box to an image (numpy array).

+  Bounding box coordinates can be specified in either absolute (pixel) or
+  normalized coordinates by setting the use_normalized_coordinates argument.
+
  Args:
    image: a numpy array with shape [height, width, 3].
-    ymin: ymin of bounding box in normalized coordinates (same below).
+    ymin: ymin of bounding box.
    xmin: xmin of bounding box.
    ymax: ymax of bounding box.
    xmax: xmax of bounding box.
@@ -132,6 +139,9 @@ def draw_bounding_box_on_image(image,
                               use_normalized_coordinates=True):
  """Adds a bounding box to an image.

+  Bounding box coordinates can be specified in either absolute (pixel) or
+  normalized coordinates by setting the use_normalized_coordinates argument.
+
  Each string in display_str_list is displayed on a separate line above the
  bounding box in black text on a rectangle filled with the input 'color'.
  If the top of the bounding box extends to the edge of the image, the strings
@@ -255,14 +265,58 @@ def draw_bounding_boxes_on_image(image,
                               boxes[i, 3], color, thickness, display_str_list)


+def _visualize_boxes(image, boxes, classes, scores, category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image, boxes, classes, scores, category_index=category_index, **kwargs)
+
+
+def _visualize_boxes_and_masks(image, boxes, classes, scores, masks,
+                               category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image,
+      boxes,
+      classes,
+      scores,
+      category_index=category_index,
+      instance_masks=masks,
+      **kwargs)
+
+
+def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints,
+                                   category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image,
+      boxes,
+      classes,
+      scores,
+      category_index=category_index,
+      keypoints=keypoints,
+      **kwargs)
+
+
+def _visualize_boxes_and_masks_and_keypoints(
+    image, boxes, classes, scores, masks, keypoints, category_index, **kwargs):
+  return visualize_boxes_and_labels_on_image_array(
+      image,
+      boxes,
+      classes,
+      scores,
+      category_index=category_index,
+      instance_masks=masks,
+      keypoints=keypoints,
+      **kwargs)
+
+
 def draw_bounding_boxes_on_image_tensors(images,
                                         boxes,
                                         classes,
                                         scores,
                                         category_index,
+                                         instance_masks=None,
+                                         keypoints=None,
                                         max_boxes_to_draw=20,
                                         min_score_thresh=0.2):
-  """Draws bounding boxes on batch of image tensors.
+  """Draws bounding boxes, masks, and keypoints on batch of image tensors.

  Args:
    images: A 4D uint8 image tensor of shape [N, H, W, C].
@@ -272,37 +326,123 @@ def draw_bounding_boxes_on_image_tensors(images,
    scores: [N, max_detections] float32 tensor of detection scores.
    category_index: a dict that maps integer ids to category dicts. e.g.
      {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
+    instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
+      instance masks.
+    keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
+      with keypoints.
    max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
    min_score_thresh: Minimum score threshold for visualization. Default 0.2.

  Returns:
    4D image tensor of type uint8, with boxes drawn on top.
  """
-  visualize_boxes_fn = functools.partial(
-      visualize_boxes_and_labels_on_image_array,
-      category_index=category_index,
-      instance_masks=None,
-      keypoints=None,
-      use_normalized_coordinates=True,
-      max_boxes_to_draw=max_boxes_to_draw,
-      min_score_thresh=min_score_thresh,
-      agnostic_mode=False,
-      line_thickness=4)
+  visualization_keyword_args = {
+      'use_normalized_coordinates': True,
+      'max_boxes_to_draw': max_boxes_to_draw,
+      'min_score_thresh': min_score_thresh,
+      'agnostic_mode': False,
+      'line_thickness': 4
+  }
+
+  if instance_masks is not None and keypoints is None:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes_and_masks,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [images, boxes, classes, scores, instance_masks]
+  elif instance_masks is None and keypoints is not None:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes_and_keypoints,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [images, boxes, classes, scores, keypoints]
+  elif instance_masks is not None and keypoints is not None:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes_and_masks_and_keypoints,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [images, boxes, classes, scores, instance_masks, keypoints]
+  else:
+    visualize_boxes_fn = functools.partial(
+        _visualize_boxes,
+        category_index=category_index,
+        **visualization_keyword_args)
+    elems = [images, boxes, classes, scores]

-  def draw_boxes(image_boxes_classes_scores):
+  def draw_boxes(image_and_detections):
    """Draws boxes on image."""
-    (image, boxes, classes, scores) = image_boxes_classes_scores
-    image_with_boxes = tf.py_func(visualize_boxes_fn,
-                                  [image, boxes, classes, scores], tf.uint8)
+    image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections,
+                                  tf.uint8)
    return image_with_boxes

-  images = tf.map_fn(
-      draw_boxes, (images, boxes, classes, scores),
-      dtype=tf.uint8,
-      back_prop=False)
+  images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
  return images


+def draw_side_by_side_evaluation_image(eval_dict,
+                                       category_index,
+                                       max_boxes_to_draw=20,
+                                       min_score_thresh=0.2):
+  """Creates a side-by-side image with detections and groundtruth.
+
+  Bounding boxes (and instance masks, if available) are visualized on both
+  subimages.
+
+  Args:
+    eval_dict: The evaluation dictionary returned by
+      eval_util.result_dict_for_single_example().
+    category_index: A category index (dictionary) produced from a labelmap.
+    max_boxes_to_draw: The maximum number of boxes to draw for detections.
+    min_score_thresh: The minimum score threshold for showing detections.
+
+  Returns:
+    A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
+      detections, while the subimage on the right corresponds to groundtruth.
+  """
+  detection_fields = fields.DetectionResultFields()
+  input_data_fields = fields.InputDataFields()
+  instance_masks = None
+  if detection_fields.detection_masks in eval_dict:
+    instance_masks = tf.cast(
+        tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0),
+        tf.uint8)
+  keypoints = None
+  if detection_fields.detection_keypoints in eval_dict:
+    keypoints = tf.expand_dims(
+        eval_dict[detection_fields.detection_keypoints], axis=0)
+  groundtruth_instance_masks = None
+  if input_data_fields.groundtruth_instance_masks in eval_dict:
+    groundtruth_instance_masks = tf.cast(
+        tf.expand_dims(
+            eval_dict[input_data_fields.groundtruth_instance_masks], axis=0),
+        tf.uint8)
+  images_with_detections = draw_bounding_boxes_on_image_tensors(
+      eval_dict[input_data_fields.original_image],
+      tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
+      tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
+      tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
+      category_index,
+      instance_masks=instance_masks,
+      keypoints=keypoints,
+      max_boxes_to_draw=max_boxes_to_draw,
+      min_score_thresh=min_score_thresh)
+  images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
+      eval_dict[input_data_fields.original_image],
+      tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
+      tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0),
+      tf.expand_dims(
+          tf.ones_like(
+              eval_dict[input_data_fields.groundtruth_classes],
+              dtype=tf.float32),
+          axis=0),
+      category_index,
+      instance_masks=groundtruth_instance_masks,
+      keypoints=None,
+      max_boxes_to_draw=None,
+      min_score_thresh=0.0)
+  return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
+
+
 def draw_keypoints_on_image_array(image,
                                  keypoints,
                                  color='red',
@@ -352,7 +492,7 @@ def draw_keypoints_on_image(image,
                 outline=color, fill=color)


-def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
+def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
  """Draws mask on an image.

  Args:
@@ -360,7 +500,7 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
    mask: a uint8 numpy array of shape (img_height, img_height) with
      values between either 0 or 1.
    color: color to draw the keypoints with. Default is red.
-    alpha: transparency value between 0 and 1. (default: 0.7)
+    alpha: transparency value between 0 and 1. (default: 0.4)

  Raises:
    ValueError: On incorrect data type for image or masks.
@@ -371,6 +511,9 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
    raise ValueError('`mask` not of type np.uint8')
  if np.any(np.logical_and(mask != 1, mask != 0)):
    raise ValueError('`mask` elements should be in [0, 1]')
+  if image.shape[:2] != mask.shape:
+    raise ValueError('The image has spatial dimensions %s but the mask has '
+                     'dimensions %s' % (image.shape[:2], mask.shape))
  rgb = ImageColor.getrgb(color)
  pil_image = Image.fromarray(image)

@@ -382,18 +525,23 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
  np.copyto(image, np.array(pil_image.convert('RGB')))


-def visualize_boxes_and_labels_on_image_array(image,
-                                              boxes,
-                                              classes,
-                                              scores,
-                                              category_index,
-                                              instance_masks=None,
-                                              keypoints=None,
-                                              use_normalized_coordinates=False,
-                                              max_boxes_to_draw=20,
-                                              min_score_thresh=.5,
-                                              agnostic_mode=False,
-                                              line_thickness=4):
+def visualize_boxes_and_labels_on_image_array(
+    image,
+    boxes,
+    classes,
+    scores,
+    category_index,
+    instance_masks=None,
+    instance_boundaries=None,
+    keypoints=None,
+    use_normalized_coordinates=False,
+    max_boxes_to_draw=20,
+    min_score_thresh=.5,
+    agnostic_mode=False,
+    line_thickness=4,
+    groundtruth_box_visualization_color='black',
+    skip_scores=False,
+    skip_labels=False):
  """Overlay labeled boxes on an image with formatted scores and label names.

  This function groups boxes that correspond to the same location
@@ -411,8 +559,10 @@ def visualize_boxes_and_labels_on_image_array(image,
      boxes and plot all boxes as black with no classes or scores.
    category_index: a dict containing category dictionaries (each holding
      category index `id` and category name `name`) keyed by category indices.
-    instance_masks: a numpy array of shape [N, image_height, image_width], can
-      be None
+    instance_masks: a numpy array of shape [N, image_height, image_width] with
+      values ranging between 0 and 1, can be None.
+    instance_boundaries: a numpy array of shape [N, image_height, image_width]
+      with values ranging between 0 and 1, can be None.
    keypoints: a numpy array of shape [N, num_keypoints, 2], can
      be None
    use_normalized_coordinates: whether boxes is to be interpreted as
@@ -424,6 +574,10 @@ def visualize_boxes_and_labels_on_image_array(image,
      class-agnostic mode or not.  This mode will display scores but ignore
      classes.
    line_thickness: integer (default: 4) controlling line width of the boxes.
+    groundtruth_box_visualization_color: box color for visualizing groundtruth
+      boxes
+    skip_scores: whether to skip score when drawing a single detection
+    skip_labels: whether to skip label when drawing a single detection

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
@@ -433,6 +587,7 @@ def visualize_boxes_and_labels_on_image_array(image,
  box_to_display_str_map = collections.defaultdict(list)
  box_to_color_map = collections.defaultdict(str)
  box_to_instance_masks_map = {}
+  box_to_instance_boundaries_map = {}
  box_to_keypoints_map = collections.defaultdict(list)
  if not max_boxes_to_draw:
    max_boxes_to_draw = boxes.shape[0]
@@ -441,21 +596,26 @@ def visualize_boxes_and_labels_on_image_array(image,
      box = tuple(boxes[i].tolist())
      if instance_masks is not None:
        box_to_instance_masks_map[box] = instance_masks[i]
+      if instance_boundaries is not None:
+        box_to_instance_boundaries_map[box] = instance_boundaries[i]
      if keypoints is not None:
        box_to_keypoints_map[box].extend(keypoints[i])
      if scores is None:
-        box_to_color_map[box] = 'black'
+        box_to_color_map[box] = groundtruth_box_visualization_color
      else:
-        if not agnostic_mode:
-          if classes[i] in category_index.keys():
-            class_name = category_index[classes[i]]['name']
+        display_str = ''
+        if not skip_labels:
+          if not agnostic_mode:
+            if classes[i] in category_index.keys():
+              class_name = category_index[classes[i]]['name']
+            else:
+              class_name = 'N/A'
+            display_str = str(class_name)
+        if not skip_scores:
+          if not display_str:
+            display_str = '{}%'.format(int(100*scores[i]))
          else:
-            class_name = 'N/A'
-          display_str = '{}: {}%'.format(
-              class_name,
-              int(100*scores[i]))
-        else:
-          display_str = 'score: {}%'.format(int(100 * scores[i]))
+            display_str = '{}: {}%'.format(display_str, int(100*scores[i]))
        box_to_display_str_map[box].append(display_str)
        if agnostic_mode:
          box_to_color_map[box] = 'DarkOrange'
@@ -472,6 +632,13 @@ def visualize_boxes_and_labels_on_image_array(image,
          box_to_instance_masks_map[box],
          color=color
      )
+    if instance_boundaries is not None:
+      draw_mask_on_image_array(
+          image,
+          box_to_instance_boundaries_map[box],
+          color='red',
+          alpha=1.0
+      )
    draw_bounding_box_on_image_array(
        image,
        ymin,
@@ -518,7 +685,7 @@ def add_cdf_image_summary(values, name):
    fig.canvas.draw()
    width, height = fig.get_size_inches() * fig.get_dpi()
    image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
-        1, height, width, 3)
+        1, int(height), int(width), 3)
    return image
  cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
  tf.summary.image(name, cdf_plot)
--- a/research/object_detection/utils/visualization_utils_test.py
+++ b/research/object_detection/utils/visualization_utils_test.py
@@ -145,7 +145,7 @@ class VisualizationUtilsTest(tf.test.TestCase):
        for i in range(images_with_boxes_np.shape[0]):
          img_name = 'image_' + str(i) + '.png'
          output_file = os.path.join(self.get_temp_dir(), img_name)
-          print('Writing output image %d to %s' % (i, output_file))
+          print 'Writing output image %d to %s' % (i, output_file)
          image_pil = Image.fromarray(images_with_boxes_np[i, ...])
          image_pil.save(output_file)