Add Tensorflow Object Detection API. (#1561)

For details see our paper: "Speed/accuracy trade-offs for modern convolutional object detectors." Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017 https://arxiv.org/abs/1611.10012

Add Tensorflow Object Detection API. (#1561)
For details see our paper: "Speed/accuracy trade-offs for modern convolutional object detectors." Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017 https://arxiv.org/abs/1611.10012
a4944a57 · derekjchow · Sergio Guadarrama · 60c3ed2e · a4944a57 · a4944a57
Commit a4944a57 authored Jun 14, 2017 by derekjchow Committed by Sergio Guadarrama Jun 14, 2017
20 changed files
--- a/object_detection/utils/np_box_list.py
+++ b/object_detection/utils/np_box_list.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Numpy BoxList classes and functions."""
+import numpy as np
+class BoxList(object):
+  """Box collection.
+  BoxList represents a list of bounding boxes as numpy array, where each
+  bounding box is represented as a row of 4 numbers,
+  [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
+  given list correspond to a single image.
+  Optionally, users can add additional related fields (such as
+  objectness/classification scores).
+  """
+  def __init__(self, data):
+    """Constructs box collection.
+    Args:
+      data: a numpy array of shape [N, 4] representing box coordinates
+    Raises:
+      ValueError: if bbox data is not a numpy array
+      ValueError: if invalid dimensions for bbox data
+    """
+    if not isinstance(data, np.ndarray):
+      raise ValueError('data must be a numpy array.')
+    if len(data.shape) != 2 or data.shape[1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    if data.dtype != np.float32 and data.dtype != np.float64:
+      raise ValueError('Invalid data type for box data: float is required.')
+    if not self._is_valid_boxes(data):
+      raise ValueError('Invalid box data. data must be a numpy array of '
+                       'N*[y_min, x_min, y_max, x_max]')
+    self.data = {'boxes': data}
+  def num_boxes(self):
+    """Return number of boxes held in collections."""
+    return self.data['boxes'].shape[0]
+  def get_extra_fields(self):
+    """Return all non-box fields."""
+    return [k for k in self.data.keys() if k != 'boxes']
+  def has_field(self, field):
+    return field in self.data
+  def add_field(self, field, field_data):
+    """Add data to a specified field.
+    Args:
+      field: a string parameter used to speficy a related field to be accessed.
+      field_data: a numpy array of [N, ...] representing the data associated
+          with the field.
+    Raises:
+      ValueError: if the field is already exist or the dimension of the field
+          data does not matches the number of boxes.
+    """
+    if self.has_field(field):
+      raise ValueError('Field ' + field + 'already exists')
+    if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
+      raise ValueError('Invalid dimensions for field data')
+    self.data[field] = field_data
+  def get(self):
+    """Convenience function for accesssing box coordinates.
+    Returns:
+      a numpy array of shape [N, 4] representing box corners
+    """
+    return self.get_field('boxes')
+  def get_field(self, field):
+    """Accesses data associated with the specified field in the box collection.
+    Args:
+      field: a string parameter used to speficy a related field to be accessed.
+    Returns:
+      a numpy 1-d array representing data of an associated field
+    Raises:
+      ValueError: if invalid field
+    """
+    if not self.has_field(field):
+      raise ValueError('field {} does not exist'.format(field))
+    return self.data[field]
+  def get_coordinates(self):
+    """Get corner coordinates of boxes.
+    Returns:
+     a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
+    """
+    box_coordinates = self.get()
+    y_min = box_coordinates[:, 0]
+    x_min = box_coordinates[:, 1]
+    y_max = box_coordinates[:, 2]
+    x_max = box_coordinates[:, 3]
+    return [y_min, x_min, y_max, x_max]
+  def _is_valid_boxes(self, data):
+    """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
+    Args:
+      data: a numpy array of shape [N, 4] representing box coordinates
+    Returns:
+      a boolean indicating whether all ymax of boxes are equal or greater than
+          ymin, and all xmax of boxes are equal or greater than xmin.
+    """
+    if data.shape[0] > 0:
+      for i in xrange(data.shape[0]):
+        if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
+          return False
+    return True
--- a/object_detection/utils/np_box_list_ops.py
+++ b/object_detection/utils/np_box_list_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Bounding Box List operations for Numpy BoxLists.
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+import numpy as np
+from object_detection.utils import np_box_list
+from object_detection.utils import np_box_ops
+class SortOrder(object):
+  """Enum class for sort order.
+  Attributes:
+    ascend: ascend order.
+    descend: descend order.
+  """
+  ASCEND = 1
+  DESCEND = 2
+def area(boxlist):
+  """Computes area of boxes.
+  Args:
+    boxlist: BoxList holding N boxes
+  Returns:
+    a numpy array with shape [N*1] representing box areas
+  """
+  y_min, x_min, y_max, x_max = boxlist.get_coordinates()
+  return (y_max - y_min) * (x_max - x_min)
+def intersection(boxlist1, boxlist2):
+  """Compute pairwise intersection areas between boxes.
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+  Returns:
+    a numpy array with shape [N*M] representing pairwise intersection area
+  """
+  return np_box_ops.intersection(boxlist1.get(), boxlist2.get())
+def iou(boxlist1, boxlist2):
+  """Computes pairwise intersection-over-union between box collections.
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+  Returns:
+    a numpy array with shape [N, M] representing pairwise iou scores.
+  """
+  return np_box_ops.iou(boxlist1.get(), boxlist2.get())
+def ioa(boxlist1, boxlist2):
+  """Computes pairwise intersection-over-area between box collections.
+  Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+  their intersection area over box2's area. Note that ioa is not symmetric,
+  that is, IOA(box1, box2) != IOA(box2, box1).
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+  Returns:
+    a numpy array with shape [N, M] representing pairwise ioa scores.
+  """
+  return np_box_ops.ioa(boxlist1.get(), boxlist2.get())
+def gather(boxlist, indices, fields=None):
+  """Gather boxes from BoxList according to indices and return new BoxList.
+  By default, Gather returns boxes corresponding to the input index list, as
+  well as all additional fields stored in the boxlist (indexing into the
+  first dimension).  However one can optionally only gather from a
+  subset of fields.
+  Args:
+    boxlist: BoxList holding N boxes
+    indices: a 1-d numpy array of type int_
+    fields: (optional) list of fields to also gather from.  If None (default),
+        all fields are gathered from.  Pass an empty fields list to only gather
+        the box coordinates.
+  Returns:
+    subboxlist: a BoxList corresponding to the subset of the input BoxList
+        specified by indices
+  Raises:
+    ValueError: if specified field is not contained in boxlist or if the
+        indices are not of type int_
+  """
+  if indices.size:
+    if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0:
+      raise ValueError('indices are out of valid range.')
+  subboxlist = np_box_list.BoxList(boxlist.get()[indices, :])
+  if fields is None:
+    fields = boxlist.get_extra_fields()
+  for field in fields:
+    extra_field_data = boxlist.get_field(field)
+    subboxlist.add_field(field, extra_field_data[indices, ...])
+  return subboxlist
+def sort_by_field(boxlist, field, order=SortOrder.DESCEND):
+  """Sort boxes and associated fields according to a scalar field.
+  A common use case is reordering the boxes according to descending scores.
+  Args:
+    boxlist: BoxList holding N boxes.
+    field: A BoxList field for sorting and reordering the BoxList.
+    order: (Optional) 'descend' or 'ascend'. Default is descend.
+  Returns:
+    sorted_boxlist: A sorted BoxList with the field in the specified order.
+  Raises:
+    ValueError: if specified field does not exist or is not of single dimension.
+    ValueError: if the order is not either descend or ascend.
+  """
+  if not boxlist.has_field(field):
+    raise ValueError('Field ' + field + ' does not exist')
+  if len(boxlist.get_field(field).shape) != 1:
+    raise ValueError('Field ' + field + 'should be single dimension.')
+  if order != SortOrder.DESCEND and order != SortOrder.ASCEND:
+    raise ValueError('Invalid sort order')
+  field_to_sort = boxlist.get_field(field)
+  sorted_indices = np.argsort(field_to_sort)
+  if order == SortOrder.DESCEND:
+    sorted_indices = sorted_indices[::-1]
+  return gather(boxlist, sorted_indices)
+def non_max_suppression(boxlist,
+                        max_output_size=10000,
+                        iou_threshold=1.0,
+                        score_threshold=-10.0):
+  """Non maximum suppression.
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes. In each iteration, the detected bounding box with
+  highest score in the available pool is selected.
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores. All scores belong to the same class.
+    max_output_size: maximum number of retained boxes
+    iou_threshold: intersection over union threshold.
+    score_threshold: minimum score threshold. Remove the boxes with scores
+                     less than this value. Default value is set to -10. A very
+                     low threshold to pass pretty much all the boxes, unless
+                     the user sets a different score threshold.
+  Returns:
+    a BoxList holding M boxes where M <= max_output_size
+  Raises:
+    ValueError: if 'scores' field does not exist
+    ValueError: if threshold is not in [0, 1]
+    ValueError: if max_output_size < 0
+  """
+  if not boxlist.has_field('scores'):
+    raise ValueError('Field scores does not exist')
+  if iou_threshold < 0. or iou_threshold > 1.0:
+    raise ValueError('IOU threshold must be in [0, 1]')
+  if max_output_size < 0:
+    raise ValueError('max_output_size must be bigger than 0.')
+  boxlist = filter_scores_greater_than(boxlist, score_threshold)
+  if boxlist.num_boxes() == 0:
+    return boxlist
+  boxlist = sort_by_field(boxlist, 'scores')
+  # Prevent further computation if NMS is disabled.
+  if iou_threshold == 1.0:
+    if boxlist.num_boxes() > max_output_size:
+      selected_indices = np.arange(max_output_size)
+      return gather(boxlist, selected_indices)
+    else:
+      return boxlist
+  boxes = boxlist.get()
+  num_boxes = boxlist.num_boxes()
+  # is_index_valid is True only for all remaining valid boxes,
+  is_index_valid = np.full(num_boxes, 1, dtype=bool)
+  selected_indices = []
+  num_output = 0
+  for i in xrange(num_boxes):
+    if num_output < max_output_size:
+      if is_index_valid[i]:
+        num_output += 1
+        selected_indices.append(i)
+        is_index_valid[i] = False
+        valid_indices = np.where(is_index_valid)[0]
+        if valid_indices.size == 0:
+          break
+        intersect_over_union = np_box_ops.iou(
+            np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :])
+        intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+        is_index_valid[valid_indices] = np.logical_and(
+            is_index_valid[valid_indices],
+            intersect_over_union <= iou_threshold)
+  return gather(boxlist, np.array(selected_indices))
+def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh,
+                                    max_output_size):
+  """Multi-class version of non maximum suppression.
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  It operates independently for each class for
+  which scores are provided (via the scores field of the input box_list),
+  pruning boxes with score less than a provided threshold prior to
+  applying NMS.
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.  This scores field is a tensor that can
+      be 1 dimensional (in the case of a single class) or 2-dimensional, which
+      which case we assume that it takes the shape [num_boxes, num_classes].
+      We further assume that this rank is known statically and that
+      scores.shape[1] is also known (i.e., the number of classes is fixed
+      and known at graph construction time).
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
+      with previously selected boxes are removed).
+    max_output_size: maximum number of retained boxes per class.
+  Returns:
+    a BoxList holding M boxes with a rank-1 scores field representing
+      corresponding scores for each box with scores sorted in decreasing order
+      and a rank-1 classes field representing a class label for each box.
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+      a valid scores field.
+  """
+  if not 0 <= iou_thresh <= 1.0:
+    raise ValueError('thresh must be between 0 and 1')
+  if not isinstance(boxlist, np_box_list.BoxList):
+    raise ValueError('boxlist must be a BoxList')
+  if not boxlist.has_field('scores'):
+    raise ValueError('input boxlist must have \'scores\' field')
+  scores = boxlist.get_field('scores')
+  if len(scores.shape) == 1:
+    scores = np.reshape(scores, [-1, 1])
+  elif len(scores.shape) == 2:
+    if scores.shape[1] is None:
+      raise ValueError('scores field must have statically defined second '
+                       'dimension')
+  else:
+    raise ValueError('scores field must be of rank 1 or 2')
+  num_boxes = boxlist.num_boxes()
+  num_scores = scores.shape[0]
+  num_classes = scores.shape[1]
+  if num_boxes != num_scores:
+    raise ValueError('Incorrect scores field length: actual vs expected.')
+  selected_boxes_list = []
+  for class_idx in range(num_classes):
+    boxlist_and_class_scores = np_box_list.BoxList(boxlist.get())
+    class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+    boxlist_and_class_scores.add_field('scores', class_scores)
+    boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores,
+                                              score_thresh)
+    nms_result = non_max_suppression(boxlist_filt,
+                                     max_output_size=max_output_size,
+                                     iou_threshold=iou_thresh,
+                                     score_threshold=score_thresh)
+    nms_result.add_field(
+        'classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
+    selected_boxes_list.append(nms_result)
+  selected_boxes = concatenate(selected_boxes_list)
+  sorted_boxes = sort_by_field(selected_boxes, 'scores')
+  return sorted_boxes
+def scale(boxlist, y_scale, x_scale):
+  """Scale box coordinates in x and y dimensions.
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: float
+    x_scale: float
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+  y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+  y_min = y_scale * y_min
+  y_max = y_scale * y_max
+  x_min = x_scale * x_min
+  x_max = x_scale * x_max
+  scaled_boxlist = np_box_list.BoxList(np.hstack([y_min, x_min, y_max, x_max]))
+  fields = boxlist.get_extra_fields()
+  for field in fields:
+    extra_field_data = boxlist.get_field(field)
+    scaled_boxlist.add_field(field, extra_field_data)
+  return scaled_boxlist
+def clip_to_window(boxlist, window):
+  """Clip bounding boxes to a window.
+  This op clips input bounding boxes (represented by bounding box
+  corners) to a window, optionally filtering out boxes that do not
+  overlap at all with the window.
+  Args:
+    boxlist: BoxList holding M_in boxes
+    window: a numpy array of shape [4] representing the
+            [y_min, x_min, y_max, x_max] window to which the op
+            should clip boxes.
+  Returns:
+    a BoxList holding M_out boxes where M_out <= M_in
+  """
+  y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+  win_y_min = window[0]
+  win_x_min = window[1]
+  win_y_max = window[2]
+  win_x_max = window[3]
+  y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min)
+  y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min)
+  x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min)
+  x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min)
+  clipped = np_box_list.BoxList(
+      np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped]))
+  clipped = _copy_extra_fields(clipped, boxlist)
+  areas = area(clipped)
+  nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)),
+                                    [-1]).astype(np.int32)
+  return gather(clipped, nonzero_area_indices)
+def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
+  """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+  For each box in boxlist1, we want its IOA to be more than minoverlap with
+  at least one of the boxes in boxlist2. If it does not, we remove it.
+  Args:
+    boxlist1: BoxList holding N boxes.
+    boxlist2: BoxList holding M boxes.
+    minoverlap: Minimum required overlap between boxes, to count them as
+                overlapping.
+  Returns:
+    A pruned boxlist with size [N', 4].
+  """
+  intersection_over_area = ioa(boxlist2, boxlist1)  # [M, N] tensor
+  intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+  keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+  keep_inds = np.nonzero(keep_bool)[0]
+  new_boxlist1 = gather(boxlist1, keep_inds)
+  return new_boxlist1
+def prune_outside_window(boxlist, window):
+  """Prunes bounding boxes that fall outside a given window.
+  This function prunes bounding boxes that even partially fall outside the given
+  window. See also ClipToWindow which only prunes bounding boxes that fall
+  completely outside the window, and clips any bounding boxes that partially
+  overflow.
+  Args:
+    boxlist: a BoxList holding M_in boxes.
+    window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax]
+            of the window.
+  Returns:
+    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
+    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+     in the input tensor.
+  """
+  y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+  win_y_min = window[0]
+  win_x_min = window[1]
+  win_y_max = window[2]
+  win_x_max = window[3]
+  coordinate_violations = np.hstack([np.less(y_min, win_y_min),
+                                     np.less(x_min, win_x_min),
+                                     np.greater(y_max, win_y_max),
+                                     np.greater(x_max, win_x_max)])
+  valid_indices = np.reshape(
+      np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1])
+  return gather(boxlist, valid_indices), valid_indices
+def concatenate(boxlists, fields=None):
+  """Concatenate list of BoxLists.
+  This op concatenates a list of input BoxLists into a larger BoxList.  It also
+  handles concatenation of BoxList fields as long as the field tensor shapes
+  are equal except for the first dimension.
+  Args:
+    boxlists: list of BoxList objects
+    fields: optional list of fields to also concatenate.  By default, all
+      fields from the first BoxList in the list are included in the
+      concatenation.
+  Returns:
+    a BoxList with number of boxes equal to
+      sum([boxlist.num_boxes() for boxlist in BoxList])
+  Raises:
+    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+      contains non BoxList objects), or if requested fields are not contained in
+      all boxlists
+  """
+  if not isinstance(boxlists, list):
+    raise ValueError('boxlists should be a list')
+  if not boxlists:
+    raise ValueError('boxlists should have nonzero length')
+  for boxlist in boxlists:
+    if not isinstance(boxlist, np_box_list.BoxList):
+      raise ValueError('all elements of boxlists should be BoxList objects')
+  concatenated = np_box_list.BoxList(
+      np.vstack([boxlist.get() for boxlist in boxlists]))
+  if fields is None:
+    fields = boxlists[0].get_extra_fields()
+  for field in fields:
+    first_field_shape = boxlists[0].get_field(field).shape
+    first_field_shape = first_field_shape[1:]
+    for boxlist in boxlists:
+      if not boxlist.has_field(field):
+        raise ValueError('boxlist must contain all requested fields')
+      field_shape = boxlist.get_field(field).shape
+      field_shape = field_shape[1:]
+      if field_shape != first_field_shape:
+        raise ValueError('field %s must have same shape for all boxlists '
+                         'except for the 0th dimension.' % field)
+    concatenated_field = np.concatenate(
+        [boxlist.get_field(field) for boxlist in boxlists], axis=0)
+    concatenated.add_field(field, concatenated_field)
+  return concatenated
+def filter_scores_greater_than(boxlist, thresh):
+  """Filter to keep only boxes with score exceeding a given threshold.
+  This op keeps the collection of boxes whose corresponding scores are
+  greater than the input threshold.
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.
+    thresh: scalar threshold
+  Returns:
+    a BoxList holding M boxes where M <= N
+  Raises:
+    ValueError: if boxlist not a BoxList object or if it does not
+      have a scores field
+  """
+  if not isinstance(boxlist, np_box_list.BoxList):
+    raise ValueError('boxlist must be a BoxList')
+  if not boxlist.has_field('scores'):
+    raise ValueError('input boxlist must have \'scores\' field')
+  scores = boxlist.get_field('scores')
+  if len(scores.shape) > 2:
+    raise ValueError('Scores should have rank 1 or 2')
+  if len(scores.shape) == 2 and scores.shape[1] != 1:
+    raise ValueError('Scores should have rank 1 or have shape '
+                     'consistent with [None, 1]')
+  high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
+                                  [-1]).astype(np.int32)
+  return gather(boxlist, high_score_indices)
+def change_coordinate_frame(boxlist, window):
+  """Change coordinate frame of the boxlist to be relative to window's frame.
+  Given a window of the form [ymin, xmin, ymax, xmax],
+  changes bounding box coordinates from boxlist to be relative to this window
+  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+  An example use case is data augmentation: where we are given groundtruth
+  boxes (boxlist) and would like to randomly crop the image to some
+  window (window). In this case we need to change the coordinate frame of
+  each groundtruth box to be relative to this new window.
+  Args:
+    boxlist: A BoxList object holding N boxes.
+    window: a size 4 1-D numpy array.
+  Returns:
+    Returns a BoxList object with N boxes.
+  """
+  win_height = window[2] - window[0]
+  win_width = window[3] - window[1]
+  boxlist_new = scale(
+      np_box_list.BoxList(boxlist.get() -
+                          [window[0], window[1], window[0], window[1]]),
+      1.0 / win_height, 1.0 / win_width)
+  _copy_extra_fields(boxlist_new, boxlist)
+  return boxlist_new
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+def _update_valid_indices_by_removing_high_iou_boxes(
+    selected_indices, is_index_valid, intersect_over_union, threshold):
+  max_iou = np.max(intersect_over_union[:, selected_indices], axis=1)
+  return np.logical_and(is_index_valid, max_iou <= threshold)
--- a/object_detection/utils/np_box_list_ops_test.py
+++ b/object_detection/utils/np_box_list_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.np_box_list_ops."""
+import numpy as np
+import tensorflow as tf
+from object_detection.utils import np_box_list
+from object_detection.utils import np_box_list_ops
+class AreaRelatedTest(tf.test.TestCase):
+  def setUp(self):
+    boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
+                      dtype=float)
+    boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                       [0.0, 0.0, 20.0, 20.0]],
+                      dtype=float)
+    self.boxlist1 = np_box_list.BoxList(boxes1)
+    self.boxlist2 = np_box_list.BoxList(boxes2)
+  def test_area(self):
+    areas = np_box_list_ops.area(self.boxlist1)
+    expected_areas = np.array([6.0, 5.0], dtype=float)
+    self.assertAllClose(expected_areas, areas)
+  def test_intersection(self):
+    intersection = np_box_list_ops.intersection(self.boxlist1, self.boxlist2)
+    expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
+                                     dtype=float)
+    self.assertAllClose(intersection, expected_intersection)
+  def test_iou(self):
+    iou = np_box_list_ops.iou(self.boxlist1, self.boxlist2)
+    expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
+                             [1.0 / 16.0, 0.0, 5.0 / 400.0]],
+                            dtype=float)
+    self.assertAllClose(iou, expected_iou)
+  def test_ioa(self):
+    boxlist1 = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist2 = np_box_list.BoxList(
+        np.array(
+            [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
+    ioa21 = np_box_list_ops.ioa(boxlist2, boxlist1)
+    expected_ioa21 = np.array([[0.5, 0.0],
+                               [1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(ioa21, expected_ioa21)
+  def test_scale(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist_scaled = np_box_list_ops.scale(boxlist, 2.0, 3.0)
+    expected_boxlist_scaled = np_box_list.BoxList(
+        np.array(
+            [[0.5, 0.75, 1.5, 2.25], [0.0, 0.0, 1.0, 2.25]], dtype=np.float32))
+    self.assertAllClose(expected_boxlist_scaled.get(), boxlist_scaled.get())
+  def test_clip_to_window(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [-0.2, -0.3, 0.7, 1.5]],
+            dtype=np.float32))
+    boxlist_clipped = np_box_list_ops.clip_to_window(boxlist,
+                                                     [0.0, 0.0, 1.0, 1.0])
+    expected_boxlist_clipped = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [0.0, 0.0, 0.7, 1.0]],
+            dtype=np.float32))
+    self.assertAllClose(expected_boxlist_clipped.get(), boxlist_clipped.get())
+  def test_prune_outside_window(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [-0.2, -0.3, 0.7, 1.5]],
+            dtype=np.float32))
+    boxlist_pruned, _ = np_box_list_ops.prune_outside_window(
+        boxlist, [0.0, 0.0, 1.0, 1.0])
+    expected_boxlist_pruned = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    self.assertAllClose(expected_boxlist_pruned.get(), boxlist_pruned.get())
+  def test_concatenate(self):
+    boxlist1 = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist2 = np_box_list.BoxList(
+        np.array(
+            [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
+    boxlists = [boxlist1, boxlist2]
+    boxlist_concatenated = np_box_list_ops.concatenate(boxlists)
+    boxlist_concatenated_expected = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]],
+            dtype=np.float32))
+    self.assertAllClose(boxlist_concatenated_expected.get(),
+                        boxlist_concatenated.get())
+  def test_change_coordinate_frame(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist_coord = np_box_list_ops.change_coordinate_frame(
+        boxlist, np.array([0, 0, 0.5, 0.5], dtype=np.float32))
+    expected_boxlist_coord = np_box_list.BoxList(
+        np.array([[0.5, 0.5, 1.5, 1.5], [0, 0, 1.0, 1.5]], dtype=np.float32))
+    self.assertAllClose(boxlist_coord.get(), expected_boxlist_coord.get())
+  def test_filter_scores_greater_than(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist.add_field('scores', np.array([0.8, 0.2], np.float32))
+    boxlist_greater = np_box_list_ops.filter_scores_greater_than(boxlist, 0.5)
+    expected_boxlist_greater = np_box_list.BoxList(
+        np.array([[0.25, 0.25, 0.75, 0.75]], dtype=np.float32))
+    self.assertAllClose(boxlist_greater.get(), expected_boxlist_greater.get())
+class GatherOpsTest(tf.test.TestCase):
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    self.boxlist = np_box_list.BoxList(boxes)
+    self.boxlist.add_field('scores', np.array([0.5, 0.7, 0.9], dtype=float))
+    self.boxlist.add_field('labels',
+                           np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
+                                     [0, 0, 0, 0, 1]],
+                                    dtype=int))
+  def test_gather_with_out_of_range_indices(self):
+    indices = np.array([3, 1], dtype=int)
+    boxlist = self.boxlist
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices)
+  def test_gather_with_invalid_multidimensional_indices(self):
+    indices = np.array([[0, 1], [1, 2]], dtype=int)
+    boxlist = self.boxlist
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices)
+  def test_gather_without_fields_specified(self):
+    indices = np.array([2, 0, 1], dtype=int)
+    boxlist = self.boxlist
+    subboxlist = np_box_list_ops.gather(boxlist, indices)
+    expected_scores = np.array([0.9, 0.5, 0.7], dtype=float)
+    self.assertAllClose(expected_scores, subboxlist.get_field('scores'))
+    expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
+                               [14.0, 14.0, 15.0, 15.0]],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, subboxlist.get())
+    expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
+                                [0, 1, 0, 0, 0]],
+                               dtype=int)
+    self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
+  def test_gather_with_invalid_field_specified(self):
+    indices = np.array([2, 0, 1], dtype=int)
+    boxlist = self.boxlist
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices, 'labels')
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices, ['objectness'])
+  def test_gather_with_fields_specified(self):
+    indices = np.array([2, 0, 1], dtype=int)
+    boxlist = self.boxlist
+    subboxlist = np_box_list_ops.gather(boxlist, indices, ['labels'])
+    self.assertFalse(subboxlist.has_field('scores'))
+    expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
+                               [14.0, 14.0, 15.0, 15.0]],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, subboxlist.get())
+    expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
+                                [0, 1, 0, 0, 0]],
+                               dtype=int)
+    self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
+class SortByFieldTest(tf.test.TestCase):
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    self.boxlist = np_box_list.BoxList(boxes)
+    self.boxlist.add_field('scores', np.array([0.5, 0.9, 0.4], dtype=float))
+    self.boxlist.add_field('labels',
+                           np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
+                                     [0, 0, 0, 0, 1]],
+                                    dtype=int))
+  def test_with_invalid_field(self):
+    with self.assertRaises(ValueError):
+      np_box_list_ops.sort_by_field(self.boxlist, 'objectness')
+    with self.assertRaises(ValueError):
+      np_box_list_ops.sort_by_field(self.boxlist, 'labels')
+  def test_with_invalid_sorting_order(self):
+    with self.assertRaises(ValueError):
+      np_box_list_ops.sort_by_field(self.boxlist, 'scores', 'Descending')
+  def test_with_descending_sorting(self):
+    sorted_boxlist = np_box_list_ops.sort_by_field(self.boxlist, 'scores')
+    expected_boxes = np.array([[14.0, 14.0, 15.0, 15.0], [3.0, 4.0, 6.0, 8.0],
+                               [0.0, 0.0, 20.0, 20.0]],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, sorted_boxlist.get())
+    expected_scores = np.array([0.9, 0.5, 0.4], dtype=float)
+    self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
+  def test_with_ascending_sorting(self):
+    sorted_boxlist = np_box_list_ops.sort_by_field(
+        self.boxlist, 'scores', np_box_list_ops.SortOrder.ASCEND)
+    expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0],
+                               [3.0, 4.0, 6.0, 8.0],
+                               [14.0, 14.0, 15.0, 15.0],],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, sorted_boxlist.get())
+    expected_scores = np.array([0.4, 0.5, 0.9], dtype=float)
+    self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
+class NonMaximumSuppressionTest(tf.test.TestCase):
+  def setUp(self):
+    self._boxes = np.array([[0, 0, 1, 1],
+                            [0, 0.1, 1, 1.1],
+                            [0, -0.1, 1, 0.9],
+                            [0, 10, 1, 11],
+                            [0, 10.1, 1, 11.1],
+                            [0, 100, 1, 101]],
+                           dtype=float)
+    self._boxlist = np_box_list.BoxList(self._boxes)
+  def test_with_no_scores_field(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    max_output_size = 3
+    iou_threshold = 0.5
+    with self.assertRaises(ValueError):
+      np_box_list_ops.non_max_suppression(
+          boxlist, max_output_size, iou_threshold)
+  def test_nms_disabled_max_output_size_equals_three(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .2, .3], dtype=float))
+    max_output_size = 3
+    iou_threshold = 1.  # No NMS
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 0.1, 1, 1.1]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+  def test_select_from_three_clusters(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .2, .3], dtype=float))
+    max_output_size = 3
+    iou_threshold = 0.5
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+  def test_select_at_most_two_from_three_clusters(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .5, .3], dtype=float))
+    max_output_size = 2
+    iou_threshold = 0.5
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1]], dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+  def test_select_at_most_thirty_from_three_clusters(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .5, .3], dtype=float))
+    max_output_size = 30
+    iou_threshold = 0.5
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+  def test_select_from_ten_indentical_boxes(self):
+    boxes = np.array(10 * [[0, 0, 1, 1]], dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    boxlist.add_field('scores', np.array(10 * [0.8]))
+    iou_threshold = .5
+    max_output_size = 3
+    expected_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+  def test_different_iou_threshold(self):
+    boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80], [200, 200, 210, 300],
+                      [200, 200, 210, 250]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    boxlist.add_field('scores', np.array([0.9, 0.8, 0.7, 0.6]))
+    max_output_size = 4
+    iou_threshold = .4
+    expected_boxes = np.array([[0, 0, 20, 100],
+                               [200, 200, 210, 300],],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+    iou_threshold = .5
+    expected_boxes = np.array([[0, 0, 20, 100], [200, 200, 210, 300],
+                               [200, 200, 210, 250]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+    iou_threshold = .8
+    expected_boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80],
+                               [200, 200, 210, 300], [200, 200, 210, 250]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+  def test_multiclass_nms(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
+            dtype=np.float32))
+    scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
+                       [0.7, -0.7, 0.6, 0.2, -0.9],
+                       [0.4, 0.34, -0.9, 0.2, 0.31]],
+                      dtype=np.float32)
+    boxlist.add_field('scores', scores)
+    boxlist_clean = np_box_list_ops.multi_class_non_max_suppression(
+        boxlist, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
+    scores_clean = boxlist_clean.get_field('scores')
+    classes_clean = boxlist_clean.get_field('classes')
+    boxes = boxlist_clean.get()
+    expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
+    expected_classes = np.array([0, 2, 1, 4])
+    expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
+                               [0.4, 0.2, 0.8, 0.8],
+                               [0.6, 0.0, 1.0, 1.0],
+                               [0.6, 0.0, 1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(scores_clean, expected_scores)
+    self.assertAllClose(classes_clean, expected_classes)
+    self.assertAllClose(boxes, expected_boxes)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/np_box_list_test.py
+++ b/object_detection/utils/np_box_list_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.np_box_list_test."""
+import numpy as np
+import tensorflow as tf
+from object_detection.utils import np_box_list
+class BoxListTest(tf.test.TestCase):
+  def test_invalid_box_data(self):
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList([0, 0, 1, 1])
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int))
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float))
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float))
+  def test_has_field_with_existed_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    self.assertTrue(boxlist.has_field('boxes'))
+  def test_has_field_with_nonexisted_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    self.assertFalse(boxlist.has_field('scores'))
+  def test_get_field_with_existed_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes))
+  def test_get_field_with_nonexited_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    with self.assertRaises(ValueError):
+      boxlist.get_field('scores')
+class AddExtraFieldTest(tf.test.TestCase):
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    self.boxlist = np_box_list.BoxList(boxes)
+  def test_add_already_existed_field(self):
+    with self.assertRaises(ValueError):
+      self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float))
+  def test_add_invalid_field_data(self):
+    with self.assertRaises(ValueError):
+      self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float))
+    with self.assertRaises(ValueError):
+      self.boxlist.add_field('scores',
+                             np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
+  def test_add_single_dimensional_field_data(self):
+    boxlist = self.boxlist
+    scores = np.array([0.5, 0.7, 0.9], dtype=float)
+    boxlist.add_field('scores', scores)
+    self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores')))
+  def test_add_multi_dimensional_field_data(self):
+    boxlist = self.boxlist
+    labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
+                      dtype=int)
+    boxlist.add_field('labels', labels)
+    self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels')))
+  def test_get_extra_fields(self):
+    boxlist = self.boxlist
+    self.assertSameElements(boxlist.get_extra_fields(), [])
+    scores = np.array([0.5, 0.7, 0.9], dtype=float)
+    boxlist.add_field('scores', scores)
+    self.assertSameElements(boxlist.get_extra_fields(), ['scores'])
+    labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
+                      dtype=int)
+    boxlist.add_field('labels', labels)
+    self.assertSameElements(boxlist.get_extra_fields(), ['scores', 'labels'])
+  def test_get_coordinates(self):
+    y_min, x_min, y_max, x_max = self.boxlist.get_coordinates()
+    expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
+    expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
+    expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
+    expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
+    self.assertTrue(np.allclose(y_min, expected_y_min))
+    self.assertTrue(np.allclose(x_min, expected_x_min))
+    self.assertTrue(np.allclose(y_max, expected_y_max))
+    self.assertTrue(np.allclose(x_max, expected_x_max))
+  def test_num_boxes(self):
+    boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    expected_num_boxes = 2
+    self.assertEquals(boxlist.num_boxes(), expected_num_boxes)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/np_box_ops.py
+++ b/object_detection/utils/np_box_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Operations for [N, 4] numpy arrays representing bounding boxes.
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+import numpy as np
+def area(boxes):
+  """Computes area of boxes.
+  Args:
+    boxes: Numpy array with shape [N, 4] holding N boxes
+  Returns:
+    a numpy array with shape [N*1] representing box areas
+  """
+  return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+def intersection(boxes1, boxes2):
+  """Compute pairwise intersection areas between boxes.
+  Args:
+    boxes1: a numpy array with shape [N, 4] holding N boxes
+    boxes2: a numpy array with shape [M, 4] holding M boxes
+  Returns:
+    a numpy array with shape [N*M] representing pairwise intersection area
+  """
+  [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
+  [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
+  all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
+  all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
+  intersect_heights = np.maximum(
+      np.zeros(all_pairs_max_ymin.shape),
+      all_pairs_min_ymax - all_pairs_max_ymin)
+  all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
+  all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
+  intersect_widths = np.maximum(
+      np.zeros(all_pairs_max_xmin.shape),
+      all_pairs_min_xmax - all_pairs_max_xmin)
+  return intersect_heights * intersect_widths
+def iou(boxes1, boxes2):
+  """Computes pairwise intersection-over-union between box collections.
+  Args:
+    boxes1: a numpy array with shape [N, 4] holding N boxes.
+    boxes2: a numpy array with shape [M, 4] holding N boxes.
+  Returns:
+    a numpy array with shape [N, M] representing pairwise iou scores.
+  """
+  intersect = intersection(boxes1, boxes2)
+  area1 = area(boxes1)
+  area2 = area(boxes2)
+  union = np.expand_dims(area1, axis=1) + np.expand_dims(
+      area2, axis=0) - intersect
+  return intersect / union
+def ioa(boxes1, boxes2):
+  """Computes pairwise intersection-over-area between box collections.
+  Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+  their intersection area over box2's area. Note that ioa is not symmetric,
+  that is, IOA(box1, box2) != IOA(box2, box1).
+  Args:
+    boxes1: a numpy array with shape [N, 4] holding N boxes.
+    boxes2: a numpy array with shape [M, 4] holding N boxes.
+  Returns:
+    a numpy array with shape [N, M] representing pairwise ioa scores.
+  """
+  intersect = intersection(boxes1, boxes2)
+  areas = np.expand_dims(area(boxes2), axis=0)
+  return intersect / areas
--- a/object_detection/utils/np_box_ops_test.py
+++ b/object_detection/utils/np_box_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.np_box_ops."""
+import numpy as np
+import tensorflow as tf
+from object_detection.utils import np_box_ops
+class BoxOpsTests(tf.test.TestCase):
+  def setUp(self):
+    boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
+                      dtype=float)
+    boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                       [0.0, 0.0, 20.0, 20.0]],
+                      dtype=float)
+    self.boxes1 = boxes1
+    self.boxes2 = boxes2
+  def testArea(self):
+    areas = np_box_ops.area(self.boxes1)
+    expected_areas = np.array([6.0, 5.0], dtype=float)
+    self.assertAllClose(expected_areas, areas)
+  def testIntersection(self):
+    intersection = np_box_ops.intersection(self.boxes1, self.boxes2)
+    expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
+                                     dtype=float)
+    self.assertAllClose(intersection, expected_intersection)
+  def testIOU(self):
+    iou = np_box_ops.iou(self.boxes1, self.boxes2)
+    expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
+                             [1.0 / 16.0, 0.0, 5.0 / 400.0]],
+                            dtype=float)
+    self.assertAllClose(iou, expected_iou)
+  def testIOA(self):
+    boxes1 = np.array([[0.25, 0.25, 0.75, 0.75],
+                       [0.0, 0.0, 0.5, 0.75]],
+                      dtype=np.float32)
+    boxes2 = np.array([[0.5, 0.25, 1.0, 1.0],
+                       [0.0, 0.0, 1.0, 1.0]],
+                      dtype=np.float32)
+    ioa21 = np_box_ops.ioa(boxes2, boxes1)
+    expected_ioa21 = np.array([[0.5, 0.0],
+                               [1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(ioa21, expected_ioa21)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/object_detection_evaluation.py
+++ b/object_detection/utils/object_detection_evaluation.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""object_detection_evaluation module.
+ObjectDetectionEvaluation is a class which manages ground truth information of a
+object detection dataset, and computes frequently used detection metrics such as
+Precision, Recall, CorLoc of the provided detection results.
+It supports the following operations:
+1) Add ground truth information of images sequentially.
+2) Add detection result of images sequentially.
+3) Evaluate detection metrics on already inserted detection results.
+4) Write evaluation result into a pickle file for future processing or
+   visualization.
+Note: This module operates on numpy boxes and box lists.
+"""
+import logging
+import numpy as np
+from object_detection.utils import metrics
+from object_detection.utils import per_image_evaluation
+class ObjectDetectionEvaluation(object):
+  """Evaluate Object Detection Result."""
+  def __init__(self,
+               num_groundtruth_classes,
+               matching_iou_threshold=0.5,
+               nms_iou_threshold=1.0,
+               nms_max_output_boxes=10000):
+    self.per_image_eval = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
+        nms_max_output_boxes)
+    self.num_class = num_groundtruth_classes
+    self.groundtruth_boxes = {}
+    self.groundtruth_class_labels = {}
+    self.groundtruth_is_difficult_list = {}
+    self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
+    self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
+    self.detection_keys = set()
+    self.scores_per_class = [[] for _ in range(self.num_class)]
+    self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
+    self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
+    self.average_precision_per_class = np.empty(self.num_class, dtype=float)
+    self.average_precision_per_class.fill(np.nan)
+    self.precisions_per_class = []
+    self.recalls_per_class = []
+    self.corloc_per_class = np.ones(self.num_class, dtype=float)
+  def clear_detections(self):
+    self.detection_keys = {}
+    self.scores_per_class = [[] for _ in range(self.num_class)]
+    self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
+    self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
+    self.average_precision_per_class = np.zeros(self.num_class, dtype=float)
+    self.precisions_per_class = []
+    self.recalls_per_class = []
+    self.corloc_per_class = np.ones(self.num_class, dtype=float)
+  def add_single_ground_truth_image_info(self,
+                                         image_key,
+                                         groundtruth_boxes,
+                                         groundtruth_class_labels,
+                                         groundtruth_is_difficult_list=None):
+    """Add ground truth info of a single image into the evaluation database.
+    Args:
+      image_key: sha256 key of image content
+      groundtruth_boxes: A numpy array of shape [M, 4] representing object box
+          coordinates[y_min, x_min, y_max, x_max]
+      groundtruth_class_labels: A 1-d numpy array of length M representing class
+          labels
+      groundtruth_is_difficult_list: A length M numpy boolean array denoting
+          whether a ground truth box is a difficult instance or not. To support
+          the case that no boxes are difficult, it is by default set as None.
+    """
+    if image_key in self.groundtruth_boxes:
+      logging.warn(
+          'image %s has already been added to the ground truth database.',
+          image_key)
+      return
+    self.groundtruth_boxes[image_key] = groundtruth_boxes
+    self.groundtruth_class_labels[image_key] = groundtruth_class_labels
+    if groundtruth_is_difficult_list is None:
+      num_boxes = groundtruth_boxes.shape[0]
+      groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool)
+    self.groundtruth_is_difficult_list[
+        image_key] = groundtruth_is_difficult_list.astype(dtype=bool)
+    self._update_ground_truth_statistics(groundtruth_class_labels,
+                                         groundtruth_is_difficult_list)
+  def add_single_detected_image_info(self, image_key, detected_boxes,
+                                     detected_scores, detected_class_labels):
+    """Add detected result of a single image into the evaluation database.
+    Args:
+      image_key: sha256 key of image content
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates[y_min, x_min, y_max, x_max]
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      detected_class_labels: A 1-d numpy array of length N representing class
+          labels
+    Raises:
+      ValueError: if detected_boxes, detected_scores and detected_class_labels
+                  do not have the same length.
+    """
+    if (len(detected_boxes) != len(detected_scores) or
+        len(detected_boxes) != len(detected_class_labels)):
+      raise ValueError('detected_boxes, detected_scores and '
+                       'detected_class_labels should all have same lengths. Got'
+                       '[%d, %d, %d]' % len(detected_boxes),
+                       len(detected_scores), len(detected_class_labels))
+    if image_key in self.detection_keys:
+      logging.warn(
+          'image %s has already been added to the detection result database',
+          image_key)
+      return
+    self.detection_keys.add(image_key)
+    if image_key in self.groundtruth_boxes:
+      groundtruth_boxes = self.groundtruth_boxes[image_key]
+      groundtruth_class_labels = self.groundtruth_class_labels[image_key]
+      groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[
+          image_key]
+    else:
+      groundtruth_boxes = np.empty(shape=[0, 4], dtype=float)
+      groundtruth_class_labels = np.array([], dtype=int)
+      groundtruth_is_difficult_list = np.array([], dtype=bool)
+    scores, tp_fp_labels, is_class_correctly_detected_in_image = (
+        self.per_image_eval.compute_object_detection_metrics(
+            detected_boxes, detected_scores, detected_class_labels,
+            groundtruth_boxes, groundtruth_class_labels,
+            groundtruth_is_difficult_list))
+    for i in range(self.num_class):
+      self.scores_per_class[i].append(scores[i])
+      self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
+    (self.num_images_correctly_detected_per_class
+    ) += is_class_correctly_detected_in_image
+  def _update_ground_truth_statistics(self, groundtruth_class_labels,
+                                      groundtruth_is_difficult_list):
+    """Update grouth truth statitistics.
+    1. Difficult boxes are ignored when counting the number of ground truth
+    instances as done in Pascal VOC devkit.
+    2. Difficult boxes are treated as normal boxes when computing CorLoc related
+    statitistics.
+    Args:
+      groundtruth_class_labels: An integer numpy array of length M,
+          representing M class labels of object instances in ground truth
+      groundtruth_is_difficult_list: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+    """
+    for class_index in range(self.num_class):
+      num_gt_instances = np.sum(groundtruth_class_labels[
+          ~groundtruth_is_difficult_list] == class_index)
+      self.num_gt_instances_per_class[class_index] += num_gt_instances
+      if np.any(groundtruth_class_labels == class_index):
+        self.num_gt_imgs_per_class[class_index] += 1
+  def evaluate(self):
+    """Compute evaluation result.
+    Returns:
+      average_precision_per_class: float numpy array of average precision for
+          each class.
+      mean_ap: mean average precision of all classes, float scalar
+      precisions_per_class: List of precisions, each precision is a float numpy
+          array
+      recalls_per_class: List of recalls, each recall is a float numpy array
+      corloc_per_class: numpy float array
+      mean_corloc: Mean CorLoc score for each class, float scalar
+    """
+    if (self.num_gt_instances_per_class == 0).any():
+      logging.warn(
+          'The following classes have no ground truth examples: %s',
+          np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)))
+    for class_index in range(self.num_class):
+      if self.num_gt_instances_per_class[class_index] == 0:
+        continue
+      scores = np.concatenate(self.scores_per_class[class_index])
+      tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
+      precision, recall = metrics.compute_precision_recall(
+          scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
+      self.precisions_per_class.append(precision)
+      self.recalls_per_class.append(recall)
+      average_precision = metrics.compute_average_precision(precision, recall)
+      self.average_precision_per_class[class_index] = average_precision
+    self.corloc_per_class = metrics.compute_cor_loc(
+        self.num_gt_imgs_per_class,
+        self.num_images_correctly_detected_per_class)
+    mean_ap = np.nanmean(self.average_precision_per_class)
+    mean_corloc = np.nanmean(self.corloc_per_class)
+    return (self.average_precision_per_class, mean_ap,
+            self.precisions_per_class, self.recalls_per_class,
+            self.corloc_per_class, mean_corloc)
+  def get_eval_result(self):
+    return EvalResult(self.average_precision_per_class,
+                      self.precisions_per_class, self.recalls_per_class,
+                      self.corloc_per_class)
+class EvalResult(object):
+  def __init__(self, average_precisions, precisions, recalls, all_corloc):
+    self.precisions = precisions
+    self.recalls = recalls
+    self.all_corloc = all_corloc
+    self.average_precisions = average_precisions
--- a/object_detection/utils/object_detection_evaluation_test.py
+++ b/object_detection/utils/object_detection_evaluation_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.object_detection_evaluation."""
+import numpy as np
+import tensorflow as tf
+from object_detection.utils import object_detection_evaluation
+class ObjectDetectionEvaluationTest(tf.test.TestCase):
+  def setUp(self):
+    num_groundtruth_classes = 3
+    self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
+        num_groundtruth_classes)
+    image_key1 = "img1"
+    groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                  dtype=float)
+    groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
+    self.od_eval.add_single_ground_truth_image_info(
+        image_key1, groundtruth_boxes1, groundtruth_class_labels1)
+    image_key2 = "img2"
+    groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
+                                   [10, 10, 12, 12]], dtype=float)
+    groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int)
+    groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
+    self.od_eval.add_single_ground_truth_image_info(
+        image_key2, groundtruth_boxes2, groundtruth_class_labels2,
+        groundtruth_is_difficult_list2)
+    image_key3 = "img3"
+    groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_class_labels3 = np.array([1], dtype=int)
+    self.od_eval.add_single_ground_truth_image_info(
+        image_key3, groundtruth_boxes3, groundtruth_class_labels3)
+    image_key = "img2"
+    detected_boxes = np.array(
+        [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
+        dtype=float)
+    detected_class_labels = np.array([0, 0, 2], dtype=int)
+    detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
+    self.od_eval.add_single_detected_image_info(
+        image_key, detected_boxes, detected_scores, detected_class_labels)
+  def test_add_single_ground_truth_image_info(self):
+    expected_num_gt_instances_per_class = np.array([3, 1, 2], dtype=int)
+    expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)
+    self.assertTrue(np.array_equal(expected_num_gt_instances_per_class,
+                                   self.od_eval.num_gt_instances_per_class))
+    self.assertTrue(np.array_equal(expected_num_gt_imgs_per_class,
+                                   self.od_eval.num_gt_imgs_per_class))
+    groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
+                                   [10, 10, 12, 12]], dtype=float)
+    self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes["img2"],
+                                groundtruth_boxes2))
+    groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
+    self.assertTrue(np.allclose(
+        self.od_eval.groundtruth_is_difficult_list["img2"],
+        groundtruth_is_difficult_list2))
+    groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
+    self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[
+        "img1"], groundtruth_class_labels1))
+  def test_add_single_detected_image_info(self):
+    expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [],
+                                 [np.array([0.9], dtype=float)]]
+    expected_tp_fp_labels_per_class = [[np.array([0, 1], dtype=bool)], [],
+                                       [np.array([0], dtype=bool)]]
+    expected_num_images_correctly_detected_per_class = np.array([0, 0, 0],
+                                                                dtype=int)
+    for i in range(self.od_eval.num_class):
+      for j in range(len(expected_scores_per_class[i])):
+        self.assertTrue(np.allclose(expected_scores_per_class[i][j],
+                                    self.od_eval.scores_per_class[i][j]))
+        self.assertTrue(np.array_equal(expected_tp_fp_labels_per_class[i][
+            j], self.od_eval.tp_fp_labels_per_class[i][j]))
+    self.assertTrue(np.array_equal(
+        expected_num_images_correctly_detected_per_class,
+        self.od_eval.num_images_correctly_detected_per_class))
+  def test_evaluate(self):
+    (average_precision_per_class, mean_ap, precisions_per_class,
+     recalls_per_class, corloc_per_class,
+     mean_corloc) = self.od_eval.evaluate()
+    expected_precisions_per_class = [np.array([0, 0.5], dtype=float),
+                                     np.array([], dtype=float),
+                                     np.array([0], dtype=float)]
+    expected_recalls_per_class = [
+        np.array([0, 1. / 3.], dtype=float), np.array([], dtype=float),
+        np.array([0], dtype=float)
+    ]
+    expected_average_precision_per_class = np.array([1. / 6., 0, 0],
+                                                    dtype=float)
+    expected_corloc_per_class = np.array([0, np.divide(0, 0), 0], dtype=float)
+    expected_mean_ap = 1. / 18
+    expected_mean_corloc = 0.0
+    for i in range(self.od_eval.num_class):
+      self.assertTrue(np.allclose(expected_precisions_per_class[i],
+                                  precisions_per_class[i]))
+      self.assertTrue(np.allclose(expected_recalls_per_class[i],
+                                  recalls_per_class[i]))
+    self.assertTrue(np.allclose(expected_average_precision_per_class,
+                                average_precision_per_class))
+    self.assertTrue(np.allclose(expected_corloc_per_class, corloc_per_class))
+    self.assertAlmostEqual(expected_mean_ap, mean_ap)
+    self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
+if __name__ == "__main__":
+  tf.test.main()
--- a/object_detection/utils/ops.py
+++ b/object_detection/utils/ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A module for helper tensorflow ops."""
+import math
+import tensorflow as tf
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import standard_fields as fields
+from object_detection.utils import static_shape
+def expanded_shape(orig_shape, start_dim, num_dims):
+  """Inserts multiple ones into a shape vector.
+  Inserts an all-1 vector of length num_dims at position start_dim into a shape.
+  Can be combined with tf.reshape to generalize tf.expand_dims.
+  Args:
+    orig_shape: the shape into which the all-1 vector is added (int32 vector)
+    start_dim: insertion position (int scalar)
+    num_dims: length of the inserted all-1 vector (int scalar)
+  Returns:
+    An int32 vector of length tf.size(orig_shape) + num_dims.
+  """
+  with tf.name_scope('ExpandedShape'):
+    start_dim = tf.expand_dims(start_dim, 0)  # scalar to rank-1
+    before = tf.slice(orig_shape, [0], start_dim)
+    add_shape = tf.ones(tf.reshape(num_dims, [1]), dtype=tf.int32)
+    after = tf.slice(orig_shape, start_dim, [-1])
+    new_shape = tf.concat([before, add_shape, after], 0)
+    return new_shape
+def normalized_to_image_coordinates(normalized_boxes, image_shape,
+                                    parallel_iterations=32):
+  """Converts a batch of boxes from normal to image coordinates.
+  Args:
+    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
+      normalized coordinates.
+    image_shape: a float32 tensor of shape [4] containing the image shape.
+    parallel_iterations: parallelism for the map_fn op.
+  Returns:
+    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
+      boxes in image coordinates.
+  """
+  def _to_absolute_coordinates(normalized_boxes):
+    return box_list_ops.to_absolute_coordinates(
+        box_list.BoxList(normalized_boxes),
+        image_shape[1], image_shape[2], check_range=False).get()
+  absolute_boxes = tf.map_fn(
+      _to_absolute_coordinates,
+      elems=(normalized_boxes),
+      dtype=tf.float32,
+      parallel_iterations=parallel_iterations,
+      back_prop=True)
+  return absolute_boxes
+def meshgrid(x, y):
+  """Tiles the contents of x and y into a pair of grids.
+  Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y
+  are vectors. Generally, this will give:
+  xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n)
+  ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m)
+  Keep in mind that the order of the arguments and outputs is reverse relative
+  to the order of the indices they go into, done for compatibility with numpy.
+  The output tensors have the same shapes.  Specifically:
+  xgrid.get_shape() = y.get_shape().concatenate(x.get_shape())
+  ygrid.get_shape() = y.get_shape().concatenate(x.get_shape())
+  Args:
+    x: A tensor of arbitrary shape and rank. xgrid will contain these values
+       varying in its last dimensions.
+    y: A tensor of arbitrary shape and rank. ygrid will contain these values
+       varying in its first dimensions.
+  Returns:
+    A tuple of tensors (xgrid, ygrid).
+  """
+  with tf.name_scope('Meshgrid'):
+    x = tf.convert_to_tensor(x)
+    y = tf.convert_to_tensor(y)
+    x_exp_shape = expanded_shape(tf.shape(x), 0, tf.rank(y))
+    y_exp_shape = expanded_shape(tf.shape(y), tf.rank(y), tf.rank(x))
+    xgrid = tf.tile(tf.reshape(x, x_exp_shape), y_exp_shape)
+    ygrid = tf.tile(tf.reshape(y, y_exp_shape), x_exp_shape)
+    new_shape = y.get_shape().concatenate(x.get_shape())
+    xgrid.set_shape(new_shape)
+    ygrid.set_shape(new_shape)
+    return xgrid, ygrid
+def pad_to_multiple(tensor, multiple):
+  """Returns the tensor zero padded to the specified multiple.
+  Appends 0s to the end of the first and second dimension (height and width) of
+  the tensor until both dimensions are a multiple of the input argument
+  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
+  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
+  be of shape [1, 4, 8, 1].
+  Args:
+    tensor: rank 4 float32 tensor, where
+            tensor -> [batch_size, height, width, channels].
+    multiple: the multiple to pad to.
+  Returns:
+    padded_tensor: the tensor zero padded to the specified multiple.
+  """
+  tensor_shape = tensor.get_shape()
+  batch_size = static_shape.get_batch_size(tensor_shape)
+  tensor_height = static_shape.get_height(tensor_shape)
+  tensor_width = static_shape.get_width(tensor_shape)
+  tensor_depth = static_shape.get_depth(tensor_shape)
+  if batch_size is None:
+    batch_size = tf.shape(tensor)[0]
+  if tensor_height is None:
+    tensor_height = tf.shape(tensor)[1]
+    padded_tensor_height = tf.to_int32(
+        tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
+  else:
+    padded_tensor_height = int(
+        math.ceil(float(tensor_height) / multiple) * multiple)
+  if tensor_width is None:
+    tensor_width = tf.shape(tensor)[2]
+    padded_tensor_width = tf.to_int32(
+        tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
+  else:
+    padded_tensor_width = int(
+        math.ceil(float(tensor_width) / multiple) * multiple)
+  if tensor_depth is None:
+    tensor_depth = tf.shape(tensor)[3]
+  # Use tf.concat instead of tf.pad to preserve static shape
+  height_pad = tf.zeros([
+      batch_size, padded_tensor_height - tensor_height, tensor_width,
+      tensor_depth
+  ])
+  padded_tensor = tf.concat([tensor, height_pad], 1)
+  width_pad = tf.zeros([
+      batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
+      tensor_depth
+  ])
+  padded_tensor = tf.concat([padded_tensor, width_pad], 2)
+  return padded_tensor
+def padded_one_hot_encoding(indices, depth, left_pad):
+  """Returns a zero padded one-hot tensor.
+  This function converts a sparse representation of indices (e.g., [4]) to a
+  zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
+  left_pad = 1). If `indices` is empty, the result will simply be a tensor of
+  shape (0, depth + left_pad). If depth = 0, then this function just returns
+  `None`.
+  Args:
+    indices: an integer tensor of shape [num_indices].
+    depth: depth for the one-hot tensor (integer).
+    left_pad: number of zeros to left pad the one-hot tensor with (integer).
+  Returns:
+    padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
+      `None` if the depth is zero.
+  Raises:
+    ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
+      either negative or non-integers.
+  TODO: add runtime checks for depth and indices.
+  """
+  if depth < 0 or not isinstance(depth, (int, long)):
+    raise ValueError('`depth` must be a non-negative integer.')
+  if left_pad < 0 or not isinstance(left_pad, (int, long)):
+    raise ValueError('`left_pad` must be a non-negative integer.')
+  if depth == 0:
+    return None
+  if len(indices.get_shape().as_list()) != 1:
+    raise ValueError('`indices` must have rank 1')
+  def one_hot_and_pad():
+    one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
+                                 on_value=1, off_value=0), tf.float32)
+    return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
+  result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
+                   lambda: tf.zeros((depth + left_pad, 0)))
+  return tf.reshape(result, [-1, depth + left_pad])
+def dense_to_sparse_boxes(dense_locations, dense_num_boxes, num_classes):
+  """Converts bounding boxes from dense to sparse form.
+  Args:
+    dense_locations:  a [max_num_boxes, 4] tensor in which only the first k rows
+      are valid bounding box location coordinates, where k is the sum of
+      elements in dense_num_boxes.
+    dense_num_boxes: a [max_num_classes] tensor indicating the counts of
+       various bounding box classes e.g. [1, 0, 0, 2] means that the first
+       bounding box is of class 0 and the second and third bounding boxes are
+       of class 3. The sum of elements in this tensor is the number of valid
+       bounding boxes.
+    num_classes: number of classes
+  Returns:
+    box_locations: a [num_boxes, 4] tensor containing only valid bounding
+       boxes (i.e. the first num_boxes rows of dense_locations)
+    box_classes: a [num_boxes] tensor containing the classes of each bounding
+       box (e.g. dense_num_boxes = [1, 0, 0, 2] => box_classes = [0, 3, 3]
+  """
+  num_valid_boxes = tf.reduce_sum(dense_num_boxes)
+  box_locations = tf.slice(dense_locations,
+                           tf.constant([0, 0]), tf.stack([num_valid_boxes, 4]))
+  tiled_classes = [tf.tile([i], tf.expand_dims(dense_num_boxes[i], 0))
+                   for i in range(num_classes)]
+  box_classes = tf.concat(tiled_classes, 0)
+  box_locations.set_shape([None, 4])
+  return box_locations, box_classes
+def indices_to_dense_vector(indices,
+                            size,
+                            indices_value=1.,
+                            default_value=0,
+                            dtype=tf.float32):
+  """Creates dense vector with indices set to specific value and rest to zeros.
+  This function exists because it is unclear if it is safe to use
+    tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+  with indices which are not ordered.
+  This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+  Args:
+    indices: 1d Tensor with integer indices which are to be set to
+        indices_values.
+    size: scalar with size (integer) of output Tensor.
+    indices_value: values of elements specified by indices in the output vector
+    default_value: values of other elements in the output vector.
+    dtype: data type.
+  Returns:
+    dense 1D Tensor of shape [size] with indices set to indices_values and the
+        rest set to default_value.
+  """
+  size = tf.to_int32(size)
+  zeros = tf.ones([size], dtype=dtype) * default_value
+  values = tf.ones_like(indices, dtype=dtype) * indices_value
+  return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
+                           [zeros, values])
+def retain_groundtruth(tensor_dict, valid_indices):
+  """Retains groundtruth by valid indices.
+  Args:
+    tensor_dict: a dictionary of following groundtruth tensors -
+      fields.InputDataFields.groundtruth_boxes
+      fields.InputDataFields.groundtruth_classes
+      fields.InputDataFields.groundtruth_is_crowd
+      fields.InputDataFields.groundtruth_area
+      fields.InputDataFields.groundtruth_label_types
+      fields.InputDataFields.groundtruth_difficult
+    valid_indices: a tensor with valid indices for the box-level groundtruth.
+  Returns:
+    a dictionary of tensors containing only the groundtruth for valid_indices.
+  Raises:
+    ValueError: If the shape of valid_indices is invalid.
+    ValueError: field fields.InputDataFields.groundtruth_boxes is
+      not present in tensor_dict.
+  """
+  input_shape = valid_indices.get_shape().as_list()
+  if not (len(input_shape) == 1 or
+          (len(input_shape) == 2 and input_shape[1] == 1)):
+    raise ValueError('The shape of valid_indices is invalid.')
+  valid_indices = tf.reshape(valid_indices, [-1])
+  valid_dict = {}
+  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
+    # Prevents reshape failure when num_boxes is 0.
+    num_boxes = tf.maximum(tf.shape(
+        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1)
+    for key in tensor_dict:
+      if key in [fields.InputDataFields.groundtruth_boxes,
+                 fields.InputDataFields.groundtruth_classes]:
+        valid_dict[key] = tf.gather(tensor_dict[key], valid_indices)
+      # Input decoder returns empty tensor when these fields are not provided.
+      # Needs to reshape into [num_boxes, -1] for tf.gather() to work.
+      elif key in [fields.InputDataFields.groundtruth_is_crowd,
+                   fields.InputDataFields.groundtruth_area,
+                   fields.InputDataFields.groundtruth_difficult,
+                   fields.InputDataFields.groundtruth_label_types]:
+        valid_dict[key] = tf.reshape(
+            tf.gather(tf.reshape(tensor_dict[key], [num_boxes, -1]),
+                      valid_indices), [-1])
+      # Fields that are not associated with boxes.
+      else:
+        valid_dict[key] = tensor_dict[key]
+  else:
+    raise ValueError('%s not present in input tensor dict.' % (
+        fields.InputDataFields.groundtruth_boxes))
+  return valid_dict
+def retain_groundtruth_with_positive_classes(tensor_dict):
+  """Retains only groundtruth with positive class ids.
+  Args:
+    tensor_dict: a dictionary of following groundtruth tensors -
+      fields.InputDataFields.groundtruth_boxes
+      fields.InputDataFields.groundtruth_classes
+      fields.InputDataFields.groundtruth_is_crowd
+      fields.InputDataFields.groundtruth_area
+      fields.InputDataFields.groundtruth_label_types
+      fields.InputDataFields.groundtruth_difficult
+  Returns:
+    a dictionary of tensors containing only the groundtruth with positive
+    classes.
+  Raises:
+    ValueError: If groundtruth_classes tensor is not in tensor_dict.
+  """
+  if fields.InputDataFields.groundtruth_classes not in tensor_dict:
+    raise ValueError('`groundtruth classes` not in tensor_dict.')
+  keep_indices = tf.where(tf.greater(
+      tensor_dict[fields.InputDataFields.groundtruth_classes], 0))
+  return retain_groundtruth(tensor_dict, keep_indices)
+def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
+  """Filters out groundtruth with no bounding boxes.
+  Args:
+    tensor_dict: a dictionary of following groundtruth tensors -
+      fields.InputDataFields.groundtruth_boxes
+      fields.InputDataFields.groundtruth_classes
+      fields.InputDataFields.groundtruth_is_crowd
+      fields.InputDataFields.groundtruth_area
+      fields.InputDataFields.groundtruth_label_types
+  Returns:
+    a dictionary of tensors containing only the groundtruth that have bounding
+    boxes.
+  """
+  groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+  nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
+      tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
+  valid_indicator_vector = tf.logical_not(nan_indicator_vector)
+  valid_indices = tf.where(valid_indicator_vector)
+  return retain_groundtruth(tensor_dict, valid_indices)
+def normalize_to_target(inputs,
+                        target_norm_value,
+                        dim,
+                        epsilon=1e-7,
+                        trainable=True,
+                        scope='NormalizeToTarget',
+                        summarize=True):
+  """L2 normalizes the inputs across the specified dimension to a target norm.
+  This op implements the L2 Normalization layer introduced in
+  Liu, Wei, et al. "SSD: Single Shot MultiBox Detector."
+  and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg.
+  "Parsenet: Looking wider to see better." and is useful for bringing
+  activations from multiple layers in a convnet to a standard scale.
+  Note that the rank of `inputs` must be known and the dimension to which
+  normalization is to be applied should be statically defined.
+  TODO: Add option to scale by L2 norm of the entire input.
+  Args:
+    inputs: A `Tensor` of arbitrary size.
+    target_norm_value: A float value that specifies an initial target norm or
+      a list of floats (whose length must be equal to the depth along the
+      dimension to be normalized) specifying a per-dimension multiplier
+      after normalization.
+    dim: The dimension along which the input is normalized.
+    epsilon: A small value to add to the inputs to avoid dividing by zero.
+    trainable: Whether the norm is trainable or not
+    scope: Optional scope for variable_scope.
+    summarize: Whether or not to add a tensorflow summary for the op.
+  Returns:
+    The input tensor normalized to the specified target norm.
+  Raises:
+    ValueError: If dim is smaller than the number of dimensions in 'inputs'.
+    ValueError: If target_norm_value is not a float or a list of floats with
+      length equal to the depth along the dimension to be normalized.
+  """
+  with tf.variable_scope(scope, 'NormalizeToTarget', [inputs]):
+    if not inputs.get_shape():
+      raise ValueError('The input rank must be known.')
+    input_shape = inputs.get_shape().as_list()
+    input_rank = len(input_shape)
+    if dim < 0 or dim >= input_rank:
+      raise ValueError(
+          'dim must be non-negative but smaller than the input rank.')
+    if not input_shape[dim]:
+      raise ValueError('input shape should be statically defined along '
+                       'the specified dimension.')
+    depth = input_shape[dim]
+    if not (isinstance(target_norm_value, float) or
+            (isinstance(target_norm_value, list) and
+             len(target_norm_value) == depth) and
+            all([isinstance(val, float) for val in target_norm_value])):
+      raise ValueError('target_norm_value must be a float or a list of floats '
+                       'with length equal to the depth along the dimension to '
+                       'be normalized.')
+    if isinstance(target_norm_value, float):
+      initial_norm = depth * [target_norm_value]
+    else:
+      initial_norm = target_norm_value
+    target_norm = tf.contrib.framework.model_variable(
+        name='weights', dtype=tf.float32,
+        initializer=tf.constant(initial_norm, dtype=tf.float32),
+        trainable=trainable)
+    if summarize:
+      mean = tf.reduce_mean(target_norm)
+      mean = tf.Print(mean, ['NormalizeToTarget:', mean])
+      tf.summary.scalar(tf.get_variable_scope().name, mean)
+    lengths = epsilon + tf.sqrt(tf.reduce_sum(tf.square(inputs), dim, True))
+    mult_shape = input_rank*[1]
+    mult_shape[dim] = depth
+    return tf.reshape(target_norm, mult_shape) * tf.truediv(inputs, lengths)
+def position_sensitive_crop_regions(image,
+                                    boxes,
+                                    box_ind,
+                                    crop_size,
+                                    num_spatial_bins,
+                                    global_pool,
+                                    extrapolation_value=None):
+  """Position-sensitive crop and pool rectangular regions from a feature grid.
+  The output crops are split into `spatial_bins_y` vertical bins
+  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
+  and a horizontal bin the output values are gathered by performing
+  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
+  channels of the image. This reduces `depth` by a factor of
+  `(spatial_bins_y * spatial_bins_x)`.
+  When global_pool is True, this function implements a differentiable version
+  of position-sensitive RoI pooling used in
+  [R-FCN detection system](https://arxiv.org/abs/1605.06409).
+  When global_pool is False, this function implements a differentiable version
+  of position-sensitive assembling operation used in
+  [instance FCN](https://arxiv.org/abs/1603.08678).
+  Args:
+    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
+      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
+      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+      Both `image_height` and `image_width` need to be positive.
+    boxes: A `Tensor` of type `float32`.
+      A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+      specifies the coordinates of a box in the `box_ind[i]` image and is
+      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
+      coordinate value of `y` is mapped to the image coordinate at
+      `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
+      height is mapped to `[0, image_height - 1] in image height coordinates.
+      We do allow y1 > y2, in which case the sampled crop is an up-down flipped
+      version of the original image. The width dimension is treated similarly.
+      Normalized coordinates outside the `[0, 1]` range are allowed, in which
+      case we use `extrapolation_value` to extrapolate the input image values.
+    box_ind:  A `Tensor` of type `int32`.
+      A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+      The value of `box_ind[i]` specifies the image that the `i`-th box refers
+      to.
+    crop_size: A list of two integers `[crop_height, crop_width]`. All
+      cropped image patches are resized to this size. The aspect ratio of the
+      image content is not preserved. Both `crop_height` and `crop_width` need
+      to be positive.
+    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
+      Represents the number of position-sensitive bins in y and x directions.
+      Both values should be >= 1. `crop_height` should be divisible by
+      `spatial_bins_y`, and similarly for width.
+      The number of image channels should be divisible by
+      (spatial_bins_y * spatial_bins_x).
+      Suggested value from R-FCN paper: [3, 3].
+    global_pool: A boolean variable.
+      If True, we perform average global pooling on the features assembled from
+        the position-sensitive score maps.
+      If False, we keep the position-pooled features without global pooling
+        over the spatial coordinates.
+      Note that using global_pool=True is equivalent to but more efficient than
+        running the function with global_pool=False and then performing global
+        average pooling.
+    extrapolation_value: An optional `float`. Defaults to `0`.
+      Value used for extrapolation, when applicable.
+  Returns:
+    position_sensitive_features: A 4-D tensor of shape
+      `[num_boxes, K, K, crop_channels]`,
+      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
+      where K = 1 when global_pool is True (Average-pooled cropped regions),
+      and K = crop_size when global_pool is False.
+  Raises:
+    ValueError: Raised in four situations:
+      `num_spatial_bins` is not >= 1;
+      `num_spatial_bins` does not divide `crop_size`;
+      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
+      `bin_crop_size` is not square when global_pool=False due to the
+        constraint in function space_to_depth.
+  """
+  total_bins = 1
+  bin_crop_size = []
+  for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
+    if num_bins < 1:
+      raise ValueError('num_spatial_bins should be >= 1')
+    if crop_dim % num_bins != 0:
+      raise ValueError('crop_size should be divisible by num_spatial_bins')
+    total_bins *= num_bins
+    bin_crop_size.append(crop_dim / num_bins)
+  if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
+    raise ValueError('Only support square bin crop size for now.')
+  ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
+  spatial_bins_y, spatial_bins_x = num_spatial_bins
+  # Split each box into spatial_bins_y * spatial_bins_x bins.
+  position_sensitive_boxes = []
+  for bin_y in range(spatial_bins_y):
+    step_y = (ymax - ymin) / spatial_bins_y
+    for bin_x in range(spatial_bins_x):
+      step_x = (xmax - xmin) / spatial_bins_x
+      box_coordinates = [ymin + bin_y * step_y,
+                         xmin + bin_x * step_x,
+                         ymin + (bin_y + 1) * step_y,
+                         xmin + (bin_x + 1) * step_x,
+                        ]
+      position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))
+  image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3)
+  image_crops = []
+  for (split, box) in zip(image_splits, position_sensitive_boxes):
+    crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size,
+                                    extrapolation_value=extrapolation_value)
+    image_crops.append(crop)
+  if global_pool:
+    # Average over all bins.
+    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
+    # Then average over spatial positions within the bins.
+    position_sensitive_features = tf.reduce_mean(
+        position_sensitive_features, [1, 2], keep_dims=True)
+  else:
+    # Reorder height/width to depth channel.
+    block_size = bin_crop_size[0]
+    if block_size >= 2:
+      image_crops = [tf.space_to_depth(
+          crop, block_size=block_size) for crop in image_crops]
+    # Pack image_crops so that first dimension is for position-senstive boxes.
+    position_sensitive_features = tf.stack(image_crops, axis=0)
+    # Unroll the position-sensitive boxes to spatial positions.
+    position_sensitive_features = tf.squeeze(
+        tf.batch_to_space_nd(position_sensitive_features,
+                             block_shape=[1] + num_spatial_bins,
+                             crops=tf.zeros((3, 2), dtype=tf.int32)),
+        squeeze_dims=[0])
+    # Reorder back the depth channel.
+    if block_size >= 2:
+      position_sensitive_features = tf.depth_to_space(
+          position_sensitive_features, block_size=block_size)
+  return position_sensitive_features
+def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
+                                     image_width):
+  """Transforms the box masks back to full image masks.
+  Embeds masks in bounding boxes of larger masks whose shapes correspond to
+  image shape.
+  Args:
+    box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width].
+    boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
+           corners. Row i contains [ymin, xmin, ymax, xmax] of the box
+           corresponding to mask i. Note that the box corners are in
+           normalized coordinates.
+    image_height: Image height. The output mask will have the same height as
+                  the image height.
+    image_width: Image width. The output mask will have the same width as the
+                 image width.
+  Returns:
+    A tf.float32 tensor of size [num_masks, image_height, image_width].
+  """
+  # TODO: Make this a public function.
+  def transform_boxes_relative_to_boxes(boxes, reference_boxes):
+    boxes = tf.reshape(boxes, [-1, 2, 2])
+    min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
+    max_corner = tf.expand_dims(reference_boxes[:, 2:4], 1)
+    transformed_boxes = (boxes - min_corner) / (max_corner - min_corner)
+    return tf.reshape(transformed_boxes, [-1, 4])
+  box_masks = tf.expand_dims(box_masks, axis=3)
+  num_boxes = tf.shape(box_masks)[0]
+  unit_boxes = tf.concat(
+      [tf.zeros([num_boxes, 2]), tf.ones([num_boxes, 2])], axis=1)
+  reverse_boxes = transform_boxes_relative_to_boxes(unit_boxes, boxes)
+  image_masks = tf.image.crop_and_resize(image=box_masks,
+                                         boxes=reverse_boxes,
+                                         box_ind=tf.range(num_boxes),
+                                         crop_size=[image_height, image_width],
+                                         extrapolation_value=0.0)
+  return tf.squeeze(image_masks, axis=3)
--- a/object_detection/utils/ops_test.py
+++ b/object_detection/utils/ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.ops."""
+import numpy as np
+import tensorflow as tf
+from object_detection.core import standard_fields as fields
+from object_detection.utils import ops
+class NormalizedToImageCoordinatesTest(tf.test.TestCase):
+  def test_normalized_to_image_coordinates(self):
+    normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
+    normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
+                                    [[0.5, 0.5, 1.0, 1.0]]])
+    image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
+    absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes,
+                                                         image_shape,
+                                                         parallel_iterations=2)
+    expected_boxes = np.array([[[0, 0, 4, 4]],
+                               [[2, 2, 4, 4]]])
+    with self.test_session() as sess:
+      absolute_boxes = sess.run(absolute_boxes,
+                                feed_dict={normalized_boxes:
+                                           normalized_boxes_np})
+    self.assertAllEqual(absolute_boxes, expected_boxes)
+class MeshgridTest(tf.test.TestCase):
+  def test_meshgrid_numpy_comparison(self):
+    """Tests meshgrid op with vectors, for which it should match numpy."""
+    x = np.arange(4)
+    y = np.arange(6)
+    exp_xgrid, exp_ygrid = np.meshgrid(x, y)
+    xgrid, ygrid = ops.meshgrid(x, y)
+    with self.test_session() as sess:
+      xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
+      self.assertAllEqual(xgrid_output, exp_xgrid)
+      self.assertAllEqual(ygrid_output, exp_ygrid)
+  def test_meshgrid_multidimensional(self):
+    np.random.seed(18)
+    x = np.random.rand(4, 1, 2).astype(np.float32)
+    y = np.random.rand(2, 3).astype(np.float32)
+    xgrid, ygrid = ops.meshgrid(x, y)
+    grid_shape = list(y.shape) + list(x.shape)
+    self.assertEqual(xgrid.get_shape().as_list(), grid_shape)
+    self.assertEqual(ygrid.get_shape().as_list(), grid_shape)
+    with self.test_session() as sess:
+      xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
+    # Check the shape of the output grids
+    self.assertEqual(xgrid_output.shape, tuple(grid_shape))
+    self.assertEqual(ygrid_output.shape, tuple(grid_shape))
+    # Check a few elements
+    test_elements = [((3, 0, 0), (1, 2)),
+                     ((2, 0, 1), (0, 0)),
+                     ((0, 0, 0), (1, 1))]
+    for xind, yind in test_elements:
+      # These are float equality tests, but the meshgrid op should not introduce
+      # rounding.
+      self.assertEqual(xgrid_output[yind + xind], x[xind])
+      self.assertEqual(ygrid_output[yind + xind], y[yind])
+class OpsTestPadToMultiple(tf.test.TestCase):
+  def test_zero_padding(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.pad_to_multiple(tensor, 1)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
+  def test_no_padding(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.pad_to_multiple(tensor, 2)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
+  def test_padding(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.pad_to_multiple(tensor, 4)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
+class OpsTestPaddedOneHotEncoding(tf.test.TestCase):
+  def test_correct_one_hot_tensor_with_no_pad(self):
+    indices = tf.constant([1, 2, 3, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=0)
+    expected_tensor = np.array([[0, 1, 0, 0, 0, 0],
+                                [0, 0, 1, 0, 0, 0],
+                                [0, 0, 0, 1, 0, 0],
+                                [0, 0, 0, 0, 0, 1]], np.float32)
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+  def test_correct_one_hot_tensor_with_pad_one(self):
+    indices = tf.constant([1, 2, 3, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=1)
+    expected_tensor = np.array([[0, 0, 1, 0, 0, 0, 0],
+                                [0, 0, 0, 1, 0, 0, 0],
+                                [0, 0, 0, 0, 1, 0, 0],
+                                [0, 0, 0, 0, 0, 0, 1]], np.float32)
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+  def test_correct_one_hot_tensor_with_pad_three(self):
+    indices = tf.constant([1, 2, 3, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=3)
+    expected_tensor = np.array([[0, 0, 0, 0, 1, 0, 0, 0, 0],
+                                [0, 0, 0, 0, 0, 1, 0, 0, 0],
+                                [0, 0, 0, 0, 0, 0, 1, 0, 0],
+                                [0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+  def test_correct_padded_one_hot_tensor_with_empty_indices(self):
+    depth = 6
+    pad = 2
+    indices = tf.constant([])
+    one_hot_tensor = ops.padded_one_hot_encoding(
+        indices, depth=depth, left_pad=pad)
+    expected_tensor = np.zeros((0, depth + pad))
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+  def test_return_none_on_zero_depth(self):
+    indices = tf.constant([1, 2, 3, 4, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=0, left_pad=2)
+    self.assertEqual(one_hot_tensor, None)
+  def test_raise_value_error_on_rank_two_input(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=6, left_pad=2)
+  def test_raise_value_error_on_negative_pad(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=6, left_pad=-1)
+  def test_raise_value_error_on_float_pad(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=6, left_pad=0.1)
+  def test_raise_value_error_on_float_depth(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=0.1, left_pad=2)
+class OpsDenseToSparseBoxesTest(tf.test.TestCase):
+  def test_return_all_boxes_when_all_input_boxes_are_valid(self):
+    num_classes = 4
+    num_valid_boxes = 3
+    code_size = 4
+    dense_location_placeholder = tf.placeholder(tf.float32,
+                                                shape=(num_valid_boxes,
+                                                       code_size))
+    dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
+    box_locations, box_classes = ops.dense_to_sparse_boxes(
+        dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
+    feed_dict = {dense_location_placeholder: np.random.uniform(
+        size=[num_valid_boxes, code_size]),
+                 dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
+                                                       dtype=np.int32)}
+    expected_box_locations = feed_dict[dense_location_placeholder]
+    expected_box_classses = np.array([0, 3, 3])
+    with self.test_session() as sess:
+      box_locations, box_classes = sess.run([box_locations, box_classes],
+                                            feed_dict=feed_dict)
+    self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
+                        atol=1e-6)
+    self.assertAllEqual(box_classes, expected_box_classses)
+  def test_return_only_valid_boxes_when_input_contains_invalid_boxes(self):
+    num_classes = 4
+    num_valid_boxes = 3
+    num_boxes = 10
+    code_size = 4
+    dense_location_placeholder = tf.placeholder(tf.float32, shape=(num_boxes,
+                                                                   code_size))
+    dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
+    box_locations, box_classes = ops.dense_to_sparse_boxes(
+        dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
+    feed_dict = {dense_location_placeholder: np.random.uniform(
+        size=[num_boxes, code_size]),
+                 dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
+                                                       dtype=np.int32)}
+    expected_box_locations = (feed_dict[dense_location_placeholder]
+                              [:num_valid_boxes])
+    expected_box_classses = np.array([0, 3, 3])
+    with self.test_session() as sess:
+      box_locations, box_classes = sess.run([box_locations, box_classes],
+                                            feed_dict=feed_dict)
+    self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
+                        atol=1e-6)
+    self.assertAllEqual(box_classes, expected_box_classses)
+class OpsTestIndicesToDenseVector(tf.test.TestCase):
+  def test_indices_to_dense_vector(self):
+    size = 10000
+    num_indices = np.random.randint(size)
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+    expected_output = np.zeros(size, dtype=np.float32)
+    expected_output[rand_indices] = 1.
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+  def test_indices_to_dense_vector_size_at_inference(self):
+    size = 5000
+    num_indices = 250
+    all_indices = np.arange(size)
+    rand_indices = np.random.permutation(all_indices)[0:num_indices]
+    expected_output = np.zeros(size, dtype=np.float32)
+    expected_output[rand_indices] = 1.
+    tf_all_indices = tf.placeholder(tf.int32)
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices,
+                                            tf.shape(tf_all_indices)[0])
+    feed_dict = {tf_all_indices: all_indices}
+    with self.test_session() as sess:
+      output = sess.run(indicator, feed_dict=feed_dict)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+  def test_indices_to_dense_vector_int(self):
+    size = 500
+    num_indices = 25
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+    expected_output = np.zeros(size, dtype=np.int64)
+    expected_output[rand_indices] = 1
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(
+        tf_rand_indices, size, 1, dtype=tf.int64)
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+  def test_indices_to_dense_vector_custom_values(self):
+    size = 100
+    num_indices = 10
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+    indices_value = np.random.rand(1)
+    default_value = np.random.rand(1)
+    expected_output = np.float32(np.ones(size) * default_value)
+    expected_output[rand_indices] = indices_value
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(
+        tf_rand_indices,
+        size,
+        indices_value=indices_value,
+        default_value=default_value)
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllClose(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+  def test_indices_to_dense_vector_all_indices_as_input(self):
+    size = 500
+    num_indices = 500
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+    expected_output = np.ones(size, dtype=np.float32)
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+  def test_indices_to_dense_vector_empty_indices_as_input(self):
+    size = 500
+    rand_indices = []
+    expected_output = np.zeros(size, dtype=np.float32)
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+class GroundtruthFilterTest(tf.test.TestCase):
+  def test_filter_groundtruth(self):
+    input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    input_label_types = tf.placeholder(tf.string, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.image: input_image,
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult,
+        fields.InputDataFields.groundtruth_label_types: input_label_types
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+    image_tensor = np.random.rand(224, 224, 3)
+    feed_dict = {
+        input_image: image_tensor,
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 2], dtype=np.int32),
+        input_is_crowd:
+        np.array([False, True], dtype=np.bool),
+        input_area:
+        np.array([32, 48], dtype=np.float32),
+        input_difficult:
+        np.array([True, False], dtype=np.bool),
+        input_label_types:
+        np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.image:
+        image_tensor,
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False],
+        fields.InputDataFields.groundtruth_area:
+        [32],
+        fields.InputDataFields.groundtruth_difficult:
+        [True],
+        fields.InputDataFields.groundtruth_label_types:
+        ['APPROPRIATE']
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.image,
+                  fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd,
+                  fields.InputDataFields.groundtruth_label_types]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+  def test_filter_with_missing_fields(self):
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes
+    }
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    feed_dict = {
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 2], dtype=np.int32),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1]
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.groundtruth_boxes]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+  def test_filter_with_empty_fields(self):
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+    feed_dict = {
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 2], dtype=np.int32),
+        input_is_crowd:
+        np.array([False, True], dtype=np.bool),
+        input_area:
+        np.array([], dtype=np.float32),
+        input_difficult:
+        np.array([], dtype=np.float32),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False],
+        fields.InputDataFields.groundtruth_area:
+        [],
+        fields.InputDataFields.groundtruth_difficult:
+        []
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+  def test_filter_with_empty_groundtruth_boxes(self):
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+    feed_dict = {
+        input_boxes:
+        np.array([], dtype=np.float).reshape(0, 4),
+        input_classes:
+        np.array([], dtype=np.int32),
+        input_is_crowd:
+        np.array([], dtype=np.bool),
+        input_area:
+        np.array([], dtype=np.float32),
+        input_difficult:
+        np.array([], dtype=np.float32),
+        valid_indices:
+        np.array([], dtype=np.int32)
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in input_tensors:
+        if key == fields.InputDataFields.groundtruth_boxes:
+          self.assertAllEqual([0, 4], output_tensors[key].shape)
+        else:
+          self.assertAllEqual([0], output_tensors[key].shape)
+class RetainGroundTruthWithPositiveClasses(tf.test.TestCase):
+  def test_filter_groundtruth_with_positive_classes(self):
+    input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    input_label_types = tf.placeholder(tf.string, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.image: input_image,
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult,
+        fields.InputDataFields.groundtruth_label_types: input_label_types
+    }
+    output_tensors = ops.retain_groundtruth_with_positive_classes(input_tensors)
+    image_tensor = np.random.rand(224, 224, 3)
+    feed_dict = {
+        input_image: image_tensor,
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 0], dtype=np.int32),
+        input_is_crowd:
+        np.array([False, True], dtype=np.bool),
+        input_area:
+        np.array([32, 48], dtype=np.float32),
+        input_difficult:
+        np.array([True, False], dtype=np.bool),
+        input_label_types:
+        np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.image:
+        image_tensor,
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False],
+        fields.InputDataFields.groundtruth_area:
+        [32],
+        fields.InputDataFields.groundtruth_difficult:
+        [True],
+        fields.InputDataFields.groundtruth_label_types:
+        ['APPROPRIATE']
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.image,
+                  fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd,
+                  fields.InputDataFields.groundtruth_label_types]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+class GroundtruthFilterWithNanBoxTest(tf.test.TestCase):
+  def test_filter_groundtruth_with_nan_box_coordinates(self):
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1, 2],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False, True],
+        fields.InputDataFields.groundtruth_area:
+        [100.0, 238.7]
+    }
+    expected_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [2],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [True],
+        fields.InputDataFields.groundtruth_area:
+        [238.7]
+    }
+    output_tensors = ops.filter_groundtruth_with_nan_box_coordinates(
+        input_tensors)
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors)
+      for key in [fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+class OpsTestNormalizeToTarget(tf.test.TestCase):
+  def test_create_normalize_to_target(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = 4.0
+    dim = 3
+    with self.test_session():
+      output = ops.normalize_to_target(inputs, target_norm_value, dim)
+      self.assertEqual(output.op.name, 'NormalizeToTarget/mul')
+      var_name = tf.contrib.framework.get_variables()[0].name
+      self.assertEqual(var_name, 'NormalizeToTarget/weights:0')
+  def test_invalid_dim(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = 4.0
+    dim = 10
+    with self.assertRaisesRegexp(
+        ValueError,
+        'dim must be non-negative but smaller than the input rank.'):
+      ops.normalize_to_target(inputs, target_norm_value, dim)
+  def test_invalid_target_norm_values(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = [4.0, 4.0]
+    dim = 3
+    with self.assertRaisesRegexp(
+        ValueError, 'target_norm_value must be a float or a list of floats'):
+      ops.normalize_to_target(inputs, target_norm_value, dim)
+  def test_correct_output_shape(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = 4.0
+    dim = 3
+    with self.test_session():
+      output = ops.normalize_to_target(inputs, target_norm_value, dim)
+      self.assertEqual(output.get_shape().as_list(),
+                       inputs.get_shape().as_list())
+  def test_correct_initial_output_values(self):
+    inputs = tf.constant([[[[3, 4], [7, 24]],
+                           [[5, -12], [-1, 0]]]], tf.float32)
+    target_norm_value = 10.0
+    dim = 3
+    expected_output = [[[[30/5.0, 40/5.0], [70/25.0, 240/25.0]],
+                        [[50/13.0, -120/13.0], [-10, 0]]]]
+    with self.test_session() as sess:
+      normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
+                                                  dim)
+      sess.run(tf.global_variables_initializer())
+      output = normalized_inputs.eval()
+      self.assertAllClose(output, expected_output)
+  def test_multiple_target_norm_values(self):
+    inputs = tf.constant([[[[3, 4], [7, 24]],
+                           [[5, -12], [-1, 0]]]], tf.float32)
+    target_norm_value = [10.0, 20.0]
+    dim = 3
+    expected_output = [[[[30/5.0, 80/5.0], [70/25.0, 480/25.0]],
+                        [[50/13.0, -240/13.0], [-10, 0]]]]
+    with self.test_session() as sess:
+      normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
+                                                  dim)
+      sess.run(tf.global_variables_initializer())
+      output = normalized_inputs.eval()
+      self.assertAllClose(output, expected_output)
+class OpsTestPositionSensitiveCropRegions(tf.test.TestCase):
+  def test_position_sensitive(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    # First channel is 1's, second channel is 2's, etc.
+    image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
+                        shape=image_shape)
+    boxes = tf.random_uniform((2, 4))
+    box_ind = tf.constant([0, 0], dtype=tf.int32)
+    # The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
+    # before averaging.
+    expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1])
+    for crop_size_mult in range(1, 3):
+      crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
+      ps_crop_and_pool = ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+      with self.test_session() as sess:
+        output = sess.run(ps_crop_and_pool)
+        self.assertAllClose(output, expected_output)
+  def test_position_sensitive_with_equal_channels(self):
+    num_spatial_bins = [2, 2]
+    image_shape = [1, 3, 3, 4]
+    crop_size = [2, 2]
+    image = tf.constant(range(1, 3 * 3 + 1), dtype=tf.float32,
+                        shape=[1, 3, 3, 1])
+    tiled_image = tf.tile(image, [1, 1, 1, image_shape[3]])
+    boxes = tf.random_uniform((3, 4))
+    box_ind = tf.constant([0, 0, 0], dtype=tf.int32)
+    # All channels are equal so position-sensitive crop and resize should
+    # work as the usual crop and resize for just one channel.
+    crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
+    crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
+    ps_crop_and_pool = ops.position_sensitive_crop_regions(
+        tiled_image,
+        boxes,
+        box_ind,
+        crop_size,
+        num_spatial_bins,
+        global_pool=True)
+    with self.test_session() as sess:
+      expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
+      self.assertAllClose(output, expected_output)
+  def test_position_sensitive_with_single_bin(self):
+    num_spatial_bins = [1, 1]
+    image_shape = [2, 3, 3, 4]
+    crop_size = [2, 2]
+    image = tf.random_uniform(image_shape)
+    boxes = tf.random_uniform((6, 4))
+    box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
+    # When a single bin is used, position-sensitive crop and pool should be
+    # the same as non-position sensitive crop and pool.
+    crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
+    crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
+    ps_crop_and_pool = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+    with self.test_session() as sess:
+      expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
+      self.assertAllClose(output, expected_output)
+  def test_raise_value_error_on_num_bins_less_than_one(self):
+    num_spatial_bins = [1, -1]
+    image_shape = [1, 1, 1, 2]
+    crop_size = [2, 2]
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+    with self.assertRaisesRegexp(ValueError, 'num_spatial_bins should be >= 1'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+  def test_raise_value_error_on_non_divisible_crop_size(self):
+    num_spatial_bins = [2, 3]
+    image_shape = [1, 1, 1, 6]
+    crop_size = [3, 2]
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+    with self.assertRaisesRegexp(
+        ValueError, 'crop_size should be divisible by num_spatial_bins'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+  def test_raise_value_error_on_non_divisible_num_channels(self):
+    num_spatial_bins = [2, 2]
+    image_shape = [1, 1, 1, 5]
+    crop_size = [2, 2]
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+    with self.assertRaisesRegexp(
+        ValueError, 'Dimension size must be evenly divisible by 4 but is 5'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+  def test_position_sensitive_with_global_pool_false(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    num_boxes = 2
+    # First channel is 1's, second channel is 2's, etc.
+    image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
+                        shape=image_shape)
+    boxes = tf.random_uniform((num_boxes, 4))
+    box_ind = tf.constant([0, 0], dtype=tf.int32)
+    expected_output = []
+    # Expected output, when crop_size = [3, 2].
+    expected_output.append(np.expand_dims(
+        np.tile(np.array([[1, 2],
+                          [3, 4],
+                          [5, 6]]), (num_boxes, 1, 1)),
+        axis=-1))
+    # Expected output, when crop_size = [6, 4].
+    expected_output.append(np.expand_dims(
+        np.tile(np.array([[1, 1, 2, 2],
+                          [1, 1, 2, 2],
+                          [3, 3, 4, 4],
+                          [3, 3, 4, 4],
+                          [5, 5, 6, 6],
+                          [5, 5, 6, 6]]), (num_boxes, 1, 1)),
+        axis=-1))
+    for crop_size_mult in range(1, 3):
+      crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
+      ps_crop = ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+      with self.test_session() as sess:
+        output = sess.run(ps_crop)
+      self.assertAllEqual(output, expected_output[crop_size_mult - 1])
+  def test_position_sensitive_with_global_pool_false_and_known_boxes(self):
+    num_spatial_bins = [2, 2]
+    image_shape = [2, 2, 2, 4]
+    crop_size = [2, 2]
+    image = tf.constant(range(1, 2 * 2 * 4  + 1) * 2, dtype=tf.float32,
+                        shape=image_shape)
+    # First box contains whole image, and second box contains only first row.
+    boxes = tf.constant(np.array([[0., 0., 1., 1.],
+                                  [0., 0., 0.5, 1.]]), dtype=tf.float32)
+    box_ind = tf.constant([0, 1], dtype=tf.int32)
+    expected_output = []
+    # Expected output, when the box containing whole image.
+    expected_output.append(
+        np.reshape(np.array([[4, 7],
+                             [10, 13]]),
+                   (1, 2, 2, 1))
+    )
+    # Expected output, when the box containing only first row.
+    expected_output.append(
+        np.reshape(np.array([[3, 6],
+                             [7, 10]]),
+                   (1, 2, 2, 1))
+    )
+    expected_output = np.concatenate(expected_output, axis=0)
+    ps_crop = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+    with self.test_session() as sess:
+      output = sess.run(ps_crop)
+      self.assertAllEqual(output, expected_output)
+  def test_position_sensitive_with_global_pool_false_and_single_bin(self):
+    num_spatial_bins = [1, 1]
+    image_shape = [2, 3, 3, 4]
+    crop_size = [1, 1]
+    image = tf.random_uniform(image_shape)
+    boxes = tf.random_uniform((6, 4))
+    box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
+    # Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
+    # the outputs are the same whatever the global_pool value is.
+    ps_crop_and_pool = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+    ps_crop = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+    with self.test_session() as sess:
+      pooled_output, unpooled_output = sess.run((ps_crop_and_pool, ps_crop))
+      self.assertAllClose(pooled_output, unpooled_output)
+  def test_position_sensitive_with_global_pool_false_and_do_global_pool(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    num_boxes = 2
+    # First channel is 1's, second channel is 2's, etc.
+    image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
+                        shape=image_shape)
+    boxes = tf.random_uniform((num_boxes, 4))
+    box_ind = tf.constant([0, 0], dtype=tf.int32)
+    expected_output = []
+    # Expected output, when crop_size = [3, 2].
+    expected_output.append(np.mean(
+        np.expand_dims(
+            np.tile(np.array([[1, 2],
+                              [3, 4],
+                              [5, 6]]), (num_boxes, 1, 1)),
+            axis=-1),
+        axis=(1, 2), keepdims=True))
+    # Expected output, when crop_size = [6, 4].
+    expected_output.append(np.mean(
+        np.expand_dims(
+            np.tile(np.array([[1, 1, 2, 2],
+                              [1, 1, 2, 2],
+                              [3, 3, 4, 4],
+                              [3, 3, 4, 4],
+                              [5, 5, 6, 6],
+                              [5, 5, 6, 6]]), (num_boxes, 1, 1)),
+            axis=-1),
+        axis=(1, 2), keepdims=True))
+    for crop_size_mult in range(1, 3):
+      crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
+      # Perform global_pooling after running the function with
+      # global_pool=False.
+      ps_crop = ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+      ps_crop_and_pool = tf.reduce_mean(
+          ps_crop, reduction_indices=(1, 2), keep_dims=True)
+      with self.test_session() as sess:
+        output = sess.run(ps_crop_and_pool)
+      self.assertAllEqual(output, expected_output[crop_size_mult - 1])
+  def test_raise_value_error_on_non_square_block_size(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    crop_size = [6, 2]
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+    with self.assertRaisesRegexp(
+        ValueError, 'Only support square bin crop size for now.'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
+  def testZeroImageOnEmptyMask(self):
+    box_masks = tf.constant([[[0, 0],
+                              [0, 0]]], dtype=tf.float32)
+    boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
+    image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
+                                                       image_height=4,
+                                                       image_width=4)
+    np_expected_image_masks = np.array([[[0, 0, 0, 0],
+                                         [0, 0, 0, 0],
+                                         [0, 0, 0, 0],
+                                         [0, 0, 0, 0]]], dtype=np.float32)
+    with self.test_session() as sess:
+      np_image_masks = sess.run(image_masks)
+      self.assertAllClose(np_image_masks, np_expected_image_masks)
+  def testMaskIsCenteredInImageWhenBoxIsCentered(self):
+    box_masks = tf.constant([[[1, 1],
+                              [1, 1]]], dtype=tf.float32)
+    boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
+    image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
+                                                       image_height=4,
+                                                       image_width=4)
+    np_expected_image_masks = np.array([[[0, 0, 0, 0],
+                                         [0, 1, 1, 0],
+                                         [0, 1, 1, 0],
+                                         [0, 0, 0, 0]]], dtype=np.float32)
+    with self.test_session() as sess:
+      np_image_masks = sess.run(image_masks)
+      self.assertAllClose(np_image_masks, np_expected_image_masks)
+  def testMaskOffCenterRemainsOffCenterInImage(self):
+    box_masks = tf.constant([[[1, 0],
+                              [0, 1]]], dtype=tf.float32)
+    boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
+    image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
+                                                       image_height=4,
+                                                       image_width=4)
+    np_expected_image_masks = np.array([[[0, 0, 0, 0],
+                                         [0, 0, 0.6111111, 0.16666669],
+                                         [0, 0, 0.3888889, 0.83333337],
+                                         [0, 0, 0, 0]]], dtype=np.float32)
+    with self.test_session() as sess:
+      np_image_masks = sess.run(image_masks)
+      self.assertAllClose(np_image_masks, np_expected_image_masks)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/per_image_evaluation.py
+++ b/object_detection/utils/per_image_evaluation.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Evaluate Object Detection result on a single image.
+Annotate each detected result as true positives or false positive according to
+a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
+detection is supported by default.
+"""
+import numpy as np
+from object_detection.utils import np_box_list
+from object_detection.utils import np_box_list_ops
+class PerImageEvaluation(object):
+  """Evaluate detection result of a single image."""
+  def __init__(self,
+               num_groundtruth_classes,
+               matching_iou_threshold=0.5,
+               nms_iou_threshold=0.3,
+               nms_max_output_boxes=50):
+    """Initialized PerImageEvaluation by evaluation parameters.
+    Args:
+      num_groundtruth_classes: Number of ground truth object classes
+      matching_iou_threshold: A ratio of area intersection to union, which is
+          the threshold to consider whether a detection is true positive or not
+      nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
+      nms_max_output_boxes: Number of maximum output boxes in NMS.
+    """
+    self.matching_iou_threshold = matching_iou_threshold
+    self.nms_iou_threshold = nms_iou_threshold
+    self.nms_max_output_boxes = nms_max_output_boxes
+    self.num_groundtruth_classes = num_groundtruth_classes
+  def compute_object_detection_metrics(self, detected_boxes, detected_scores,
+                                       detected_class_labels, groundtruth_boxes,
+                                       groundtruth_class_labels,
+                                       groundtruth_is_difficult_lists):
+    """Compute Object Detection related metrics from a single image.
+    Args:
+      detected_boxes: A float numpy array of shape [N, 4], representing N
+          regions of detected object regions.
+          Each row is of the format [y_min, x_min, y_max, x_max]
+      detected_scores: A float numpy array of shape [N, 1], representing
+          the confidence scores of the detected N object instances.
+      detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+          the class labels of the detected N object instances.
+      groundtruth_boxes: A float numpy array of shape [M, 4], representing M
+          regions of object instances in ground truth
+      groundtruth_class_labels: An integer numpy array of shape [M, 1],
+          representing M class labels of object instances in ground truth
+      groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+    Returns:
+      scores: A list of C float numpy arrays. Each numpy array is of
+          shape [K, 1], representing K scores detected with object class
+          label c
+      tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
+          is of shape [K, 1], representing K True/False positive label of
+          object instances detected with class label c
+      is_class_correctly_detected_in_image: a numpy integer array of
+          shape [C, 1], indicating whether the correponding class has a least
+          one instance being correctly detected in the image
+    """
+    detected_boxes, detected_scores, detected_class_labels = (
+        self._remove_invalid_boxes(detected_boxes, detected_scores,
+                                   detected_class_labels))
+    scores, tp_fp_labels = self._compute_tp_fp(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels,
+        groundtruth_is_difficult_lists)
+    is_class_correctly_detected_in_image = self._compute_cor_loc(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels)
+    return scores, tp_fp_labels, is_class_correctly_detected_in_image
+  def _compute_cor_loc(self, detected_boxes, detected_scores,
+                       detected_class_labels, groundtruth_boxes,
+                       groundtruth_class_labels):
+    """Compute CorLoc score for object detection result.
+    Args:
+      detected_boxes: A float numpy array of shape [N, 4], representing N
+          regions of detected object regions.
+          Each row is of the format [y_min, x_min, y_max, x_max]
+      detected_scores: A float numpy array of shape [N, 1], representing
+          the confidence scores of the detected N object instances.
+      detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+          the class labels of the detected N object instances.
+      groundtruth_boxes: A float numpy array of shape [M, 4], representing M
+          regions of object instances in ground truth
+      groundtruth_class_labels: An integer numpy array of shape [M, 1],
+          representing M class labels of object instances in ground truth
+    Returns:
+      is_class_correctly_detected_in_image: a numpy integer array of
+          shape [C, 1], indicating whether the correponding class has a least
+          one instance being correctly detected in the image
+    """
+    is_class_correctly_detected_in_image = np.zeros(
+        self.num_groundtruth_classes, dtype=int)
+    for i in range(self.num_groundtruth_classes):
+      gt_boxes_at_ith_class = groundtruth_boxes[
+          groundtruth_class_labels == i, :]
+      detected_boxes_at_ith_class = detected_boxes[
+          detected_class_labels == i, :]
+      detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
+      is_class_correctly_detected_in_image[i] = (
+          self._compute_is_aclass_correctly_detected_in_image(
+              detected_boxes_at_ith_class, detected_scores_at_ith_class,
+              gt_boxes_at_ith_class))
+    return is_class_correctly_detected_in_image
+  def _compute_is_aclass_correctly_detected_in_image(
+      self, detected_boxes, detected_scores, groundtruth_boxes):
+    """Compute CorLoc score for a single class.
+    Args:
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
+          box coordinates
+    Returns:
+      is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
+          class is correctly detected in the image or not
+    """
+    if detected_boxes.size > 0:
+      if groundtruth_boxes.size > 0:
+        max_score_id = np.argmax(detected_scores)
+        detected_boxlist = np_box_list.BoxList(
+            np.expand_dims(detected_boxes[max_score_id, :], axis=0))
+        gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
+        iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
+        if np.max(iou) >= self.matching_iou_threshold:
+          return 1
+    return 0
+  def _compute_tp_fp(self, detected_boxes, detected_scores,
+                     detected_class_labels, groundtruth_boxes,
+                     groundtruth_class_labels, groundtruth_is_difficult_lists):
+    """Labels true/false positives of detections of an image across all classes.
+    Args:
+      detected_boxes: A float numpy array of shape [N, 4], representing N
+          regions of detected object regions.
+          Each row is of the format [y_min, x_min, y_max, x_max]
+      detected_scores: A float numpy array of shape [N, 1], representing
+          the confidence scores of the detected N object instances.
+      detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+          the class labels of the detected N object instances.
+      groundtruth_boxes: A float numpy array of shape [M, 4], representing M
+          regions of object instances in ground truth
+      groundtruth_class_labels: An integer numpy array of shape [M, 1],
+          representing M class labels of object instances in ground truth
+      groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+    Returns:
+      result_scores: A list of float numpy arrays. Each numpy array is of
+          shape [K, 1], representing K scores detected with object class
+          label c
+      result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
+          shape [K, 1], representing K True/False positive label of object
+          instances detected with class label c
+    """
+    result_scores = []
+    result_tp_fp_labels = []
+    for i in range(self.num_groundtruth_classes):
+      gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i
+                                                ), :]
+      groundtruth_is_difficult_list_at_ith_class = (
+          groundtruth_is_difficult_lists[groundtruth_class_labels == i])
+      detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
+                                                   ), :]
+      detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
+      scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
+          detected_boxes_at_ith_class, detected_scores_at_ith_class,
+          gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class)
+      result_scores.append(scores)
+      result_tp_fp_labels.append(tp_fp_labels)
+    return result_scores, result_tp_fp_labels
+  def _remove_invalid_boxes(self, detected_boxes, detected_scores,
+                            detected_class_labels):
+    valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
+                                   detected_boxes[:, 1] < detected_boxes[:, 3])
+    return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
+            detected_class_labels[valid_indices])
+  def _compute_tp_fp_for_single_class(self, detected_boxes, detected_scores,
+                                      groundtruth_boxes,
+                                      groundtruth_is_difficult_list):
+    """Labels boxes detected with the same class from the same image as tp/fp.
+    Args:
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
+          box coordinates
+      groundtruth_is_difficult_list: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+    Returns:
+      scores: A numpy array representing the detection scores
+      tp_fp_labels: a boolean numpy array indicating whether a detection is a
+      true positive.
+    """
+    if detected_boxes.size == 0:
+      return np.array([], dtype=float), np.array([], dtype=bool)
+    detected_boxlist = np_box_list.BoxList(detected_boxes)
+    detected_boxlist.add_field('scores', detected_scores)
+    detected_boxlist = np_box_list_ops.non_max_suppression(
+        detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+    scores = detected_boxlist.get_field('scores')
+    if groundtruth_boxes.size == 0:
+      return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
+    gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
+    iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
+    max_overlap_gt_ids = np.argmax(iou, axis=1)
+    is_gt_box_detected = np.zeros(gt_boxlist.num_boxes(), dtype=bool)
+    tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
+    is_matched_to_difficult_box = np.zeros(
+        detected_boxlist.num_boxes(), dtype=bool)
+    for i in range(detected_boxlist.num_boxes()):
+      gt_id = max_overlap_gt_ids[i]
+      if iou[i, gt_id] >= self.matching_iou_threshold:
+        if not groundtruth_is_difficult_list[gt_id]:
+          if not is_gt_box_detected[gt_id]:
+            tp_fp_labels[i] = True
+            is_gt_box_detected[gt_id] = True
+        else:
+          is_matched_to_difficult_box[i] = True
+    return scores[~is_matched_to_difficult_box], tp_fp_labels[
+        ~is_matched_to_difficult_box]
--- a/object_detection/utils/per_image_evaluation_test.py
+++ b/object_detection/utils/per_image_evaluation_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.per_image_evaluation."""
+import numpy as np
+import tensorflow as tf
+from object_detection.utils import per_image_evaluation
+class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
+  def setUp(self):
+    num_groundtruth_classes = 1
+    matching_iou_threshold = 0.5
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    self.eval = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
+        nms_max_output_boxes)
+    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                   dtype=float)
+    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
+    self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]],
+                                      dtype=float)
+  def test_match_to_not_difficult_box(self):
+    groundtruth_groundtruth_is_difficult_list = np.array([False, True],
+                                                         dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+  def test_match_to_difficult_box(self):
+    groundtruth_groundtruth_is_difficult_list = np.array([True, False],
+                                                         dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
+  def setUp(self):
+    num_groundtruth_classes = 1
+    matching_iou_threshold1 = 0.5
+    matching_iou_threshold2 = 0.1
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    self.eval1 = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold,
+        nms_max_output_boxes)
+    self.eval2 = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold,
+        nms_max_output_boxes)
+    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                   dtype=float)
+    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
+  def test_no_true_positives(self):
+    groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+  def test_one_true_positives_with_large_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+  def test_one_true_positives_with_very_small_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+  def test_two_true_positives_with_large_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
+    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, True, True], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+class MultiClassesTpFpTest(tf.test.TestCase):
+  def test_tp_fp(self):
+    num_groundtruth_classes = 3
+    matching_iou_threshold = 0.5
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
+                                                    matching_iou_threshold,
+                                                    nms_iou_threshold,
+                                                    nms_max_output_boxes)
+    detected_boxes = np.array([[0, 0, 1, 1], [10, 10, 5, 5], [0, 0, 2, 2],
+                               [5, 10, 10, 5], [10, 5, 5, 10], [0, 0, 3, 3]],
+                              dtype=float)
+    detected_scores = np.array([0.8, 0.1, 0.8, 0.9, 0.7, 0.8], dtype=float)
+    detected_class_labels = np.array([0, 1, 1, 2, 0, 2], dtype=int)
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
+    groundtruth_class_labels = np.array([0, 2], dtype=int)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float)
+    scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = [np.array([0.8], dtype=float)] * 3
+    expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True
+                                                                           ])]
+    for i in range(len(expected_scores)):
+      self.assertTrue(np.allclose(expected_scores[i], scores[i]))
+      self.assertTrue(np.array_equal(expected_tp_fp_labels[i], tp_fp_labels[i]))
+class CorLocTest(tf.test.TestCase):
+  def test_compute_corloc_with_normal_iou_threshold(self):
+    num_groundtruth_classes = 3
+    matching_iou_threshold = 0.5
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
+                                                    matching_iou_threshold,
+                                                    nms_iou_threshold,
+                                                    nms_max_output_boxes)
+    detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
+                               [0, 0, 5, 5]], dtype=float)
+    detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
+    detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
+                                 dtype=float)
+    groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
+    is_class_correctly_detected_in_image = eval1._compute_cor_loc(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels)
+    expected_result = np.array([1, 0, 1], dtype=int)
+    self.assertTrue(np.array_equal(expected_result,
+                                   is_class_correctly_detected_in_image))
+  def test_compute_corloc_with_very_large_iou_threshold(self):
+    num_groundtruth_classes = 3
+    matching_iou_threshold = 0.9
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
+                                                    matching_iou_threshold,
+                                                    nms_iou_threshold,
+                                                    nms_max_output_boxes)
+    detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
+                               [0, 0, 5, 5]], dtype=float)
+    detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
+    detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
+                                 dtype=float)
+    groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
+    is_class_correctly_detected_in_image = eval1._compute_cor_loc(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels)
+    expected_result = np.array([1, 0, 0], dtype=int)
+    self.assertTrue(np.array_equal(expected_result,
+                                   is_class_correctly_detected_in_image))
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/shape_utils.py
+++ b/object_detection/utils/shape_utils.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utils used to manipulate tensor shapes."""
+import tensorflow as tf
+def _is_tensor(t):
+  """Returns a boolean indicating whether the input is a tensor.
+  Args:
+    t: the input to be tested.
+  Returns:
+    a boolean that indicates whether t is a tensor.
+  """
+  return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable))
+def _set_dim_0(t, d0):
+  """Sets the 0-th dimension of the input tensor.
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    d0: an integer indicating the 0-th dimension of the input tensor.
+  Returns:
+    the tensor t with the 0-th dimension set.
+  """
+  t_shape = t.get_shape().as_list()
+  t_shape[0] = d0
+  t.set_shape(t_shape)
+  return t
+def pad_tensor(t, length):
+  """Pads the input tensor with 0s along the first dimension up to the length.
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    length: a tensor of shape [1]  or an integer, indicating the first dimension
+      of the input tensor t after padding, assuming length <= t.shape[0].
+  Returns:
+    padded_t: the padded tensor, whose first dimension is length. If the length
+      is an integer, the first dimension of padded_t is set to length
+      statically.
+  """
+  t_rank = tf.rank(t)
+  t_shape = tf.shape(t)
+  t_d0 = t_shape[0]
+  pad_d0 = tf.expand_dims(length - t_d0, 0)
+  pad_shape = tf.cond(
+      tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0),
+      lambda: tf.expand_dims(length - t_d0, 0))
+  padded_t = tf.concat([t, tf.zeros(pad_shape, dtype=t.dtype)], 0)
+  if not _is_tensor(length):
+    padded_t = _set_dim_0(padded_t, length)
+  return padded_t
+def clip_tensor(t, length):
+  """Clips the input tensor along the first dimension up to the length.
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    length: a tensor of shape [1]  or an integer, indicating the first dimension
+      of the input tensor t after clipping, assuming length <= t.shape[0].
+  Returns:
+    clipped_t: the clipped tensor, whose first dimension is length. If the
+      length is an integer, the first dimension of clipped_t is set to length
+      statically.
+  """
+  clipped_t = tf.gather(t, tf.range(length))
+  if not _is_tensor(length):
+    clipped_t = _set_dim_0(clipped_t, length)
+  return clipped_t
+def pad_or_clip_tensor(t, length):
+  """Pad or clip the input tensor along the first dimension.
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    length: a tensor of shape [1]  or an integer, indicating the first dimension
+      of the input tensor t after processing.
+  Returns:
+    processed_t: the processed tensor, whose first dimension is length. If the
+      length is an integer, the first dimension of the processed tensor is set
+      to length statically.
+  """
+  processed_t = tf.cond(
+      tf.greater(tf.shape(t)[0], length),
+      lambda: clip_tensor(t, length),
+      lambda: pad_tensor(t, length))
+  if not _is_tensor(length):
+    processed_t = _set_dim_0(processed_t, length)
+  return processed_t
--- a/object_detection/utils/shape_utils_test.py
+++ b/object_detection/utils/shape_utils_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.shape_utils."""
+import tensorflow as tf
+from object_detection.utils import shape_utils
+class UtilTest(tf.test.TestCase):
+  def test_pad_tensor_using_integer_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    pad_t1 = shape_utils.pad_tensor(t1, 2)
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    pad_t2 = shape_utils.pad_tensor(t2, 2)
+    self.assertEqual(2, pad_t1.get_shape()[0])
+    self.assertEqual(2, pad_t2.get_shape()[0])
+    with self.test_session() as sess:
+      pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
+      self.assertAllEqual([1, 0], pad_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
+  def test_pad_tensor_using_tensor_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    pad_t1 = shape_utils.pad_tensor(t1, tf.constant(2))
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    pad_t2 = shape_utils.pad_tensor(t2, tf.constant(2))
+    with self.test_session() as sess:
+      pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
+      self.assertAllEqual([1, 0], pad_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
+  def test_clip_tensor_using_integer_input(self):
+    t1 = tf.constant([1, 2, 3], dtype=tf.int32)
+    clip_t1 = shape_utils.clip_tensor(t1, 2)
+    t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    clip_t2 = shape_utils.clip_tensor(t2, 2)
+    self.assertEqual(2, clip_t1.get_shape()[0])
+    self.assertEqual(2, clip_t2.get_shape()[0])
+    with self.test_session() as sess:
+      clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
+      self.assertAllEqual([1, 2], clip_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
+  def test_clip_tensor_using_tensor_input(self):
+    t1 = tf.constant([1, 2, 3], dtype=tf.int32)
+    clip_t1 = shape_utils.clip_tensor(t1, tf.constant(2))
+    t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    clip_t2 = shape_utils.clip_tensor(t2, tf.constant(2))
+    with self.test_session() as sess:
+      clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
+      self.assertAllEqual([1, 2], clip_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
+  def test_pad_or_clip_tensor_using_integer_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    tt1 = shape_utils.pad_or_clip_tensor(t1, 2)
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    tt2 = shape_utils.pad_or_clip_tensor(t2, 2)
+    t3 = tf.constant([1, 2, 3], dtype=tf.int32)
+    tt3 = shape_utils.clip_tensor(t3, 2)
+    t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    tt4 = shape_utils.clip_tensor(t4, 2)
+    self.assertEqual(2, tt1.get_shape()[0])
+    self.assertEqual(2, tt2.get_shape()[0])
+    self.assertEqual(2, tt3.get_shape()[0])
+    self.assertEqual(2, tt4.get_shape()[0])
+    with self.test_session() as sess:
+      tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
+          [tt1, tt2, tt3, tt4])
+      self.assertAllEqual([1, 0], tt1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
+      self.assertAllEqual([1, 2], tt3_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
+  def test_pad_or_clip_tensor_using_tensor_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    tt1 = shape_utils.pad_or_clip_tensor(t1, tf.constant(2))
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    tt2 = shape_utils.pad_or_clip_tensor(t2, tf.constant(2))
+    t3 = tf.constant([1, 2, 3], dtype=tf.int32)
+    tt3 = shape_utils.clip_tensor(t3, tf.constant(2))
+    t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    tt4 = shape_utils.clip_tensor(t4, tf.constant(2))
+    with self.test_session() as sess:
+      tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
+          [tt1, tt2, tt3, tt4])
+      self.assertAllEqual([1, 0], tt1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
+      self.assertAllEqual([1, 2], tt3_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/static_shape.py
+++ b/object_detection/utils/static_shape.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helper functions to access TensorShape values.
+The rank 4 tensor_shape must be of the form [batch_size, height, width, depth].
+"""
+def get_batch_size(tensor_shape):
+  """Returns batch size from the tensor shape.
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+  Returns:
+    An integer representing the batch size of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[0].value
+def get_height(tensor_shape):
+  """Returns height from the tensor shape.
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+  Returns:
+    An integer representing the height of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[1].value
+def get_width(tensor_shape):
+  """Returns width from the tensor shape.
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+  Returns:
+    An integer representing the width of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[2].value
+def get_depth(tensor_shape):
+  """Returns depth from the tensor shape.
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+  Returns:
+    An integer representing the depth of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[3].value
--- a/object_detection/utils/static_shape_test.py
+++ b/object_detection/utils/static_shape_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.static_shape."""
+import tensorflow as tf
+from object_detection.utils import static_shape
+class StaticShapeTest(tf.test.TestCase):
+  def test_return_correct_batchSize(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(32, static_shape.get_batch_size(tensor_shape))
+  def test_return_correct_height(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(299, static_shape.get_height(tensor_shape))
+  def test_return_correct_width(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(384, static_shape.get_width(tensor_shape))
+  def test_return_correct_depth(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(3, static_shape.get_depth(tensor_shape))
+  def test_die_on_tensor_shape_with_rank_three(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384])
+    with self.assertRaises(ValueError):
+      static_shape.get_batch_size(tensor_shape)
+      static_shape.get_height(tensor_shape)
+      static_shape.get_width(tensor_shape)
+      static_shape.get_depth(tensor_shape)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/test_utils.py
+++ b/object_detection/utils/test_utils.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains functions which are convenient for unit testing."""
+import numpy as np
+import tensorflow as tf
+from object_detection.core import anchor_generator
+from object_detection.core import box_coder
+from object_detection.core import box_list
+from object_detection.core import box_predictor
+from object_detection.core import matcher
+class MockBoxCoder(box_coder.BoxCoder):
+  """Simple `difference` BoxCoder."""
+  @property
+  def code_size(self):
+    return 4
+  def _encode(self, boxes, anchors):
+    return boxes.get() - anchors.get()
+  def _decode(self, rel_codes, anchors):
+    return box_list.BoxList(rel_codes + anchors.get())
+class MockBoxPredictor(box_predictor.BoxPredictor):
+  """Simple box predictor that ignores inputs and outputs all zeros."""
+  def __init__(self, is_training, num_classes):
+    super(MockBoxPredictor, self).__init__(is_training, num_classes)
+  def _predict(self, image_features, num_predictions_per_location):
+    batch_size = image_features.get_shape().as_list()[0]
+    num_anchors = (image_features.get_shape().as_list()[1]
+                   * image_features.get_shape().as_list()[2])
+    code_size = 4
+    zero = tf.reduce_sum(0 * image_features)
+    box_encodings = zero + tf.zeros(
+        (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
+    class_predictions_with_background = zero + tf.zeros(
+        (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
+    return {box_predictor.BOX_ENCODINGS: box_encodings,
+            box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
+            class_predictions_with_background}
+class MockAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Mock anchor generator."""
+  def name_scope(self):
+    return 'MockAnchorGenerator'
+  def num_anchors_per_location(self):
+    return [1]
+  def _generate(self, feature_map_shape_list):
+    num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list])
+    return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32))
+class MockMatcher(matcher.Matcher):
+  """Simple matcher that matches first anchor to first groundtruth box."""
+  def _match(self, similarity_matrix):
+    return tf.constant([0, -1, -1, -1], dtype=tf.int32)
+def create_diagonal_gradient_image(height, width, depth):
+  """Creates pyramid image. Useful for testing.
+  For example, pyramid_image(5, 6, 1) looks like:
+  # [[[ 5.  4.  3.  2.  1.  0.]
+  #   [ 6.  5.  4.  3.  2.  1.]
+  #   [ 7.  6.  5.  4.  3.  2.]
+  #   [ 8.  7.  6.  5.  4.  3.]
+  #   [ 9.  8.  7.  6.  5.  4.]]]
+  Args:
+    height: height of image
+    width: width of image
+    depth: depth of image
+  Returns:
+    pyramid image
+  """
+  row = np.arange(height)
+  col = np.arange(width)[::-1]
+  image_layer = np.expand_dims(row, 1) + col
+  image_layer = np.expand_dims(image_layer, 2)
+  image = image_layer
+  for i in range(1, depth):
+    image = np.concatenate((image, image_layer * pow(10, i)), 2)
+  return image.astype(np.float32)
+def create_random_boxes(num_boxes, max_height, max_width):
+  """Creates random bounding boxes of specific maximum height and width.
+  Args:
+    num_boxes: number of boxes.
+    max_height: maximum height of boxes.
+    max_width: maximum width of boxes.
+  Returns:
+    boxes: numpy array of shape [num_boxes, 4]. Each row is in form
+        [y_min, x_min, y_max, x_max].
+  """
+  y_1 = np.random.uniform(size=(1, num_boxes)) * max_height
+  y_2 = np.random.uniform(size=(1, num_boxes)) * max_height
+  x_1 = np.random.uniform(size=(1, num_boxes)) * max_width
+  x_2 = np.random.uniform(size=(1, num_boxes)) * max_width
+  boxes = np.zeros(shape=(num_boxes, 4))
+  boxes[:, 0] = np.minimum(y_1, y_2)
+  boxes[:, 1] = np.minimum(x_1, x_2)
+  boxes[:, 2] = np.maximum(y_1, y_2)
+  boxes[:, 3] = np.maximum(x_1, x_2)
+  return boxes.astype(np.float32)
--- a/object_detection/utils/test_utils_test.py
+++ b/object_detection/utils/test_utils_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.test_utils."""
+import numpy as np
+import tensorflow as tf
+from object_detection.utils import test_utils
+class TestUtilsTest(tf.test.TestCase):
+  def test_diagonal_gradient_image(self):
+    """Tests if a good pyramid image is created."""
+    pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2)
+    # Test which is easy to understand.
+    expected_first_channel = np.array([[3, 2, 1, 0],
+                                       [4, 3, 2, 1],
+                                       [5, 4, 3, 2]], dtype=np.float32)
+    self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]),
+                        expected_first_channel)
+    # Actual test.
+    expected_image = np.array([[[3, 30],
+                                [2, 20],
+                                [1, 10],
+                                [0, 0]],
+                               [[4, 40],
+                                [3, 30],
+                                [2, 20],
+                                [1, 10]],
+                               [[5, 50],
+                                [4, 40],
+                                [3, 30],
+                                [2, 20]]], dtype=np.float32)
+    self.assertAllEqual(pyramid_image, expected_image)
+  def test_random_boxes(self):
+    """Tests if valid random boxes are created."""
+    num_boxes = 1000
+    max_height = 3
+    max_width = 5
+    boxes = test_utils.create_random_boxes(num_boxes,
+                                           max_height,
+                                           max_width)
+    true_column = np.ones(shape=(num_boxes)) == 1
+    self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column)
+    self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column)
+    self.assertTrue(boxes[:, 0].min() >= 0)
+    self.assertTrue(boxes[:, 1].min() >= 0)
+    self.assertTrue(boxes[:, 2].max() <= max_height)
+    self.assertTrue(boxes[:, 3].max() <= max_width)
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/variables_helper.py
+++ b/object_detection/utils/variables_helper.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helper functions for manipulating collections of variables during training.
+"""
+import logging
+import re
+import tensorflow as tf
+slim = tf.contrib.slim
+# TODO: Consider replacing with tf.contrib.filter_variables in
+# tensorflow/contrib/framework/python/ops/variables.py
+def filter_variables(variables, filter_regex_list, invert=False):
+  """Filters out the variables matching the filter_regex.
+  Filter out the variables whose name matches the any of the regular
+  expressions in filter_regex_list and returns the remaining variables.
+  Optionally, if invert=True, the complement set is returned.
+  Args:
+    variables: a list of tensorflow variables.
+    filter_regex_list: a list of string regular expressions.
+    invert: (boolean).  If True, returns the complement of the filter set; that
+      is, all variables matching filter_regex are kept and all others discarded.
+  Returns:
+    a list of filtered variables.
+  """
+  kept_vars = []
+  variables_to_ignore_patterns = filter(None, filter_regex_list)
+  for var in variables:
+    add = True
+    for pattern in variables_to_ignore_patterns:
+      if re.match(pattern, var.op.name):
+        add = False
+        break
+    if add != invert:
+      kept_vars.append(var)
+  return kept_vars
+def multiply_gradients_matching_regex(grads_and_vars, regex_list, multiplier):
+  """Multiply gradients whose variable names match a regular expression.
+  Args:
+    grads_and_vars: A list of gradient to variable pairs (tuples).
+    regex_list: A list of string regular expressions.
+    multiplier: A (float) multiplier to apply to each gradient matching the
+      regular expression.
+  Returns:
+    grads_and_vars: A list of gradient to variable pairs (tuples).
+  """
+  variables = [pair[1] for pair in grads_and_vars]
+  matching_vars = filter_variables(variables, regex_list, invert=True)
+  for var in matching_vars:
+    logging.info('Applying multiplier %f to variable [%s]',
+                 multiplier, var.op.name)
+  grad_multipliers = {var: float(multiplier) for var in matching_vars}
+  return slim.learning.multiply_gradients(grads_and_vars,
+                                          grad_multipliers)
+def freeze_gradients_matching_regex(grads_and_vars, regex_list):
+  """Freeze gradients whose variable names match a regular expression.
+  Args:
+    grads_and_vars: A list of gradient to variable pairs (tuples).
+    regex_list: A list of string regular expressions.
+  Returns:
+    grads_and_vars: A list of gradient to variable pairs (tuples) that do not
+      contain the variables and gradients matching the regex.
+  """
+  variables = [pair[1] for pair in grads_and_vars]
+  matching_vars = filter_variables(variables, regex_list, invert=True)
+  kept_grads_and_vars = [pair for pair in grads_and_vars
+                         if pair[1] not in matching_vars]
+  for var in matching_vars:
+    logging.info('Freezing variable [%s]', var.op.name)
+  return kept_grads_and_vars
+def get_variables_available_in_checkpoint(variables, checkpoint_path):
+  """Returns the subset of variables available in the checkpoint.
+  Inspects given checkpoint and returns the subset of variables that are
+  available in it.
+  TODO: force input and output to be a dictionary.
+  Args:
+    variables: a list or dictionary of variables to find in checkpoint.
+    checkpoint_path: path to the checkpoint to restore variables from.
+  Returns:
+    A list or dictionary of variables.
+  Raises:
+    ValueError: if `variables` is not a list or dict.
+  """
+  if isinstance(variables, list):
+    variable_names_map = {variable.op.name: variable for variable in variables}
+  elif isinstance(variables, dict):
+    variable_names_map = variables
+  else:
+    raise ValueError('`variables` is expected to be a list or dict.')
+  ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path)
+  ckpt_vars = ckpt_reader.get_variable_to_shape_map().keys()
+  vars_in_ckpt = {}
+  for variable_name, variable in sorted(variable_names_map.iteritems()):
+    if variable_name in ckpt_vars:
+      vars_in_ckpt[variable_name] = variable
+    else:
+      logging.warning('Variable [%s] not available in checkpoint',
+                      variable_name)
+  if isinstance(variables, list):
+    return vars_in_ckpt.values()
+  return vars_in_ckpt
--- a/object_detection/utils/variables_helper_test.py
+++ b/object_detection/utils/variables_helper_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.utils.variables_helper."""
+import os
+import tensorflow as tf
+from object_detection.utils import variables_helper
+class FilterVariablesTest(tf.test.TestCase):
+  def _create_variables(self):
+    return [tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights'),
+            tf.Variable(1.0, name='FeatureExtractor/InceptionV3/biases'),
+            tf.Variable(1.0, name='StackProposalGenerator/weights'),
+            tf.Variable(1.0, name='StackProposalGenerator/biases')]
+  def test_return_all_variables_when_empty_regex(self):
+    variables = self._create_variables()
+    out_variables = variables_helper.filter_variables(variables, [''])
+    self.assertItemsEqual(out_variables, variables)
+  def test_return_variables_which_do_not_match_single_regex(self):
+    variables = self._create_variables()
+    out_variables = variables_helper.filter_variables(variables,
+                                                      ['FeatureExtractor/.*'])
+    self.assertItemsEqual(out_variables, variables[2:])
+  def test_return_variables_which_do_not_match_any_regex_in_list(self):
+    variables = self._create_variables()
+    out_variables = variables_helper.filter_variables(variables, [
+        'FeatureExtractor.*biases', 'StackProposalGenerator.*biases'
+    ])
+    self.assertItemsEqual(out_variables, [variables[0], variables[2]])
+  def test_return_variables_matching_empty_regex_list(self):
+    variables = self._create_variables()
+    out_variables = variables_helper.filter_variables(
+        variables, [''], invert=True)
+    self.assertItemsEqual(out_variables, [])
+  def test_return_variables_matching_some_regex_in_list(self):
+    variables = self._create_variables()
+    out_variables = variables_helper.filter_variables(
+        variables,
+        ['FeatureExtractor.*biases', 'StackProposalGenerator.*biases'],
+        invert=True)
+    self.assertItemsEqual(out_variables, [variables[1], variables[3]])
+class MultiplyGradientsMatchingRegexTest(tf.test.TestCase):
+  def _create_grads_and_vars(self):
+    return [(tf.constant(1.0),
+             tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')),
+            (tf.constant(2.0),
+             tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')),
+            (tf.constant(3.0),
+             tf.Variable(3.0, name='StackProposalGenerator/weights')),
+            (tf.constant(4.0),
+             tf.Variable(4.0, name='StackProposalGenerator/biases'))]
+  def test_multiply_all_feature_extractor_variables(self):
+    grads_and_vars = self._create_grads_and_vars()
+    regex_list = ['FeatureExtractor/.*']
+    multiplier = 0.0
+    grads_and_vars = variables_helper.multiply_gradients_matching_regex(
+        grads_and_vars, regex_list, multiplier)
+    exp_output = [(0.0, 1.0), (0.0, 2.0), (3.0, 3.0), (4.0, 4.0)]
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      output = sess.run(grads_and_vars)
+      self.assertItemsEqual(output, exp_output)
+  def test_multiply_all_bias_variables(self):
+    grads_and_vars = self._create_grads_and_vars()
+    regex_list = ['.*/biases']
+    multiplier = 0.0
+    grads_and_vars = variables_helper.multiply_gradients_matching_regex(
+        grads_and_vars, regex_list, multiplier)
+    exp_output = [(1.0, 1.0), (0.0, 2.0), (3.0, 3.0), (0.0, 4.0)]
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      output = sess.run(grads_and_vars)
+      self.assertItemsEqual(output, exp_output)
+class FreezeGradientsMatchingRegexTest(tf.test.TestCase):
+  def _create_grads_and_vars(self):
+    return [(tf.constant(1.0),
+             tf.Variable(1.0, name='FeatureExtractor/InceptionV3/weights')),
+            (tf.constant(2.0),
+             tf.Variable(2.0, name='FeatureExtractor/InceptionV3/biases')),
+            (tf.constant(3.0),
+             tf.Variable(3.0, name='StackProposalGenerator/weights')),
+            (tf.constant(4.0),
+             tf.Variable(4.0, name='StackProposalGenerator/biases'))]
+  def test_freeze_all_feature_extractor_variables(self):
+    grads_and_vars = self._create_grads_and_vars()
+    regex_list = ['FeatureExtractor/.*']
+    grads_and_vars = variables_helper.freeze_gradients_matching_regex(
+        grads_and_vars, regex_list)
+    exp_output = [(3.0, 3.0), (4.0, 4.0)]
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      output = sess.run(grads_and_vars)
+      self.assertItemsEqual(output, exp_output)
+class GetVariablesAvailableInCheckpointTest(tf.test.TestCase):
+  def test_return_all_variables_from_checkpoint(self):
+    variables = [
+        tf.Variable(1.0, name='weights'),
+        tf.Variable(1.0, name='biases')
+    ]
+    checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
+    init_op = tf.global_variables_initializer()
+    saver = tf.train.Saver(variables)
+    with self.test_session() as sess:
+      sess.run(init_op)
+      saver.save(sess, checkpoint_path)
+    out_variables = variables_helper.get_variables_available_in_checkpoint(
+        variables, checkpoint_path)
+    self.assertItemsEqual(out_variables, variables)
+  def test_return_variables_available_in_checkpoint(self):
+    checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
+    graph1_variables = [
+        tf.Variable(1.0, name='weights'),
+    ]
+    init_op = tf.global_variables_initializer()
+    saver = tf.train.Saver(graph1_variables)
+    with self.test_session() as sess:
+      sess.run(init_op)
+      saver.save(sess, checkpoint_path)
+    graph2_variables = graph1_variables + [tf.Variable(1.0, name='biases')]
+    out_variables = variables_helper.get_variables_available_in_checkpoint(
+        graph2_variables, checkpoint_path)
+    self.assertItemsEqual(out_variables, graph1_variables)
+  def test_return_variables_available_an_checkpoint_with_dict_inputs(self):
+    checkpoint_path = os.path.join(self.get_temp_dir(), 'graph.pb')
+    graph1_variables = [
+        tf.Variable(1.0, name='ckpt_weights'),
+    ]
+    init_op = tf.global_variables_initializer()
+    saver = tf.train.Saver(graph1_variables)
+    with self.test_session() as sess:
+      sess.run(init_op)
+      saver.save(sess, checkpoint_path)
+    graph2_variables_dict = {
+        'ckpt_weights': tf.Variable(1.0, name='weights'),
+        'ckpt_biases': tf.Variable(1.0, name='biases')
+    }
+    out_variables = variables_helper.get_variables_available_in_checkpoint(
+        graph2_variables_dict, checkpoint_path)
+    self.assertTrue(isinstance(out_variables, dict))
+    self.assertItemsEqual(out_variables.keys(), ['ckpt_weights'])
+    self.assertTrue(out_variables['ckpt_weights'].op.name == 'weights')
+if __name__ == '__main__':
+  tf.test.main()