Merge branch 'master' of github.com:tensorflow/models

f282f6ef · Alexander Gorban · 58a5da7b · a2970b03 · f282f6ef · f282f6ef
Commit f282f6ef authored Jul 05, 2017 by Alexander Gorban
20 changed files
--- a/object_detection/utils/metrics.py
+++ b/object_detection/utils/metrics.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions for computing metrics like precision, recall, CorLoc and etc."""
+from __future__ import division
+
+import numpy as np
+from six import moves
+
+
+def compute_precision_recall(scores, labels, num_gt):
+  """Compute precision and recall.
+
+  Args:
+    scores: A float numpy array representing detection score
+    labels: A boolean numpy array representing true/false positive labels
+    num_gt: Number of ground truth instances
+
+  Raises:
+    ValueError: if the input is not of the correct format
+
+  Returns:
+    precision: Fraction of positive instances over detected ones. This value is
+      None if no ground truth labels are present.
+    recall: Fraction of detected positive instance over all positive instances.
+      This value is None if no ground truth labels are present.
+
+  """
+  if not isinstance(
+      labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1:
+    raise ValueError("labels must be single dimension bool numpy array")
+
+  if not isinstance(
+      scores, np.ndarray) or len(scores.shape) != 1:
+    raise ValueError("scores must be single dimension numpy array")
+
+  if num_gt < np.sum(labels):
+    raise ValueError("Number of true positives must be smaller than num_gt.")
+
+  if len(scores) != len(labels):
+    raise ValueError("scores and labels must be of the same size.")
+
+  if num_gt == 0:
+    return None, None
+
+  sorted_indices = np.argsort(scores)
+  sorted_indices = sorted_indices[::-1]
+  labels = labels.astype(int)
+  true_positive_labels = labels[sorted_indices]
+  false_positive_labels = 1 - true_positive_labels
+  cum_true_positives = np.cumsum(true_positive_labels)
+  cum_false_positives = np.cumsum(false_positive_labels)
+  precision = cum_true_positives.astype(float) / (
+      cum_true_positives + cum_false_positives)
+  recall = cum_true_positives.astype(float) / num_gt
+  return precision, recall
+
+
+def compute_average_precision(precision, recall):
+  """Compute Average Precision according to the definition in VOCdevkit.
+
+  Precision is modified to ensure that it does not decrease as recall
+  decrease.
+
+  Args:
+    precision: A float [N, 1] numpy array of precisions
+    recall: A float [N, 1] numpy array of recalls
+
+  Raises:
+    ValueError: if the input is not of the correct format
+
+  Returns:
+    average_precison: The area under the precision recall curve. NaN if
+      precision and recall are None.
+
+  """
+  if precision is None:
+    if recall is not None:
+      raise ValueError("If precision is None, recall must also be None")
+    return np.NAN
+
+  if not isinstance(precision, np.ndarray) or not isinstance(recall,
+                                                             np.ndarray):
+    raise ValueError("precision and recall must be numpy array")
+  if precision.dtype != np.float or recall.dtype != np.float:
+    raise ValueError("input must be float numpy array.")
+  if len(precision) != len(recall):
+    raise ValueError("precision and recall must be of the same size.")
+  if not precision.size:
+    return 0.0
+  if np.amin(precision) < 0 or np.amax(precision) > 1:
+    raise ValueError("Precision must be in the range of [0, 1].")
+  if np.amin(recall) < 0 or np.amax(recall) > 1:
+    raise ValueError("recall must be in the range of [0, 1].")
+  if not all(recall[i] <= recall[i + 1] for i in moves.range(len(recall) - 1)):
+    raise ValueError("recall must be a non-decreasing array")
+
+  recall = np.concatenate([[0], recall, [1]])
+  precision = np.concatenate([[0], precision, [0]])
+
+  # Preprocess precision to be a non-decreasing array
+  for i in range(len(precision) - 2, -1, -1):
+    precision[i] = np.maximum(precision[i], precision[i + 1])
+
+  indices = np.where(recall[1:] != recall[:-1])[0] + 1
+  average_precision = np.sum(
+      (recall[indices] - recall[indices - 1]) * precision[indices])
+  return average_precision
+
+
+def compute_cor_loc(num_gt_imgs_per_class,
+                    num_images_correctly_detected_per_class):
+  """Compute CorLoc according to the definition in the following paper.
+
+  https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
+
+  Returns nans if there are no ground truth images for a class.
+
+  Args:
+    num_gt_imgs_per_class: 1D array, representing number of images containing
+        at least one object instance of a particular class
+    num_images_correctly_detected_per_class: 1D array, representing number of
+        images that are correctly detected at least one object instance of a
+        particular class
+
+  Returns:
+    corloc_per_class: A float numpy array represents the corloc score of each
+      class
+  """
+  return np.where(
+      num_gt_imgs_per_class == 0,
+      np.nan,
+      num_images_correctly_detected_per_class / num_gt_imgs_per_class)
--- a/object_detection/utils/metrics_test.py
+++ b/object_detection/utils/metrics_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.metrics."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import metrics
+
+
+class MetricsTest(tf.test.TestCase):
+
+  def test_compute_cor_loc(self):
+    num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
+    num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
+                                                       dtype=int)
+    corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
+                                     num_images_correctly_detected_per_class)
+    expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
+    self.assertTrue(np.allclose(corloc, expected_corloc))
+
+  def test_compute_cor_loc_nans(self):
+    num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
+    num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
+                                                       dtype=int)
+    corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
+                                     num_images_correctly_detected_per_class)
+    expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
+    self.assertAllClose(corloc, expected_corloc)
+
+  def test_compute_precision_recall(self):
+    num_gt = 10
+    scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
+    labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
+    accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
+    expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
+    expected_recall = accumulated_tp_count / num_gt
+    precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
+    self.assertAllClose(precision, expected_precision)
+    self.assertAllClose(recall, expected_recall)
+
+  def test_compute_average_precision(self):
+    precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
+    recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
+    processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0],
+                                   dtype=float)
+    recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
+    expected_mean_ap = np.sum(recall_interval * processed_precision)
+    mean_ap = metrics.compute_average_precision(precision, recall)
+    self.assertAlmostEqual(expected_mean_ap, mean_ap)
+
+  def test_compute_precision_recall_and_ap_no_groundtruth(self):
+    num_gt = 0
+    scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
+    labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
+    expected_precision = None
+    expected_recall = None
+    precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
+    self.assertEquals(precision, expected_precision)
+    self.assertEquals(recall, expected_recall)
+    ap = metrics.compute_average_precision(precision, recall)
+    self.assertTrue(np.isnan(ap))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/np_box_list.py
+++ b/object_detection/utils/np_box_list.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Numpy BoxList classes and functions."""
+
+import numpy as np
+from six import moves
+
+
+class BoxList(object):
+  """Box collection.
+
+  BoxList represents a list of bounding boxes as numpy array, where each
+  bounding box is represented as a row of 4 numbers,
+  [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
+  given list correspond to a single image.
+
+  Optionally, users can add additional related fields (such as
+  objectness/classification scores).
+  """
+
+  def __init__(self, data):
+    """Constructs box collection.
+
+    Args:
+      data: a numpy array of shape [N, 4] representing box coordinates
+
+    Raises:
+      ValueError: if bbox data is not a numpy array
+      ValueError: if invalid dimensions for bbox data
+    """
+    if not isinstance(data, np.ndarray):
+      raise ValueError('data must be a numpy array.')
+    if len(data.shape) != 2 or data.shape[1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    if data.dtype != np.float32 and data.dtype != np.float64:
+      raise ValueError('Invalid data type for box data: float is required.')
+    if not self._is_valid_boxes(data):
+      raise ValueError('Invalid box data. data must be a numpy array of '
+                       'N*[y_min, x_min, y_max, x_max]')
+    self.data = {'boxes': data}
+
+  def num_boxes(self):
+    """Return number of boxes held in collections."""
+    return self.data['boxes'].shape[0]
+
+  def get_extra_fields(self):
+    """Return all non-box fields."""
+    return [k for k in self.data.keys() if k != 'boxes']
+
+  def has_field(self, field):
+    return field in self.data
+
+  def add_field(self, field, field_data):
+    """Add data to a specified field.
+
+    Args:
+      field: a string parameter used to speficy a related field to be accessed.
+      field_data: a numpy array of [N, ...] representing the data associated
+          with the field.
+    Raises:
+      ValueError: if the field is already exist or the dimension of the field
+          data does not matches the number of boxes.
+    """
+    if self.has_field(field):
+      raise ValueError('Field ' + field + 'already exists')
+    if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
+      raise ValueError('Invalid dimensions for field data')
+    self.data[field] = field_data
+
+  def get(self):
+    """Convenience function for accesssing box coordinates.
+
+    Returns:
+      a numpy array of shape [N, 4] representing box corners
+    """
+    return self.get_field('boxes')
+
+  def get_field(self, field):
+    """Accesses data associated with the specified field in the box collection.
+
+    Args:
+      field: a string parameter used to speficy a related field to be accessed.
+
+    Returns:
+      a numpy 1-d array representing data of an associated field
+
+    Raises:
+      ValueError: if invalid field
+    """
+    if not self.has_field(field):
+      raise ValueError('field {} does not exist'.format(field))
+    return self.data[field]
+
+  def get_coordinates(self):
+    """Get corner coordinates of boxes.
+
+    Returns:
+     a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
+    """
+    box_coordinates = self.get()
+    y_min = box_coordinates[:, 0]
+    x_min = box_coordinates[:, 1]
+    y_max = box_coordinates[:, 2]
+    x_max = box_coordinates[:, 3]
+    return [y_min, x_min, y_max, x_max]
+
+  def _is_valid_boxes(self, data):
+    """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
+
+    Args:
+      data: a numpy array of shape [N, 4] representing box coordinates
+
+    Returns:
+      a boolean indicating whether all ymax of boxes are equal or greater than
+          ymin, and all xmax of boxes are equal or greater than xmin.
+    """
+    if data.shape[0] > 0:
+      for i in moves.range(data.shape[0]):
+        if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
+          return False
+    return True
--- a/object_detection/utils/np_box_list_ops.py
+++ b/object_detection/utils/np_box_list_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List operations for Numpy BoxLists.
+
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+
+import numpy as np
+
+from object_detection.utils import np_box_list
+from object_detection.utils import np_box_ops
+
+
+class SortOrder(object):
+  """Enum class for sort order.
+
+  Attributes:
+    ascend: ascend order.
+    descend: descend order.
+  """
+  ASCEND = 1
+  DESCEND = 2
+
+
+def area(boxlist):
+  """Computes area of boxes.
+
+  Args:
+    boxlist: BoxList holding N boxes
+
+  Returns:
+    a numpy array with shape [N*1] representing box areas
+  """
+  y_min, x_min, y_max, x_max = boxlist.get_coordinates()
+  return (y_max - y_min) * (x_max - x_min)
+
+
+def intersection(boxlist1, boxlist2):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+
+  Returns:
+    a numpy array with shape [N*M] representing pairwise intersection area
+  """
+  return np_box_ops.intersection(boxlist1.get(), boxlist2.get())
+
+
+def iou(boxlist1, boxlist2):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise iou scores.
+  """
+  return np_box_ops.iou(boxlist1.get(), boxlist2.get())
+
+
+def ioa(boxlist1, boxlist2):
+  """Computes pairwise intersection-over-area between box collections.
+
+  Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+  their intersection area over box2's area. Note that ioa is not symmetric,
+  that is, IOA(box1, box2) != IOA(box2, box1).
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise ioa scores.
+  """
+  return np_box_ops.ioa(boxlist1.get(), boxlist2.get())
+
+
+def gather(boxlist, indices, fields=None):
+  """Gather boxes from BoxList according to indices and return new BoxList.
+
+  By default, Gather returns boxes corresponding to the input index list, as
+  well as all additional fields stored in the boxlist (indexing into the
+  first dimension).  However one can optionally only gather from a
+  subset of fields.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    indices: a 1-d numpy array of type int_
+    fields: (optional) list of fields to also gather from.  If None (default),
+        all fields are gathered from.  Pass an empty fields list to only gather
+        the box coordinates.
+
+  Returns:
+    subboxlist: a BoxList corresponding to the subset of the input BoxList
+        specified by indices
+
+  Raises:
+    ValueError: if specified field is not contained in boxlist or if the
+        indices are not of type int_
+  """
+  if indices.size:
+    if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0:
+      raise ValueError('indices are out of valid range.')
+  subboxlist = np_box_list.BoxList(boxlist.get()[indices, :])
+  if fields is None:
+    fields = boxlist.get_extra_fields()
+  for field in fields:
+    extra_field_data = boxlist.get_field(field)
+    subboxlist.add_field(field, extra_field_data[indices, ...])
+  return subboxlist
+
+
+def sort_by_field(boxlist, field, order=SortOrder.DESCEND):
+  """Sort boxes and associated fields according to a scalar field.
+
+  A common use case is reordering the boxes according to descending scores.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    field: A BoxList field for sorting and reordering the BoxList.
+    order: (Optional) 'descend' or 'ascend'. Default is descend.
+
+  Returns:
+    sorted_boxlist: A sorted BoxList with the field in the specified order.
+
+  Raises:
+    ValueError: if specified field does not exist or is not of single dimension.
+    ValueError: if the order is not either descend or ascend.
+  """
+  if not boxlist.has_field(field):
+    raise ValueError('Field ' + field + ' does not exist')
+  if len(boxlist.get_field(field).shape) != 1:
+    raise ValueError('Field ' + field + 'should be single dimension.')
+  if order != SortOrder.DESCEND and order != SortOrder.ASCEND:
+    raise ValueError('Invalid sort order')
+
+  field_to_sort = boxlist.get_field(field)
+  sorted_indices = np.argsort(field_to_sort)
+  if order == SortOrder.DESCEND:
+    sorted_indices = sorted_indices[::-1]
+  return gather(boxlist, sorted_indices)
+
+
+def non_max_suppression(boxlist,
+                        max_output_size=10000,
+                        iou_threshold=1.0,
+                        score_threshold=-10.0):
+  """Non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes. In each iteration, the detected bounding box with
+  highest score in the available pool is selected.
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores. All scores belong to the same class.
+    max_output_size: maximum number of retained boxes
+    iou_threshold: intersection over union threshold.
+    score_threshold: minimum score threshold. Remove the boxes with scores
+                     less than this value. Default value is set to -10. A very
+                     low threshold to pass pretty much all the boxes, unless
+                     the user sets a different score threshold.
+
+  Returns:
+    a BoxList holding M boxes where M <= max_output_size
+  Raises:
+    ValueError: if 'scores' field does not exist
+    ValueError: if threshold is not in [0, 1]
+    ValueError: if max_output_size < 0
+  """
+  if not boxlist.has_field('scores'):
+    raise ValueError('Field scores does not exist')
+  if iou_threshold < 0. or iou_threshold > 1.0:
+    raise ValueError('IOU threshold must be in [0, 1]')
+  if max_output_size < 0:
+    raise ValueError('max_output_size must be bigger than 0.')
+
+  boxlist = filter_scores_greater_than(boxlist, score_threshold)
+  if boxlist.num_boxes() == 0:
+    return boxlist
+
+  boxlist = sort_by_field(boxlist, 'scores')
+
+  # Prevent further computation if NMS is disabled.
+  if iou_threshold == 1.0:
+    if boxlist.num_boxes() > max_output_size:
+      selected_indices = np.arange(max_output_size)
+      return gather(boxlist, selected_indices)
+    else:
+      return boxlist
+
+  boxes = boxlist.get()
+  num_boxes = boxlist.num_boxes()
+  # is_index_valid is True only for all remaining valid boxes,
+  is_index_valid = np.full(num_boxes, 1, dtype=bool)
+  selected_indices = []
+  num_output = 0
+  for i in xrange(num_boxes):
+    if num_output < max_output_size:
+      if is_index_valid[i]:
+        num_output += 1
+        selected_indices.append(i)
+        is_index_valid[i] = False
+        valid_indices = np.where(is_index_valid)[0]
+        if valid_indices.size == 0:
+          break
+
+        intersect_over_union = np_box_ops.iou(
+            np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :])
+        intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+        is_index_valid[valid_indices] = np.logical_and(
+            is_index_valid[valid_indices],
+            intersect_over_union <= iou_threshold)
+  return gather(boxlist, np.array(selected_indices))
+
+
+def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh,
+                                    max_output_size):
+  """Multi-class version of non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  It operates independently for each class for
+  which scores are provided (via the scores field of the input box_list),
+  pruning boxes with score less than a provided threshold prior to
+  applying NMS.
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.  This scores field is a tensor that can
+      be 1 dimensional (in the case of a single class) or 2-dimensional, which
+      which case we assume that it takes the shape [num_boxes, num_classes].
+      We further assume that this rank is known statically and that
+      scores.shape[1] is also known (i.e., the number of classes is fixed
+      and known at graph construction time).
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
+      with previously selected boxes are removed).
+    max_output_size: maximum number of retained boxes per class.
+
+  Returns:
+    a BoxList holding M boxes with a rank-1 scores field representing
+      corresponding scores for each box with scores sorted in decreasing order
+      and a rank-1 classes field representing a class label for each box.
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+      a valid scores field.
+  """
+  if not 0 <= iou_thresh <= 1.0:
+    raise ValueError('thresh must be between 0 and 1')
+  if not isinstance(boxlist, np_box_list.BoxList):
+    raise ValueError('boxlist must be a BoxList')
+  if not boxlist.has_field('scores'):
+    raise ValueError('input boxlist must have \'scores\' field')
+  scores = boxlist.get_field('scores')
+  if len(scores.shape) == 1:
+    scores = np.reshape(scores, [-1, 1])
+  elif len(scores.shape) == 2:
+    if scores.shape[1] is None:
+      raise ValueError('scores field must have statically defined second '
+                       'dimension')
+  else:
+    raise ValueError('scores field must be of rank 1 or 2')
+  num_boxes = boxlist.num_boxes()
+  num_scores = scores.shape[0]
+  num_classes = scores.shape[1]
+
+  if num_boxes != num_scores:
+    raise ValueError('Incorrect scores field length: actual vs expected.')
+
+  selected_boxes_list = []
+  for class_idx in range(num_classes):
+    boxlist_and_class_scores = np_box_list.BoxList(boxlist.get())
+    class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+    boxlist_and_class_scores.add_field('scores', class_scores)
+    boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores,
+                                              score_thresh)
+    nms_result = non_max_suppression(boxlist_filt,
+                                     max_output_size=max_output_size,
+                                     iou_threshold=iou_thresh,
+                                     score_threshold=score_thresh)
+    nms_result.add_field(
+        'classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
+    selected_boxes_list.append(nms_result)
+  selected_boxes = concatenate(selected_boxes_list)
+  sorted_boxes = sort_by_field(selected_boxes, 'scores')
+  return sorted_boxes
+
+
+def scale(boxlist, y_scale, x_scale):
+  """Scale box coordinates in x and y dimensions.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: float
+    x_scale: float
+
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+  y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+  y_min = y_scale * y_min
+  y_max = y_scale * y_max
+  x_min = x_scale * x_min
+  x_max = x_scale * x_max
+  scaled_boxlist = np_box_list.BoxList(np.hstack([y_min, x_min, y_max, x_max]))
+
+  fields = boxlist.get_extra_fields()
+  for field in fields:
+    extra_field_data = boxlist.get_field(field)
+    scaled_boxlist.add_field(field, extra_field_data)
+
+  return scaled_boxlist
+
+
+def clip_to_window(boxlist, window):
+  """Clip bounding boxes to a window.
+
+  This op clips input bounding boxes (represented by bounding box
+  corners) to a window, optionally filtering out boxes that do not
+  overlap at all with the window.
+
+  Args:
+    boxlist: BoxList holding M_in boxes
+    window: a numpy array of shape [4] representing the
+            [y_min, x_min, y_max, x_max] window to which the op
+            should clip boxes.
+
+  Returns:
+    a BoxList holding M_out boxes where M_out <= M_in
+  """
+  y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+  win_y_min = window[0]
+  win_x_min = window[1]
+  win_y_max = window[2]
+  win_x_max = window[3]
+  y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min)
+  y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min)
+  x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min)
+  x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min)
+  clipped = np_box_list.BoxList(
+      np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped]))
+  clipped = _copy_extra_fields(clipped, boxlist)
+  areas = area(clipped)
+  nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)),
+                                    [-1]).astype(np.int32)
+  return gather(clipped, nonzero_area_indices)
+
+
+def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
+  """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+
+  For each box in boxlist1, we want its IOA to be more than minoverlap with
+  at least one of the boxes in boxlist2. If it does not, we remove it.
+
+  Args:
+    boxlist1: BoxList holding N boxes.
+    boxlist2: BoxList holding M boxes.
+    minoverlap: Minimum required overlap between boxes, to count them as
+                overlapping.
+
+  Returns:
+    A pruned boxlist with size [N', 4].
+  """
+  intersection_over_area = ioa(boxlist2, boxlist1)  # [M, N] tensor
+  intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+  keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+  keep_inds = np.nonzero(keep_bool)[0]
+  new_boxlist1 = gather(boxlist1, keep_inds)
+  return new_boxlist1
+
+
+def prune_outside_window(boxlist, window):
+  """Prunes bounding boxes that fall outside a given window.
+
+  This function prunes bounding boxes that even partially fall outside the given
+  window. See also ClipToWindow which only prunes bounding boxes that fall
+  completely outside the window, and clips any bounding boxes that partially
+  overflow.
+
+  Args:
+    boxlist: a BoxList holding M_in boxes.
+    window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax]
+            of the window.
+
+  Returns:
+    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
+    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+     in the input tensor.
+  """
+
+  y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+  win_y_min = window[0]
+  win_x_min = window[1]
+  win_y_max = window[2]
+  win_x_max = window[3]
+  coordinate_violations = np.hstack([np.less(y_min, win_y_min),
+                                     np.less(x_min, win_x_min),
+                                     np.greater(y_max, win_y_max),
+                                     np.greater(x_max, win_x_max)])
+  valid_indices = np.reshape(
+      np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1])
+  return gather(boxlist, valid_indices), valid_indices
+
+
+def concatenate(boxlists, fields=None):
+  """Concatenate list of BoxLists.
+
+  This op concatenates a list of input BoxLists into a larger BoxList.  It also
+  handles concatenation of BoxList fields as long as the field tensor shapes
+  are equal except for the first dimension.
+
+  Args:
+    boxlists: list of BoxList objects
+    fields: optional list of fields to also concatenate.  By default, all
+      fields from the first BoxList in the list are included in the
+      concatenation.
+
+  Returns:
+    a BoxList with number of boxes equal to
+      sum([boxlist.num_boxes() for boxlist in BoxList])
+  Raises:
+    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+      contains non BoxList objects), or if requested fields are not contained in
+      all boxlists
+  """
+  if not isinstance(boxlists, list):
+    raise ValueError('boxlists should be a list')
+  if not boxlists:
+    raise ValueError('boxlists should have nonzero length')
+  for boxlist in boxlists:
+    if not isinstance(boxlist, np_box_list.BoxList):
+      raise ValueError('all elements of boxlists should be BoxList objects')
+  concatenated = np_box_list.BoxList(
+      np.vstack([boxlist.get() for boxlist in boxlists]))
+  if fields is None:
+    fields = boxlists[0].get_extra_fields()
+  for field in fields:
+    first_field_shape = boxlists[0].get_field(field).shape
+    first_field_shape = first_field_shape[1:]
+    for boxlist in boxlists:
+      if not boxlist.has_field(field):
+        raise ValueError('boxlist must contain all requested fields')
+      field_shape = boxlist.get_field(field).shape
+      field_shape = field_shape[1:]
+      if field_shape != first_field_shape:
+        raise ValueError('field %s must have same shape for all boxlists '
+                         'except for the 0th dimension.' % field)
+    concatenated_field = np.concatenate(
+        [boxlist.get_field(field) for boxlist in boxlists], axis=0)
+    concatenated.add_field(field, concatenated_field)
+  return concatenated
+
+
+def filter_scores_greater_than(boxlist, thresh):
+  """Filter to keep only boxes with score exceeding a given threshold.
+
+  This op keeps the collection of boxes whose corresponding scores are
+  greater than the input threshold.
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.
+    thresh: scalar threshold
+
+  Returns:
+    a BoxList holding M boxes where M <= N
+
+  Raises:
+    ValueError: if boxlist not a BoxList object or if it does not
+      have a scores field
+  """
+  if not isinstance(boxlist, np_box_list.BoxList):
+    raise ValueError('boxlist must be a BoxList')
+  if not boxlist.has_field('scores'):
+    raise ValueError('input boxlist must have \'scores\' field')
+  scores = boxlist.get_field('scores')
+  if len(scores.shape) > 2:
+    raise ValueError('Scores should have rank 1 or 2')
+  if len(scores.shape) == 2 and scores.shape[1] != 1:
+    raise ValueError('Scores should have rank 1 or have shape '
+                     'consistent with [None, 1]')
+  high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
+                                  [-1]).astype(np.int32)
+  return gather(boxlist, high_score_indices)
+
+
+def change_coordinate_frame(boxlist, window):
+  """Change coordinate frame of the boxlist to be relative to window's frame.
+
+  Given a window of the form [ymin, xmin, ymax, xmax],
+  changes bounding box coordinates from boxlist to be relative to this window
+  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+
+  An example use case is data augmentation: where we are given groundtruth
+  boxes (boxlist) and would like to randomly crop the image to some
+  window (window). In this case we need to change the coordinate frame of
+  each groundtruth box to be relative to this new window.
+
+  Args:
+    boxlist: A BoxList object holding N boxes.
+    window: a size 4 1-D numpy array.
+
+  Returns:
+    Returns a BoxList object with N boxes.
+  """
+  win_height = window[2] - window[0]
+  win_width = window[3] - window[1]
+  boxlist_new = scale(
+      np_box_list.BoxList(boxlist.get() -
+                          [window[0], window[1], window[0], window[1]]),
+      1.0 / win_height, 1.0 / win_width)
+  _copy_extra_fields(boxlist_new, boxlist)
+
+  return boxlist_new
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+
+
+def _update_valid_indices_by_removing_high_iou_boxes(
+    selected_indices, is_index_valid, intersect_over_union, threshold):
+  max_iou = np.max(intersect_over_union[:, selected_indices], axis=1)
+  return np.logical_and(is_index_valid, max_iou <= threshold)
--- a/object_detection/utils/np_box_list_ops_test.py
+++ b/object_detection/utils/np_box_list_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.np_box_list_ops."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import np_box_list
+from object_detection.utils import np_box_list_ops
+
+
+class AreaRelatedTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
+                      dtype=float)
+    boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                       [0.0, 0.0, 20.0, 20.0]],
+                      dtype=float)
+    self.boxlist1 = np_box_list.BoxList(boxes1)
+    self.boxlist2 = np_box_list.BoxList(boxes2)
+
+  def test_area(self):
+    areas = np_box_list_ops.area(self.boxlist1)
+    expected_areas = np.array([6.0, 5.0], dtype=float)
+    self.assertAllClose(expected_areas, areas)
+
+  def test_intersection(self):
+    intersection = np_box_list_ops.intersection(self.boxlist1, self.boxlist2)
+    expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
+                                     dtype=float)
+    self.assertAllClose(intersection, expected_intersection)
+
+  def test_iou(self):
+    iou = np_box_list_ops.iou(self.boxlist1, self.boxlist2)
+    expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
+                             [1.0 / 16.0, 0.0, 5.0 / 400.0]],
+                            dtype=float)
+    self.assertAllClose(iou, expected_iou)
+
+  def test_ioa(self):
+    boxlist1 = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist2 = np_box_list.BoxList(
+        np.array(
+            [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
+    ioa21 = np_box_list_ops.ioa(boxlist2, boxlist1)
+    expected_ioa21 = np.array([[0.5, 0.0],
+                               [1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(ioa21, expected_ioa21)
+
+  def test_scale(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist_scaled = np_box_list_ops.scale(boxlist, 2.0, 3.0)
+    expected_boxlist_scaled = np_box_list.BoxList(
+        np.array(
+            [[0.5, 0.75, 1.5, 2.25], [0.0, 0.0, 1.0, 2.25]], dtype=np.float32))
+    self.assertAllClose(expected_boxlist_scaled.get(), boxlist_scaled.get())
+
+  def test_clip_to_window(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [-0.2, -0.3, 0.7, 1.5]],
+            dtype=np.float32))
+    boxlist_clipped = np_box_list_ops.clip_to_window(boxlist,
+                                                     [0.0, 0.0, 1.0, 1.0])
+    expected_boxlist_clipped = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [0.0, 0.0, 0.7, 1.0]],
+            dtype=np.float32))
+    self.assertAllClose(expected_boxlist_clipped.get(), boxlist_clipped.get())
+
+  def test_prune_outside_window(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [-0.2, -0.3, 0.7, 1.5]],
+            dtype=np.float32))
+    boxlist_pruned, _ = np_box_list_ops.prune_outside_window(
+        boxlist, [0.0, 0.0, 1.0, 1.0])
+    expected_boxlist_pruned = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    self.assertAllClose(expected_boxlist_pruned.get(), boxlist_pruned.get())
+
+  def test_concatenate(self):
+    boxlist1 = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist2 = np_box_list.BoxList(
+        np.array(
+            [[0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], dtype=np.float32))
+    boxlists = [boxlist1, boxlist2]
+    boxlist_concatenated = np_box_list_ops.concatenate(boxlists)
+    boxlist_concatenated_expected = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
+             [0.5, 0.25, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]],
+            dtype=np.float32))
+    self.assertAllClose(boxlist_concatenated_expected.get(),
+                        boxlist_concatenated.get())
+
+  def test_change_coordinate_frame(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist_coord = np_box_list_ops.change_coordinate_frame(
+        boxlist, np.array([0, 0, 0.5, 0.5], dtype=np.float32))
+    expected_boxlist_coord = np_box_list.BoxList(
+        np.array([[0.5, 0.5, 1.5, 1.5], [0, 0, 1.0, 1.5]], dtype=np.float32))
+    self.assertAllClose(boxlist_coord.get(), expected_boxlist_coord.get())
+
+  def test_filter_scores_greater_than(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=
+            np.float32))
+    boxlist.add_field('scores', np.array([0.8, 0.2], np.float32))
+    boxlist_greater = np_box_list_ops.filter_scores_greater_than(boxlist, 0.5)
+
+    expected_boxlist_greater = np_box_list.BoxList(
+        np.array([[0.25, 0.25, 0.75, 0.75]], dtype=np.float32))
+
+    self.assertAllClose(boxlist_greater.get(), expected_boxlist_greater.get())
+
+
+class GatherOpsTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    self.boxlist = np_box_list.BoxList(boxes)
+    self.boxlist.add_field('scores', np.array([0.5, 0.7, 0.9], dtype=float))
+    self.boxlist.add_field('labels',
+                           np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
+                                     [0, 0, 0, 0, 1]],
+                                    dtype=int))
+
+  def test_gather_with_out_of_range_indices(self):
+    indices = np.array([3, 1], dtype=int)
+    boxlist = self.boxlist
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices)
+
+  def test_gather_with_invalid_multidimensional_indices(self):
+    indices = np.array([[0, 1], [1, 2]], dtype=int)
+    boxlist = self.boxlist
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices)
+
+  def test_gather_without_fields_specified(self):
+    indices = np.array([2, 0, 1], dtype=int)
+    boxlist = self.boxlist
+    subboxlist = np_box_list_ops.gather(boxlist, indices)
+
+    expected_scores = np.array([0.9, 0.5, 0.7], dtype=float)
+    self.assertAllClose(expected_scores, subboxlist.get_field('scores'))
+
+    expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
+                               [14.0, 14.0, 15.0, 15.0]],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, subboxlist.get())
+
+    expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
+                                [0, 1, 0, 0, 0]],
+                               dtype=int)
+    self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
+
+  def test_gather_with_invalid_field_specified(self):
+    indices = np.array([2, 0, 1], dtype=int)
+    boxlist = self.boxlist
+
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices, 'labels')
+
+    with self.assertRaises(ValueError):
+      np_box_list_ops.gather(boxlist, indices, ['objectness'])
+
+  def test_gather_with_fields_specified(self):
+    indices = np.array([2, 0, 1], dtype=int)
+    boxlist = self.boxlist
+    subboxlist = np_box_list_ops.gather(boxlist, indices, ['labels'])
+
+    self.assertFalse(subboxlist.has_field('scores'))
+
+    expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0], [3.0, 4.0, 6.0, 8.0],
+                               [14.0, 14.0, 15.0, 15.0]],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, subboxlist.get())
+
+    expected_labels = np.array([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
+                                [0, 1, 0, 0, 0]],
+                               dtype=int)
+    self.assertAllClose(expected_labels, subboxlist.get_field('labels'))
+
+
+class SortByFieldTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    self.boxlist = np_box_list.BoxList(boxes)
+    self.boxlist.add_field('scores', np.array([0.5, 0.9, 0.4], dtype=float))
+    self.boxlist.add_field('labels',
+                           np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0],
+                                     [0, 0, 0, 0, 1]],
+                                    dtype=int))
+
+  def test_with_invalid_field(self):
+    with self.assertRaises(ValueError):
+      np_box_list_ops.sort_by_field(self.boxlist, 'objectness')
+    with self.assertRaises(ValueError):
+      np_box_list_ops.sort_by_field(self.boxlist, 'labels')
+
+  def test_with_invalid_sorting_order(self):
+    with self.assertRaises(ValueError):
+      np_box_list_ops.sort_by_field(self.boxlist, 'scores', 'Descending')
+
+  def test_with_descending_sorting(self):
+    sorted_boxlist = np_box_list_ops.sort_by_field(self.boxlist, 'scores')
+
+    expected_boxes = np.array([[14.0, 14.0, 15.0, 15.0], [3.0, 4.0, 6.0, 8.0],
+                               [0.0, 0.0, 20.0, 20.0]],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, sorted_boxlist.get())
+
+    expected_scores = np.array([0.9, 0.5, 0.4], dtype=float)
+    self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
+
+  def test_with_ascending_sorting(self):
+    sorted_boxlist = np_box_list_ops.sort_by_field(
+        self.boxlist, 'scores', np_box_list_ops.SortOrder.ASCEND)
+
+    expected_boxes = np.array([[0.0, 0.0, 20.0, 20.0],
+                               [3.0, 4.0, 6.0, 8.0],
+                               [14.0, 14.0, 15.0, 15.0],],
+                              dtype=float)
+    self.assertAllClose(expected_boxes, sorted_boxlist.get())
+
+    expected_scores = np.array([0.4, 0.5, 0.9], dtype=float)
+    self.assertAllClose(expected_scores, sorted_boxlist.get_field('scores'))
+
+
+class NonMaximumSuppressionTest(tf.test.TestCase):
+
+  def setUp(self):
+    self._boxes = np.array([[0, 0, 1, 1],
+                            [0, 0.1, 1, 1.1],
+                            [0, -0.1, 1, 0.9],
+                            [0, 10, 1, 11],
+                            [0, 10.1, 1, 11.1],
+                            [0, 100, 1, 101]],
+                           dtype=float)
+    self._boxlist = np_box_list.BoxList(self._boxes)
+
+  def test_with_no_scores_field(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    max_output_size = 3
+    iou_threshold = 0.5
+
+    with self.assertRaises(ValueError):
+      np_box_list_ops.non_max_suppression(
+          boxlist, max_output_size, iou_threshold)
+
+  def test_nms_disabled_max_output_size_equals_three(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .2, .3], dtype=float))
+    max_output_size = 3
+    iou_threshold = 1.  # No NMS
+
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 0.1, 1, 1.1]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+  def test_select_from_three_clusters(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .2, .3], dtype=float))
+    max_output_size = 3
+    iou_threshold = 0.5
+
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+  def test_select_at_most_two_from_three_clusters(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .5, .3], dtype=float))
+    max_output_size = 2
+    iou_threshold = 0.5
+
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1]], dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+  def test_select_at_most_thirty_from_three_clusters(self):
+    boxlist = np_box_list.BoxList(self._boxes)
+    boxlist.add_field('scores',
+                      np.array([.9, .75, .6, .95, .5, .3], dtype=float))
+    max_output_size = 30
+    iou_threshold = 0.5
+
+    expected_boxes = np.array([[0, 10, 1, 11], [0, 0, 1, 1], [0, 100, 1, 101]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+  def test_select_from_ten_indentical_boxes(self):
+    boxes = np.array(10 * [[0, 0, 1, 1]], dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    boxlist.add_field('scores', np.array(10 * [0.8]))
+    iou_threshold = .5
+    max_output_size = 3
+    expected_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+  def test_different_iou_threshold(self):
+    boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80], [200, 200, 210, 300],
+                      [200, 200, 210, 250]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    boxlist.add_field('scores', np.array([0.9, 0.8, 0.7, 0.6]))
+    max_output_size = 4
+
+    iou_threshold = .4
+    expected_boxes = np.array([[0, 0, 20, 100],
+                               [200, 200, 210, 300],],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+    iou_threshold = .5
+    expected_boxes = np.array([[0, 0, 20, 100], [200, 200, 210, 300],
+                               [200, 200, 210, 250]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+    iou_threshold = .8
+    expected_boxes = np.array([[0, 0, 20, 100], [0, 0, 20, 80],
+                               [200, 200, 210, 300], [200, 200, 210, 250]],
+                              dtype=float)
+    nms_boxlist = np_box_list_ops.non_max_suppression(
+        boxlist, max_output_size, iou_threshold)
+    self.assertAllClose(nms_boxlist.get(), expected_boxes)
+
+  def test_multiclass_nms(self):
+    boxlist = np_box_list.BoxList(
+        np.array(
+            [[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
+            dtype=np.float32))
+    scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
+                       [0.7, -0.7, 0.6, 0.2, -0.9],
+                       [0.4, 0.34, -0.9, 0.2, 0.31]],
+                      dtype=np.float32)
+    boxlist.add_field('scores', scores)
+    boxlist_clean = np_box_list_ops.multi_class_non_max_suppression(
+        boxlist, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
+
+    scores_clean = boxlist_clean.get_field('scores')
+    classes_clean = boxlist_clean.get_field('classes')
+    boxes = boxlist_clean.get()
+    expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
+    expected_classes = np.array([0, 2, 1, 4])
+    expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
+                               [0.4, 0.2, 0.8, 0.8],
+                               [0.6, 0.0, 1.0, 1.0],
+                               [0.6, 0.0, 1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(scores_clean, expected_scores)
+    self.assertAllClose(classes_clean, expected_classes)
+    self.assertAllClose(boxes, expected_boxes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/np_box_list_test.py
+++ b/object_detection/utils/np_box_list_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.np_box_list_test."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import np_box_list
+
+
+class BoxListTest(tf.test.TestCase):
+
+  def test_invalid_box_data(self):
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList([0, 0, 1, 1])
+
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList(np.array([[0, 0, 1, 1]], dtype=int))
+
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList(np.array([0, 1, 1, 3, 4], dtype=float))
+
+    with self.assertRaises(ValueError):
+      np_box_list.BoxList(np.array([[0, 1, 1, 3], [3, 1, 1, 5]], dtype=float))
+
+  def test_has_field_with_existed_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    self.assertTrue(boxlist.has_field('boxes'))
+
+  def test_has_field_with_nonexisted_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    self.assertFalse(boxlist.has_field('scores'))
+
+  def test_get_field_with_existed_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    self.assertTrue(np.allclose(boxlist.get_field('boxes'), boxes))
+
+  def test_get_field_with_nonexited_field(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    with self.assertRaises(ValueError):
+      boxlist.get_field('scores')
+
+
+class AddExtraFieldTest(tf.test.TestCase):
+
+  def setUp(self):
+    boxes = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                      [0.0, 0.0, 20.0, 20.0]],
+                     dtype=float)
+    self.boxlist = np_box_list.BoxList(boxes)
+
+  def test_add_already_existed_field(self):
+    with self.assertRaises(ValueError):
+      self.boxlist.add_field('boxes', np.array([[0, 0, 0, 1, 0]], dtype=float))
+
+  def test_add_invalid_field_data(self):
+    with self.assertRaises(ValueError):
+      self.boxlist.add_field('scores', np.array([0.5, 0.7], dtype=float))
+    with self.assertRaises(ValueError):
+      self.boxlist.add_field('scores',
+                             np.array([0.5, 0.7, 0.9, 0.1], dtype=float))
+
+  def test_add_single_dimensional_field_data(self):
+    boxlist = self.boxlist
+    scores = np.array([0.5, 0.7, 0.9], dtype=float)
+    boxlist.add_field('scores', scores)
+    self.assertTrue(np.allclose(scores, self.boxlist.get_field('scores')))
+
+  def test_add_multi_dimensional_field_data(self):
+    boxlist = self.boxlist
+    labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
+                      dtype=int)
+    boxlist.add_field('labels', labels)
+    self.assertTrue(np.allclose(labels, self.boxlist.get_field('labels')))
+
+  def test_get_extra_fields(self):
+    boxlist = self.boxlist
+    self.assertSameElements(boxlist.get_extra_fields(), [])
+
+    scores = np.array([0.5, 0.7, 0.9], dtype=float)
+    boxlist.add_field('scores', scores)
+    self.assertSameElements(boxlist.get_extra_fields(), ['scores'])
+
+    labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
+                      dtype=int)
+    boxlist.add_field('labels', labels)
+    self.assertSameElements(boxlist.get_extra_fields(), ['scores', 'labels'])
+
+  def test_get_coordinates(self):
+    y_min, x_min, y_max, x_max = self.boxlist.get_coordinates()
+
+    expected_y_min = np.array([3.0, 14.0, 0.0], dtype=float)
+    expected_x_min = np.array([4.0, 14.0, 0.0], dtype=float)
+    expected_y_max = np.array([6.0, 15.0, 20.0], dtype=float)
+    expected_x_max = np.array([8.0, 15.0, 20.0], dtype=float)
+
+    self.assertTrue(np.allclose(y_min, expected_y_min))
+    self.assertTrue(np.allclose(x_min, expected_x_min))
+    self.assertTrue(np.allclose(y_max, expected_y_max))
+    self.assertTrue(np.allclose(x_max, expected_x_max))
+
+  def test_num_boxes(self):
+    boxes = np.array([[0., 0., 100., 100.], [10., 30., 50., 70.]], dtype=float)
+    boxlist = np_box_list.BoxList(boxes)
+    expected_num_boxes = 2
+    self.assertEquals(boxlist.num_boxes(), expected_num_boxes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/np_box_ops.py
+++ b/object_detection/utils/np_box_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Operations for [N, 4] numpy arrays representing bounding boxes.
+
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+import numpy as np
+
+
+def area(boxes):
+  """Computes area of boxes.
+
+  Args:
+    boxes: Numpy array with shape [N, 4] holding N boxes
+
+  Returns:
+    a numpy array with shape [N*1] representing box areas
+  """
+  return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+
+
+def intersection(boxes1, boxes2):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxes1: a numpy array with shape [N, 4] holding N boxes
+    boxes2: a numpy array with shape [M, 4] holding M boxes
+
+  Returns:
+    a numpy array with shape [N*M] representing pairwise intersection area
+  """
+  [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
+  [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
+
+  all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
+  all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
+  intersect_heights = np.maximum(
+      np.zeros(all_pairs_max_ymin.shape),
+      all_pairs_min_ymax - all_pairs_max_ymin)
+  all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
+  all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
+  intersect_widths = np.maximum(
+      np.zeros(all_pairs_max_xmin.shape),
+      all_pairs_min_xmax - all_pairs_max_xmin)
+  return intersect_heights * intersect_widths
+
+
+def iou(boxes1, boxes2):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxes1: a numpy array with shape [N, 4] holding N boxes.
+    boxes2: a numpy array with shape [M, 4] holding N boxes.
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise iou scores.
+  """
+  intersect = intersection(boxes1, boxes2)
+  area1 = area(boxes1)
+  area2 = area(boxes2)
+  union = np.expand_dims(area1, axis=1) + np.expand_dims(
+      area2, axis=0) - intersect
+  return intersect / union
+
+
+def ioa(boxes1, boxes2):
+  """Computes pairwise intersection-over-area between box collections.
+
+  Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+  their intersection area over box2's area. Note that ioa is not symmetric,
+  that is, IOA(box1, box2) != IOA(box2, box1).
+
+  Args:
+    boxes1: a numpy array with shape [N, 4] holding N boxes.
+    boxes2: a numpy array with shape [M, 4] holding N boxes.
+
+  Returns:
+    a numpy array with shape [N, M] representing pairwise ioa scores.
+  """
+  intersect = intersection(boxes1, boxes2)
+  areas = np.expand_dims(area(boxes2), axis=0)
+  return intersect / areas
--- a/object_detection/utils/np_box_ops_test.py
+++ b/object_detection/utils/np_box_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.np_box_ops."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import np_box_ops
+
+
+class BoxOpsTests(tf.test.TestCase):
+
+  def setUp(self):
+    boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
+                      dtype=float)
+    boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                       [0.0, 0.0, 20.0, 20.0]],
+                      dtype=float)
+    self.boxes1 = boxes1
+    self.boxes2 = boxes2
+
+  def testArea(self):
+    areas = np_box_ops.area(self.boxes1)
+    expected_areas = np.array([6.0, 5.0], dtype=float)
+    self.assertAllClose(expected_areas, areas)
+
+  def testIntersection(self):
+    intersection = np_box_ops.intersection(self.boxes1, self.boxes2)
+    expected_intersection = np.array([[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]],
+                                     dtype=float)
+    self.assertAllClose(intersection, expected_intersection)
+
+  def testIOU(self):
+    iou = np_box_ops.iou(self.boxes1, self.boxes2)
+    expected_iou = np.array([[2.0 / 16.0, 0.0, 6.0 / 400.0],
+                             [1.0 / 16.0, 0.0, 5.0 / 400.0]],
+                            dtype=float)
+    self.assertAllClose(iou, expected_iou)
+
+  def testIOA(self):
+    boxes1 = np.array([[0.25, 0.25, 0.75, 0.75],
+                       [0.0, 0.0, 0.5, 0.75]],
+                      dtype=np.float32)
+    boxes2 = np.array([[0.5, 0.25, 1.0, 1.0],
+                       [0.0, 0.0, 1.0, 1.0]],
+                      dtype=np.float32)
+    ioa21 = np_box_ops.ioa(boxes2, boxes1)
+    expected_ioa21 = np.array([[0.5, 0.0],
+                               [1.0, 1.0]],
+                              dtype=np.float32)
+    self.assertAllClose(ioa21, expected_ioa21)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/object_detection_evaluation.py
+++ b/object_detection/utils/object_detection_evaluation.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""object_detection_evaluation module.
+
+ObjectDetectionEvaluation is a class which manages ground truth information of a
+object detection dataset, and computes frequently used detection metrics such as
+Precision, Recall, CorLoc of the provided detection results.
+It supports the following operations:
+1) Add ground truth information of images sequentially.
+2) Add detection result of images sequentially.
+3) Evaluate detection metrics on already inserted detection results.
+4) Write evaluation result into a pickle file for future processing or
+   visualization.
+
+Note: This module operates on numpy boxes and box lists.
+"""
+
+import logging
+import numpy as np
+
+from object_detection.utils import metrics
+from object_detection.utils import per_image_evaluation
+
+
+class ObjectDetectionEvaluation(object):
+  """Evaluate Object Detection Result."""
+
+  def __init__(self,
+               num_groundtruth_classes,
+               matching_iou_threshold=0.5,
+               nms_iou_threshold=1.0,
+               nms_max_output_boxes=10000):
+    self.per_image_eval = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
+        nms_max_output_boxes)
+    self.num_class = num_groundtruth_classes
+
+    self.groundtruth_boxes = {}
+    self.groundtruth_class_labels = {}
+    self.groundtruth_is_difficult_list = {}
+    self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
+    self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
+
+    self.detection_keys = set()
+    self.scores_per_class = [[] for _ in range(self.num_class)]
+    self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
+    self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
+    self.average_precision_per_class = np.empty(self.num_class, dtype=float)
+    self.average_precision_per_class.fill(np.nan)
+    self.precisions_per_class = []
+    self.recalls_per_class = []
+    self.corloc_per_class = np.ones(self.num_class, dtype=float)
+
+  def clear_detections(self):
+    self.detection_keys = {}
+    self.scores_per_class = [[] for _ in range(self.num_class)]
+    self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
+    self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
+    self.average_precision_per_class = np.zeros(self.num_class, dtype=float)
+    self.precisions_per_class = []
+    self.recalls_per_class = []
+    self.corloc_per_class = np.ones(self.num_class, dtype=float)
+
+  def add_single_ground_truth_image_info(self,
+                                         image_key,
+                                         groundtruth_boxes,
+                                         groundtruth_class_labels,
+                                         groundtruth_is_difficult_list=None):
+    """Add ground truth info of a single image into the evaluation database.
+
+    Args:
+      image_key: sha256 key of image content
+      groundtruth_boxes: A numpy array of shape [M, 4] representing object box
+          coordinates[y_min, x_min, y_max, x_max]
+      groundtruth_class_labels: A 1-d numpy array of length M representing class
+          labels
+      groundtruth_is_difficult_list: A length M numpy boolean array denoting
+          whether a ground truth box is a difficult instance or not. To support
+          the case that no boxes are difficult, it is by default set as None.
+    """
+    if image_key in self.groundtruth_boxes:
+      logging.warn(
+          'image %s has already been added to the ground truth database.',
+          image_key)
+      return
+
+    self.groundtruth_boxes[image_key] = groundtruth_boxes
+    self.groundtruth_class_labels[image_key] = groundtruth_class_labels
+    if groundtruth_is_difficult_list is None:
+      num_boxes = groundtruth_boxes.shape[0]
+      groundtruth_is_difficult_list = np.zeros(num_boxes, dtype=bool)
+    self.groundtruth_is_difficult_list[
+        image_key] = groundtruth_is_difficult_list.astype(dtype=bool)
+    self._update_ground_truth_statistics(groundtruth_class_labels,
+                                         groundtruth_is_difficult_list)
+
+  def add_single_detected_image_info(self, image_key, detected_boxes,
+                                     detected_scores, detected_class_labels):
+    """Add detected result of a single image into the evaluation database.
+
+    Args:
+      image_key: sha256 key of image content
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates[y_min, x_min, y_max, x_max]
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      detected_class_labels: A 1-d numpy array of length N representing class
+          labels
+    Raises:
+      ValueError: if detected_boxes, detected_scores and detected_class_labels
+                  do not have the same length.
+    """
+    if (len(detected_boxes) != len(detected_scores) or
+        len(detected_boxes) != len(detected_class_labels)):
+      raise ValueError('detected_boxes, detected_scores and '
+                       'detected_class_labels should all have same lengths. Got'
+                       '[%d, %d, %d]' % len(detected_boxes),
+                       len(detected_scores), len(detected_class_labels))
+
+    if image_key in self.detection_keys:
+      logging.warn(
+          'image %s has already been added to the detection result database',
+          image_key)
+      return
+
+    self.detection_keys.add(image_key)
+    if image_key in self.groundtruth_boxes:
+      groundtruth_boxes = self.groundtruth_boxes[image_key]
+      groundtruth_class_labels = self.groundtruth_class_labels[image_key]
+      groundtruth_is_difficult_list = self.groundtruth_is_difficult_list[
+          image_key]
+    else:
+      groundtruth_boxes = np.empty(shape=[0, 4], dtype=float)
+      groundtruth_class_labels = np.array([], dtype=int)
+      groundtruth_is_difficult_list = np.array([], dtype=bool)
+    scores, tp_fp_labels, is_class_correctly_detected_in_image = (
+        self.per_image_eval.compute_object_detection_metrics(
+            detected_boxes, detected_scores, detected_class_labels,
+            groundtruth_boxes, groundtruth_class_labels,
+            groundtruth_is_difficult_list))
+    for i in range(self.num_class):
+      self.scores_per_class[i].append(scores[i])
+      self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
+    (self.num_images_correctly_detected_per_class
+    ) += is_class_correctly_detected_in_image
+
+  def _update_ground_truth_statistics(self, groundtruth_class_labels,
+                                      groundtruth_is_difficult_list):
+    """Update grouth truth statitistics.
+
+    1. Difficult boxes are ignored when counting the number of ground truth
+    instances as done in Pascal VOC devkit.
+    2. Difficult boxes are treated as normal boxes when computing CorLoc related
+    statitistics.
+
+    Args:
+      groundtruth_class_labels: An integer numpy array of length M,
+          representing M class labels of object instances in ground truth
+      groundtruth_is_difficult_list: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+    """
+    for class_index in range(self.num_class):
+      num_gt_instances = np.sum(groundtruth_class_labels[
+          ~groundtruth_is_difficult_list] == class_index)
+      self.num_gt_instances_per_class[class_index] += num_gt_instances
+      if np.any(groundtruth_class_labels == class_index):
+        self.num_gt_imgs_per_class[class_index] += 1
+
+  def evaluate(self):
+    """Compute evaluation result.
+
+    Returns:
+      average_precision_per_class: float numpy array of average precision for
+          each class.
+      mean_ap: mean average precision of all classes, float scalar
+      precisions_per_class: List of precisions, each precision is a float numpy
+          array
+      recalls_per_class: List of recalls, each recall is a float numpy array
+      corloc_per_class: numpy float array
+      mean_corloc: Mean CorLoc score for each class, float scalar
+    """
+    if (self.num_gt_instances_per_class == 0).any():
+      logging.warn(
+          'The following classes have no ground truth examples: %s',
+          np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)))
+    for class_index in range(self.num_class):
+      if self.num_gt_instances_per_class[class_index] == 0:
+        continue
+      scores = np.concatenate(self.scores_per_class[class_index])
+      tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
+      precision, recall = metrics.compute_precision_recall(
+          scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
+      self.precisions_per_class.append(precision)
+      self.recalls_per_class.append(recall)
+      average_precision = metrics.compute_average_precision(precision, recall)
+      self.average_precision_per_class[class_index] = average_precision
+
+    self.corloc_per_class = metrics.compute_cor_loc(
+        self.num_gt_imgs_per_class,
+        self.num_images_correctly_detected_per_class)
+
+    mean_ap = np.nanmean(self.average_precision_per_class)
+    mean_corloc = np.nanmean(self.corloc_per_class)
+    return (self.average_precision_per_class, mean_ap,
+            self.precisions_per_class, self.recalls_per_class,
+            self.corloc_per_class, mean_corloc)
+
+  def get_eval_result(self):
+    return EvalResult(self.average_precision_per_class,
+                      self.precisions_per_class, self.recalls_per_class,
+                      self.corloc_per_class)
+
+
+class EvalResult(object):
+
+  def __init__(self, average_precisions, precisions, recalls, all_corloc):
+    self.precisions = precisions
+    self.recalls = recalls
+    self.all_corloc = all_corloc
+    self.average_precisions = average_precisions
--- a/object_detection/utils/object_detection_evaluation_test.py
+++ b/object_detection/utils/object_detection_evaluation_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.object_detection_evaluation."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import object_detection_evaluation
+
+
+class ObjectDetectionEvaluationTest(tf.test.TestCase):
+
+  def setUp(self):
+    num_groundtruth_classes = 3
+    self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
+        num_groundtruth_classes)
+
+    image_key1 = "img1"
+    groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                  dtype=float)
+    groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
+    self.od_eval.add_single_ground_truth_image_info(
+        image_key1, groundtruth_boxes1, groundtruth_class_labels1)
+    image_key2 = "img2"
+    groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
+                                   [10, 10, 12, 12]], dtype=float)
+    groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int)
+    groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
+    self.od_eval.add_single_ground_truth_image_info(
+        image_key2, groundtruth_boxes2, groundtruth_class_labels2,
+        groundtruth_is_difficult_list2)
+    image_key3 = "img3"
+    groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_class_labels3 = np.array([1], dtype=int)
+    self.od_eval.add_single_ground_truth_image_info(
+        image_key3, groundtruth_boxes3, groundtruth_class_labels3)
+
+    image_key = "img2"
+    detected_boxes = np.array(
+        [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
+        dtype=float)
+    detected_class_labels = np.array([0, 0, 2], dtype=int)
+    detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
+    self.od_eval.add_single_detected_image_info(
+        image_key, detected_boxes, detected_scores, detected_class_labels)
+
+  def test_add_single_ground_truth_image_info(self):
+    expected_num_gt_instances_per_class = np.array([3, 1, 2], dtype=int)
+    expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)
+    self.assertTrue(np.array_equal(expected_num_gt_instances_per_class,
+                                   self.od_eval.num_gt_instances_per_class))
+    self.assertTrue(np.array_equal(expected_num_gt_imgs_per_class,
+                                   self.od_eval.num_gt_imgs_per_class))
+    groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
+                                   [10, 10, 12, 12]], dtype=float)
+    self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes["img2"],
+                                groundtruth_boxes2))
+    groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
+    self.assertTrue(np.allclose(
+        self.od_eval.groundtruth_is_difficult_list["img2"],
+        groundtruth_is_difficult_list2))
+    groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
+    self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[
+        "img1"], groundtruth_class_labels1))
+
+  def test_add_single_detected_image_info(self):
+    expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [],
+                                 [np.array([0.9], dtype=float)]]
+    expected_tp_fp_labels_per_class = [[np.array([0, 1], dtype=bool)], [],
+                                       [np.array([0], dtype=bool)]]
+    expected_num_images_correctly_detected_per_class = np.array([0, 0, 0],
+                                                                dtype=int)
+    for i in range(self.od_eval.num_class):
+      for j in range(len(expected_scores_per_class[i])):
+        self.assertTrue(np.allclose(expected_scores_per_class[i][j],
+                                    self.od_eval.scores_per_class[i][j]))
+        self.assertTrue(np.array_equal(expected_tp_fp_labels_per_class[i][
+            j], self.od_eval.tp_fp_labels_per_class[i][j]))
+    self.assertTrue(np.array_equal(
+        expected_num_images_correctly_detected_per_class,
+        self.od_eval.num_images_correctly_detected_per_class))
+
+  def test_evaluate(self):
+    (average_precision_per_class, mean_ap, precisions_per_class,
+     recalls_per_class, corloc_per_class,
+     mean_corloc) = self.od_eval.evaluate()
+    expected_precisions_per_class = [np.array([0, 0.5], dtype=float),
+                                     np.array([], dtype=float),
+                                     np.array([0], dtype=float)]
+    expected_recalls_per_class = [
+        np.array([0, 1. / 3.], dtype=float), np.array([], dtype=float),
+        np.array([0], dtype=float)
+    ]
+    expected_average_precision_per_class = np.array([1. / 6., 0, 0],
+                                                    dtype=float)
+    expected_corloc_per_class = np.array([0, np.divide(0, 0), 0], dtype=float)
+    expected_mean_ap = 1. / 18
+    expected_mean_corloc = 0.0
+    for i in range(self.od_eval.num_class):
+      self.assertTrue(np.allclose(expected_precisions_per_class[i],
+                                  precisions_per_class[i]))
+      self.assertTrue(np.allclose(expected_recalls_per_class[i],
+                                  recalls_per_class[i]))
+    self.assertTrue(np.allclose(expected_average_precision_per_class,
+                                average_precision_per_class))
+    self.assertTrue(np.allclose(expected_corloc_per_class, corloc_per_class))
+    self.assertAlmostEqual(expected_mean_ap, mean_ap)
+    self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/object_detection/utils/ops.py
+++ b/object_detection/utils/ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A module for helper tensorflow ops."""
+import math
+import six
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import standard_fields as fields
+from object_detection.utils import static_shape
+
+
+def expanded_shape(orig_shape, start_dim, num_dims):
+  """Inserts multiple ones into a shape vector.
+
+  Inserts an all-1 vector of length num_dims at position start_dim into a shape.
+  Can be combined with tf.reshape to generalize tf.expand_dims.
+
+  Args:
+    orig_shape: the shape into which the all-1 vector is added (int32 vector)
+    start_dim: insertion position (int scalar)
+    num_dims: length of the inserted all-1 vector (int scalar)
+  Returns:
+    An int32 vector of length tf.size(orig_shape) + num_dims.
+  """
+  with tf.name_scope('ExpandedShape'):
+    start_dim = tf.expand_dims(start_dim, 0)  # scalar to rank-1
+    before = tf.slice(orig_shape, [0], start_dim)
+    add_shape = tf.ones(tf.reshape(num_dims, [1]), dtype=tf.int32)
+    after = tf.slice(orig_shape, start_dim, [-1])
+    new_shape = tf.concat([before, add_shape, after], 0)
+    return new_shape
+
+
+def normalized_to_image_coordinates(normalized_boxes, image_shape,
+                                    parallel_iterations=32):
+  """Converts a batch of boxes from normal to image coordinates.
+
+  Args:
+    normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
+      normalized coordinates.
+    image_shape: a float32 tensor of shape [4] containing the image shape.
+    parallel_iterations: parallelism for the map_fn op.
+
+  Returns:
+    absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
+      boxes in image coordinates.
+  """
+  def _to_absolute_coordinates(normalized_boxes):
+    return box_list_ops.to_absolute_coordinates(
+        box_list.BoxList(normalized_boxes),
+        image_shape[1], image_shape[2], check_range=False).get()
+
+  absolute_boxes = tf.map_fn(
+      _to_absolute_coordinates,
+      elems=(normalized_boxes),
+      dtype=tf.float32,
+      parallel_iterations=parallel_iterations,
+      back_prop=True)
+  return absolute_boxes
+
+
+def meshgrid(x, y):
+  """Tiles the contents of x and y into a pair of grids.
+
+  Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y
+  are vectors. Generally, this will give:
+
+  xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n)
+  ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m)
+
+  Keep in mind that the order of the arguments and outputs is reverse relative
+  to the order of the indices they go into, done for compatibility with numpy.
+  The output tensors have the same shapes.  Specifically:
+
+  xgrid.get_shape() = y.get_shape().concatenate(x.get_shape())
+  ygrid.get_shape() = y.get_shape().concatenate(x.get_shape())
+
+  Args:
+    x: A tensor of arbitrary shape and rank. xgrid will contain these values
+       varying in its last dimensions.
+    y: A tensor of arbitrary shape and rank. ygrid will contain these values
+       varying in its first dimensions.
+  Returns:
+    A tuple of tensors (xgrid, ygrid).
+  """
+  with tf.name_scope('Meshgrid'):
+    x = tf.convert_to_tensor(x)
+    y = tf.convert_to_tensor(y)
+    x_exp_shape = expanded_shape(tf.shape(x), 0, tf.rank(y))
+    y_exp_shape = expanded_shape(tf.shape(y), tf.rank(y), tf.rank(x))
+
+    xgrid = tf.tile(tf.reshape(x, x_exp_shape), y_exp_shape)
+    ygrid = tf.tile(tf.reshape(y, y_exp_shape), x_exp_shape)
+    new_shape = y.get_shape().concatenate(x.get_shape())
+    xgrid.set_shape(new_shape)
+    ygrid.set_shape(new_shape)
+
+    return xgrid, ygrid
+
+
+def pad_to_multiple(tensor, multiple):
+  """Returns the tensor zero padded to the specified multiple.
+
+  Appends 0s to the end of the first and second dimension (height and width) of
+  the tensor until both dimensions are a multiple of the input argument
+  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
+  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
+  be of shape [1, 4, 8, 1].
+
+  Args:
+    tensor: rank 4 float32 tensor, where
+            tensor -> [batch_size, height, width, channels].
+    multiple: the multiple to pad to.
+
+  Returns:
+    padded_tensor: the tensor zero padded to the specified multiple.
+  """
+  tensor_shape = tensor.get_shape()
+  batch_size = static_shape.get_batch_size(tensor_shape)
+  tensor_height = static_shape.get_height(tensor_shape)
+  tensor_width = static_shape.get_width(tensor_shape)
+  tensor_depth = static_shape.get_depth(tensor_shape)
+
+  if batch_size is None:
+    batch_size = tf.shape(tensor)[0]
+
+  if tensor_height is None:
+    tensor_height = tf.shape(tensor)[1]
+    padded_tensor_height = tf.to_int32(
+        tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
+  else:
+    padded_tensor_height = int(
+        math.ceil(float(tensor_height) / multiple) * multiple)
+
+  if tensor_width is None:
+    tensor_width = tf.shape(tensor)[2]
+    padded_tensor_width = tf.to_int32(
+        tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
+  else:
+    padded_tensor_width = int(
+        math.ceil(float(tensor_width) / multiple) * multiple)
+
+  if tensor_depth is None:
+    tensor_depth = tf.shape(tensor)[3]
+
+  # Use tf.concat instead of tf.pad to preserve static shape
+  height_pad = tf.zeros([
+      batch_size, padded_tensor_height - tensor_height, tensor_width,
+      tensor_depth
+  ])
+  padded_tensor = tf.concat([tensor, height_pad], 1)
+  width_pad = tf.zeros([
+      batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
+      tensor_depth
+  ])
+  padded_tensor = tf.concat([padded_tensor, width_pad], 2)
+
+  return padded_tensor
+
+
+def padded_one_hot_encoding(indices, depth, left_pad):
+  """Returns a zero padded one-hot tensor.
+
+  This function converts a sparse representation of indices (e.g., [4]) to a
+  zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
+  left_pad = 1). If `indices` is empty, the result will simply be a tensor of
+  shape (0, depth + left_pad). If depth = 0, then this function just returns
+  `None`.
+
+  Args:
+    indices: an integer tensor of shape [num_indices].
+    depth: depth for the one-hot tensor (integer).
+    left_pad: number of zeros to left pad the one-hot tensor with (integer).
+
+  Returns:
+    padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
+      `None` if the depth is zero.
+
+  Raises:
+    ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
+      either negative or non-integers.
+
+  TODO: add runtime checks for depth and indices.
+  """
+  if depth < 0 or not isinstance(depth, (int, long) if six.PY2 else int):
+    raise ValueError('`depth` must be a non-negative integer.')
+  if left_pad < 0 or not isinstance(left_pad, (int, long) if six.PY2 else int):
+    raise ValueError('`left_pad` must be a non-negative integer.')
+  if depth == 0:
+    return None
+  if len(indices.get_shape().as_list()) != 1:
+    raise ValueError('`indices` must have rank 1')
+
+  def one_hot_and_pad():
+    one_hot = tf.cast(tf.one_hot(tf.cast(indices, tf.int64), depth,
+                                 on_value=1, off_value=0), tf.float32)
+    return tf.pad(one_hot, [[0, 0], [left_pad, 0]], mode='CONSTANT')
+  result = tf.cond(tf.greater(tf.size(indices), 0), one_hot_and_pad,
+                   lambda: tf.zeros((depth + left_pad, 0)))
+  return tf.reshape(result, [-1, depth + left_pad])
+
+
+def dense_to_sparse_boxes(dense_locations, dense_num_boxes, num_classes):
+  """Converts bounding boxes from dense to sparse form.
+
+  Args:
+    dense_locations:  a [max_num_boxes, 4] tensor in which only the first k rows
+      are valid bounding box location coordinates, where k is the sum of
+      elements in dense_num_boxes.
+    dense_num_boxes: a [max_num_classes] tensor indicating the counts of
+       various bounding box classes e.g. [1, 0, 0, 2] means that the first
+       bounding box is of class 0 and the second and third bounding boxes are
+       of class 3. The sum of elements in this tensor is the number of valid
+       bounding boxes.
+    num_classes: number of classes
+
+  Returns:
+    box_locations: a [num_boxes, 4] tensor containing only valid bounding
+       boxes (i.e. the first num_boxes rows of dense_locations)
+    box_classes: a [num_boxes] tensor containing the classes of each bounding
+       box (e.g. dense_num_boxes = [1, 0, 0, 2] => box_classes = [0, 3, 3]
+  """
+
+  num_valid_boxes = tf.reduce_sum(dense_num_boxes)
+  box_locations = tf.slice(dense_locations,
+                           tf.constant([0, 0]), tf.stack([num_valid_boxes, 4]))
+  tiled_classes = [tf.tile([i], tf.expand_dims(dense_num_boxes[i], 0))
+                   for i in range(num_classes)]
+  box_classes = tf.concat(tiled_classes, 0)
+  box_locations.set_shape([None, 4])
+  return box_locations, box_classes
+
+
+def indices_to_dense_vector(indices,
+                            size,
+                            indices_value=1.,
+                            default_value=0,
+                            dtype=tf.float32):
+  """Creates dense vector with indices set to specific value and rest to zeros.
+
+  This function exists because it is unclear if it is safe to use
+    tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+  with indices which are not ordered.
+  This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+
+  Args:
+    indices: 1d Tensor with integer indices which are to be set to
+        indices_values.
+    size: scalar with size (integer) of output Tensor.
+    indices_value: values of elements specified by indices in the output vector
+    default_value: values of other elements in the output vector.
+    dtype: data type.
+
+  Returns:
+    dense 1D Tensor of shape [size] with indices set to indices_values and the
+        rest set to default_value.
+  """
+  size = tf.to_int32(size)
+  zeros = tf.ones([size], dtype=dtype) * default_value
+  values = tf.ones_like(indices, dtype=dtype) * indices_value
+
+  return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
+                           [zeros, values])
+
+
+def retain_groundtruth(tensor_dict, valid_indices):
+  """Retains groundtruth by valid indices.
+
+  Args:
+    tensor_dict: a dictionary of following groundtruth tensors -
+      fields.InputDataFields.groundtruth_boxes
+      fields.InputDataFields.groundtruth_classes
+      fields.InputDataFields.groundtruth_is_crowd
+      fields.InputDataFields.groundtruth_area
+      fields.InputDataFields.groundtruth_label_types
+      fields.InputDataFields.groundtruth_difficult
+    valid_indices: a tensor with valid indices for the box-level groundtruth.
+
+  Returns:
+    a dictionary of tensors containing only the groundtruth for valid_indices.
+
+  Raises:
+    ValueError: If the shape of valid_indices is invalid.
+    ValueError: field fields.InputDataFields.groundtruth_boxes is
+      not present in tensor_dict.
+  """
+  input_shape = valid_indices.get_shape().as_list()
+  if not (len(input_shape) == 1 or
+          (len(input_shape) == 2 and input_shape[1] == 1)):
+    raise ValueError('The shape of valid_indices is invalid.')
+  valid_indices = tf.reshape(valid_indices, [-1])
+  valid_dict = {}
+  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
+    # Prevents reshape failure when num_boxes is 0.
+    num_boxes = tf.maximum(tf.shape(
+        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1)
+    for key in tensor_dict:
+      if key in [fields.InputDataFields.groundtruth_boxes,
+                 fields.InputDataFields.groundtruth_classes]:
+        valid_dict[key] = tf.gather(tensor_dict[key], valid_indices)
+      # Input decoder returns empty tensor when these fields are not provided.
+      # Needs to reshape into [num_boxes, -1] for tf.gather() to work.
+      elif key in [fields.InputDataFields.groundtruth_is_crowd,
+                   fields.InputDataFields.groundtruth_area,
+                   fields.InputDataFields.groundtruth_difficult,
+                   fields.InputDataFields.groundtruth_label_types]:
+        valid_dict[key] = tf.reshape(
+            tf.gather(tf.reshape(tensor_dict[key], [num_boxes, -1]),
+                      valid_indices), [-1])
+      # Fields that are not associated with boxes.
+      else:
+        valid_dict[key] = tensor_dict[key]
+  else:
+    raise ValueError('%s not present in input tensor dict.' % (
+        fields.InputDataFields.groundtruth_boxes))
+  return valid_dict
+
+
+def retain_groundtruth_with_positive_classes(tensor_dict):
+  """Retains only groundtruth with positive class ids.
+
+  Args:
+    tensor_dict: a dictionary of following groundtruth tensors -
+      fields.InputDataFields.groundtruth_boxes
+      fields.InputDataFields.groundtruth_classes
+      fields.InputDataFields.groundtruth_is_crowd
+      fields.InputDataFields.groundtruth_area
+      fields.InputDataFields.groundtruth_label_types
+      fields.InputDataFields.groundtruth_difficult
+
+  Returns:
+    a dictionary of tensors containing only the groundtruth with positive
+    classes.
+
+  Raises:
+    ValueError: If groundtruth_classes tensor is not in tensor_dict.
+  """
+  if fields.InputDataFields.groundtruth_classes not in tensor_dict:
+    raise ValueError('`groundtruth classes` not in tensor_dict.')
+  keep_indices = tf.where(tf.greater(
+      tensor_dict[fields.InputDataFields.groundtruth_classes], 0))
+  return retain_groundtruth(tensor_dict, keep_indices)
+
+
+def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
+  """Filters out groundtruth with no bounding boxes.
+
+  Args:
+    tensor_dict: a dictionary of following groundtruth tensors -
+      fields.InputDataFields.groundtruth_boxes
+      fields.InputDataFields.groundtruth_classes
+      fields.InputDataFields.groundtruth_is_crowd
+      fields.InputDataFields.groundtruth_area
+      fields.InputDataFields.groundtruth_label_types
+
+  Returns:
+    a dictionary of tensors containing only the groundtruth that have bounding
+    boxes.
+  """
+  groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+  nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
+      tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
+  valid_indicator_vector = tf.logical_not(nan_indicator_vector)
+  valid_indices = tf.where(valid_indicator_vector)
+
+  return retain_groundtruth(tensor_dict, valid_indices)
+
+
+def normalize_to_target(inputs,
+                        target_norm_value,
+                        dim,
+                        epsilon=1e-7,
+                        trainable=True,
+                        scope='NormalizeToTarget',
+                        summarize=True):
+  """L2 normalizes the inputs across the specified dimension to a target norm.
+
+  This op implements the L2 Normalization layer introduced in
+  Liu, Wei, et al. "SSD: Single Shot MultiBox Detector."
+  and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg.
+  "Parsenet: Looking wider to see better." and is useful for bringing
+  activations from multiple layers in a convnet to a standard scale.
+
+  Note that the rank of `inputs` must be known and the dimension to which
+  normalization is to be applied should be statically defined.
+
+  TODO: Add option to scale by L2 norm of the entire input.
+
+  Args:
+    inputs: A `Tensor` of arbitrary size.
+    target_norm_value: A float value that specifies an initial target norm or
+      a list of floats (whose length must be equal to the depth along the
+      dimension to be normalized) specifying a per-dimension multiplier
+      after normalization.
+    dim: The dimension along which the input is normalized.
+    epsilon: A small value to add to the inputs to avoid dividing by zero.
+    trainable: Whether the norm is trainable or not
+    scope: Optional scope for variable_scope.
+    summarize: Whether or not to add a tensorflow summary for the op.
+
+  Returns:
+    The input tensor normalized to the specified target norm.
+
+  Raises:
+    ValueError: If dim is smaller than the number of dimensions in 'inputs'.
+    ValueError: If target_norm_value is not a float or a list of floats with
+      length equal to the depth along the dimension to be normalized.
+  """
+  with tf.variable_scope(scope, 'NormalizeToTarget', [inputs]):
+    if not inputs.get_shape():
+      raise ValueError('The input rank must be known.')
+    input_shape = inputs.get_shape().as_list()
+    input_rank = len(input_shape)
+    if dim < 0 or dim >= input_rank:
+      raise ValueError(
+          'dim must be non-negative but smaller than the input rank.')
+    if not input_shape[dim]:
+      raise ValueError('input shape should be statically defined along '
+                       'the specified dimension.')
+    depth = input_shape[dim]
+    if not (isinstance(target_norm_value, float) or
+            (isinstance(target_norm_value, list) and
+             len(target_norm_value) == depth) and
+            all([isinstance(val, float) for val in target_norm_value])):
+      raise ValueError('target_norm_value must be a float or a list of floats '
+                       'with length equal to the depth along the dimension to '
+                       'be normalized.')
+    if isinstance(target_norm_value, float):
+      initial_norm = depth * [target_norm_value]
+    else:
+      initial_norm = target_norm_value
+    target_norm = tf.contrib.framework.model_variable(
+        name='weights', dtype=tf.float32,
+        initializer=tf.constant(initial_norm, dtype=tf.float32),
+        trainable=trainable)
+    if summarize:
+      mean = tf.reduce_mean(target_norm)
+      mean = tf.Print(mean, ['NormalizeToTarget:', mean])
+      tf.summary.scalar(tf.get_variable_scope().name, mean)
+    lengths = epsilon + tf.sqrt(tf.reduce_sum(tf.square(inputs), dim, True))
+    mult_shape = input_rank*[1]
+    mult_shape[dim] = depth
+    return tf.reshape(target_norm, mult_shape) * tf.truediv(inputs, lengths)
+
+
+def position_sensitive_crop_regions(image,
+                                    boxes,
+                                    box_ind,
+                                    crop_size,
+                                    num_spatial_bins,
+                                    global_pool,
+                                    extrapolation_value=None):
+  """Position-sensitive crop and pool rectangular regions from a feature grid.
+
+  The output crops are split into `spatial_bins_y` vertical bins
+  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
+  and a horizontal bin the output values are gathered by performing
+  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
+  channels of the image. This reduces `depth` by a factor of
+  `(spatial_bins_y * spatial_bins_x)`.
+
+  When global_pool is True, this function implements a differentiable version
+  of position-sensitive RoI pooling used in
+  [R-FCN detection system](https://arxiv.org/abs/1605.06409).
+
+  When global_pool is False, this function implements a differentiable version
+  of position-sensitive assembling operation used in
+  [instance FCN](https://arxiv.org/abs/1603.08678).
+
+  Args:
+    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
+      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
+      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+      Both `image_height` and `image_width` need to be positive.
+    boxes: A `Tensor` of type `float32`.
+      A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+      specifies the coordinates of a box in the `box_ind[i]` image and is
+      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
+      coordinate value of `y` is mapped to the image coordinate at
+      `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
+      height is mapped to `[0, image_height - 1] in image height coordinates.
+      We do allow y1 > y2, in which case the sampled crop is an up-down flipped
+      version of the original image. The width dimension is treated similarly.
+      Normalized coordinates outside the `[0, 1]` range are allowed, in which
+      case we use `extrapolation_value` to extrapolate the input image values.
+    box_ind:  A `Tensor` of type `int32`.
+      A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+      The value of `box_ind[i]` specifies the image that the `i`-th box refers
+      to.
+    crop_size: A list of two integers `[crop_height, crop_width]`. All
+      cropped image patches are resized to this size. The aspect ratio of the
+      image content is not preserved. Both `crop_height` and `crop_width` need
+      to be positive.
+    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
+      Represents the number of position-sensitive bins in y and x directions.
+      Both values should be >= 1. `crop_height` should be divisible by
+      `spatial_bins_y`, and similarly for width.
+      The number of image channels should be divisible by
+      (spatial_bins_y * spatial_bins_x).
+      Suggested value from R-FCN paper: [3, 3].
+    global_pool: A boolean variable.
+      If True, we perform average global pooling on the features assembled from
+        the position-sensitive score maps.
+      If False, we keep the position-pooled features without global pooling
+        over the spatial coordinates.
+      Note that using global_pool=True is equivalent to but more efficient than
+        running the function with global_pool=False and then performing global
+        average pooling.
+    extrapolation_value: An optional `float`. Defaults to `0`.
+      Value used for extrapolation, when applicable.
+  Returns:
+    position_sensitive_features: A 4-D tensor of shape
+      `[num_boxes, K, K, crop_channels]`,
+      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
+      where K = 1 when global_pool is True (Average-pooled cropped regions),
+      and K = crop_size when global_pool is False.
+  Raises:
+    ValueError: Raised in four situations:
+      `num_spatial_bins` is not >= 1;
+      `num_spatial_bins` does not divide `crop_size`;
+      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
+      `bin_crop_size` is not square when global_pool=False due to the
+        constraint in function space_to_depth.
+  """
+  total_bins = 1
+  bin_crop_size = []
+
+  for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
+    if num_bins < 1:
+      raise ValueError('num_spatial_bins should be >= 1')
+
+    if crop_dim % num_bins != 0:
+      raise ValueError('crop_size should be divisible by num_spatial_bins')
+
+    total_bins *= num_bins
+    bin_crop_size.append(crop_dim // num_bins)
+
+  if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
+    raise ValueError('Only support square bin crop size for now.')
+
+  ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
+  spatial_bins_y, spatial_bins_x = num_spatial_bins
+
+  # Split each box into spatial_bins_y * spatial_bins_x bins.
+  position_sensitive_boxes = []
+  for bin_y in range(spatial_bins_y):
+    step_y = (ymax - ymin) / spatial_bins_y
+    for bin_x in range(spatial_bins_x):
+      step_x = (xmax - xmin) / spatial_bins_x
+      box_coordinates = [ymin + bin_y * step_y,
+                         xmin + bin_x * step_x,
+                         ymin + (bin_y + 1) * step_y,
+                         xmin + (bin_x + 1) * step_x,
+                        ]
+      position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))
+
+  image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3)
+
+  image_crops = []
+  for (split, box) in zip(image_splits, position_sensitive_boxes):
+    crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size,
+                                    extrapolation_value=extrapolation_value)
+    image_crops.append(crop)
+
+  if global_pool:
+    # Average over all bins.
+    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
+    # Then average over spatial positions within the bins.
+    position_sensitive_features = tf.reduce_mean(
+        position_sensitive_features, [1, 2], keep_dims=True)
+  else:
+    # Reorder height/width to depth channel.
+    block_size = bin_crop_size[0]
+    if block_size >= 2:
+      image_crops = [tf.space_to_depth(
+          crop, block_size=block_size) for crop in image_crops]
+
+    # Pack image_crops so that first dimension is for position-senstive boxes.
+    position_sensitive_features = tf.stack(image_crops, axis=0)
+
+    # Unroll the position-sensitive boxes to spatial positions.
+    position_sensitive_features = tf.squeeze(
+        tf.batch_to_space_nd(position_sensitive_features,
+                             block_shape=[1] + num_spatial_bins,
+                             crops=tf.zeros((3, 2), dtype=tf.int32)),
+        squeeze_dims=[0])
+
+    # Reorder back the depth channel.
+    if block_size >= 2:
+      position_sensitive_features = tf.depth_to_space(
+          position_sensitive_features, block_size=block_size)
+
+  return position_sensitive_features
+
+
+def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
+                                     image_width):
+  """Transforms the box masks back to full image masks.
+
+  Embeds masks in bounding boxes of larger masks whose shapes correspond to
+  image shape.
+
+  Args:
+    box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width].
+    boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
+           corners. Row i contains [ymin, xmin, ymax, xmax] of the box
+           corresponding to mask i. Note that the box corners are in
+           normalized coordinates.
+    image_height: Image height. The output mask will have the same height as
+                  the image height.
+    image_width: Image width. The output mask will have the same width as the
+                 image width.
+
+  Returns:
+    A tf.float32 tensor of size [num_masks, image_height, image_width].
+  """
+  # TODO: Make this a public function.
+  def transform_boxes_relative_to_boxes(boxes, reference_boxes):
+    boxes = tf.reshape(boxes, [-1, 2, 2])
+    min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
+    max_corner = tf.expand_dims(reference_boxes[:, 2:4], 1)
+    transformed_boxes = (boxes - min_corner) / (max_corner - min_corner)
+    return tf.reshape(transformed_boxes, [-1, 4])
+
+  box_masks = tf.expand_dims(box_masks, axis=3)
+  num_boxes = tf.shape(box_masks)[0]
+  unit_boxes = tf.concat(
+      [tf.zeros([num_boxes, 2]), tf.ones([num_boxes, 2])], axis=1)
+  reverse_boxes = transform_boxes_relative_to_boxes(unit_boxes, boxes)
+  image_masks = tf.image.crop_and_resize(image=box_masks,
+                                         boxes=reverse_boxes,
+                                         box_ind=tf.range(num_boxes),
+                                         crop_size=[image_height, image_width],
+                                         extrapolation_value=0.0)
+  return tf.squeeze(image_masks, axis=3)
--- a/object_detection/utils/ops_test.py
+++ b/object_detection/utils/ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.ops."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import standard_fields as fields
+from object_detection.utils import ops
+
+
+class NormalizedToImageCoordinatesTest(tf.test.TestCase):
+
+  def test_normalized_to_image_coordinates(self):
+    normalized_boxes = tf.placeholder(tf.float32, shape=(None, 1, 4))
+    normalized_boxes_np = np.array([[[0.0, 0.0, 1.0, 1.0]],
+                                    [[0.5, 0.5, 1.0, 1.0]]])
+    image_shape = tf.convert_to_tensor([1, 4, 4, 3], dtype=tf.int32)
+    absolute_boxes = ops.normalized_to_image_coordinates(normalized_boxes,
+                                                         image_shape,
+                                                         parallel_iterations=2)
+
+    expected_boxes = np.array([[[0, 0, 4, 4]],
+                               [[2, 2, 4, 4]]])
+    with self.test_session() as sess:
+      absolute_boxes = sess.run(absolute_boxes,
+                                feed_dict={normalized_boxes:
+                                           normalized_boxes_np})
+
+    self.assertAllEqual(absolute_boxes, expected_boxes)
+
+
+class MeshgridTest(tf.test.TestCase):
+
+  def test_meshgrid_numpy_comparison(self):
+    """Tests meshgrid op with vectors, for which it should match numpy."""
+    x = np.arange(4)
+    y = np.arange(6)
+    exp_xgrid, exp_ygrid = np.meshgrid(x, y)
+    xgrid, ygrid = ops.meshgrid(x, y)
+    with self.test_session() as sess:
+      xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
+      self.assertAllEqual(xgrid_output, exp_xgrid)
+      self.assertAllEqual(ygrid_output, exp_ygrid)
+
+  def test_meshgrid_multidimensional(self):
+    np.random.seed(18)
+    x = np.random.rand(4, 1, 2).astype(np.float32)
+    y = np.random.rand(2, 3).astype(np.float32)
+
+    xgrid, ygrid = ops.meshgrid(x, y)
+
+    grid_shape = list(y.shape) + list(x.shape)
+    self.assertEqual(xgrid.get_shape().as_list(), grid_shape)
+    self.assertEqual(ygrid.get_shape().as_list(), grid_shape)
+    with self.test_session() as sess:
+      xgrid_output, ygrid_output = sess.run([xgrid, ygrid])
+
+    # Check the shape of the output grids
+    self.assertEqual(xgrid_output.shape, tuple(grid_shape))
+    self.assertEqual(ygrid_output.shape, tuple(grid_shape))
+
+    # Check a few elements
+    test_elements = [((3, 0, 0), (1, 2)),
+                     ((2, 0, 1), (0, 0)),
+                     ((0, 0, 0), (1, 1))]
+    for xind, yind in test_elements:
+      # These are float equality tests, but the meshgrid op should not introduce
+      # rounding.
+      self.assertEqual(xgrid_output[yind + xind], x[xind])
+      self.assertEqual(ygrid_output[yind + xind], y[yind])
+
+
+class OpsTestPadToMultiple(tf.test.TestCase):
+
+  def test_zero_padding(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.pad_to_multiple(tensor, 1)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
+
+  def test_no_padding(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.pad_to_multiple(tensor, 2)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 2, 2, 1), padded_tensor_out.shape)
+
+  def test_padding(self):
+    tensor = tf.constant([[[[0.], [0.]], [[0.], [0.]]]])
+    padded_tensor = ops.pad_to_multiple(tensor, 4)
+    with self.test_session() as sess:
+      padded_tensor_out = sess.run(padded_tensor)
+    self.assertEqual((1, 4, 4, 1), padded_tensor_out.shape)
+
+
+class OpsTestPaddedOneHotEncoding(tf.test.TestCase):
+
+  def test_correct_one_hot_tensor_with_no_pad(self):
+    indices = tf.constant([1, 2, 3, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=0)
+    expected_tensor = np.array([[0, 1, 0, 0, 0, 0],
+                                [0, 0, 1, 0, 0, 0],
+                                [0, 0, 0, 1, 0, 0],
+                                [0, 0, 0, 0, 0, 1]], np.float32)
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+
+  def test_correct_one_hot_tensor_with_pad_one(self):
+    indices = tf.constant([1, 2, 3, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=1)
+    expected_tensor = np.array([[0, 0, 1, 0, 0, 0, 0],
+                                [0, 0, 0, 1, 0, 0, 0],
+                                [0, 0, 0, 0, 1, 0, 0],
+                                [0, 0, 0, 0, 0, 0, 1]], np.float32)
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+
+  def test_correct_one_hot_tensor_with_pad_three(self):
+    indices = tf.constant([1, 2, 3, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=6, left_pad=3)
+    expected_tensor = np.array([[0, 0, 0, 0, 1, 0, 0, 0, 0],
+                                [0, 0, 0, 0, 0, 1, 0, 0, 0],
+                                [0, 0, 0, 0, 0, 0, 1, 0, 0],
+                                [0, 0, 0, 0, 0, 0, 0, 0, 1]], np.float32)
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+
+  def test_correct_padded_one_hot_tensor_with_empty_indices(self):
+    depth = 6
+    pad = 2
+    indices = tf.constant([])
+    one_hot_tensor = ops.padded_one_hot_encoding(
+        indices, depth=depth, left_pad=pad)
+    expected_tensor = np.zeros((0, depth + pad))
+    with self.test_session() as sess:
+      out_one_hot_tensor = sess.run(one_hot_tensor)
+      self.assertAllClose(out_one_hot_tensor, expected_tensor, rtol=1e-10,
+                          atol=1e-10)
+
+  def test_return_none_on_zero_depth(self):
+    indices = tf.constant([1, 2, 3, 4, 5])
+    one_hot_tensor = ops.padded_one_hot_encoding(indices, depth=0, left_pad=2)
+    self.assertEqual(one_hot_tensor, None)
+
+  def test_raise_value_error_on_rank_two_input(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=6, left_pad=2)
+
+  def test_raise_value_error_on_negative_pad(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=6, left_pad=-1)
+
+  def test_raise_value_error_on_float_pad(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=6, left_pad=0.1)
+
+  def test_raise_value_error_on_float_depth(self):
+    indices = tf.constant(1.0, shape=(2, 3))
+    with self.assertRaises(ValueError):
+      ops.padded_one_hot_encoding(indices, depth=0.1, left_pad=2)
+
+
+class OpsDenseToSparseBoxesTest(tf.test.TestCase):
+
+  def test_return_all_boxes_when_all_input_boxes_are_valid(self):
+    num_classes = 4
+    num_valid_boxes = 3
+    code_size = 4
+    dense_location_placeholder = tf.placeholder(tf.float32,
+                                                shape=(num_valid_boxes,
+                                                       code_size))
+    dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
+    box_locations, box_classes = ops.dense_to_sparse_boxes(
+        dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
+    feed_dict = {dense_location_placeholder: np.random.uniform(
+        size=[num_valid_boxes, code_size]),
+                 dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
+                                                       dtype=np.int32)}
+
+    expected_box_locations = feed_dict[dense_location_placeholder]
+    expected_box_classses = np.array([0, 3, 3])
+    with self.test_session() as sess:
+      box_locations, box_classes = sess.run([box_locations, box_classes],
+                                            feed_dict=feed_dict)
+
+    self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
+                        atol=1e-6)
+    self.assertAllEqual(box_classes, expected_box_classses)
+
+  def test_return_only_valid_boxes_when_input_contains_invalid_boxes(self):
+    num_classes = 4
+    num_valid_boxes = 3
+    num_boxes = 10
+    code_size = 4
+
+    dense_location_placeholder = tf.placeholder(tf.float32, shape=(num_boxes,
+                                                                   code_size))
+    dense_num_boxes_placeholder = tf.placeholder(tf.int32, shape=(num_classes))
+    box_locations, box_classes = ops.dense_to_sparse_boxes(
+        dense_location_placeholder, dense_num_boxes_placeholder, num_classes)
+    feed_dict = {dense_location_placeholder: np.random.uniform(
+        size=[num_boxes, code_size]),
+                 dense_num_boxes_placeholder: np.array([1, 0, 0, 2],
+                                                       dtype=np.int32)}
+
+    expected_box_locations = (feed_dict[dense_location_placeholder]
+                              [:num_valid_boxes])
+    expected_box_classses = np.array([0, 3, 3])
+    with self.test_session() as sess:
+      box_locations, box_classes = sess.run([box_locations, box_classes],
+                                            feed_dict=feed_dict)
+
+    self.assertAllClose(box_locations, expected_box_locations, rtol=1e-6,
+                        atol=1e-6)
+    self.assertAllEqual(box_classes, expected_box_classses)
+
+
+class OpsTestIndicesToDenseVector(tf.test.TestCase):
+
+  def test_indices_to_dense_vector(self):
+    size = 10000
+    num_indices = np.random.randint(size)
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+
+    expected_output = np.zeros(size, dtype=np.float32)
+    expected_output[rand_indices] = 1.
+
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
+
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+
+  def test_indices_to_dense_vector_size_at_inference(self):
+    size = 5000
+    num_indices = 250
+    all_indices = np.arange(size)
+    rand_indices = np.random.permutation(all_indices)[0:num_indices]
+
+    expected_output = np.zeros(size, dtype=np.float32)
+    expected_output[rand_indices] = 1.
+
+    tf_all_indices = tf.placeholder(tf.int32)
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices,
+                                            tf.shape(tf_all_indices)[0])
+    feed_dict = {tf_all_indices: all_indices}
+
+    with self.test_session() as sess:
+      output = sess.run(indicator, feed_dict=feed_dict)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+
+  def test_indices_to_dense_vector_int(self):
+    size = 500
+    num_indices = 25
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+
+    expected_output = np.zeros(size, dtype=np.int64)
+    expected_output[rand_indices] = 1
+
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(
+        tf_rand_indices, size, 1, dtype=tf.int64)
+
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+
+  def test_indices_to_dense_vector_custom_values(self):
+    size = 100
+    num_indices = 10
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+    indices_value = np.random.rand(1)
+    default_value = np.random.rand(1)
+
+    expected_output = np.float32(np.ones(size) * default_value)
+    expected_output[rand_indices] = indices_value
+
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(
+        tf_rand_indices,
+        size,
+        indices_value=indices_value,
+        default_value=default_value)
+
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllClose(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+
+  def test_indices_to_dense_vector_all_indices_as_input(self):
+    size = 500
+    num_indices = 500
+    rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
+
+    expected_output = np.ones(size, dtype=np.float32)
+
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
+
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+
+  def test_indices_to_dense_vector_empty_indices_as_input(self):
+    size = 500
+    rand_indices = []
+
+    expected_output = np.zeros(size, dtype=np.float32)
+
+    tf_rand_indices = tf.constant(rand_indices)
+    indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
+
+    with self.test_session() as sess:
+      output = sess.run(indicator)
+      self.assertAllEqual(output, expected_output)
+      self.assertEqual(output.dtype, expected_output.dtype)
+
+
+class GroundtruthFilterTest(tf.test.TestCase):
+
+  def test_filter_groundtruth(self):
+    input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    input_label_types = tf.placeholder(tf.string, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.image: input_image,
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult,
+        fields.InputDataFields.groundtruth_label_types: input_label_types
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+
+    image_tensor = np.random.rand(224, 224, 3)
+    feed_dict = {
+        input_image: image_tensor,
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 2], dtype=np.int32),
+        input_is_crowd:
+        np.array([False, True], dtype=np.bool),
+        input_area:
+        np.array([32, 48], dtype=np.float32),
+        input_difficult:
+        np.array([True, False], dtype=np.bool),
+        input_label_types:
+        np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.image:
+        image_tensor,
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False],
+        fields.InputDataFields.groundtruth_area:
+        [32],
+        fields.InputDataFields.groundtruth_difficult:
+        [True],
+        fields.InputDataFields.groundtruth_label_types:
+        ['APPROPRIATE']
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.image,
+                  fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd,
+                  fields.InputDataFields.groundtruth_label_types]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+
+  def test_filter_with_missing_fields(self):
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes
+    }
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+
+    feed_dict = {
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 2], dtype=np.int32),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1]
+    }
+
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.groundtruth_boxes]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+
+  def test_filter_with_empty_fields(self):
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+
+    feed_dict = {
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 2], dtype=np.int32),
+        input_is_crowd:
+        np.array([False, True], dtype=np.bool),
+        input_area:
+        np.array([], dtype=np.float32),
+        input_difficult:
+        np.array([], dtype=np.float32),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False],
+        fields.InputDataFields.groundtruth_area:
+        [],
+        fields.InputDataFields.groundtruth_difficult:
+        []
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+
+  def test_filter_with_empty_groundtruth_boxes(self):
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult
+    }
+    output_tensors = ops.retain_groundtruth(input_tensors, valid_indices)
+
+    feed_dict = {
+        input_boxes:
+        np.array([], dtype=np.float).reshape(0, 4),
+        input_classes:
+        np.array([], dtype=np.int32),
+        input_is_crowd:
+        np.array([], dtype=np.bool),
+        input_area:
+        np.array([], dtype=np.float32),
+        input_difficult:
+        np.array([], dtype=np.float32),
+        valid_indices:
+        np.array([], dtype=np.int32)
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in input_tensors:
+        if key == fields.InputDataFields.groundtruth_boxes:
+          self.assertAllEqual([0, 4], output_tensors[key].shape)
+        else:
+          self.assertAllEqual([0], output_tensors[key].shape)
+
+
+class RetainGroundTruthWithPositiveClasses(tf.test.TestCase):
+
+  def test_filter_groundtruth_with_positive_classes(self):
+    input_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+    input_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    input_classes = tf.placeholder(tf.int32, shape=(None,))
+    input_is_crowd = tf.placeholder(tf.bool, shape=(None,))
+    input_area = tf.placeholder(tf.float32, shape=(None,))
+    input_difficult = tf.placeholder(tf.float32, shape=(None,))
+    input_label_types = tf.placeholder(tf.string, shape=(None,))
+    valid_indices = tf.placeholder(tf.int32, shape=(None,))
+    input_tensors = {
+        fields.InputDataFields.image: input_image,
+        fields.InputDataFields.groundtruth_boxes: input_boxes,
+        fields.InputDataFields.groundtruth_classes: input_classes,
+        fields.InputDataFields.groundtruth_is_crowd: input_is_crowd,
+        fields.InputDataFields.groundtruth_area: input_area,
+        fields.InputDataFields.groundtruth_difficult: input_difficult,
+        fields.InputDataFields.groundtruth_label_types: input_label_types
+    }
+    output_tensors = ops.retain_groundtruth_with_positive_classes(input_tensors)
+
+    image_tensor = np.random.rand(224, 224, 3)
+    feed_dict = {
+        input_image: image_tensor,
+        input_boxes:
+        np.array([[0.2, 0.4, 0.1, 0.8], [0.2, 0.4, 1.0, 0.8]], dtype=np.float),
+        input_classes:
+        np.array([1, 0], dtype=np.int32),
+        input_is_crowd:
+        np.array([False, True], dtype=np.bool),
+        input_area:
+        np.array([32, 48], dtype=np.float32),
+        input_difficult:
+        np.array([True, False], dtype=np.bool),
+        input_label_types:
+        np.array(['APPROPRIATE', 'INCORRECT'], dtype=np.string_),
+        valid_indices:
+        np.array([0], dtype=np.int32)
+    }
+    expected_tensors = {
+        fields.InputDataFields.image:
+        image_tensor,
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False],
+        fields.InputDataFields.groundtruth_area:
+        [32],
+        fields.InputDataFields.groundtruth_difficult:
+        [True],
+        fields.InputDataFields.groundtruth_label_types:
+        ['APPROPRIATE']
+    }
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors, feed_dict=feed_dict)
+      for key in [fields.InputDataFields.image,
+                  fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd,
+                  fields.InputDataFields.groundtruth_label_types]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+
+
+class GroundtruthFilterWithNanBoxTest(tf.test.TestCase):
+
+  def test_filter_groundtruth_with_nan_box_coordinates(self):
+    input_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[np.nan, np.nan, np.nan, np.nan], [0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [1, 2],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [False, True],
+        fields.InputDataFields.groundtruth_area:
+        [100.0, 238.7]
+    }
+
+    expected_tensors = {
+        fields.InputDataFields.groundtruth_boxes:
+        [[0.2, 0.4, 0.1, 0.8]],
+        fields.InputDataFields.groundtruth_classes:
+        [2],
+        fields.InputDataFields.groundtruth_is_crowd:
+        [True],
+        fields.InputDataFields.groundtruth_area:
+        [238.7]
+    }
+
+    output_tensors = ops.filter_groundtruth_with_nan_box_coordinates(
+        input_tensors)
+    with self.test_session() as sess:
+      output_tensors = sess.run(output_tensors)
+      for key in [fields.InputDataFields.groundtruth_boxes,
+                  fields.InputDataFields.groundtruth_area]:
+        self.assertAllClose(expected_tensors[key], output_tensors[key])
+      for key in [fields.InputDataFields.groundtruth_classes,
+                  fields.InputDataFields.groundtruth_is_crowd]:
+        self.assertAllEqual(expected_tensors[key], output_tensors[key])
+
+
+class OpsTestNormalizeToTarget(tf.test.TestCase):
+
+  def test_create_normalize_to_target(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = 4.0
+    dim = 3
+    with self.test_session():
+      output = ops.normalize_to_target(inputs, target_norm_value, dim)
+      self.assertEqual(output.op.name, 'NormalizeToTarget/mul')
+      var_name = tf.contrib.framework.get_variables()[0].name
+      self.assertEqual(var_name, 'NormalizeToTarget/weights:0')
+
+  def test_invalid_dim(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = 4.0
+    dim = 10
+    with self.assertRaisesRegexp(
+        ValueError,
+        'dim must be non-negative but smaller than the input rank.'):
+      ops.normalize_to_target(inputs, target_norm_value, dim)
+
+  def test_invalid_target_norm_values(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = [4.0, 4.0]
+    dim = 3
+    with self.assertRaisesRegexp(
+        ValueError, 'target_norm_value must be a float or a list of floats'):
+      ops.normalize_to_target(inputs, target_norm_value, dim)
+
+  def test_correct_output_shape(self):
+    inputs = tf.random_uniform([5, 10, 12, 3])
+    target_norm_value = 4.0
+    dim = 3
+    with self.test_session():
+      output = ops.normalize_to_target(inputs, target_norm_value, dim)
+      self.assertEqual(output.get_shape().as_list(),
+                       inputs.get_shape().as_list())
+
+  def test_correct_initial_output_values(self):
+    inputs = tf.constant([[[[3, 4], [7, 24]],
+                           [[5, -12], [-1, 0]]]], tf.float32)
+    target_norm_value = 10.0
+    dim = 3
+    expected_output = [[[[30/5.0, 40/5.0], [70/25.0, 240/25.0]],
+                        [[50/13.0, -120/13.0], [-10, 0]]]]
+    with self.test_session() as sess:
+      normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
+                                                  dim)
+      sess.run(tf.global_variables_initializer())
+      output = normalized_inputs.eval()
+      self.assertAllClose(output, expected_output)
+
+  def test_multiple_target_norm_values(self):
+    inputs = tf.constant([[[[3, 4], [7, 24]],
+                           [[5, -12], [-1, 0]]]], tf.float32)
+    target_norm_value = [10.0, 20.0]
+    dim = 3
+    expected_output = [[[[30/5.0, 80/5.0], [70/25.0, 480/25.0]],
+                        [[50/13.0, -240/13.0], [-10, 0]]]]
+    with self.test_session() as sess:
+      normalized_inputs = ops.normalize_to_target(inputs, target_norm_value,
+                                                  dim)
+      sess.run(tf.global_variables_initializer())
+      output = normalized_inputs.eval()
+      self.assertAllClose(output, expected_output)
+
+
+class OpsTestPositionSensitiveCropRegions(tf.test.TestCase):
+
+  def test_position_sensitive(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+
+    # First channel is 1's, second channel is 2's, etc.
+    image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
+                        shape=image_shape)
+    boxes = tf.random_uniform((2, 4))
+    box_ind = tf.constant([0, 0], dtype=tf.int32)
+
+    # The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
+    # before averaging.
+    expected_output = np.array([3.5, 3.5]).reshape([2, 1, 1, 1])
+
+    for crop_size_mult in range(1, 3):
+      crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
+      ps_crop_and_pool = ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+
+      with self.test_session() as sess:
+        output = sess.run(ps_crop_and_pool)
+        self.assertAllClose(output, expected_output)
+
+  def test_position_sensitive_with_equal_channels(self):
+    num_spatial_bins = [2, 2]
+    image_shape = [1, 3, 3, 4]
+    crop_size = [2, 2]
+
+    image = tf.constant(range(1, 3 * 3 + 1), dtype=tf.float32,
+                        shape=[1, 3, 3, 1])
+    tiled_image = tf.tile(image, [1, 1, 1, image_shape[3]])
+    boxes = tf.random_uniform((3, 4))
+    box_ind = tf.constant([0, 0, 0], dtype=tf.int32)
+
+    # All channels are equal so position-sensitive crop and resize should
+    # work as the usual crop and resize for just one channel.
+    crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
+    crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
+
+    ps_crop_and_pool = ops.position_sensitive_crop_regions(
+        tiled_image,
+        boxes,
+        box_ind,
+        crop_size,
+        num_spatial_bins,
+        global_pool=True)
+
+    with self.test_session() as sess:
+      expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
+      self.assertAllClose(output, expected_output)
+
+  def test_position_sensitive_with_single_bin(self):
+    num_spatial_bins = [1, 1]
+    image_shape = [2, 3, 3, 4]
+    crop_size = [2, 2]
+
+    image = tf.random_uniform(image_shape)
+    boxes = tf.random_uniform((6, 4))
+    box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
+
+    # When a single bin is used, position-sensitive crop and pool should be
+    # the same as non-position sensitive crop and pool.
+    crop = tf.image.crop_and_resize(image, boxes, box_ind, crop_size)
+    crop_and_pool = tf.reduce_mean(crop, [1, 2], keep_dims=True)
+
+    ps_crop_and_pool = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+
+    with self.test_session() as sess:
+      expected_output, output = sess.run((crop_and_pool, ps_crop_and_pool))
+      self.assertAllClose(output, expected_output)
+
+  def test_raise_value_error_on_num_bins_less_than_one(self):
+    num_spatial_bins = [1, -1]
+    image_shape = [1, 1, 1, 2]
+    crop_size = [2, 2]
+
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+
+    with self.assertRaisesRegexp(ValueError, 'num_spatial_bins should be >= 1'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+
+  def test_raise_value_error_on_non_divisible_crop_size(self):
+    num_spatial_bins = [2, 3]
+    image_shape = [1, 1, 1, 6]
+    crop_size = [3, 2]
+
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+
+    with self.assertRaisesRegexp(
+        ValueError, 'crop_size should be divisible by num_spatial_bins'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+
+  def test_raise_value_error_on_non_divisible_num_channels(self):
+    num_spatial_bins = [2, 2]
+    image_shape = [1, 1, 1, 5]
+    crop_size = [2, 2]
+
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+
+    with self.assertRaisesRegexp(
+        ValueError, 'Dimension size must be evenly divisible by 4 but is 5'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+
+  def test_position_sensitive_with_global_pool_false(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    num_boxes = 2
+
+    # First channel is 1's, second channel is 2's, etc.
+    image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
+                        shape=image_shape)
+    boxes = tf.random_uniform((num_boxes, 4))
+    box_ind = tf.constant([0, 0], dtype=tf.int32)
+
+    expected_output = []
+
+    # Expected output, when crop_size = [3, 2].
+    expected_output.append(np.expand_dims(
+        np.tile(np.array([[1, 2],
+                          [3, 4],
+                          [5, 6]]), (num_boxes, 1, 1)),
+        axis=-1))
+
+    # Expected output, when crop_size = [6, 4].
+    expected_output.append(np.expand_dims(
+        np.tile(np.array([[1, 1, 2, 2],
+                          [1, 1, 2, 2],
+                          [3, 3, 4, 4],
+                          [3, 3, 4, 4],
+                          [5, 5, 6, 6],
+                          [5, 5, 6, 6]]), (num_boxes, 1, 1)),
+        axis=-1))
+
+    for crop_size_mult in range(1, 3):
+      crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
+      ps_crop = ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+      with self.test_session() as sess:
+        output = sess.run(ps_crop)
+
+      self.assertAllEqual(output, expected_output[crop_size_mult - 1])
+
+  def test_position_sensitive_with_global_pool_false_and_known_boxes(self):
+    num_spatial_bins = [2, 2]
+    image_shape = [2, 2, 2, 4]
+    crop_size = [2, 2]
+
+    image = tf.constant(range(1, 2 * 2 * 4  + 1) * 2, dtype=tf.float32,
+                        shape=image_shape)
+
+    # First box contains whole image, and second box contains only first row.
+    boxes = tf.constant(np.array([[0., 0., 1., 1.],
+                                  [0., 0., 0.5, 1.]]), dtype=tf.float32)
+    box_ind = tf.constant([0, 1], dtype=tf.int32)
+
+    expected_output = []
+
+    # Expected output, when the box containing whole image.
+    expected_output.append(
+        np.reshape(np.array([[4, 7],
+                             [10, 13]]),
+                   (1, 2, 2, 1))
+    )
+
+    # Expected output, when the box containing only first row.
+    expected_output.append(
+        np.reshape(np.array([[3, 6],
+                             [7, 10]]),
+                   (1, 2, 2, 1))
+    )
+    expected_output = np.concatenate(expected_output, axis=0)
+
+    ps_crop = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+
+    with self.test_session() as sess:
+      output = sess.run(ps_crop)
+      self.assertAllEqual(output, expected_output)
+
+  def test_position_sensitive_with_global_pool_false_and_single_bin(self):
+    num_spatial_bins = [1, 1]
+    image_shape = [2, 3, 3, 4]
+    crop_size = [1, 1]
+
+    image = tf.random_uniform(image_shape)
+    boxes = tf.random_uniform((6, 4))
+    box_ind = tf.constant([0, 0, 0, 1, 1, 1], dtype=tf.int32)
+
+    # Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
+    # the outputs are the same whatever the global_pool value is.
+    ps_crop_and_pool = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=True)
+    ps_crop = ops.position_sensitive_crop_regions(
+        image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+
+    with self.test_session() as sess:
+      pooled_output, unpooled_output = sess.run((ps_crop_and_pool, ps_crop))
+      self.assertAllClose(pooled_output, unpooled_output)
+
+  def test_position_sensitive_with_global_pool_false_and_do_global_pool(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    num_boxes = 2
+
+    # First channel is 1's, second channel is 2's, etc.
+    image = tf.constant(range(1, 3 * 2 + 1) * 6, dtype=tf.float32,
+                        shape=image_shape)
+    boxes = tf.random_uniform((num_boxes, 4))
+    box_ind = tf.constant([0, 0], dtype=tf.int32)
+
+    expected_output = []
+
+    # Expected output, when crop_size = [3, 2].
+    expected_output.append(np.mean(
+        np.expand_dims(
+            np.tile(np.array([[1, 2],
+                              [3, 4],
+                              [5, 6]]), (num_boxes, 1, 1)),
+            axis=-1),
+        axis=(1, 2), keepdims=True))
+
+    # Expected output, when crop_size = [6, 4].
+    expected_output.append(np.mean(
+        np.expand_dims(
+            np.tile(np.array([[1, 1, 2, 2],
+                              [1, 1, 2, 2],
+                              [3, 3, 4, 4],
+                              [3, 3, 4, 4],
+                              [5, 5, 6, 6],
+                              [5, 5, 6, 6]]), (num_boxes, 1, 1)),
+            axis=-1),
+        axis=(1, 2), keepdims=True))
+
+    for crop_size_mult in range(1, 3):
+      crop_size = [3 * crop_size_mult, 2 * crop_size_mult]
+
+      # Perform global_pooling after running the function with
+      # global_pool=False.
+      ps_crop = ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+      ps_crop_and_pool = tf.reduce_mean(
+          ps_crop, reduction_indices=(1, 2), keep_dims=True)
+
+      with self.test_session() as sess:
+        output = sess.run(ps_crop_and_pool)
+
+      self.assertAllEqual(output, expected_output[crop_size_mult - 1])
+
+  def test_raise_value_error_on_non_square_block_size(self):
+    num_spatial_bins = [3, 2]
+    image_shape = [1, 3, 2, 6]
+    crop_size = [6, 2]
+
+    image = tf.constant(1, dtype=tf.float32, shape=image_shape)
+    boxes = tf.constant([[0, 0, 1, 1]], dtype=tf.float32)
+    box_ind = tf.constant([0], dtype=tf.int32)
+
+    with self.assertRaisesRegexp(
+        ValueError, 'Only support square bin crop size for now.'):
+      ops.position_sensitive_crop_regions(
+          image, boxes, box_ind, crop_size, num_spatial_bins, global_pool=False)
+
+
+class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
+
+  def testZeroImageOnEmptyMask(self):
+    box_masks = tf.constant([[[0, 0],
+                              [0, 0]]], dtype=tf.float32)
+    boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
+    image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
+                                                       image_height=4,
+                                                       image_width=4)
+    np_expected_image_masks = np.array([[[0, 0, 0, 0],
+                                         [0, 0, 0, 0],
+                                         [0, 0, 0, 0],
+                                         [0, 0, 0, 0]]], dtype=np.float32)
+    with self.test_session() as sess:
+      np_image_masks = sess.run(image_masks)
+      self.assertAllClose(np_image_masks, np_expected_image_masks)
+
+  def testMaskIsCenteredInImageWhenBoxIsCentered(self):
+    box_masks = tf.constant([[[1, 1],
+                              [1, 1]]], dtype=tf.float32)
+    boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
+    image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
+                                                       image_height=4,
+                                                       image_width=4)
+    np_expected_image_masks = np.array([[[0, 0, 0, 0],
+                                         [0, 1, 1, 0],
+                                         [0, 1, 1, 0],
+                                         [0, 0, 0, 0]]], dtype=np.float32)
+    with self.test_session() as sess:
+      np_image_masks = sess.run(image_masks)
+      self.assertAllClose(np_image_masks, np_expected_image_masks)
+
+  def testMaskOffCenterRemainsOffCenterInImage(self):
+    box_masks = tf.constant([[[1, 0],
+                              [0, 1]]], dtype=tf.float32)
+    boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
+    image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
+                                                       image_height=4,
+                                                       image_width=4)
+    np_expected_image_masks = np.array([[[0, 0, 0, 0],
+                                         [0, 0, 0.6111111, 0.16666669],
+                                         [0, 0, 0.3888889, 0.83333337],
+                                         [0, 0, 0, 0]]], dtype=np.float32)
+    with self.test_session() as sess:
+      np_image_masks = sess.run(image_masks)
+      self.assertAllClose(np_image_masks, np_expected_image_masks)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/per_image_evaluation.py
+++ b/object_detection/utils/per_image_evaluation.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Evaluate Object Detection result on a single image.
+
+Annotate each detected result as true positives or false positive according to
+a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
+detection is supported by default.
+"""
+import numpy as np
+
+from object_detection.utils import np_box_list
+from object_detection.utils import np_box_list_ops
+
+
+class PerImageEvaluation(object):
+  """Evaluate detection result of a single image."""
+
+  def __init__(self,
+               num_groundtruth_classes,
+               matching_iou_threshold=0.5,
+               nms_iou_threshold=0.3,
+               nms_max_output_boxes=50):
+    """Initialized PerImageEvaluation by evaluation parameters.
+
+    Args:
+      num_groundtruth_classes: Number of ground truth object classes
+      matching_iou_threshold: A ratio of area intersection to union, which is
+          the threshold to consider whether a detection is true positive or not
+      nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
+      nms_max_output_boxes: Number of maximum output boxes in NMS.
+    """
+    self.matching_iou_threshold = matching_iou_threshold
+    self.nms_iou_threshold = nms_iou_threshold
+    self.nms_max_output_boxes = nms_max_output_boxes
+    self.num_groundtruth_classes = num_groundtruth_classes
+
+  def compute_object_detection_metrics(self, detected_boxes, detected_scores,
+                                       detected_class_labels, groundtruth_boxes,
+                                       groundtruth_class_labels,
+                                       groundtruth_is_difficult_lists):
+    """Compute Object Detection related metrics from a single image.
+
+    Args:
+      detected_boxes: A float numpy array of shape [N, 4], representing N
+          regions of detected object regions.
+          Each row is of the format [y_min, x_min, y_max, x_max]
+      detected_scores: A float numpy array of shape [N, 1], representing
+          the confidence scores of the detected N object instances.
+      detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+          the class labels of the detected N object instances.
+      groundtruth_boxes: A float numpy array of shape [M, 4], representing M
+          regions of object instances in ground truth
+      groundtruth_class_labels: An integer numpy array of shape [M, 1],
+          representing M class labels of object instances in ground truth
+      groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+
+    Returns:
+      scores: A list of C float numpy arrays. Each numpy array is of
+          shape [K, 1], representing K scores detected with object class
+          label c
+      tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
+          is of shape [K, 1], representing K True/False positive label of
+          object instances detected with class label c
+      is_class_correctly_detected_in_image: a numpy integer array of
+          shape [C, 1], indicating whether the correponding class has a least
+          one instance being correctly detected in the image
+    """
+    detected_boxes, detected_scores, detected_class_labels = (
+        self._remove_invalid_boxes(detected_boxes, detected_scores,
+                                   detected_class_labels))
+    scores, tp_fp_labels = self._compute_tp_fp(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels,
+        groundtruth_is_difficult_lists)
+    is_class_correctly_detected_in_image = self._compute_cor_loc(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels)
+    return scores, tp_fp_labels, is_class_correctly_detected_in_image
+
+  def _compute_cor_loc(self, detected_boxes, detected_scores,
+                       detected_class_labels, groundtruth_boxes,
+                       groundtruth_class_labels):
+    """Compute CorLoc score for object detection result.
+
+    Args:
+      detected_boxes: A float numpy array of shape [N, 4], representing N
+          regions of detected object regions.
+          Each row is of the format [y_min, x_min, y_max, x_max]
+      detected_scores: A float numpy array of shape [N, 1], representing
+          the confidence scores of the detected N object instances.
+      detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+          the class labels of the detected N object instances.
+      groundtruth_boxes: A float numpy array of shape [M, 4], representing M
+          regions of object instances in ground truth
+      groundtruth_class_labels: An integer numpy array of shape [M, 1],
+          representing M class labels of object instances in ground truth
+    Returns:
+      is_class_correctly_detected_in_image: a numpy integer array of
+          shape [C, 1], indicating whether the correponding class has a least
+          one instance being correctly detected in the image
+    """
+    is_class_correctly_detected_in_image = np.zeros(
+        self.num_groundtruth_classes, dtype=int)
+    for i in range(self.num_groundtruth_classes):
+      gt_boxes_at_ith_class = groundtruth_boxes[
+          groundtruth_class_labels == i, :]
+      detected_boxes_at_ith_class = detected_boxes[
+          detected_class_labels == i, :]
+      detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
+      is_class_correctly_detected_in_image[i] = (
+          self._compute_is_aclass_correctly_detected_in_image(
+              detected_boxes_at_ith_class, detected_scores_at_ith_class,
+              gt_boxes_at_ith_class))
+
+    return is_class_correctly_detected_in_image
+
+  def _compute_is_aclass_correctly_detected_in_image(
+      self, detected_boxes, detected_scores, groundtruth_boxes):
+    """Compute CorLoc score for a single class.
+
+    Args:
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
+          box coordinates
+
+    Returns:
+      is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
+          class is correctly detected in the image or not
+    """
+    if detected_boxes.size > 0:
+      if groundtruth_boxes.size > 0:
+        max_score_id = np.argmax(detected_scores)
+        detected_boxlist = np_box_list.BoxList(
+            np.expand_dims(detected_boxes[max_score_id, :], axis=0))
+        gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
+        iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
+        if np.max(iou) >= self.matching_iou_threshold:
+          return 1
+    return 0
+
+  def _compute_tp_fp(self, detected_boxes, detected_scores,
+                     detected_class_labels, groundtruth_boxes,
+                     groundtruth_class_labels, groundtruth_is_difficult_lists):
+    """Labels true/false positives of detections of an image across all classes.
+
+    Args:
+      detected_boxes: A float numpy array of shape [N, 4], representing N
+          regions of detected object regions.
+          Each row is of the format [y_min, x_min, y_max, x_max]
+      detected_scores: A float numpy array of shape [N, 1], representing
+          the confidence scores of the detected N object instances.
+      detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+          the class labels of the detected N object instances.
+      groundtruth_boxes: A float numpy array of shape [M, 4], representing M
+          regions of object instances in ground truth
+      groundtruth_class_labels: An integer numpy array of shape [M, 1],
+          representing M class labels of object instances in ground truth
+      groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+
+    Returns:
+      result_scores: A list of float numpy arrays. Each numpy array is of
+          shape [K, 1], representing K scores detected with object class
+          label c
+      result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
+          shape [K, 1], representing K True/False positive label of object
+          instances detected with class label c
+    """
+    result_scores = []
+    result_tp_fp_labels = []
+    for i in range(self.num_groundtruth_classes):
+      gt_boxes_at_ith_class = groundtruth_boxes[(groundtruth_class_labels == i
+                                                ), :]
+      groundtruth_is_difficult_list_at_ith_class = (
+          groundtruth_is_difficult_lists[groundtruth_class_labels == i])
+      detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
+                                                   ), :]
+      detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
+      scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
+          detected_boxes_at_ith_class, detected_scores_at_ith_class,
+          gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class)
+      result_scores.append(scores)
+      result_tp_fp_labels.append(tp_fp_labels)
+    return result_scores, result_tp_fp_labels
+
+  def _remove_invalid_boxes(self, detected_boxes, detected_scores,
+                            detected_class_labels):
+    valid_indices = np.logical_and(detected_boxes[:, 0] < detected_boxes[:, 2],
+                                   detected_boxes[:, 1] < detected_boxes[:, 3])
+    return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
+            detected_class_labels[valid_indices])
+
+  def _compute_tp_fp_for_single_class(self, detected_boxes, detected_scores,
+                                      groundtruth_boxes,
+                                      groundtruth_is_difficult_list):
+    """Labels boxes detected with the same class from the same image as tp/fp.
+
+    Args:
+      detected_boxes: A numpy array of shape [N, 4] representing detected box
+          coordinates
+      detected_scores: A 1-d numpy array of length N representing classification
+          score
+      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
+          box coordinates
+      groundtruth_is_difficult_list: A boolean numpy array of length M denoting
+          whether a ground truth box is a difficult instance or not
+
+    Returns:
+      scores: A numpy array representing the detection scores
+      tp_fp_labels: a boolean numpy array indicating whether a detection is a
+      true positive.
+
+    """
+    if detected_boxes.size == 0:
+      return np.array([], dtype=float), np.array([], dtype=bool)
+    detected_boxlist = np_box_list.BoxList(detected_boxes)
+    detected_boxlist.add_field('scores', detected_scores)
+    detected_boxlist = np_box_list_ops.non_max_suppression(
+        detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+
+    scores = detected_boxlist.get_field('scores')
+
+    if groundtruth_boxes.size == 0:
+      return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
+    gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
+
+    iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
+    max_overlap_gt_ids = np.argmax(iou, axis=1)
+    is_gt_box_detected = np.zeros(gt_boxlist.num_boxes(), dtype=bool)
+    tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
+    is_matched_to_difficult_box = np.zeros(
+        detected_boxlist.num_boxes(), dtype=bool)
+    for i in range(detected_boxlist.num_boxes()):
+      gt_id = max_overlap_gt_ids[i]
+      if iou[i, gt_id] >= self.matching_iou_threshold:
+        if not groundtruth_is_difficult_list[gt_id]:
+          if not is_gt_box_detected[gt_id]:
+            tp_fp_labels[i] = True
+            is_gt_box_detected[gt_id] = True
+        else:
+          is_matched_to_difficult_box[i] = True
+    return scores[~is_matched_to_difficult_box], tp_fp_labels[
+        ~is_matched_to_difficult_box]
--- a/object_detection/utils/per_image_evaluation_test.py
+++ b/object_detection/utils/per_image_evaluation_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.per_image_evaluation."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import per_image_evaluation
+
+
+class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
+
+  def setUp(self):
+    num_groundtruth_classes = 1
+    matching_iou_threshold = 0.5
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    self.eval = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
+        nms_max_output_boxes)
+
+    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                   dtype=float)
+    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
+    self.groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]],
+                                      dtype=float)
+
+  def test_match_to_not_difficult_box(self):
+    groundtruth_groundtruth_is_difficult_list = np.array([False, True],
+                                                         dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+  def test_match_to_difficult_box(self):
+    groundtruth_groundtruth_is_difficult_list = np.array([True, False],
+                                                         dtype=bool)
+    scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+
+class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
+
+  def setUp(self):
+    num_groundtruth_classes = 1
+    matching_iou_threshold1 = 0.5
+    matching_iou_threshold2 = 0.1
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    self.eval1 = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold1, nms_iou_threshold,
+        nms_max_output_boxes)
+
+    self.eval2 = per_image_evaluation.PerImageEvaluation(
+        num_groundtruth_classes, matching_iou_threshold2, nms_iou_threshold,
+        nms_max_output_boxes)
+
+    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
+                                   dtype=float)
+    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
+
+  def test_no_true_positives(self):
+    groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+  def test_one_true_positives_with_large_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+  def test_one_true_positives_with_very_small_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
+    scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+  def test_two_true_positives_with_large_iou_threshold(self):
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
+    scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
+        self.detected_boxes, self.detected_scores, groundtruth_boxes,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
+    expected_tp_fp_labels = np.array([False, True, True], dtype=bool)
+    self.assertTrue(np.allclose(expected_scores, scores))
+    self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
+
+
+class MultiClassesTpFpTest(tf.test.TestCase):
+
+  def test_tp_fp(self):
+    num_groundtruth_classes = 3
+    matching_iou_threshold = 0.5
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
+                                                    matching_iou_threshold,
+                                                    nms_iou_threshold,
+                                                    nms_max_output_boxes)
+    detected_boxes = np.array([[0, 0, 1, 1], [10, 10, 5, 5], [0, 0, 2, 2],
+                               [5, 10, 10, 5], [10, 5, 5, 10], [0, 0, 3, 3]],
+                              dtype=float)
+    detected_scores = np.array([0.8, 0.1, 0.8, 0.9, 0.7, 0.8], dtype=float)
+    detected_class_labels = np.array([0, 1, 1, 2, 0, 2], dtype=int)
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
+    groundtruth_class_labels = np.array([0, 2], dtype=int)
+    groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float)
+    scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels,
+        groundtruth_groundtruth_is_difficult_list)
+    expected_scores = [np.array([0.8], dtype=float)] * 3
+    expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True
+                                                                           ])]
+    for i in range(len(expected_scores)):
+      self.assertTrue(np.allclose(expected_scores[i], scores[i]))
+      self.assertTrue(np.array_equal(expected_tp_fp_labels[i], tp_fp_labels[i]))
+
+
+class CorLocTest(tf.test.TestCase):
+
+  def test_compute_corloc_with_normal_iou_threshold(self):
+    num_groundtruth_classes = 3
+    matching_iou_threshold = 0.5
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
+                                                    matching_iou_threshold,
+                                                    nms_iou_threshold,
+                                                    nms_max_output_boxes)
+    detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
+                               [0, 0, 5, 5]], dtype=float)
+    detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
+    detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
+                                 dtype=float)
+    groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
+
+    is_class_correctly_detected_in_image = eval1._compute_cor_loc(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels)
+    expected_result = np.array([1, 0, 1], dtype=int)
+    self.assertTrue(np.array_equal(expected_result,
+                                   is_class_correctly_detected_in_image))
+
+  def test_compute_corloc_with_very_large_iou_threshold(self):
+    num_groundtruth_classes = 3
+    matching_iou_threshold = 0.9
+    nms_iou_threshold = 1.0
+    nms_max_output_boxes = 10000
+    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
+                                                    matching_iou_threshold,
+                                                    nms_iou_threshold,
+                                                    nms_max_output_boxes)
+    detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
+                               [0, 0, 5, 5]], dtype=float)
+    detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
+    detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
+    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
+                                 dtype=float)
+    groundtruth_class_labels = np.array([0, 0, 2], dtype=int)
+
+    is_class_correctly_detected_in_image = eval1._compute_cor_loc(
+        detected_boxes, detected_scores, detected_class_labels,
+        groundtruth_boxes, groundtruth_class_labels)
+    expected_result = np.array([1, 0, 0], dtype=int)
+    self.assertTrue(np.array_equal(expected_result,
+                                   is_class_correctly_detected_in_image))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/shape_utils.py
+++ b/object_detection/utils/shape_utils.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utils used to manipulate tensor shapes."""
+
+import tensorflow as tf
+
+
+def _is_tensor(t):
+  """Returns a boolean indicating whether the input is a tensor.
+
+  Args:
+    t: the input to be tested.
+
+  Returns:
+    a boolean that indicates whether t is a tensor.
+  """
+  return isinstance(t, (tf.Tensor, tf.SparseTensor, tf.Variable))
+
+
+def _set_dim_0(t, d0):
+  """Sets the 0-th dimension of the input tensor.
+
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    d0: an integer indicating the 0-th dimension of the input tensor.
+
+  Returns:
+    the tensor t with the 0-th dimension set.
+  """
+  t_shape = t.get_shape().as_list()
+  t_shape[0] = d0
+  t.set_shape(t_shape)
+  return t
+
+
+def pad_tensor(t, length):
+  """Pads the input tensor with 0s along the first dimension up to the length.
+
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    length: a tensor of shape [1]  or an integer, indicating the first dimension
+      of the input tensor t after padding, assuming length <= t.shape[0].
+
+  Returns:
+    padded_t: the padded tensor, whose first dimension is length. If the length
+      is an integer, the first dimension of padded_t is set to length
+      statically.
+  """
+  t_rank = tf.rank(t)
+  t_shape = tf.shape(t)
+  t_d0 = t_shape[0]
+  pad_d0 = tf.expand_dims(length - t_d0, 0)
+  pad_shape = tf.cond(
+      tf.greater(t_rank, 1), lambda: tf.concat([pad_d0, t_shape[1:]], 0),
+      lambda: tf.expand_dims(length - t_d0, 0))
+  padded_t = tf.concat([t, tf.zeros(pad_shape, dtype=t.dtype)], 0)
+  if not _is_tensor(length):
+    padded_t = _set_dim_0(padded_t, length)
+  return padded_t
+
+
+def clip_tensor(t, length):
+  """Clips the input tensor along the first dimension up to the length.
+
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    length: a tensor of shape [1]  or an integer, indicating the first dimension
+      of the input tensor t after clipping, assuming length <= t.shape[0].
+
+  Returns:
+    clipped_t: the clipped tensor, whose first dimension is length. If the
+      length is an integer, the first dimension of clipped_t is set to length
+      statically.
+  """
+  clipped_t = tf.gather(t, tf.range(length))
+  if not _is_tensor(length):
+    clipped_t = _set_dim_0(clipped_t, length)
+  return clipped_t
+
+
+def pad_or_clip_tensor(t, length):
+  """Pad or clip the input tensor along the first dimension.
+
+  Args:
+    t: the input tensor, assuming the rank is at least 1.
+    length: a tensor of shape [1]  or an integer, indicating the first dimension
+      of the input tensor t after processing.
+
+  Returns:
+    processed_t: the processed tensor, whose first dimension is length. If the
+      length is an integer, the first dimension of the processed tensor is set
+      to length statically.
+  """
+  processed_t = tf.cond(
+      tf.greater(tf.shape(t)[0], length),
+      lambda: clip_tensor(t, length),
+      lambda: pad_tensor(t, length))
+  if not _is_tensor(length):
+    processed_t = _set_dim_0(processed_t, length)
+  return processed_t
--- a/object_detection/utils/shape_utils_test.py
+++ b/object_detection/utils/shape_utils_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.shape_utils."""
+
+import tensorflow as tf
+
+from object_detection.utils import shape_utils
+
+
+class UtilTest(tf.test.TestCase):
+
+  def test_pad_tensor_using_integer_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    pad_t1 = shape_utils.pad_tensor(t1, 2)
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    pad_t2 = shape_utils.pad_tensor(t2, 2)
+
+    self.assertEqual(2, pad_t1.get_shape()[0])
+    self.assertEqual(2, pad_t2.get_shape()[0])
+
+    with self.test_session() as sess:
+      pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
+      self.assertAllEqual([1, 0], pad_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
+
+  def test_pad_tensor_using_tensor_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    pad_t1 = shape_utils.pad_tensor(t1, tf.constant(2))
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    pad_t2 = shape_utils.pad_tensor(t2, tf.constant(2))
+
+    with self.test_session() as sess:
+      pad_t1_result, pad_t2_result = sess.run([pad_t1, pad_t2])
+      self.assertAllEqual([1, 0], pad_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], pad_t2_result)
+
+  def test_clip_tensor_using_integer_input(self):
+    t1 = tf.constant([1, 2, 3], dtype=tf.int32)
+    clip_t1 = shape_utils.clip_tensor(t1, 2)
+    t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    clip_t2 = shape_utils.clip_tensor(t2, 2)
+
+    self.assertEqual(2, clip_t1.get_shape()[0])
+    self.assertEqual(2, clip_t2.get_shape()[0])
+
+    with self.test_session() as sess:
+      clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
+      self.assertAllEqual([1, 2], clip_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
+
+  def test_clip_tensor_using_tensor_input(self):
+    t1 = tf.constant([1, 2, 3], dtype=tf.int32)
+    clip_t1 = shape_utils.clip_tensor(t1, tf.constant(2))
+    t2 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    clip_t2 = shape_utils.clip_tensor(t2, tf.constant(2))
+
+    with self.test_session() as sess:
+      clip_t1_result, clip_t2_result = sess.run([clip_t1, clip_t2])
+      self.assertAllEqual([1, 2], clip_t1_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], clip_t2_result)
+
+  def test_pad_or_clip_tensor_using_integer_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    tt1 = shape_utils.pad_or_clip_tensor(t1, 2)
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    tt2 = shape_utils.pad_or_clip_tensor(t2, 2)
+
+    t3 = tf.constant([1, 2, 3], dtype=tf.int32)
+    tt3 = shape_utils.clip_tensor(t3, 2)
+    t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    tt4 = shape_utils.clip_tensor(t4, 2)
+
+    self.assertEqual(2, tt1.get_shape()[0])
+    self.assertEqual(2, tt2.get_shape()[0])
+    self.assertEqual(2, tt3.get_shape()[0])
+    self.assertEqual(2, tt4.get_shape()[0])
+
+    with self.test_session() as sess:
+      tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
+          [tt1, tt2, tt3, tt4])
+      self.assertAllEqual([1, 0], tt1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
+      self.assertAllEqual([1, 2], tt3_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
+
+  def test_pad_or_clip_tensor_using_tensor_input(self):
+    t1 = tf.constant([1], dtype=tf.int32)
+    tt1 = shape_utils.pad_or_clip_tensor(t1, tf.constant(2))
+    t2 = tf.constant([[0.1, 0.2]], dtype=tf.float32)
+    tt2 = shape_utils.pad_or_clip_tensor(t2, tf.constant(2))
+
+    t3 = tf.constant([1, 2, 3], dtype=tf.int32)
+    tt3 = shape_utils.clip_tensor(t3, tf.constant(2))
+    t4 = tf.constant([[0.1, 0.2], [0.2, 0.4], [0.5, 0.8]], dtype=tf.float32)
+    tt4 = shape_utils.clip_tensor(t4, tf.constant(2))
+
+    with self.test_session() as sess:
+      tt1_result, tt2_result, tt3_result, tt4_result = sess.run(
+          [tt1, tt2, tt3, tt4])
+      self.assertAllEqual([1, 0], tt1_result)
+      self.assertAllClose([[0.1, 0.2], [0, 0]], tt2_result)
+      self.assertAllEqual([1, 2], tt3_result)
+      self.assertAllClose([[0.1, 0.2], [0.2, 0.4]], tt4_result)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/static_shape.py
+++ b/object_detection/utils/static_shape.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Helper functions to access TensorShape values.
+
+The rank 4 tensor_shape must be of the form [batch_size, height, width, depth].
+"""
+
+
+def get_batch_size(tensor_shape):
+  """Returns batch size from the tensor shape.
+
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+
+  Returns:
+    An integer representing the batch size of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[0].value
+
+
+def get_height(tensor_shape):
+  """Returns height from the tensor shape.
+
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+
+  Returns:
+    An integer representing the height of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[1].value
+
+
+def get_width(tensor_shape):
+  """Returns width from the tensor shape.
+
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+
+  Returns:
+    An integer representing the width of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[2].value
+
+
+def get_depth(tensor_shape):
+  """Returns depth from the tensor shape.
+
+  Args:
+    tensor_shape: A rank 4 TensorShape.
+
+  Returns:
+    An integer representing the depth of the tensor.
+  """
+  tensor_shape.assert_has_rank(rank=4)
+  return tensor_shape[3].value
--- a/object_detection/utils/static_shape_test.py
+++ b/object_detection/utils/static_shape_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.static_shape."""
+
+import tensorflow as tf
+
+from object_detection.utils import static_shape
+
+
+class StaticShapeTest(tf.test.TestCase):
+
+  def test_return_correct_batchSize(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(32, static_shape.get_batch_size(tensor_shape))
+
+  def test_return_correct_height(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(299, static_shape.get_height(tensor_shape))
+
+  def test_return_correct_width(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(384, static_shape.get_width(tensor_shape))
+
+  def test_return_correct_depth(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384, 3])
+    self.assertEqual(3, static_shape.get_depth(tensor_shape))
+
+  def test_die_on_tensor_shape_with_rank_three(self):
+    tensor_shape = tf.TensorShape(dims=[32, 299, 384])
+    with self.assertRaises(ValueError):
+      static_shape.get_batch_size(tensor_shape)
+      static_shape.get_height(tensor_shape)
+      static_shape.get_width(tensor_shape)
+      static_shape.get_depth(tensor_shape)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/test_utils.py
+++ b/object_detection/utils/test_utils.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Contains functions which are convenient for unit testing."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import anchor_generator
+from object_detection.core import box_coder
+from object_detection.core import box_list
+from object_detection.core import box_predictor
+from object_detection.core import matcher
+
+
+class MockBoxCoder(box_coder.BoxCoder):
+  """Simple `difference` BoxCoder."""
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    return boxes.get() - anchors.get()
+
+  def _decode(self, rel_codes, anchors):
+    return box_list.BoxList(rel_codes + anchors.get())
+
+
+class MockBoxPredictor(box_predictor.BoxPredictor):
+  """Simple box predictor that ignores inputs and outputs all zeros."""
+
+  def __init__(self, is_training, num_classes):
+    super(MockBoxPredictor, self).__init__(is_training, num_classes)
+
+  def _predict(self, image_features, num_predictions_per_location):
+    batch_size = image_features.get_shape().as_list()[0]
+    num_anchors = (image_features.get_shape().as_list()[1]
+                   * image_features.get_shape().as_list()[2])
+    code_size = 4
+    zero = tf.reduce_sum(0 * image_features)
+    box_encodings = zero + tf.zeros(
+        (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
+    class_predictions_with_background = zero + tf.zeros(
+        (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
+    return {box_predictor.BOX_ENCODINGS: box_encodings,
+            box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND:
+            class_predictions_with_background}
+
+
+class MockAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Mock anchor generator."""
+
+  def name_scope(self):
+    return 'MockAnchorGenerator'
+
+  def num_anchors_per_location(self):
+    return [1]
+
+  def _generate(self, feature_map_shape_list):
+    num_anchors = sum([shape[0] * shape[1] for shape in feature_map_shape_list])
+    return box_list.BoxList(tf.zeros((num_anchors, 4), dtype=tf.float32))
+
+
+class MockMatcher(matcher.Matcher):
+  """Simple matcher that matches first anchor to first groundtruth box."""
+
+  def _match(self, similarity_matrix):
+    return tf.constant([0, -1, -1, -1], dtype=tf.int32)
+
+
+def create_diagonal_gradient_image(height, width, depth):
+  """Creates pyramid image. Useful for testing.
+
+  For example, pyramid_image(5, 6, 1) looks like:
+  # [[[ 5.  4.  3.  2.  1.  0.]
+  #   [ 6.  5.  4.  3.  2.  1.]
+  #   [ 7.  6.  5.  4.  3.  2.]
+  #   [ 8.  7.  6.  5.  4.  3.]
+  #   [ 9.  8.  7.  6.  5.  4.]]]
+
+  Args:
+    height: height of image
+    width: width of image
+    depth: depth of image
+
+  Returns:
+    pyramid image
+  """
+  row = np.arange(height)
+  col = np.arange(width)[::-1]
+  image_layer = np.expand_dims(row, 1) + col
+  image_layer = np.expand_dims(image_layer, 2)
+
+  image = image_layer
+  for i in range(1, depth):
+    image = np.concatenate((image, image_layer * pow(10, i)), 2)
+
+  return image.astype(np.float32)
+
+
+def create_random_boxes(num_boxes, max_height, max_width):
+  """Creates random bounding boxes of specific maximum height and width.
+
+  Args:
+    num_boxes: number of boxes.
+    max_height: maximum height of boxes.
+    max_width: maximum width of boxes.
+
+  Returns:
+    boxes: numpy array of shape [num_boxes, 4]. Each row is in form
+        [y_min, x_min, y_max, x_max].
+  """
+
+  y_1 = np.random.uniform(size=(1, num_boxes)) * max_height
+  y_2 = np.random.uniform(size=(1, num_boxes)) * max_height
+  x_1 = np.random.uniform(size=(1, num_boxes)) * max_width
+  x_2 = np.random.uniform(size=(1, num_boxes)) * max_width
+
+  boxes = np.zeros(shape=(num_boxes, 4))
+  boxes[:, 0] = np.minimum(y_1, y_2)
+  boxes[:, 1] = np.minimum(x_1, x_2)
+  boxes[:, 2] = np.maximum(y_1, y_2)
+  boxes[:, 3] = np.maximum(x_1, x_2)
+
+  return boxes.astype(np.float32)
--- a/object_detection/utils/test_utils_test.py
+++ b/object_detection/utils/test_utils_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.test_utils."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import test_utils
+
+
+class TestUtilsTest(tf.test.TestCase):
+
+  def test_diagonal_gradient_image(self):
+    """Tests if a good pyramid image is created."""
+    pyramid_image = test_utils.create_diagonal_gradient_image(3, 4, 2)
+
+    # Test which is easy to understand.
+    expected_first_channel = np.array([[3, 2, 1, 0],
+                                       [4, 3, 2, 1],
+                                       [5, 4, 3, 2]], dtype=np.float32)
+    self.assertAllEqual(np.squeeze(pyramid_image[:, :, 0]),
+                        expected_first_channel)
+
+    # Actual test.
+    expected_image = np.array([[[3, 30],
+                                [2, 20],
+                                [1, 10],
+                                [0, 0]],
+                               [[4, 40],
+                                [3, 30],
+                                [2, 20],
+                                [1, 10]],
+                               [[5, 50],
+                                [4, 40],
+                                [3, 30],
+                                [2, 20]]], dtype=np.float32)
+
+    self.assertAllEqual(pyramid_image, expected_image)
+
+  def test_random_boxes(self):
+    """Tests if valid random boxes are created."""
+    num_boxes = 1000
+    max_height = 3
+    max_width = 5
+    boxes = test_utils.create_random_boxes(num_boxes,
+                                           max_height,
+                                           max_width)
+
+    true_column = np.ones(shape=(num_boxes)) == 1
+    self.assertAllEqual(boxes[:, 0] < boxes[:, 2], true_column)
+    self.assertAllEqual(boxes[:, 1] < boxes[:, 3], true_column)
+
+    self.assertTrue(boxes[:, 0].min() >= 0)
+    self.assertTrue(boxes[:, 1].min() >= 0)
+    self.assertTrue(boxes[:, 2].max() <= max_height)
+    self.assertTrue(boxes[:, 3].max() <= max_width)
+
+
+if __name__ == '__main__':
+  tf.test.main()