Merge branch 'master' of github.com:tensorflow/models

f282f6ef · Alexander Gorban · 58a5da7b · a2970b03 · f282f6ef · f282f6ef
Commit f282f6ef authored Jul 05, 2017 by Alexander Gorban
20 changed files
--- a/object_detection/core/box_list.py
+++ b/object_detection/core/box_list.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List definition.
+
+BoxList represents a list of bounding boxes as tensorflow
+tensors, where each bounding box is represented as a row of 4 numbers,
+[y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes
+within a given list correspond to a single image.  See also
+box_list_ops.py for common box related operations (such as area, iou, etc).
+
+Optionally, users can add additional related fields (such as weights).
+We assume the following things to be true about fields:
+* they correspond to boxes in the box_list along the 0th dimension
+* they have inferrable rank at graph construction time
+* all dimensions except for possibly the 0th can be inferred
+  (i.e., not None) at graph construction time.
+
+Some other notes:
+  * Following tensorflow conventions, we use height, width ordering,
+  and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
+  * Tensors are always provided as (flat) [N, 4] tensors.
+"""
+
+import tensorflow as tf
+
+
+class BoxList(object):
+  """Box collection."""
+
+  def __init__(self, boxes):
+    """Constructs box collection.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data or if bbox data is not in
+          float32 format.
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    if boxes.dtype != tf.float32:
+      raise ValueError('Invalid tensor type: should be tf.float32')
+    self.data = {'boxes': boxes}
+
+  def num_boxes(self):
+    """Returns number of boxes held in collection.
+
+    Returns:
+      a tensor representing the number of boxes held in the collection.
+    """
+    return tf.shape(self.data['boxes'])[0]
+
+  def num_boxes_static(self):
+    """Returns number of boxes held in collection.
+
+    This number is inferred at graph construction time rather than run-time.
+
+    Returns:
+      Number of boxes held in collection (integer) or None if this is not
+        inferrable at graph construction time.
+    """
+    return self.data['boxes'].get_shape()[0].value
+
+  def get_all_fields(self):
+    """Returns all fields."""
+    return self.data.keys()
+
+  def get_extra_fields(self):
+    """Returns all non-box fields (i.e., everything not named 'boxes')."""
+    return [k for k in self.data.keys() if k != 'boxes']
+
+  def add_field(self, field, field_data):
+    """Add field to box list.
+
+    This method can be used to add related box data such as
+    weights/labels, etc.
+
+    Args:
+      field: a string key to access the data via `get`
+      field_data: a tensor containing the data to store in the BoxList
+    """
+    self.data[field] = field_data
+
+  def has_field(self, field):
+    return field in self.data
+
+  def get(self):
+    """Convenience function for accessing box coordinates.
+
+    Returns:
+      a tensor with shape [N, 4] representing box coordinates.
+    """
+    return self.get_field('boxes')
+
+  def set(self, boxes):
+    """Convenience function for setting box coordinates.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    self.data['boxes'] = boxes
+
+  def get_field(self, field):
+    """Accesses a box collection and associated fields.
+
+    This function returns specified field with object; if no field is specified,
+    it returns the box coordinates.
+
+    Args:
+      field: this optional string parameter can be used to specify
+        a related field to be accessed.
+
+    Returns:
+      a tensor representing the box collection or an associated field.
+
+    Raises:
+      ValueError: if invalid field
+    """
+    if not self.has_field(field):
+      raise ValueError('field ' + str(field) + ' does not exist')
+    return self.data[field]
+
+  def set_field(self, field, value):
+    """Sets the value of a field.
+
+    Updates the field of a box_list with a given value.
+
+    Args:
+      field: (string) name of the field to set value.
+      value: the value to assign to the field.
+
+    Raises:
+      ValueError: if the box_list does not have specified field.
+    """
+    if not self.has_field(field):
+      raise ValueError('field %s does not exist' % field)
+    self.data[field] = value
+
+  def get_center_coordinates_and_sizes(self, scope=None):
+    """Computes the center coordinates, height and width of the boxes.
+
+    Args:
+      scope: name scope of the function.
+
+    Returns:
+      a list of 4 1-D tensors [ycenter, xcenter, height, width].
+    """
+    with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
+      box_corners = self.get()
+      ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
+      width = xmax - xmin
+      height = ymax - ymin
+      ycenter = ymin + height / 2.
+      xcenter = xmin + width / 2.
+      return [ycenter, xcenter, height, width]
+
+  def transpose_coordinates(self, scope=None):
+    """Transpose the coordinate representation in a boxlist.
+
+    Args:
+      scope: name scope of the function.
+    """
+    with tf.name_scope(scope, 'transpose_coordinates'):
+      y_min, x_min, y_max, x_max = tf.split(
+          value=self.get(), num_or_size_splits=4, axis=1)
+      self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
+
+  def as_tensor_dict(self, fields=None):
+    """Retrieves specified fields as a dictionary of tensors.
+
+    Args:
+      fields: (optional) list of fields to return in the dictionary.
+        If None (default), all fields are returned.
+
+    Returns:
+      tensor_dict: A dictionary of tensors specified by fields.
+
+    Raises:
+      ValueError: if specified field is not contained in boxlist.
+    """
+    tensor_dict = {}
+    if fields is None:
+      fields = self.get_all_fields()
+    for field in fields:
+      if not self.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      tensor_dict[field] = self.get_field(field)
+    return tensor_dict
--- a/object_detection/core/box_list_ops.py
+++ b/object_detection/core/box_list_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List operations.
+
+Example box operations that are supported:
+  * areas: compute bounding box areas
+  * iou: pairwise intersection-over-union scores
+  * sq_dist: pairwise distances between bounding boxes
+
+Whenever box_list_ops functions output a BoxList, the fields of the incoming
+BoxList are retained unless documented otherwise.
+"""
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.utils import shape_utils
+
+
+class SortOrder(object):
+  """Enum class for sort order.
+
+  Attributes:
+    ascend: ascend order.
+    descend: descend order.
+  """
+  ascend = 1
+  descend = 2
+
+
+def area(boxlist, scope=None):
+  """Computes area of boxes.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing box areas.
+  """
+  with tf.name_scope(scope, 'Area'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
+
+
+def height_width(boxlist, scope=None):
+  """Computes height and width of boxes in boxlist.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    Height: A tensor with shape [N] representing box heights.
+    Width: A tensor with shape [N] representing box widths.
+  """
+  with tf.name_scope(scope, 'HeightWidth'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
+
+
+def scale(boxlist, y_scale, x_scale, scope=None):
+  """scale box coordinates in x and y dimensions.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    y_min = y_scale * y_min
+    y_max = y_scale * y_max
+    x_min = x_scale * x_min
+    x_max = x_scale * x_max
+    scaled_boxlist = box_list.BoxList(
+        tf.concat([y_min, x_min, y_max, x_max], 1))
+    return _copy_extra_fields(scaled_boxlist, boxlist)
+
+
+def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
+  """Clip bounding boxes to a window.
+
+  This op clips any input bounding boxes (represented by bounding box
+  corners) to a window, optionally filtering out boxes that do not
+  overlap at all with the window.
+
+  Args:
+    boxlist: BoxList holding M_in boxes
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window to which the op should clip boxes.
+    filter_nonoverlapping: whether to filter out boxes that do not overlap at
+      all with the window.
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M_out boxes where M_out <= M_in
+  """
+  with tf.name_scope(scope, 'ClipToWindow'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
+    y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
+    x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
+    x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
+    clipped = box_list.BoxList(
+        tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
+                  1))
+    clipped = _copy_extra_fields(clipped, boxlist)
+    if filter_nonoverlapping:
+      areas = area(clipped)
+      nonzero_area_indices = tf.cast(
+          tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
+      clipped = gather(clipped, nonzero_area_indices)
+    return clipped
+
+
+def prune_outside_window(boxlist, window, scope=None):
+  """Prunes bounding boxes that fall outside a given window.
+
+  This function prunes bounding boxes that even partially fall outside the given
+  window. See also clip_to_window which only prunes bounding boxes that fall
+  completely outside the window, and clips any bounding boxes that partially
+  overflow.
+
+  Args:
+    boxlist: a BoxList holding M_in boxes.
+    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
+      of the window
+    scope: name scope.
+
+  Returns:
+    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
+    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+     in the input tensor.
+  """
+  with tf.name_scope(scope, 'PruneOutsideWindow'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    coordinate_violations = tf.concat([
+        tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
+        tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
+    ], 1)
+    valid_indices = tf.reshape(
+        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
+    return gather(boxlist, valid_indices), valid_indices
+
+
+def prune_completely_outside_window(boxlist, window, scope=None):
+  """Prunes bounding boxes that fall completely outside of the given window.
+
+  The function clip_to_window prunes bounding boxes that fall
+  completely outside the window, but also clips any bounding boxes that
+  partially overflow. This function does not clip partially overflowing boxes.
+
+  Args:
+    boxlist: a BoxList holding M_in boxes.
+    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
+      of the window
+    scope: name scope.
+
+  Returns:
+    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
+    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+     in the input tensor.
+  """
+  with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    coordinate_violations = tf.concat([
+        tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
+        tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
+    ], 1)
+    valid_indices = tf.reshape(
+        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
+    return gather(boxlist, valid_indices), valid_indices
+
+
+def intersection(boxlist1, boxlist2, scope=None):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise intersections
+  """
+  with tf.name_scope(scope, 'Intersection'):
+    y_min1, x_min1, y_max1, x_max1 = tf.split(
+        value=boxlist1.get(), num_or_size_splits=4, axis=1)
+    y_min2, x_min2, y_max2, x_max2 = tf.split(
+        value=boxlist2.get(), num_or_size_splits=4, axis=1)
+    all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
+    all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
+    intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
+    all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
+    intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+
+
+def matched_intersection(boxlist1, boxlist2, scope=None):
+  """Compute intersection areas between corresponding boxes in two boxlists.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing pairwise intersections
+  """
+  with tf.name_scope(scope, 'MatchedIntersection'):
+    y_min1, x_min1, y_max1, x_max1 = tf.split(
+        value=boxlist1.get(), num_or_size_splits=4, axis=1)
+    y_min2, x_min2, y_max2, x_max2 = tf.split(
+        value=boxlist2.get(), num_or_size_splits=4, axis=1)
+    min_ymax = tf.minimum(y_max1, y_max2)
+    max_ymin = tf.maximum(y_min1, y_min2)
+    intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
+    min_xmax = tf.minimum(x_max1, x_max2)
+    max_xmin = tf.maximum(x_min1, x_min2)
+    intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
+    return tf.reshape(intersect_heights * intersect_widths, [-1])
+
+
+def iou(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise iou scores.
+  """
+  with tf.name_scope(scope, 'IOU'):
+    intersections = intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = (
+        tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
+    return tf.where(
+        tf.equal(intersections, 0.0),
+        tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+def matched_iou(boxlist1, boxlist2, scope=None):
+  """Compute intersection-over-union between corresponding boxes in boxlists.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing pairwise iou scores.
+  """
+  with tf.name_scope(scope, 'MatchedIOU'):
+    intersections = matched_intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = areas1 + areas2 - intersections
+    return tf.where(
+        tf.equal(intersections, 0.0),
+        tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+def ioa(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-area between box collections.
+
+  intersection-over-area (IOA) between two boxes box1 and box2 is defined as
+  their intersection area over box2's area. Note that ioa is not symmetric,
+  that is, ioa(box1, box2) != ioa(box2, box1).
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise ioa scores.
+  """
+  with tf.name_scope(scope, 'IOA'):
+    intersections = intersection(boxlist1, boxlist2)
+    areas = tf.expand_dims(area(boxlist2), 0)
+    return tf.truediv(intersections, areas)
+
+
+def prune_non_overlapping_boxes(
+    boxlist1, boxlist2, min_overlap=0.0, scope=None):
+  """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+
+  For each box in boxlist1, we want its IOA to be more than minoverlap with
+  at least one of the boxes in boxlist2. If it does not, we remove it.
+
+  Args:
+    boxlist1: BoxList holding N boxes.
+    boxlist2: BoxList holding M boxes.
+    min_overlap: Minimum required overlap between boxes, to count them as
+                overlapping.
+    scope: name scope.
+
+  Returns:
+    new_boxlist1: A pruned boxlist with size [N', 4].
+    keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
+      first input BoxList `boxlist1`.
+  """
+  with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
+    ioa_ = ioa(boxlist2, boxlist1)  # [M, N] tensor
+    ioa_ = tf.reduce_max(ioa_, reduction_indices=[0])  # [N] tensor
+    keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
+    keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
+    new_boxlist1 = gather(boxlist1, keep_inds)
+    return new_boxlist1, keep_inds
+
+
+def prune_small_boxes(boxlist, min_side, scope=None):
+  """Prunes small boxes in the boxlist which have a side smaller than min_side.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    min_side: Minimum width AND height of box to survive pruning.
+    scope: name scope.
+
+  Returns:
+    A pruned boxlist.
+  """
+  with tf.name_scope(scope, 'PruneSmallBoxes'):
+    height, width = height_width(boxlist)
+    is_valid = tf.logical_and(tf.greater_equal(width, min_side),
+                              tf.greater_equal(height, min_side))
+    return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
+
+
+def change_coordinate_frame(boxlist, window, scope=None):
+  """Change coordinate frame of the boxlist to be relative to window's frame.
+
+  Given a window of the form [ymin, xmin, ymax, xmax],
+  changes bounding box coordinates from boxlist to be relative to this window
+  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+
+  An example use case is data augmentation: where we are given groundtruth
+  boxes (boxlist) and would like to randomly crop the image to some
+  window (window). In this case we need to change the coordinate frame of
+  each groundtruth box to be relative to this new window.
+
+  Args:
+    boxlist: A BoxList object holding N boxes.
+    window: A rank 1 tensor [4].
+    scope: name scope.
+
+  Returns:
+    Returns a BoxList object with N boxes.
+  """
+  with tf.name_scope(scope, 'ChangeCoordinateFrame'):
+    win_height = window[2] - window[0]
+    win_width = window[3] - window[1]
+    boxlist_new = scale(box_list.BoxList(
+        boxlist.get() - [window[0], window[1], window[0], window[1]]),
+                        1.0 / win_height, 1.0 / win_width)
+    boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
+    return boxlist_new
+
+
+def sq_dist(boxlist1, boxlist2, scope=None):
+  """Computes the pairwise squared distances between box corners.
+
+  This op treats each box as if it were a point in a 4d Euclidean space and
+  computes pairwise squared distances.
+
+  Mathematically, we are given two matrices of box coordinates X and Y,
+  where X(i,:) is the i'th row of X, containing the 4 numbers defining the
+  corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
+  boxlist2.  We compute
+  Z(i,j) = ||X(i,:) - Y(j,:)||^2
+         = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise distances
+  """
+  with tf.name_scope(scope, 'SqDist'):
+    sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
+    sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
+    innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
+                          transpose_a=False, transpose_b=True)
+    return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
+
+
+def boolean_mask(boxlist, indicator, fields=None, scope=None):
+  """Select boxes from BoxList according to indicator and return new BoxList.
+
+  `boolean_mask` returns the subset of boxes that are marked as "True" by the
+  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
+  the input index list, as well as all additional fields stored in the boxlist
+  (indexing into the first dimension).  However one can optionally only draw
+  from a subset of fields.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    indicator: a rank-1 boolean tensor
+    fields: (optional) list of fields to also gather from.  If None (default),
+      all fields are gathered from.  Pass an empty fields list to only gather
+      the box coordinates.
+    scope: name scope.
+
+  Returns:
+    subboxlist: a BoxList corresponding to the subset of the input BoxList
+      specified by indicator
+  Raises:
+    ValueError: if `indicator` is not a rank-1 boolean tensor.
+  """
+  with tf.name_scope(scope, 'BooleanMask'):
+    if indicator.shape.ndims != 1:
+      raise ValueError('indicator should have rank 1')
+    if indicator.dtype != tf.bool:
+      raise ValueError('indicator should be a boolean tensor')
+    subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
+    if fields is None:
+      fields = boxlist.get_extra_fields()
+    for field in fields:
+      if not boxlist.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
+      subboxlist.add_field(field, subfieldlist)
+    return subboxlist
+
+
+def gather(boxlist, indices, fields=None, scope=None):
+  """Gather boxes from BoxList according to indices and return new BoxList.
+
+  By default, `gather` returns boxes corresponding to the input index list, as
+  well as all additional fields stored in the boxlist (indexing into the
+  first dimension).  However one can optionally only gather from a
+  subset of fields.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    indices: a rank-1 tensor of type int32 / int64
+    fields: (optional) list of fields to also gather from.  If None (default),
+      all fields are gathered from.  Pass an empty fields list to only gather
+      the box coordinates.
+    scope: name scope.
+
+  Returns:
+    subboxlist: a BoxList corresponding to the subset of the input BoxList
+    specified by indices
+  Raises:
+    ValueError: if specified field is not contained in boxlist or if the
+      indices are not of type int32
+  """
+  with tf.name_scope(scope, 'Gather'):
+    if len(indices.shape.as_list()) != 1:
+      raise ValueError('indices should have rank 1')
+    if indices.dtype != tf.int32 and indices.dtype != tf.int64:
+      raise ValueError('indices should be an int32 / int64 tensor')
+    subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
+    if fields is None:
+      fields = boxlist.get_extra_fields()
+    for field in fields:
+      if not boxlist.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      subfieldlist = tf.gather(boxlist.get_field(field), indices)
+      subboxlist.add_field(field, subfieldlist)
+    return subboxlist
+
+
+def concatenate(boxlists, fields=None, scope=None):
+  """Concatenate list of BoxLists.
+
+  This op concatenates a list of input BoxLists into a larger BoxList.  It also
+  handles concatenation of BoxList fields as long as the field tensor shapes
+  are equal except for the first dimension.
+
+  Args:
+    boxlists: list of BoxList objects
+    fields: optional list of fields to also concatenate.  By default, all
+      fields from the first BoxList in the list are included in the
+      concatenation.
+    scope: name scope.
+
+  Returns:
+    a BoxList with number of boxes equal to
+      sum([boxlist.num_boxes() for boxlist in BoxList])
+  Raises:
+    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+      contains non BoxList objects), or if requested fields are not contained in
+      all boxlists
+  """
+  with tf.name_scope(scope, 'Concatenate'):
+    if not isinstance(boxlists, list):
+      raise ValueError('boxlists should be a list')
+    if not boxlists:
+      raise ValueError('boxlists should have nonzero length')
+    for boxlist in boxlists:
+      if not isinstance(boxlist, box_list.BoxList):
+        raise ValueError('all elements of boxlists should be BoxList objects')
+    concatenated = box_list.BoxList(
+        tf.concat([boxlist.get() for boxlist in boxlists], 0))
+    if fields is None:
+      fields = boxlists[0].get_extra_fields()
+    for field in fields:
+      first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
+      first_field_shape[0] = -1
+      if None in first_field_shape:
+        raise ValueError('field %s must have fully defined shape except for the'
+                         ' 0th dimension.' % field)
+      for boxlist in boxlists:
+        if not boxlist.has_field(field):
+          raise ValueError('boxlist must contain all requested fields')
+        field_shape = boxlist.get_field(field).get_shape().as_list()
+        field_shape[0] = -1
+        if field_shape != first_field_shape:
+          raise ValueError('field %s must have same shape for all boxlists '
+                           'except for the 0th dimension.' % field)
+      concatenated_field = tf.concat(
+          [boxlist.get_field(field) for boxlist in boxlists], 0)
+      concatenated.add_field(field, concatenated_field)
+    return concatenated
+
+
+def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
+  """Sort boxes and associated fields according to a scalar field.
+
+  A common use case is reordering the boxes according to descending scores.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    field: A BoxList field for sorting and reordering the BoxList.
+    order: (Optional) descend or ascend. Default is descend.
+    scope: name scope.
+
+  Returns:
+    sorted_boxlist: A sorted BoxList with the field in the specified order.
+
+  Raises:
+    ValueError: if specified field does not exist
+    ValueError: if the order is not either descend or ascend
+  """
+  with tf.name_scope(scope, 'SortByField'):
+    if order != SortOrder.descend and order != SortOrder.ascend:
+      raise ValueError('Invalid sort order')
+
+    field_to_sort = boxlist.get_field(field)
+    if len(field_to_sort.shape.as_list()) != 1:
+      raise ValueError('Field should have rank 1')
+
+    num_boxes = boxlist.num_boxes()
+    num_entries = tf.size(field_to_sort)
+    length_assert = tf.Assert(
+        tf.equal(num_boxes, num_entries),
+        ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
+
+    with tf.control_dependencies([length_assert]):
+      # TODO: Remove with tf.device when top_k operation runs correctly on GPU.
+      with tf.device('/cpu:0'):
+        _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
+
+    if order == SortOrder.ascend:
+      sorted_indices = tf.reverse_v2(sorted_indices, [0])
+
+    return gather(boxlist, sorted_indices)
+
+
+def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
+  """Overlay bounding box list on image.
+
+  Currently this visualization plots a 1 pixel thick red bounding box on top
+  of the image.  Note that tf.image.draw_bounding_boxes essentially is
+  1 indexed.
+
+  Args:
+    image: an image tensor with shape [height, width, 3]
+    boxlist: a BoxList
+    normalized: (boolean) specify whether corners are to be interpreted
+      as absolute coordinates in image space or normalized with respect to the
+      image size.
+    scope: name scope.
+
+  Returns:
+    image_and_boxes: an image tensor with shape [height, width, 3]
+  """
+  with tf.name_scope(scope, 'VisualizeBoxesInImage'):
+    if not normalized:
+      height, width, _ = tf.unstack(tf.shape(image))
+      boxlist = scale(boxlist,
+                      1.0 / tf.cast(height, tf.float32),
+                      1.0 / tf.cast(width, tf.float32))
+    corners = tf.expand_dims(boxlist.get(), 0)
+    image = tf.expand_dims(image, 0)
+    return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
+
+
+def filter_field_value_equals(boxlist, field, value, scope=None):
+  """Filter to keep only boxes with field entries equal to the given value.
+
+  Args:
+    boxlist: BoxList holding N boxes.
+    field: field name for filtering.
+    value: scalar value.
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes where M <= N
+
+  Raises:
+    ValueError: if boxlist not a BoxList object or if it does not have
+      the specified field.
+  """
+  with tf.name_scope(scope, 'FilterFieldValueEquals'):
+    if not isinstance(boxlist, box_list.BoxList):
+      raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field(field):
+      raise ValueError('boxlist must contain the specified field')
+    filter_field = boxlist.get_field(field)
+    gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
+    return gather(boxlist, gather_index)
+
+
+def filter_greater_than(boxlist, thresh, scope=None):
+  """Filter to keep only boxes with score exceeding a given threshold.
+
+  This op keeps the collection of boxes whose corresponding scores are
+  greater than the input threshold.
+
+  TODO: Change function name to FilterScoresGreaterThan
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.
+    thresh: scalar threshold
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes where M <= N
+
+  Raises:
+    ValueError: if boxlist not a BoxList object or if it does not
+      have a scores field
+  """
+  with tf.name_scope(scope, 'FilterGreaterThan'):
+    if not isinstance(boxlist, box_list.BoxList):
+      raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+      raise ValueError('input boxlist must have \'scores\' field')
+    scores = boxlist.get_field('scores')
+    if len(scores.shape.as_list()) > 2:
+      raise ValueError('Scores should have rank 1 or 2')
+    if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
+      raise ValueError('Scores should have rank 1 or have shape '
+                       'consistent with [None, 1]')
+    high_score_indices = tf.cast(tf.reshape(
+        tf.where(tf.greater(scores, thresh)),
+        [-1]), tf.int32)
+    return gather(boxlist, high_score_indices)
+
+
+def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
+  """Non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  Note that this only works for a single class ---
+  to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
+
+  Args:
+    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+      representing detection scores.
+    thresh: scalar threshold
+    max_output_size: maximum number of retained boxes
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes where M <= max_output_size
+  Raises:
+    ValueError: if thresh is not in [0, 1]
+  """
+  with tf.name_scope(scope, 'NonMaxSuppression'):
+    if not 0 <= thresh <= 1.0:
+      raise ValueError('thresh must be between 0 and 1')
+    if not isinstance(boxlist, box_list.BoxList):
+      raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+      raise ValueError('input boxlist must have \'scores\' field')
+    selected_indices = tf.image.non_max_suppression(
+        boxlist.get(), boxlist.get_field('scores'),
+        max_output_size, iou_threshold=thresh)
+    return gather(boxlist, selected_indices)
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+
+
+def to_normalized_coordinates(boxlist, height, width,
+                              check_range=True, scope=None):
+  """Converts absolute box coordinates to normalized coordinates in [0, 1].
+
+  Usually one uses the dynamic shape of the image or conv-layer tensor:
+    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+                                                     tf.shape(images)[1],
+                                                     tf.shape(images)[2]),
+
+  This function raises an assertion failed error at graph execution time when
+  the maximum coordinate is smaller than 1.01 (which means that coordinates are
+  already normalized). The value 1.01 is to deal with small rounding errors.
+
+  Args:
+    boxlist: BoxList with coordinates in terms of pixel-locations.
+    height: Maximum value for height of absolute box coordinates.
+    width: Maximum value for width of absolute box coordinates.
+    check_range: If True, checks if the coordinates are normalized or not.
+    scope: name scope.
+
+  Returns:
+    boxlist with normalized coordinates in [0, 1].
+  """
+  with tf.name_scope(scope, 'ToNormalizedCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    if check_range:
+      max_val = tf.reduce_max(boxlist.get())
+      max_assert = tf.Assert(tf.greater(max_val, 1.01),
+                             ['max value is lower than 1.01: ', max_val])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(boxlist, 1 / height, 1 / width)
+
+
+def to_absolute_coordinates(boxlist, height, width,
+                            check_range=True, scope=None):
+  """Converts normalized box coordinates to absolute pixel coordinates.
+
+  This function raises an assertion failed error when the maximum box coordinate
+  value is larger than 1.01 (in which case coordinates are already absolute).
+
+  Args:
+    boxlist: BoxList with coordinates in range [0, 1].
+    height: Maximum value for height of absolute box coordinates.
+    width: Maximum value for width of absolute box coordinates.
+    check_range: If True, checks if the coordinates are normalized or not.
+    scope: name scope.
+
+  Returns:
+    boxlist with absolute coordinates in terms of the image size.
+
+  """
+  with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    # Ensure range of input boxes is correct.
+    if check_range:
+      box_maximum = tf.reduce_max(boxlist.get())
+      max_assert = tf.Assert(tf.greater_equal(1.01, box_maximum),
+                             ['maximum box coordinate value is larger '
+                              'than 1.01: ', box_maximum])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(boxlist, height, width)
+
+
+def refine_boxes_multi_class(pool_boxes,
+                             num_classes,
+                             nms_iou_thresh,
+                             nms_max_detections,
+                             voting_iou_thresh=0.5):
+  """Refines a pool of boxes using non max suppression and box voting.
+
+  Box refinement is done independently for each class.
+
+  Args:
+    pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
+      have a rank 1 'scores' field and a rank 1 'classes' field.
+    num_classes: (int scalar) Number of classes.
+    nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
+    nms_max_detections: (int scalar) maximum output size for NMS.
+    voting_iou_thresh: (float scalar) iou threshold for box voting.
+
+  Returns:
+    BoxList of refined boxes.
+
+  Raises:
+    ValueError: if
+      a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
+      b) pool_boxes is not a BoxList.
+      c) pool_boxes does not have a scores and classes field.
+  """
+  if not 0.0 <= nms_iou_thresh <= 1.0:
+    raise ValueError('nms_iou_thresh must be between 0 and 1')
+  if not 0.0 <= voting_iou_thresh <= 1.0:
+    raise ValueError('voting_iou_thresh must be between 0 and 1')
+  if not isinstance(pool_boxes, box_list.BoxList):
+    raise ValueError('pool_boxes must be a BoxList')
+  if not pool_boxes.has_field('scores'):
+    raise ValueError('pool_boxes must have a \'scores\' field')
+  if not pool_boxes.has_field('classes'):
+    raise ValueError('pool_boxes must have a \'classes\' field')
+
+  refined_boxes = []
+  for i in range(num_classes):
+    boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
+    refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
+                                       nms_max_detections, voting_iou_thresh)
+    refined_boxes.append(refined_boxes_class)
+  return sort_by_field(concatenate(refined_boxes), 'scores')
+
+
+def refine_boxes(pool_boxes,
+                 nms_iou_thresh,
+                 nms_max_detections,
+                 voting_iou_thresh=0.5):
+  """Refines a pool of boxes using non max suppression and box voting.
+
+  Args:
+    pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
+      have a rank 1 'scores' field.
+    nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
+    nms_max_detections: (int scalar) maximum output size for NMS.
+    voting_iou_thresh: (float scalar) iou threshold for box voting.
+
+  Returns:
+    BoxList of refined boxes.
+
+  Raises:
+    ValueError: if
+      a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
+      b) pool_boxes is not a BoxList.
+      c) pool_boxes does not have a scores field.
+  """
+  if not 0.0 <= nms_iou_thresh <= 1.0:
+    raise ValueError('nms_iou_thresh must be between 0 and 1')
+  if not 0.0 <= voting_iou_thresh <= 1.0:
+    raise ValueError('voting_iou_thresh must be between 0 and 1')
+  if not isinstance(pool_boxes, box_list.BoxList):
+    raise ValueError('pool_boxes must be a BoxList')
+  if not pool_boxes.has_field('scores'):
+    raise ValueError('pool_boxes must have a \'scores\' field')
+
+  nms_boxes = non_max_suppression(
+      pool_boxes, nms_iou_thresh, nms_max_detections)
+  return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
+
+
+def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
+  """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
+
+  Performs box voting as described in 'Object detection via a multi-region &
+  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
+  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
+  with iou overlap >= iou_thresh. The location of B is set to the weighted
+  average location of boxes in S (scores are used for weighting). And the score
+  of B is set to the average score of boxes in S.
+
+  Args:
+    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
+      boxes are usually selected from pool_boxes using non max suppression.
+    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
+    iou_thresh: (float scalar) iou threshold for matching boxes in
+      selected_boxes and pool_boxes.
+
+  Returns:
+    BoxList containing averaged locations and scores for each box in
+    selected_boxes.
+
+  Raises:
+    ValueError: if
+      a) selected_boxes or pool_boxes is not a BoxList.
+      b) if iou_thresh is not in [0, 1].
+      c) pool_boxes does not have a scores field.
+  """
+  if not 0.0 <= iou_thresh <= 1.0:
+    raise ValueError('iou_thresh must be between 0 and 1')
+  if not isinstance(selected_boxes, box_list.BoxList):
+    raise ValueError('selected_boxes must be a BoxList')
+  if not isinstance(pool_boxes, box_list.BoxList):
+    raise ValueError('pool_boxes must be a BoxList')
+  if not pool_boxes.has_field('scores'):
+    raise ValueError('pool_boxes must have a \'scores\' field')
+
+  iou_ = iou(selected_boxes, pool_boxes)
+  match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
+  num_matches = tf.reduce_sum(match_indicator, 1)
+  # TODO: Handle the case where some boxes in selected_boxes do not match to any
+  # boxes in pool_boxes. For such boxes without any matches, we should return
+  # the original boxes without voting.
+  match_assert = tf.Assert(
+      tf.reduce_all(tf.greater(num_matches, 0)),
+      ['Each box in selected_boxes must match with at least one box '
+       'in pool_boxes.'])
+
+  scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
+  scores_assert = tf.Assert(
+      tf.reduce_all(tf.greater_equal(scores, 0)),
+      ['Scores must be non negative.'])
+
+  with tf.control_dependencies([scores_assert, match_assert]):
+    sum_scores = tf.matmul(match_indicator, scores)
+  averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
+
+  box_locations = tf.matmul(match_indicator,
+                            pool_boxes.get() * scores) / sum_scores
+  averaged_boxes = box_list.BoxList(box_locations)
+  _copy_extra_fields(averaged_boxes, selected_boxes)
+  averaged_boxes.add_field('scores', averaged_scores)
+  return averaged_boxes
+
+
+def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
+  """Pads or clips all fields of a BoxList.
+
+  Args:
+    boxlist: A BoxList with arbitrary of number of boxes.
+    num_boxes: First num_boxes in boxlist are kept.
+      The fields are zero-padded if num_boxes is bigger than the
+      actual number of boxes.
+    scope: name scope.
+
+  Returns:
+    BoxList with all fields padded or clipped.
+  """
+  with tf.name_scope(scope, 'PadOrClipBoxList'):
+    subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor(
+        boxlist.get(), num_boxes))
+    for field in boxlist.get_extra_fields():
+      subfield = shape_utils.pad_or_clip_tensor(
+          boxlist.get_field(field), num_boxes)
+      subboxlist.add_field(field, subfield)
+    return subboxlist
--- a/object_detection/core/box_list_ops_test.py
+++ b/object_detection/core/box_list_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.box_list_ops."""
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.framework import errors
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+
+
+class BoxListOpsTest(tf.test.TestCase):
+  """Tests for common bounding box operations."""
+
+  def test_area(self):
+    corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
+    exp_output = [200.0, 4.0]
+    boxes = box_list.BoxList(corners)
+    areas = box_list_ops.area(boxes)
+    with self.test_session() as sess:
+      areas_output = sess.run(areas)
+      self.assertAllClose(areas_output, exp_output)
+
+  def test_height_width(self):
+    corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
+    exp_output_heights = [10., 2.]
+    exp_output_widths = [20., 2.]
+    boxes = box_list.BoxList(corners)
+    heights, widths = box_list_ops.height_width(boxes)
+    with self.test_session() as sess:
+      output_heights, output_widths = sess.run([heights, widths])
+      self.assertAllClose(output_heights, exp_output_heights)
+      self.assertAllClose(output_widths, exp_output_widths)
+
+  def test_scale(self):
+    corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]],
+                          dtype=tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('extra_data', tf.constant([[1], [2]]))
+
+    y_scale = tf.constant(1.0/100)
+    x_scale = tf.constant(1.0/200)
+    scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale)
+    exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]]
+    with self.test_session() as sess:
+      scaled_corners_out = sess.run(scaled_boxes.get())
+      self.assertAllClose(scaled_corners_out, exp_output)
+      extra_data_out = sess.run(scaled_boxes.get_field('extra_data'))
+      self.assertAllEqual(extra_data_out, [[1], [2]])
+
+  def test_clip_to_window_filter_boxes_which_fall_outside_the_window(
+      self):
+    window = tf.constant([0, 0, 9, 14], tf.float32)
+    corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+                           [-1.0, -2.0, 4.0, 5.0],
+                           [2.0, 3.0, 5.0, 9.0],
+                           [0.0, 0.0, 9.0, 14.0],
+                           [-100.0, -100.0, 300.0, 600.0],
+                           [-10.0, -10.0, -9.0, -9.0]])
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+    exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
+                  [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
+                  [0.0, 0.0, 9.0, 14.0]]
+    pruned = box_list_ops.clip_to_window(
+        boxes, window, filter_nonoverlapping=True)
+    with self.test_session() as sess:
+      pruned_output = sess.run(pruned.get())
+      self.assertAllClose(pruned_output, exp_output)
+      extra_data_out = sess.run(pruned.get_field('extra_data'))
+      self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]])
+
+  def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window(
+      self):
+    window = tf.constant([0, 0, 9, 14], tf.float32)
+    corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+                           [-1.0, -2.0, 4.0, 5.0],
+                           [2.0, 3.0, 5.0, 9.0],
+                           [0.0, 0.0, 9.0, 14.0],
+                           [-100.0, -100.0, 300.0, 600.0],
+                           [-10.0, -10.0, -9.0, -9.0]])
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+    exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
+                  [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
+                  [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]]
+    pruned = box_list_ops.clip_to_window(
+        boxes, window, filter_nonoverlapping=False)
+    with self.test_session() as sess:
+      pruned_output = sess.run(pruned.get())
+      self.assertAllClose(pruned_output, exp_output)
+      extra_data_out = sess.run(pruned.get_field('extra_data'))
+      self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]])
+
+  def test_prune_outside_window_filters_boxes_which_fall_outside_the_window(
+      self):
+    window = tf.constant([0, 0, 9, 14], tf.float32)
+    corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+                           [-1.0, -2.0, 4.0, 5.0],
+                           [2.0, 3.0, 5.0, 9.0],
+                           [0.0, 0.0, 9.0, 14.0],
+                           [-10.0, -10.0, -9.0, -9.0],
+                           [-100.0, -100.0, 300.0, 600.0]])
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+    exp_output = [[5.0, 5.0, 6.0, 6.0],
+                  [2.0, 3.0, 5.0, 9.0],
+                  [0.0, 0.0, 9.0, 14.0]]
+    pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window)
+    with self.test_session() as sess:
+      pruned_output = sess.run(pruned.get())
+      self.assertAllClose(pruned_output, exp_output)
+      keep_indices_out = sess.run(keep_indices)
+      self.assertAllEqual(keep_indices_out, [0, 2, 3])
+      extra_data_out = sess.run(pruned.get_field('extra_data'))
+      self.assertAllEqual(extra_data_out, [[1], [3], [4]])
+
+  def test_prune_completely_outside_window(self):
+    window = tf.constant([0, 0, 9, 14], tf.float32)
+    corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+                           [-1.0, -2.0, 4.0, 5.0],
+                           [2.0, 3.0, 5.0, 9.0],
+                           [0.0, 0.0, 9.0, 14.0],
+                           [-10.0, -10.0, -9.0, -9.0],
+                           [-100.0, -100.0, 300.0, 600.0]])
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+    exp_output = [[5.0, 5.0, 6.0, 6.0],
+                  [-1.0, -2.0, 4.0, 5.0],
+                  [2.0, 3.0, 5.0, 9.0],
+                  [0.0, 0.0, 9.0, 14.0],
+                  [-100.0, -100.0, 300.0, 600.0]]
+    pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
+                                                                        window)
+    with self.test_session() as sess:
+      pruned_output = sess.run(pruned.get())
+      self.assertAllClose(pruned_output, exp_output)
+      keep_indices_out = sess.run(keep_indices)
+      self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5])
+      extra_data_out = sess.run(pruned.get_field('extra_data'))
+      self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]])
+
+  def test_intersection(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    intersect = box_list_ops.intersection(boxes1, boxes2)
+    with self.test_session() as sess:
+      intersect_output = sess.run(intersect)
+      self.assertAllClose(intersect_output, exp_output)
+
+  def test_matched_intersection(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
+    exp_output = [2.0, 0.0]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    intersect = box_list_ops.matched_intersection(boxes1, boxes2)
+    with self.test_session() as sess:
+      intersect_output = sess.run(intersect)
+      self.assertAllClose(intersect_output, exp_output)
+
+  def test_iou(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    iou = box_list_ops.iou(boxes1, boxes2)
+    with self.test_session() as sess:
+      iou_output = sess.run(iou)
+      self.assertAllClose(iou_output, exp_output)
+
+  def test_matched_iou(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
+    exp_output = [2.0 / 16.0, 0]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    iou = box_list_ops.matched_iou(boxes1, boxes2)
+    with self.test_session() as sess:
+      iou_output = sess.run(iou)
+      self.assertAllClose(iou_output, exp_output)
+
+  def test_iouworks_on_empty_inputs(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
+    iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty)
+    iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2)
+    iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty)
+    with self.test_session() as sess:
+      iou_output_1, iou_output_2, iou_output_3 = sess.run(
+          [iou_empty_1, iou_empty_2, iou_empty_3])
+      self.assertAllEqual(iou_output_1.shape, (2, 0))
+      self.assertAllEqual(iou_output_2.shape, (0, 3))
+      self.assertAllEqual(iou_output_3.shape, (0, 0))
+
+  def test_ioa(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
+                    [1.0 / 12.0, 0.0, 5.0 / 400.0]]
+    exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
+                    [0, 0],
+                    [6.0 / 6.0, 5.0 / 5.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    ioa_1 = box_list_ops.ioa(boxes1, boxes2)
+    ioa_2 = box_list_ops.ioa(boxes2, boxes1)
+    with self.test_session() as sess:
+      ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2])
+      self.assertAllClose(ioa_output_1, exp_output_1)
+      self.assertAllClose(ioa_output_2, exp_output_2)
+
+  def test_prune_non_overlapping_boxes(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    minoverlap = 0.5
+
+    exp_output_1 = boxes1
+    exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4]))
+    output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes(
+        boxes1, boxes2, min_overlap=minoverlap)
+    output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes(
+        boxes2, boxes1, min_overlap=minoverlap)
+    with self.test_session() as sess:
+      (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_,
+       exp_output_2_) = sess.run(
+           [output_1.get(), keep_indices_1,
+            output_2.get(), keep_indices_2,
+            exp_output_1.get(), exp_output_2.get()])
+      self.assertAllClose(output_1_, exp_output_1_)
+      self.assertAllClose(output_2_, exp_output_2_)
+      self.assertAllEqual(keep_indices_1_, [0, 1])
+      self.assertAllEqual(keep_indices_2_, [])
+
+  def test_prune_small_boxes(self):
+    boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
+                         [5.0, 6.0, 10.0, 7.0],
+                         [3.0, 4.0, 6.0, 8.0],
+                         [14.0, 14.0, 15.0, 15.0],
+                         [0.0, 0.0, 20.0, 20.0]])
+    exp_boxes = [[3.0, 4.0, 6.0, 8.0],
+                 [0.0, 0.0, 20.0, 20.0]]
+    boxes = box_list.BoxList(boxes)
+    pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
+    with self.test_session() as sess:
+      pruned_boxes = sess.run(pruned_boxes.get())
+      self.assertAllEqual(pruned_boxes, exp_boxes)
+
+  def test_prune_small_boxes_prunes_boxes_with_negative_side(self):
+    boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
+                         [5.0, 6.0, 10.0, 7.0],
+                         [3.0, 4.0, 6.0, 8.0],
+                         [14.0, 14.0, 15.0, 15.0],
+                         [0.0, 0.0, 20.0, 20.0],
+                         [2.0, 3.0, 1.5, 7.0],  # negative height
+                         [2.0, 3.0, 5.0, 1.7]])  # negative width
+    exp_boxes = [[3.0, 4.0, 6.0, 8.0],
+                 [0.0, 0.0, 20.0, 20.0]]
+    boxes = box_list.BoxList(boxes)
+    pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
+    with self.test_session() as sess:
+      pruned_boxes = sess.run(pruned_boxes.get())
+      self.assertAllEqual(pruned_boxes, exp_boxes)
+
+  def test_change_coordinate_frame(self):
+    corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+    boxes = box_list.BoxList(corners)
+
+    expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]])
+    expected_boxes = box_list.BoxList(expected_corners)
+    output = box_list_ops.change_coordinate_frame(boxes, window)
+
+    with self.test_session() as sess:
+      output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()])
+      self.assertAllClose(output_, expected_boxes_)
+
+  def test_ioaworks_on_empty_inputs(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
+    ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty)
+    ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2)
+    ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty)
+    with self.test_session() as sess:
+      ioa_output_1, ioa_output_2, ioa_output_3 = sess.run(
+          [ioa_empty_1, ioa_empty_2, ioa_empty_3])
+      self.assertAllEqual(ioa_output_1.shape, (2, 0))
+      self.assertAllEqual(ioa_output_2.shape, (0, 3))
+      self.assertAllEqual(ioa_output_3.shape, (0, 0))
+
+  def test_pairwise_distances(self):
+    corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
+                            [1.0, 1.0, 0.0, 2.0]])
+    corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
+                            [-4.0, 0.0, 0.0, 3.0],
+                            [0.0, 0.0, 0.0, 0.0]])
+    exp_output = [[26, 25, 0], [18, 27, 6]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    dist_matrix = box_list_ops.sq_dist(boxes1, boxes2)
+    with self.test_session() as sess:
+      dist_output = sess.run(dist_matrix)
+      self.assertAllClose(dist_output, exp_output)
+
+  def test_boolean_mask(self):
+    corners = tf.constant(
+        [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+    indicator = tf.constant([True, False, True, False, True], tf.bool)
+    expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+    boxes = box_list.BoxList(corners)
+    subset = box_list_ops.boolean_mask(boxes, indicator)
+    with self.test_session() as sess:
+      subset_output = sess.run(subset.get())
+      self.assertAllClose(subset_output, expected_subset)
+
+  def test_boolean_mask_with_field(self):
+    corners = tf.constant(
+        [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+    indicator = tf.constant([True, False, True, False, True], tf.bool)
+    weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
+    expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+    expected_weights = [[.1], [.5], [.9]]
+
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('weights', weights)
+    subset = box_list_ops.boolean_mask(boxes, indicator, ['weights'])
+    with self.test_session() as sess:
+      subset_output, weights_output = sess.run(
+          [subset.get(), subset.get_field('weights')])
+      self.assertAllClose(subset_output, expected_subset)
+      self.assertAllClose(weights_output, expected_weights)
+
+  def test_gather(self):
+    corners = tf.constant(
+        [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+    indices = tf.constant([0, 2, 4], tf.int32)
+    expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+    boxes = box_list.BoxList(corners)
+    subset = box_list_ops.gather(boxes, indices)
+    with self.test_session() as sess:
+      subset_output = sess.run(subset.get())
+      self.assertAllClose(subset_output, expected_subset)
+
+  def test_gather_with_field(self):
+    corners = tf.constant([4*[0.0], 4*[1.0], 4*[2.0], 4*[3.0], 4*[4.0]])
+    indices = tf.constant([0, 2, 4], tf.int32)
+    weights = tf.constant([[.1], [.3], [.5], [.7], [.9]], tf.float32)
+    expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+    expected_weights = [[.1], [.5], [.9]]
+
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('weights', weights)
+    subset = box_list_ops.gather(boxes, indices, ['weights'])
+    with self.test_session() as sess:
+      subset_output, weights_output = sess.run(
+          [subset.get(), subset.get_field('weights')])
+      self.assertAllClose(subset_output, expected_subset)
+      self.assertAllClose(weights_output, expected_weights)
+
+  def test_gather_with_invalid_field(self):
+    corners = tf.constant([4 * [0.0], 4 * [1.0]])
+    indices = tf.constant([0, 1], tf.int32)
+    weights = tf.constant([[.1], [.3]], tf.float32)
+
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('weights', weights)
+    with self.assertRaises(ValueError):
+      box_list_ops.gather(boxes, indices, ['foo', 'bar'])
+
+  def test_gather_with_invalid_inputs(self):
+    corners = tf.constant(
+        [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+    indices_float32 = tf.constant([0, 2, 4], tf.float32)
+    boxes = box_list.BoxList(corners)
+    with self.assertRaises(ValueError):
+      _ = box_list_ops.gather(boxes, indices_float32)
+    indices_2d = tf.constant([[0, 2, 4]], tf.int32)
+    boxes = box_list.BoxList(corners)
+    with self.assertRaises(ValueError):
+      _ = box_list_ops.gather(boxes, indices_2d)
+
+  def test_gather_with_dynamic_indexing(self):
+    corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]
+                          ])
+    weights = tf.constant([.5, .3, .7, .1, .9], tf.float32)
+    indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1])
+    expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+    expected_weights = [.5, .7, .9]
+
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('weights', weights)
+    subset = box_list_ops.gather(boxes, indices, ['weights'])
+    with self.test_session() as sess:
+      subset_output, weights_output = sess.run([subset.get(), subset.get_field(
+          'weights')])
+      self.assertAllClose(subset_output, expected_subset)
+      self.assertAllClose(weights_output, expected_weights)
+
+  def test_sort_by_field_ascending_order(self):
+    exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                   [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    exp_scores = [.95, .9, .75, .6, .5, .3]
+    exp_weights = [.2, .45, .6, .75, .8, .92]
+    shuffle = [2, 4, 0, 5, 1, 3]
+    corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant(
+        [exp_scores[i] for i in shuffle], tf.float32))
+    boxes.add_field('weights', tf.constant(
+        [exp_weights[i] for i in shuffle], tf.float32))
+    sort_by_weight = box_list_ops.sort_by_field(
+        boxes,
+        'weights',
+        order=box_list_ops.SortOrder.ascend)
+    with self.test_session() as sess:
+      corners_out, scores_out, weights_out = sess.run([
+          sort_by_weight.get(),
+          sort_by_weight.get_field('scores'),
+          sort_by_weight.get_field('weights')])
+      self.assertAllClose(corners_out, exp_corners)
+      self.assertAllClose(scores_out, exp_scores)
+      self.assertAllClose(weights_out, exp_weights)
+
+  def test_sort_by_field_descending_order(self):
+    exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+                   [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+    exp_scores = [.95, .9, .75, .6, .5, .3]
+    exp_weights = [.2, .45, .6, .75, .8, .92]
+    shuffle = [2, 4, 0, 5, 1, 3]
+
+    corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant(
+        [exp_scores[i] for i in shuffle], tf.float32))
+    boxes.add_field('weights', tf.constant(
+        [exp_weights[i] for i in shuffle], tf.float32))
+
+    sort_by_score = box_list_ops.sort_by_field(boxes, 'scores')
+    with self.test_session() as sess:
+      corners_out, scores_out, weights_out = sess.run([sort_by_score.get(
+      ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')])
+      self.assertAllClose(corners_out, exp_corners)
+      self.assertAllClose(scores_out, exp_scores)
+      self.assertAllClose(weights_out, exp_weights)
+
+  def test_sort_by_field_invalid_inputs(self):
+    corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 *
+                           [3.0], 4 * [4.0]])
+    misc = tf.constant([[.95, .9], [.5, .3]], tf.float32)
+    weights = tf.constant([.1, .2], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('misc', misc)
+    boxes.add_field('weights', weights)
+
+    with self.test_session() as sess:
+      with self.assertRaises(ValueError):
+        box_list_ops.sort_by_field(boxes, 'area')
+
+      with self.assertRaises(ValueError):
+        box_list_ops.sort_by_field(boxes, 'misc')
+
+      with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
+                                               'Incorrect field size'):
+        sess.run(box_list_ops.sort_by_field(boxes, 'weights').get())
+
+  def test_visualize_boxes_in_image(self):
+    image = tf.zeros((6, 4, 3))
+    corners = tf.constant([[0, 0, 5, 3],
+                           [0, 0, 3, 2]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes)
+    image_and_boxes_bw = tf.to_float(
+        tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0))
+    exp_result = [[1, 1, 1, 0],
+                  [1, 1, 1, 0],
+                  [1, 1, 1, 0],
+                  [1, 0, 1, 0],
+                  [1, 1, 1, 0],
+                  [0, 0, 0, 0]]
+    with self.test_session() as sess:
+      output = sess.run(image_and_boxes_bw)
+      self.assertAllEqual(output.astype(int), exp_result)
+
+  def test_filter_field_value_equals(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1],
+                           [0, -0.1, 1, 0.9],
+                           [0, 10, 1, 11],
+                           [0, 10.1, 1, 11.1],
+                           [0, 100, 1, 101]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1]))
+    exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
+    exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]]
+
+    filtered_boxes1 = box_list_ops.filter_field_value_equals(
+        boxes, 'classes', 1)
+    filtered_boxes2 = box_list_ops.filter_field_value_equals(
+        boxes, 'classes', 2)
+    with self.test_session() as sess:
+      filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(),
+                                                     filtered_boxes2.get()])
+      self.assertAllClose(filtered_output1, exp_output1)
+      self.assertAllClose(filtered_output2, exp_output2)
+
+  def test_filter_greater_than(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1],
+                           [0, -0.1, 1, 0.9],
+                           [0, 10, 1, 11],
+                           [0, 10.1, 1, 11.1],
+                           [0, 100, 1, 101]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8]))
+    thresh = .6
+    exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
+
+    filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh)
+    with self.test_session() as sess:
+      filtered_output = sess.run(filtered_boxes.get())
+      self.assertAllClose(filtered_output, exp_output)
+
+  def test_clip_box_list(self):
+    boxlist = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+                     [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
+    boxlist.add_field('classes', tf.constant([0, 0, 1, 1]))
+    boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2]))
+    num_boxes = 2
+    clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
+
+    expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
+    expected_classes = [0, 0]
+    expected_scores = [0.75, 0.65]
+    with self.test_session() as sess:
+      boxes_out, classes_out, scores_out = sess.run(
+          [clipped_boxlist.get(), clipped_boxlist.get_field('classes'),
+           clipped_boxlist.get_field('scores')])
+
+      self.assertAllClose(expected_boxes, boxes_out)
+      self.assertAllEqual(expected_classes, classes_out)
+      self.assertAllClose(expected_scores, scores_out)
+
+  def test_pad_box_list(self):
+    boxlist = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+    boxlist.add_field('classes', tf.constant([0, 1]))
+    boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+    num_boxes = 4
+    padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
+
+    expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+                      [0, 0, 0, 0], [0, 0, 0, 0]]
+    expected_classes = [0, 1, 0, 0]
+    expected_scores = [0.75, 0.2, 0, 0]
+    with self.test_session() as sess:
+      boxes_out, classes_out, scores_out = sess.run(
+          [padded_boxlist.get(), padded_boxlist.get_field('classes'),
+           padded_boxlist.get_field('scores')])
+
+      self.assertAllClose(expected_boxes, boxes_out)
+      self.assertAllEqual(expected_classes, classes_out)
+      self.assertAllClose(expected_scores, scores_out)
+
+
+class ConcatenateTest(tf.test.TestCase):
+
+  def test_invalid_input_box_list_list(self):
+    with self.assertRaises(ValueError):
+      box_list_ops.concatenate(None)
+    with self.assertRaises(ValueError):
+      box_list_ops.concatenate([])
+    with self.assertRaises(ValueError):
+      corners = tf.constant([[0, 0, 0, 0]], tf.float32)
+      boxlist = box_list.BoxList(corners)
+      box_list_ops.concatenate([boxlist, 2])
+
+  def test_concatenate_with_missing_fields(self):
+    corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
+    scores1 = tf.constant([1.0, 2.1])
+    corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
+    boxlist1 = box_list.BoxList(corners1)
+    boxlist1.add_field('scores', scores1)
+    boxlist2 = box_list.BoxList(corners2)
+    with self.assertRaises(ValueError):
+      box_list_ops.concatenate([boxlist1, boxlist2])
+
+  def test_concatenate_with_incompatible_field_shapes(self):
+    corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
+    scores1 = tf.constant([1.0, 2.1])
+    corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
+    scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]])
+    boxlist1 = box_list.BoxList(corners1)
+    boxlist1.add_field('scores', scores1)
+    boxlist2 = box_list.BoxList(corners2)
+    boxlist2.add_field('scores', scores2)
+    with self.assertRaises(ValueError):
+      box_list_ops.concatenate([boxlist1, boxlist2])
+
+  def test_concatenate_is_correct(self):
+    corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
+    scores1 = tf.constant([1.0, 2.1])
+    corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
+                           tf.float32)
+    scores2 = tf.constant([1.0, 2.1, 5.6])
+
+    exp_corners = [[0, 0, 0, 0],
+                   [1, 2, 3, 4],
+                   [0, 3, 1, 6],
+                   [2, 4, 3, 8],
+                   [1, 0, 5, 10]]
+    exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]
+
+    boxlist1 = box_list.BoxList(corners1)
+    boxlist1.add_field('scores', scores1)
+    boxlist2 = box_list.BoxList(corners2)
+    boxlist2.add_field('scores', scores2)
+    result = box_list_ops.concatenate([boxlist1, boxlist2])
+    with self.test_session() as sess:
+      corners_output, scores_output = sess.run(
+          [result.get(), result.get_field('scores')])
+      self.assertAllClose(corners_output, exp_corners)
+      self.assertAllClose(scores_output, exp_scores)
+
+
+class NonMaxSuppressionTest(tf.test.TestCase):
+
+  def test_with_invalid_scores_field(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1],
+                           [0, -0.1, 1, 0.9],
+                           [0, 10, 1, 11],
+                           [0, 10.1, 1, 11.1],
+                           [0, 100, 1, 101]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
+    iou_thresh = .5
+    max_output_size = 3
+    nms = box_list_ops.non_max_suppression(
+        boxes, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      with self.assertRaisesWithPredicateMatch(
+          errors.InvalidArgumentError, 'scores has incompatible shape'):
+        sess.run(nms.get())
+
+  def test_select_from_three_clusters(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1],
+                           [0, -0.1, 1, 0.9],
+                           [0, 10, 1, 11],
+                           [0, 10.1, 1, 11.1],
+                           [0, 100, 1, 101]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
+    iou_thresh = .5
+    max_output_size = 3
+
+    exp_nms = [[0, 10, 1, 11],
+               [0, 0, 1, 1],
+               [0, 100, 1, 101]]
+    nms = box_list_ops.non_max_suppression(
+        boxes, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_select_at_most_two_boxes_from_three_clusters(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1],
+                           [0, -0.1, 1, 0.9],
+                           [0, 10, 1, 11],
+                           [0, 10.1, 1, 11.1],
+                           [0, 100, 1, 101]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
+    iou_thresh = .5
+    max_output_size = 2
+
+    exp_nms = [[0, 10, 1, 11],
+               [0, 0, 1, 1]]
+    nms = box_list_ops.non_max_suppression(
+        boxes, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_select_at_most_thirty_boxes_from_three_clusters(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1],
+                           [0, -0.1, 1, 0.9],
+                           [0, 10, 1, 11],
+                           [0, 10.1, 1, 11.1],
+                           [0, 100, 1, 101]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
+    iou_thresh = .5
+    max_output_size = 30
+
+    exp_nms = [[0, 10, 1, 11],
+               [0, 0, 1, 1],
+               [0, 100, 1, 101]]
+    nms = box_list_ops.non_max_suppression(
+        boxes, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_select_single_box(self):
+    corners = tf.constant([[0, 0, 1, 1]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant([.9]))
+    iou_thresh = .5
+    max_output_size = 3
+
+    exp_nms = [[0, 0, 1, 1]]
+    nms = box_list_ops.non_max_suppression(
+        boxes, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_select_from_ten_identical_boxes(self):
+    corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    boxes.add_field('scores', tf.constant(10 * [.9]))
+    iou_thresh = .5
+    max_output_size = 3
+
+    exp_nms = [[0, 0, 1, 1]]
+    nms = box_list_ops.non_max_suppression(
+        boxes, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_copy_extra_fields(self):
+    corners = tf.constant([[0, 0, 1, 1],
+                           [0, 0.1, 1, 1.1]], tf.float32)
+    boxes = box_list.BoxList(corners)
+    tensor1 = np.array([[1], [4]])
+    tensor2 = np.array([[1, 1], [2, 2]])
+    boxes.add_field('tensor1', tf.constant(tensor1))
+    boxes.add_field('tensor2', tf.constant(tensor2))
+    new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10],
+                                              [1, 3, 5, 5]], tf.float32))
+    new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes)
+    with self.test_session() as sess:
+      self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1')))
+      self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2')))
+
+
+class CoordinatesConversionTest(tf.test.TestCase):
+
+  def test_to_normalized_coordinates(self):
+    coordinates = tf.constant([[0, 0, 100, 100],
+                               [25, 25, 75, 75]], tf.float32)
+    img = tf.ones((128, 100, 100, 3))
+    boxlist = box_list.BoxList(coordinates)
+    normalized_boxlist = box_list_ops.to_normalized_coordinates(
+        boxlist, tf.shape(img)[1], tf.shape(img)[2])
+    expected_boxes = [[0, 0, 1, 1],
+                      [0.25, 0.25, 0.75, 0.75]]
+
+    with self.test_session() as sess:
+      normalized_boxes = sess.run(normalized_boxlist.get())
+      self.assertAllClose(normalized_boxes, expected_boxes)
+
+  def test_to_normalized_coordinates_already_normalized(self):
+    coordinates = tf.constant([[0, 0, 1, 1],
+                               [0.25, 0.25, 0.75, 0.75]], tf.float32)
+    img = tf.ones((128, 100, 100, 3))
+    boxlist = box_list.BoxList(coordinates)
+    normalized_boxlist = box_list_ops.to_normalized_coordinates(
+        boxlist, tf.shape(img)[1], tf.shape(img)[2])
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(normalized_boxlist.get())
+
+  def test_to_absolute_coordinates(self):
+    coordinates = tf.constant([[0, 0, 1, 1],
+                               [0.25, 0.25, 0.75, 0.75]], tf.float32)
+    img = tf.ones((128, 100, 100, 3))
+    boxlist = box_list.BoxList(coordinates)
+    absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+                                                            tf.shape(img)[1],
+                                                            tf.shape(img)[2])
+    expected_boxes = [[0, 0, 100, 100],
+                      [25, 25, 75, 75]]
+
+    with self.test_session() as sess:
+      absolute_boxes = sess.run(absolute_boxlist.get())
+      self.assertAllClose(absolute_boxes, expected_boxes)
+
+  def test_to_absolute_coordinates_already_abolute(self):
+    coordinates = tf.constant([[0, 0, 100, 100],
+                               [25, 25, 75, 75]], tf.float32)
+    img = tf.ones((128, 100, 100, 3))
+    boxlist = box_list.BoxList(coordinates)
+    absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+                                                            tf.shape(img)[1],
+                                                            tf.shape(img)[2])
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(absolute_boxlist.get())
+
+  def test_convert_to_normalized_and_back(self):
+    coordinates = np.random.uniform(size=(100, 4))
+    coordinates = np.round(np.sort(coordinates) * 200)
+    coordinates[:, 2:4] += 1
+    coordinates[99, :] = [0, 0, 201, 201]
+    img = tf.ones((128, 202, 202, 3))
+
+    boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
+    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+                                                     tf.shape(img)[1],
+                                                     tf.shape(img)[2])
+    boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+                                                   tf.shape(img)[1],
+                                                   tf.shape(img)[2])
+
+    with self.test_session() as sess:
+      out = sess.run(boxlist.get())
+      self.assertAllClose(out, coordinates)
+
+  def test_convert_to_absolute_and_back(self):
+    coordinates = np.random.uniform(size=(100, 4))
+    coordinates = np.sort(coordinates)
+    coordinates[99, :] = [0, 0, 1, 1]
+    img = tf.ones((128, 202, 202, 3))
+
+    boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
+    boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+                                                   tf.shape(img)[1],
+                                                   tf.shape(img)[2])
+    boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+                                                     tf.shape(img)[1],
+                                                     tf.shape(img)[2])
+
+    with self.test_session() as sess:
+      out = sess.run(boxlist.get())
+      self.assertAllClose(out, coordinates)
+
+
+class BoxRefinementTest(tf.test.TestCase):
+
+  def test_box_voting(self):
+    candidates = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32))
+    candidates.add_field('ExtraField', tf.constant([1, 2]))
+    pool = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+                     [0.6, 0.6, 0.8, 0.8]], tf.float32))
+    pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
+    averaged_boxes = box_list_ops.box_voting(candidates, pool)
+    expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
+    expected_scores = [0.5, 0.3]
+    with self.test_session() as sess:
+      boxes_out, scores_out, extra_field_out = sess.run(
+          [averaged_boxes.get(), averaged_boxes.get_field('scores'),
+           averaged_boxes.get_field('ExtraField')])
+
+      self.assertAllClose(expected_boxes, boxes_out)
+      self.assertAllClose(expected_scores, scores_out)
+      self.assertAllEqual(extra_field_out, [1, 2])
+
+  def test_box_voting_fails_with_negative_scores(self):
+    candidates = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
+    pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
+    pool.add_field('scores', tf.constant([-0.2]))
+    averaged_boxes = box_list_ops.box_voting(candidates, pool)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('Scores must be non negative'):
+        sess.run([averaged_boxes.get()])
+
+  def test_box_voting_fails_when_unmatched(self):
+    candidates = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
+    pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32))
+    pool.add_field('scores', tf.constant([0.2]))
+    averaged_boxes = box_list_ops.box_voting(candidates, pool)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('Each box in selected_boxes must match '
+                                    'with at least one box in pool_boxes.'):
+        sess.run([averaged_boxes.get()])
+
+  def test_refine_boxes(self):
+    pool = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+                     [0.6, 0.6, 0.8, 0.8]], tf.float32))
+    pool.add_field('ExtraField', tf.constant([1, 2, 3]))
+    pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
+    refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10)
+
+    expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
+    expected_scores = [0.5, 0.3]
+    with self.test_session() as sess:
+      boxes_out, scores_out, extra_field_out = sess.run(
+          [refined_boxes.get(), refined_boxes.get_field('scores'),
+           refined_boxes.get_field('ExtraField')])
+
+      self.assertAllClose(expected_boxes, boxes_out)
+      self.assertAllClose(expected_scores, scores_out)
+      self.assertAllEqual(extra_field_out, [1, 3])
+
+  def test_refine_boxes_multi_class(self):
+    pool = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+                     [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
+    pool.add_field('classes', tf.constant([0, 0, 1, 1]))
+    pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2]))
+    refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10)
+
+    expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8],
+                      [0.2, 0.2, 0.3, 0.3]]
+    expected_scores = [0.5, 0.3, 0.2]
+    with self.test_session() as sess:
+      boxes_out, scores_out, extra_field_out = sess.run(
+          [refined_boxes.get(), refined_boxes.get_field('scores'),
+           refined_boxes.get_field('classes')])
+
+      self.assertAllClose(expected_boxes, boxes_out)
+      self.assertAllClose(expected_scores, scores_out)
+      self.assertAllEqual(extra_field_out, [0, 1, 1])
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/box_list_test.py
+++ b/object_detection/core/box_list_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.box_list."""
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+
+
+class BoxListTest(tf.test.TestCase):
+  """Tests for BoxList class."""
+
+  def test_num_boxes(self):
+    data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
+    expected_num_boxes = 3
+
+    boxes = box_list.BoxList(data)
+    with self.test_session() as sess:
+      num_boxes_output = sess.run(boxes.num_boxes())
+      self.assertEquals(num_boxes_output, expected_num_boxes)
+
+  def test_get_correct_center_coordinates_and_sizes(self):
+    boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    centers_sizes = boxes.get_center_coordinates_and_sizes()
+    expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]]
+    with self.test_session() as sess:
+      centers_sizes_out = sess.run(centers_sizes)
+      self.assertAllClose(centers_sizes_out, expected_centers_sizes)
+
+  def test_create_box_list_with_dynamic_shape(self):
+    data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
+    indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1])
+    data = tf.gather(data, indices)
+    assert data.get_shape().as_list() == [None, 4]
+    expected_num_boxes = 2
+
+    boxes = box_list.BoxList(data)
+    with self.test_session() as sess:
+      num_boxes_output = sess.run(boxes.num_boxes())
+      self.assertEquals(num_boxes_output, expected_num_boxes)
+
+  def test_transpose_coordinates(self):
+    boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.transpose_coordinates()
+    expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]]
+    with self.test_session() as sess:
+      corners_out = sess.run(boxes.get())
+      self.assertAllClose(corners_out, expected_corners)
+
+  def test_box_list_invalid_inputs(self):
+    data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32)
+    data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32)
+    data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32)
+
+    with self.assertRaises(ValueError):
+      _ = box_list.BoxList(data0)
+    with self.assertRaises(ValueError):
+      _ = box_list.BoxList(data1)
+    with self.assertRaises(ValueError):
+      _ = box_list.BoxList(data2)
+
+  def test_num_boxes_static(self):
+    box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+    self.assertEquals(boxes.num_boxes_static(), 2)
+    self.assertEquals(type(boxes.num_boxes_static()), int)
+
+  def test_num_boxes_static_for_uninferrable_shape(self):
+    placeholder = tf.placeholder(tf.float32, shape=[None, 4])
+    boxes = box_list.BoxList(placeholder)
+    self.assertEquals(boxes.num_boxes_static(), None)
+
+  def test_as_tensor_dict(self):
+    boxlist = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+    boxlist.add_field('classes', tf.constant([0, 1]))
+    boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+    tensor_dict = boxlist.as_tensor_dict()
+
+    expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
+    expected_classes = [0, 1]
+    expected_scores = [0.75, 0.2]
+
+    with self.test_session() as sess:
+      tensor_dict_out = sess.run(tensor_dict)
+      self.assertAllEqual(3, len(tensor_dict_out))
+      self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
+      self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
+      self.assertAllClose(expected_scores, tensor_dict_out['scores'])
+
+  def test_as_tensor_dict_with_features(self):
+    boxlist = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+    boxlist.add_field('classes', tf.constant([0, 1]))
+    boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+    tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores'])
+
+    expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
+    expected_classes = [0, 1]
+    expected_scores = [0.75, 0.2]
+
+    with self.test_session() as sess:
+      tensor_dict_out = sess.run(tensor_dict)
+      self.assertAllEqual(3, len(tensor_dict_out))
+      self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
+      self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
+      self.assertAllClose(expected_scores, tensor_dict_out['scores'])
+
+  def test_as_tensor_dict_missing_field(self):
+    boxlist = box_list.BoxList(
+        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+    boxlist.add_field('classes', tf.constant([0, 1]))
+    boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+    with self.assertRaises(ValueError):
+      boxlist.as_tensor_dict(['foo', 'bar'])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/box_predictor.py
+++ b/object_detection/core/box_predictor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Box predictor for object detectors.
+
+Box predictors are classes that take a high level
+image feature map as input and produce two predictions,
+(1) a tensor encoding box locations, and
+(2) a tensor encoding classes for each box.
+
+These components are passed directly to loss functions
+in our detection models.
+
+These modules are separated from the main model since the same
+few box predictor architectures are shared across many models.
+"""
+from abc import abstractmethod
+import tensorflow as tf
+from object_detection.utils import ops
+from object_detection.utils import static_shape
+
+slim = tf.contrib.slim
+
+BOX_ENCODINGS = 'box_encodings'
+CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
+MASK_PREDICTIONS = 'mask_predictions'
+
+
+class BoxPredictor(object):
+  """BoxPredictor."""
+
+  def __init__(self, is_training, num_classes):
+    """Constructor.
+
+    Args:
+      is_training: Indicates whether the BoxPredictor is in training mode.
+      num_classes: number of classes.  Note that num_classes *does not*
+        include the background category, so if groundtruth labels take values
+        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+        assigned classification targets can range from {0,... K}).
+    """
+    self._is_training = is_training
+    self._num_classes = num_classes
+
+  @property
+  def num_classes(self):
+    return self._num_classes
+
+  def predict(self, image_features, num_predictions_per_location, scope,
+              **params):
+    """Computes encoded object locations and corresponding confidences.
+
+    Takes a high level image feature map as input and produce two predictions,
+    (1) a tensor encoding box locations, and
+    (2) a tensor encoding class scores for each corresponding box.
+    In this interface, we only assume that two tensors are returned as output
+    and do not assume anything about their shapes.
+
+    Args:
+      image_features: A float tensor of shape [batch_size, height, width,
+        channels] containing features for a batch of images.
+      num_predictions_per_location: an integer representing the number of box
+        predictions to be made per spatial location in the feature map.
+      scope: Variable and Op scope name.
+      **params: Additional keyword arguments for specific implementations of
+              BoxPredictor.
+
+    Returns:
+      A dictionary containing at least the following tensors.
+        box_encodings: A float tensor of shape
+          [batch_size, num_anchors, q, code_size] representing the location of
+          the objects, where q is 1 or the number of classes.
+        class_predictions_with_background: A float tensor of shape
+          [batch_size, num_anchors, num_classes + 1] representing the class
+          predictions for the proposals.
+    """
+    with tf.variable_scope(scope):
+      return self._predict(image_features, num_predictions_per_location,
+                           **params)
+
+  # TODO: num_predictions_per_location could be moved to constructor.
+  # This is currently only used by ConvolutionalBoxPredictor.
+  @abstractmethod
+  def _predict(self, image_features, num_predictions_per_location, **params):
+    """Implementations must override this method.
+
+    Args:
+      image_features: A float tensor of shape [batch_size, height, width,
+        channels] containing features for a batch of images.
+      num_predictions_per_location: an integer representing the number of box
+        predictions to be made per spatial location in the feature map.
+      **params: Additional keyword arguments for specific implementations of
+              BoxPredictor.
+
+    Returns:
+      A dictionary containing at least the following tensors.
+        box_encodings: A float tensor of shape
+          [batch_size, num_anchors, q, code_size] representing the location of
+          the objects, where q is 1 or the number of classes.
+        class_predictions_with_background: A float tensor of shape
+          [batch_size, num_anchors, num_classes + 1] representing the class
+          predictions for the proposals.
+    """
+    pass
+
+
+class RfcnBoxPredictor(BoxPredictor):
+  """RFCN Box Predictor.
+
+  Applies a position sensitve ROI pooling on position sensitive feature maps to
+  predict classes and refined locations. See https://arxiv.org/abs/1605.06409
+  for details.
+
+  This is used for the second stage of the RFCN meta architecture. Notice that
+  locations are *not* shared across classes, thus for each anchor, a separate
+  prediction is made for each class.
+  """
+
+  def __init__(self,
+               is_training,
+               num_classes,
+               conv_hyperparams,
+               num_spatial_bins,
+               depth,
+               crop_size,
+               box_code_size):
+    """Constructor.
+
+    Args:
+      is_training: Indicates whether the BoxPredictor is in training mode.
+      num_classes: number of classes.  Note that num_classes *does not*
+        include the background category, so if groundtruth labels take values
+        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+        assigned classification targets can range from {0,... K}).
+      conv_hyperparams: Slim arg_scope with hyperparameters for conolutional
+        layers.
+      num_spatial_bins: A list of two integers `[spatial_bins_y,
+        spatial_bins_x]`.
+      depth: Target depth to reduce the input feature maps to.
+      crop_size: A list of two integers `[crop_height, crop_width]`.
+      box_code_size: Size of encoding for each box.
+    """
+    super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
+    self._conv_hyperparams = conv_hyperparams
+    self._num_spatial_bins = num_spatial_bins
+    self._depth = depth
+    self._crop_size = crop_size
+    self._box_code_size = box_code_size
+
+  @property
+  def num_classes(self):
+    return self._num_classes
+
+  def _predict(self, image_features, num_predictions_per_location,
+               proposal_boxes):
+    """Computes encoded object locations and corresponding confidences.
+
+    Args:
+      image_features: A float tensor of shape [batch_size, height, width,
+        channels] containing features for a batch of images.
+      num_predictions_per_location: an integer representing the number of box
+        predictions to be made per spatial location in the feature map.
+        Currently, this must be set to 1, or an error will be raised.
+      proposal_boxes: A float tensor of shape [batch_size, num_proposals,
+        box_code_size].
+
+    Returns:
+      box_encodings: A float tensor of shape
+        [batch_size, 1, num_classes, code_size] representing the
+        location of the objects.
+      class_predictions_with_background: A float tensor of shape
+        [batch_size, 1, num_classes + 1] representing the class
+        predictions for the proposals.
+    Raises:
+      ValueError: if num_predictions_per_location is not 1.
+    """
+    if num_predictions_per_location != 1:
+      raise ValueError('Currently RfcnBoxPredictor only supports '
+                       'predicting a single box per class per location.')
+
+    batch_size = tf.shape(proposal_boxes)[0]
+    num_boxes = tf.shape(proposal_boxes)[1]
+    def get_box_indices(proposals):
+      proposals_shape = proposals.get_shape().as_list()
+      if any(dim is None for dim in proposals_shape):
+        proposals_shape = tf.shape(proposals)
+      ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
+      multiplier = tf.expand_dims(
+          tf.range(start=0, limit=proposals_shape[0]), 1)
+      return tf.reshape(ones_mat * multiplier, [-1])
+
+    net = image_features
+    with slim.arg_scope(self._conv_hyperparams):
+      net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
+      # Location predictions.
+      location_feature_map_depth = (self._num_spatial_bins[0] *
+                                    self._num_spatial_bins[1] *
+                                    self.num_classes *
+                                    self._box_code_size)
+      location_feature_map = slim.conv2d(net, location_feature_map_depth,
+                                         [1, 1], activation_fn=None,
+                                         scope='refined_locations')
+      box_encodings = ops.position_sensitive_crop_regions(
+          location_feature_map,
+          boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
+          box_ind=get_box_indices(proposal_boxes),
+          crop_size=self._crop_size,
+          num_spatial_bins=self._num_spatial_bins,
+          global_pool=True)
+      box_encodings = tf.squeeze(box_encodings, squeeze_dims=[1, 2])
+      box_encodings = tf.reshape(box_encodings,
+                                 [batch_size * num_boxes, 1, self.num_classes,
+                                  self._box_code_size])
+
+      # Class predictions.
+      total_classes = self.num_classes + 1  # Account for background class.
+      class_feature_map_depth = (self._num_spatial_bins[0] *
+                                 self._num_spatial_bins[1] *
+                                 total_classes)
+      class_feature_map = slim.conv2d(net, class_feature_map_depth, [1, 1],
+                                      activation_fn=None,
+                                      scope='class_predictions')
+      class_predictions_with_background = ops.position_sensitive_crop_regions(
+          class_feature_map,
+          boxes=tf.reshape(proposal_boxes, [-1, self._box_code_size]),
+          box_ind=get_box_indices(proposal_boxes),
+          crop_size=self._crop_size,
+          num_spatial_bins=self._num_spatial_bins,
+          global_pool=True)
+      class_predictions_with_background = tf.squeeze(
+          class_predictions_with_background, squeeze_dims=[1, 2])
+      class_predictions_with_background = tf.reshape(
+          class_predictions_with_background,
+          [batch_size * num_boxes, 1, total_classes])
+
+    return {BOX_ENCODINGS: box_encodings,
+            CLASS_PREDICTIONS_WITH_BACKGROUND:
+            class_predictions_with_background}
+
+
+class MaskRCNNBoxPredictor(BoxPredictor):
+  """Mask R-CNN Box Predictor.
+
+  See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017).
+  Mask R-CNN. arXiv preprint arXiv:1703.06870.
+
+  This is used for the second stage of the Mask R-CNN detector where proposals
+  cropped from an image are arranged along the batch dimension of the input
+  image_features tensor. Notice that locations are *not* shared across classes,
+  thus for each anchor, a separate prediction is made for each class.
+
+  In addition to predicting boxes and classes, optionally this class allows
+  predicting masks and/or keypoints inside detection boxes.
+
+  Currently this box predictor makes per-class predictions; that is, each
+  anchor makes a separate box prediction for each class.
+  """
+
+  def __init__(self,
+               is_training,
+               num_classes,
+               fc_hyperparams,
+               use_dropout,
+               dropout_keep_prob,
+               box_code_size,
+               conv_hyperparams=None,
+               predict_instance_masks=False,
+               mask_prediction_conv_depth=256,
+               predict_keypoints=False):
+    """Constructor.
+
+    Args:
+      is_training: Indicates whether the BoxPredictor is in training mode.
+      num_classes: number of classes.  Note that num_classes *does not*
+        include the background category, so if groundtruth labels take values
+        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+        assigned classification targets can range from {0,... K}).
+      fc_hyperparams: Slim arg_scope with hyperparameters for fully
+        connected ops.
+      use_dropout: Option to use dropout or not.  Note that a single dropout
+        op is applied here prior to both box and class predictions, which stands
+        in contrast to the ConvolutionalBoxPredictor below.
+      dropout_keep_prob: Keep probability for dropout.
+        This is only used if use_dropout is True.
+      box_code_size: Size of encoding for each box.
+      conv_hyperparams: Slim arg_scope with hyperparameters for convolution
+        ops.
+      predict_instance_masks: Whether to predict object masks inside detection
+        boxes.
+      mask_prediction_conv_depth: The depth for the first conv2d_transpose op
+        applied to the image_features in the mask prediciton branch.
+      predict_keypoints: Whether to predict keypoints insde detection boxes.
+
+
+    Raises:
+      ValueError: If predict_instance_masks or predict_keypoints is true.
+    """
+    super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
+    self._fc_hyperparams = fc_hyperparams
+    self._use_dropout = use_dropout
+    self._box_code_size = box_code_size
+    self._dropout_keep_prob = dropout_keep_prob
+    self._conv_hyperparams = conv_hyperparams
+    self._predict_instance_masks = predict_instance_masks
+    self._mask_prediction_conv_depth = mask_prediction_conv_depth
+    self._predict_keypoints = predict_keypoints
+    if self._predict_keypoints:
+      raise ValueError('Keypoint prediction is unimplemented.')
+    if ((self._predict_instance_masks or self._predict_keypoints) and
+        self._conv_hyperparams is None):
+      raise ValueError('`conv_hyperparams` must be provided when predicting '
+                       'masks.')
+
+  @property
+  def num_classes(self):
+    return self._num_classes
+
+  def _predict(self, image_features, num_predictions_per_location):
+    """Computes encoded object locations and corresponding confidences.
+
+    Flattens image_features and applies fully connected ops (with no
+    non-linearity) to predict box encodings and class predictions.  In this
+    setting, anchors are not spatially arranged in any way and are assumed to
+    have been folded into the batch dimension.  Thus we output 1 for the
+    anchors dimension.
+
+    Args:
+      image_features: A float tensor of shape [batch_size, height, width,
+        channels] containing features for a batch of images.
+      num_predictions_per_location: an integer representing the number of box
+        predictions to be made per spatial location in the feature map.
+        Currently, this must be set to 1, or an error will be raised.
+
+    Returns:
+      A dictionary containing the following tensors.
+        box_encodings: A float tensor of shape
+          [batch_size, 1, num_classes, code_size] representing the
+          location of the objects.
+        class_predictions_with_background: A float tensor of shape
+          [batch_size, 1, num_classes + 1] representing the class
+          predictions for the proposals.
+      If predict_masks is True the dictionary also contains:
+        instance_masks: A float tensor of shape
+          [batch_size, 1, num_classes, image_height, image_width]
+      If predict_keypoints is True the dictionary also contains:
+        keypoints: [batch_size, 1, num_keypoints, 2]
+
+    Raises:
+      ValueError: if num_predictions_per_location is not 1.
+    """
+    if num_predictions_per_location != 1:
+      raise ValueError('Currently FullyConnectedBoxPredictor only supports '
+                       'predicting a single box per class per location.')
+    spatial_averaged_image_features = tf.reduce_mean(image_features, [1, 2],
+                                                     keep_dims=True,
+                                                     name='AvgPool')
+    flattened_image_features = slim.flatten(spatial_averaged_image_features)
+    if self._use_dropout:
+      flattened_image_features = slim.dropout(flattened_image_features,
+                                              keep_prob=self._dropout_keep_prob,
+                                              is_training=self._is_training)
+    with slim.arg_scope(self._fc_hyperparams):
+      box_encodings = slim.fully_connected(
+          flattened_image_features,
+          self._num_classes * self._box_code_size,
+          activation_fn=None,
+          scope='BoxEncodingPredictor')
+      class_predictions_with_background = slim.fully_connected(
+          flattened_image_features,
+          self._num_classes + 1,
+          activation_fn=None,
+          scope='ClassPredictor')
+    box_encodings = tf.reshape(
+        box_encodings, [-1, 1, self._num_classes, self._box_code_size])
+    class_predictions_with_background = tf.reshape(
+        class_predictions_with_background, [-1, 1, self._num_classes + 1])
+
+    predictions_dict = {
+        BOX_ENCODINGS: box_encodings,
+        CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background
+    }
+
+    if self._predict_instance_masks:
+      with slim.arg_scope(self._conv_hyperparams):
+        upsampled_features = slim.conv2d_transpose(
+            image_features,
+            num_outputs=self._mask_prediction_conv_depth,
+            kernel_size=[2, 2],
+            stride=2)
+        mask_predictions = slim.conv2d(upsampled_features,
+                                       num_outputs=self.num_classes,
+                                       activation_fn=None,
+                                       kernel_size=[1, 1])
+        instance_masks = tf.expand_dims(tf.transpose(mask_predictions,
+                                                     perm=[0, 3, 1, 2]),
+                                        axis=1,
+                                        name='MaskPredictor')
+      predictions_dict[MASK_PREDICTIONS] = instance_masks
+    return predictions_dict
+
+
+class ConvolutionalBoxPredictor(BoxPredictor):
+  """Convolutional Box Predictor.
+
+  Optionally add an intermediate 1x1 convolutional layer after features and
+  predict in parallel branches box_encodings and
+  class_predictions_with_background.
+
+  Currently this box predictor assumes that predictions are "shared" across
+  classes --- that is each anchor makes box predictions which do not depend
+  on class.
+  """
+
+  def __init__(self,
+               is_training,
+               num_classes,
+               conv_hyperparams,
+               min_depth,
+               max_depth,
+               num_layers_before_predictor,
+               use_dropout,
+               dropout_keep_prob,
+               kernel_size,
+               box_code_size,
+               apply_sigmoid_to_scores=False):
+    """Constructor.
+
+    Args:
+      is_training: Indicates whether the BoxPredictor is in training mode.
+      num_classes: number of classes.  Note that num_classes *does not*
+        include the background category, so if groundtruth labels take values
+        in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+        assigned classification targets can range from {0,... K}).
+      conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
+      min_depth: Minumum feature depth prior to predicting box encodings
+        and class predictions.
+      max_depth: Maximum feature depth prior to predicting box encodings
+        and class predictions. If max_depth is set to 0, no additional
+        feature map will be inserted before location and class predictions.
+      num_layers_before_predictor: Number of the additional conv layers before
+        the predictor.
+      use_dropout: Option to use dropout for class prediction or not.
+      dropout_keep_prob: Keep probability for dropout.
+        This is only used if use_dropout is True.
+      kernel_size: Size of final convolution kernel.  If the
+        spatial resolution of the feature map is smaller than the kernel size,
+        then the kernel size is automatically set to be
+        min(feature_width, feature_height).
+      box_code_size: Size of encoding for each box.
+      apply_sigmoid_to_scores: if True, apply the sigmoid on the output
+        class_predictions.
+
+    Raises:
+      ValueError: if min_depth > max_depth.
+    """
+    super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
+    if min_depth > max_depth:
+      raise ValueError('min_depth should be less than or equal to max_depth')
+    self._conv_hyperparams = conv_hyperparams
+    self._min_depth = min_depth
+    self._max_depth = max_depth
+    self._num_layers_before_predictor = num_layers_before_predictor
+    self._use_dropout = use_dropout
+    self._kernel_size = kernel_size
+    self._box_code_size = box_code_size
+    self._dropout_keep_prob = dropout_keep_prob
+    self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
+
+  def _predict(self, image_features, num_predictions_per_location):
+    """Computes encoded object locations and corresponding confidences.
+
+    Args:
+      image_features: A float tensor of shape [batch_size, height, width,
+        channels] containing features for a batch of images.
+      num_predictions_per_location: an integer representing the number of box
+        predictions to be made per spatial location in the feature map.
+
+    Returns:
+      A dictionary containing the following tensors.
+        box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
+          code_size] representing the location of the objects, where
+          num_anchors = feat_height * feat_width * num_predictions_per_location
+        class_predictions_with_background: A float tensor of shape
+          [batch_size, num_anchors, num_classes + 1] representing the class
+          predictions for the proposals.
+    """
+    features_depth = static_shape.get_depth(image_features.get_shape())
+    depth = max(min(features_depth, self._max_depth), self._min_depth)
+
+    # Add a slot for the background class.
+    num_class_slots = self.num_classes + 1
+    net = image_features
+    with slim.arg_scope(self._conv_hyperparams), \
+         slim.arg_scope([slim.dropout], is_training=self._is_training):
+      # Add additional conv layers before the predictor.
+      if depth > 0 and self._num_layers_before_predictor > 0:
+        for i in range(self._num_layers_before_predictor):
+          net = slim.conv2d(
+              net, depth, [1, 1], scope='Conv2d_%d_1x1_%d' % (i, depth))
+      with slim.arg_scope([slim.conv2d], activation_fn=None,
+                          normalizer_fn=None, normalizer_params=None):
+        box_encodings = slim.conv2d(
+            net, num_predictions_per_location * self._box_code_size,
+            [self._kernel_size, self._kernel_size],
+            scope='BoxEncodingPredictor')
+        if self._use_dropout:
+          net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
+        class_predictions_with_background = slim.conv2d(
+            net, num_predictions_per_location * num_class_slots,
+            [self._kernel_size, self._kernel_size], scope='ClassPredictor')
+        if self._apply_sigmoid_to_scores:
+          class_predictions_with_background = tf.sigmoid(
+              class_predictions_with_background)
+
+    batch_size = static_shape.get_batch_size(image_features.get_shape())
+    if batch_size is None:
+      features_height = static_shape.get_height(image_features.get_shape())
+      features_width = static_shape.get_width(image_features.get_shape())
+      flattened_predictions_size = (features_height * features_width *
+                                    num_predictions_per_location)
+      box_encodings = tf.reshape(
+          box_encodings,
+          [-1, flattened_predictions_size, 1, self._box_code_size])
+      class_predictions_with_background = tf.reshape(
+          class_predictions_with_background,
+          [-1, flattened_predictions_size, num_class_slots])
+    else:
+      box_encodings = tf.reshape(
+          box_encodings, [batch_size, -1, 1, self._box_code_size])
+      class_predictions_with_background = tf.reshape(
+          class_predictions_with_background, [batch_size, -1, num_class_slots])
+    return {BOX_ENCODINGS: box_encodings,
+            CLASS_PREDICTIONS_WITH_BACKGROUND:
+            class_predictions_with_background}
--- a/object_detection/core/box_predictor_test.py
+++ b/object_detection/core/box_predictor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.box_predictor."""
+
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.builders import hyperparams_builder
+from object_detection.core import box_predictor
+from object_detection.protos import hyperparams_pb2
+
+
+class MaskRCNNBoxPredictorTest(tf.test.TestCase):
+
+  def _build_arg_scope_with_hyperparams(self,
+                                        op_type=hyperparams_pb2.Hyperparams.FC):
+    hyperparams = hyperparams_pb2.Hyperparams()
+    hyperparams_text_proto = """
+      activation: NONE
+      regularizer {
+        l2_regularizer {
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+        }
+      }
+    """
+    text_format.Merge(hyperparams_text_proto, hyperparams)
+    hyperparams.op = op_type
+    return hyperparams_builder.build(hyperparams, is_training=True)
+
+  def test_get_boxes_with_five_classes(self):
+    image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
+    mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
+        is_training=False,
+        num_classes=5,
+        fc_hyperparams=self._build_arg_scope_with_hyperparams(),
+        use_dropout=False,
+        dropout_keep_prob=0.5,
+        box_code_size=4,
+    )
+    box_predictions = mask_box_predictor.predict(
+        image_features, num_predictions_per_location=1, scope='BoxPredictor')
+    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
+    class_predictions_with_background = box_predictions[
+        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       class_predictions_with_background_shape) = sess.run(
+           [tf.shape(box_encodings),
+            tf.shape(class_predictions_with_background)])
+      self.assertAllEqual(box_encodings_shape, [2, 1, 5, 4])
+      self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
+
+  def test_value_error_on_predict_instance_masks_with_no_conv_hyperparms(self):
+    with self.assertRaises(ValueError):
+      box_predictor.MaskRCNNBoxPredictor(
+          is_training=False,
+          num_classes=5,
+          fc_hyperparams=self._build_arg_scope_with_hyperparams(),
+          use_dropout=False,
+          dropout_keep_prob=0.5,
+          box_code_size=4,
+          predict_instance_masks=True)
+
+  def test_get_instance_masks(self):
+    image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
+    mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
+        is_training=False,
+        num_classes=5,
+        fc_hyperparams=self._build_arg_scope_with_hyperparams(),
+        use_dropout=False,
+        dropout_keep_prob=0.5,
+        box_code_size=4,
+        conv_hyperparams=self._build_arg_scope_with_hyperparams(
+            op_type=hyperparams_pb2.Hyperparams.CONV),
+        predict_instance_masks=True)
+    box_predictions = mask_box_predictor.predict(
+        image_features, num_predictions_per_location=1, scope='BoxPredictor')
+    mask_predictions = box_predictions[box_predictor.MASK_PREDICTIONS]
+    self.assertListEqual([2, 1, 5, 14, 14],
+                         mask_predictions.get_shape().as_list())
+
+  def test_do_not_return_instance_masks_and_keypoints_without_request(self):
+    image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
+    mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
+        is_training=False,
+        num_classes=5,
+        fc_hyperparams=self._build_arg_scope_with_hyperparams(),
+        use_dropout=False,
+        dropout_keep_prob=0.5,
+        box_code_size=4)
+    box_predictions = mask_box_predictor.predict(
+        image_features, num_predictions_per_location=1, scope='BoxPredictor')
+    self.assertEqual(len(box_predictions), 2)
+    self.assertTrue(box_predictor.BOX_ENCODINGS in box_predictions)
+    self.assertTrue(box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND
+                    in box_predictions)
+
+  def test_value_error_on_predict_keypoints(self):
+    with self.assertRaises(ValueError):
+      box_predictor.MaskRCNNBoxPredictor(
+          is_training=False,
+          num_classes=5,
+          fc_hyperparams=self._build_arg_scope_with_hyperparams(),
+          use_dropout=False,
+          dropout_keep_prob=0.5,
+          box_code_size=4,
+          predict_keypoints=True)
+
+
+class RfcnBoxPredictorTest(tf.test.TestCase):
+
+  def _build_arg_scope_with_conv_hyperparams(self):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      regularizer {
+        l2_regularizer {
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+        }
+      }
+    """
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.build(conv_hyperparams, is_training=True)
+
+  def test_get_correct_box_encoding_and_class_prediction_shapes(self):
+    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
+    proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
+    rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
+        is_training=False,
+        num_classes=2,
+        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
+        num_spatial_bins=[3, 3],
+        depth=4,
+        crop_size=[12, 12],
+        box_code_size=4
+    )
+    box_predictions = rfcn_box_predictor.predict(
+        image_features, num_predictions_per_location=1, scope='BoxPredictor',
+        proposal_boxes=proposal_boxes)
+    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
+    class_predictions_with_background = box_predictions[
+        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       class_predictions_shape) = sess.run(
+           [tf.shape(box_encodings),
+            tf.shape(class_predictions_with_background)])
+      self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
+      self.assertAllEqual(class_predictions_shape, [8, 1, 3])
+
+
+class ConvolutionalBoxPredictorTest(tf.test.TestCase):
+
+  def _build_arg_scope_with_conv_hyperparams(self):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      activation: RELU_6
+      regularizer {
+        l2_regularizer {
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+        }
+      }
+    """
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.build(conv_hyperparams, is_training=True)
+
+  def test_get_boxes_for_five_aspect_ratios_per_location(self):
+    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
+    conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
+        is_training=False,
+        num_classes=0,
+        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
+        min_depth=0,
+        max_depth=32,
+        num_layers_before_predictor=1,
+        use_dropout=True,
+        dropout_keep_prob=0.8,
+        kernel_size=1,
+        box_code_size=4
+    )
+    box_predictions = conv_box_predictor.predict(
+        image_features, num_predictions_per_location=5, scope='BoxPredictor')
+    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
+    objectness_predictions = box_predictions[
+        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       objectness_predictions_shape) = sess.run(
+           [tf.shape(box_encodings), tf.shape(objectness_predictions)])
+      self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
+      self.assertAllEqual(objectness_predictions_shape, [4, 320, 1])
+
+  def test_get_boxes_for_one_aspect_ratio_per_location(self):
+    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
+    conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
+        is_training=False,
+        num_classes=0,
+        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
+        min_depth=0,
+        max_depth=32,
+        num_layers_before_predictor=1,
+        use_dropout=True,
+        dropout_keep_prob=0.8,
+        kernel_size=1,
+        box_code_size=4
+    )
+    box_predictions = conv_box_predictor.predict(
+        image_features, num_predictions_per_location=1, scope='BoxPredictor')
+    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
+    objectness_predictions = box_predictions[
+        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       objectness_predictions_shape) = sess.run(
+           [tf.shape(box_encodings), tf.shape(objectness_predictions)])
+      self.assertAllEqual(box_encodings_shape, [4, 64, 1, 4])
+      self.assertAllEqual(objectness_predictions_shape, [4, 64, 1])
+
+  def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
+      self):
+    num_classes_without_background = 6
+    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
+    conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
+        is_training=False,
+        num_classes=num_classes_without_background,
+        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
+        min_depth=0,
+        max_depth=32,
+        num_layers_before_predictor=1,
+        use_dropout=True,
+        dropout_keep_prob=0.8,
+        kernel_size=1,
+        box_code_size=4
+    )
+    box_predictions = conv_box_predictor.predict(
+        image_features,
+        num_predictions_per_location=5,
+        scope='BoxPredictor')
+    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
+    class_predictions_with_background = box_predictions[
+        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
+
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape, class_predictions_with_background_shape
+      ) = sess.run([
+          tf.shape(box_encodings), tf.shape(class_predictions_with_background)])
+      self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
+      self.assertAllEqual(class_predictions_with_background_shape,
+                          [4, 320, num_classes_without_background+1])
+
+  def test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional(
+      self):
+    image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
+    conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
+        is_training=False,
+        num_classes=0,
+        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
+        min_depth=0,
+        max_depth=32,
+        num_layers_before_predictor=1,
+        use_dropout=True,
+        dropout_keep_prob=0.8,
+        kernel_size=1,
+        box_code_size=4
+    )
+    box_predictions = conv_box_predictor.predict(
+        image_features, num_predictions_per_location=5, scope='BoxPredictor')
+    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
+    objectness_predictions = box_predictions[
+        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
+    init_op = tf.global_variables_initializer()
+
+    resolution = 32
+    expected_num_anchors = resolution*resolution*5
+    with self.test_session() as sess:
+      sess.run(init_op)
+      (box_encodings_shape,
+       objectness_predictions_shape) = sess.run(
+           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
+           feed_dict={image_features:
+                      np.random.rand(4, resolution, resolution, 64)})
+      self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
+      self.assertAllEqual(objectness_predictions_shape,
+                          [4, expected_num_anchors, 1])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/data_decoder.py
+++ b/object_detection/core/data_decoder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Interface for data decoders.
+
+Data decoders decode the input data and return a dictionary of tensors keyed by
+the entries in core.reader.Fields.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+
+class DataDecoder(object):
+  """Interface for data decoders."""
+  __metaclass__ = ABCMeta
+
+  @abstractmethod
+  def decode(self, data):
+    """Return a single image and associated labels.
+
+    Args:
+      data: a string tensor holding a serialized protocol buffer corresponding
+        to data for a single image.
+
+    Returns:
+      tensor_dict: a dictionary containing tensors. Possible keys are defined in
+          reader.Fields.
+    """
+    pass
--- a/object_detection/core/keypoint_ops.py
+++ b/object_detection/core/keypoint_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Keypoint operations.
+
+Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
+where the last dimension holds rank 2 tensors of the form [y, x] representing
+the coordinates of the keypoint.
+"""
+import numpy as np
+import tensorflow as tf
+
+
+def scale(keypoints, y_scale, x_scale, scope=None):
+  """Scales keypoint coordinates in x and y dimensions.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    new_keypoints = keypoints * [[[y_scale, x_scale]]]
+    return new_keypoints
+
+
+def clip_to_window(keypoints, window, scope=None):
+  """Clips keypoints to a window.
+
+  This op clips any input keypoints to a window.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window to which the op should clip the keypoints.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'ClipToWindow'):
+    y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+    y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
+    x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
+    new_keypoints = tf.concat([y, x], 2)
+    return new_keypoints
+
+
+def prune_outside_window(keypoints, window, scope=None):
+  """Prunes keypoints that fall outside a given window.
+
+  This function replaces keypoints that fall outside the given window with nan.
+  See also clip_to_window which clips any keypoints that fall outside the given
+  window.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window outside of which the op should prune the keypoints.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'PruneOutsideWindow'):
+    y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+
+    valid_indices = tf.logical_and(
+        tf.logical_and(y >= win_y_min, y <= win_y_max),
+        tf.logical_and(x >= win_x_min, x <= win_x_max))
+
+    new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
+    new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
+    new_keypoints = tf.concat([new_y, new_x], 2)
+
+    return new_keypoints
+
+
+def change_coordinate_frame(keypoints, window, scope=None):
+  """Changes coordinate frame of the keypoints to be relative to window's frame.
+
+  Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
+  coordinates from keypoints of shape [num_instances, num_keypoints, 2]
+  to be relative to this window.
+
+  An example use case is data augmentation: where we are given groundtruth
+  keypoints and would like to randomly crop the image to some window. In this
+  case we need to change the coordinate frame of each groundtruth keypoint to be
+  relative to this new window.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+      window we should change the coordinate frame to.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'ChangeCoordinateFrame'):
+    win_height = window[2] - window[0]
+    win_width = window[3] - window[1]
+    new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
+                          1.0 / win_width)
+    return new_keypoints
+
+
+def to_normalized_coordinates(keypoints, height, width,
+                              check_range=True, scope=None):
+  """Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
+
+  Usually one uses the dynamic shape of the image or conv-layer tensor:
+    keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
+                                                       tf.shape(images)[1],
+                                                       tf.shape(images)[2]),
+
+  This function raises an assertion failed error at graph execution time when
+  the maximum coordinate is smaller than 1.01 (which means that coordinates are
+  already normalized). The value 1.01 is to deal with small rounding errors.
+
+  Args:
+    keypoints: A tensor of shape [num_instances, num_keypoints, 2].
+    height: Maximum value for y coordinate of absolute keypoint coordinates.
+    width: Maximum value for x coordinate of absolute keypoint coordinates.
+    check_range: If True, checks if the coordinates are normalized.
+    scope: name scope.
+
+  Returns:
+    tensor of shape [num_instances, num_keypoints, 2] with normalized
+    coordinates in [0, 1].
+  """
+  with tf.name_scope(scope, 'ToNormalizedCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    if check_range:
+      max_val = tf.reduce_max(keypoints)
+      max_assert = tf.Assert(tf.greater(max_val, 1.01),
+                             ['max value is lower than 1.01: ', max_val])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(keypoints, 1.0 / height, 1.0 / width)
+
+
+def to_absolute_coordinates(keypoints, height, width,
+                            check_range=True, scope=None):
+  """Converts normalized keypoint coordinates to absolute pixel coordinates.
+
+  This function raises an assertion failed error when the maximum keypoint
+  coordinate value is larger than 1.01 (in which case coordinates are already
+  absolute).
+
+  Args:
+    keypoints: A tensor of shape [num_instances, num_keypoints, 2]
+    height: Maximum value for y coordinate of absolute keypoint coordinates.
+    width: Maximum value for x coordinate of absolute keypoint coordinates.
+    check_range: If True, checks if the coordinates are normalized or not.
+    scope: name scope.
+
+  Returns:
+    tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
+    in terms of the image size.
+
+  """
+  with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
+    height = tf.cast(height, tf.float32)
+    width = tf.cast(width, tf.float32)
+
+    # Ensure range of input keypoints is correct.
+    if check_range:
+      max_val = tf.reduce_max(keypoints)
+      max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
+                             ['maximum keypoint coordinate value is larger '
+                              'than 1.01: ', max_val])
+      with tf.control_dependencies([max_assert]):
+        width = tf.identity(width)
+
+    return scale(keypoints, height, width)
+
+
+def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
+  """Flips the keypoints horizontally around the flip_point.
+
+  This operation flips the x coordinate for each keypoint around the flip_point
+  and also permutes the keypoints in a manner specified by flip_permutation.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    flip_point:  (float) scalar tensor representing the x coordinate to flip the
+      keypoints around.
+    flip_permutation: rank 1 int32 tensor containing the keypoint flip
+      permutation. This specifies the mapping from original keypoint indices
+      to the flipped keypoint indices. This is used primarily for keypoints
+      that are not reflection invariant. E.g. Suppose there are 3 keypoints
+      representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+      flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+      and 'right_eye' after a horizontal flip.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'FlipHorizontal'):
+    keypoints = tf.transpose(keypoints, [1, 0, 2])
+    keypoints = tf.gather(keypoints, flip_permutation)
+    v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    u = flip_point * 2.0 - u
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+    return new_keypoints
--- a/object_detection/core/keypoint_ops_test.py
+++ b/object_detection/core/keypoint_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.keypoint_ops."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import keypoint_ops
+
+
+class KeypointOpsTest(tf.test.TestCase):
+  """Tests for common keypoint operations."""
+
+  def test_scale(self):
+    keypoints = tf.constant([
+        [[0.0, 0.0], [100.0, 200.0]],
+        [[50.0, 120.0], [100.0, 140.0]]
+    ])
+    y_scale = tf.constant(1.0 / 100)
+    x_scale = tf.constant(1.0 / 200)
+
+    expected_keypoints = tf.constant([
+        [[0., 0.], [1.0, 1.0]],
+        [[0.5, 0.6], [1.0, 0.7]]
+    ])
+    output = keypoint_ops.scale(keypoints, y_scale, x_scale)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_clip_to_window(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+    expected_keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.25], [0.75, 0.75]]
+    ])
+    output = keypoint_ops.clip_to_window(keypoints, window)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_prune_outside_window(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+    expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
+                                      [[np.nan, np.nan], [np.nan, np.nan]]])
+    output = keypoint_ops.prune_outside_window(keypoints, window)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_change_coordinate_frame(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+    expected_keypoints = tf.constant([
+        [[0, 0.5], [1.0, 1.0]],
+        [[0.5, -0.5], [1.5, 1.5]]
+    ])
+    output = keypoint_ops.change_coordinate_frame(keypoints, window)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_to_normalized_coordinates(self):
+    keypoints = tf.constant([
+        [[10., 30.], [30., 45.]],
+        [[20., 0.], [40., 60.]]
+    ])
+    output = keypoint_ops.to_normalized_coordinates(
+        keypoints, 40, 60)
+    expected_keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_to_normalized_coordinates_already_normalized(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    output = keypoint_ops.to_normalized_coordinates(
+        keypoints, 40, 60)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(output)
+
+  def test_to_absolute_coordinates(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    output = keypoint_ops.to_absolute_coordinates(
+        keypoints, 40, 60)
+    expected_keypoints = tf.constant([
+        [[10., 30.], [30., 45.]],
+        [[20., 0.], [40., 60.]]
+    ])
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_to_absolute_coordinates_already_absolute(self):
+    keypoints = tf.constant([
+        [[10., 30.], [30., 45.]],
+        [[20., 0.], [40., 60.]]
+    ])
+    output = keypoint_ops.to_absolute_coordinates(
+        keypoints, 40, 60)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(output)
+
+  def test_flip_horizontal(self):
+    keypoints = tf.constant([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
+    ])
+    flip_permutation = [0, 2, 1]
+
+    expected_keypoints = tf.constant([
+        [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
+        [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
+    ])
+    output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/losses.py
+++ b/object_detection/core/losses.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Classification and regression loss functions for object detection.
+
+Localization losses:
+ * WeightedL2LocalizationLoss
+ * WeightedSmoothL1LocalizationLoss
+ * WeightedIOULocalizationLoss
+
+Classification losses:
+ * WeightedSigmoidClassificationLoss
+ * WeightedSoftmaxClassificationLoss
+ * BootstrappedSigmoidClassificationLoss
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.utils import ops
+
+slim = tf.contrib.slim
+
+
+class Loss(object):
+  """Abstract base class for loss functions."""
+  __metaclass__ = ABCMeta
+
+  def __call__(self,
+               prediction_tensor,
+               target_tensor,
+               ignore_nan_targets=False,
+               scope=None,
+               **params):
+    """Call the loss function.
+
+    Args:
+      prediction_tensor: a tensor representing predicted quantities.
+      target_tensor: a tensor representing regression or classification targets.
+      ignore_nan_targets: whether to ignore nan targets in the loss computation.
+        E.g. can be used if the target tensor is missing groundtruth data that
+        shouldn't be factored into the loss.
+      scope: Op scope name. Defaults to 'Loss' if None.
+      **params: Additional keyword arguments for specific implementations of
+              the Loss.
+
+    Returns:
+      loss: a tensor representing the value of the loss function.
+    """
+    with tf.name_scope(scope, 'Loss',
+                       [prediction_tensor, target_tensor, params]) as scope:
+      if ignore_nan_targets:
+        target_tensor = tf.where(tf.is_nan(target_tensor),
+                                 prediction_tensor,
+                                 target_tensor)
+      return self._compute_loss(prediction_tensor, target_tensor, **params)
+
+  @abstractmethod
+  def _compute_loss(self, prediction_tensor, target_tensor, **params):
+    """Method to be overriden by implementations.
+
+    Args:
+      prediction_tensor: a tensor representing predicted quantities
+      target_tensor: a tensor representing regression or classification targets
+      **params: Additional keyword arguments for specific implementations of
+              the Loss.
+
+    Returns:
+      loss: a tensor representing the value of the loss function
+    """
+    pass
+
+
+class WeightedL2LocalizationLoss(Loss):
+  """L2 localization loss function with anchorwise output support.
+
+  Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
+  """
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the (encoded) predicted locations of objects.
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the regression targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
+        weights, 2)
+    square_diff = 0.5 * tf.square(weighted_diff)
+    if self._anchorwise_output:
+      return tf.reduce_sum(square_diff, 2)
+    return tf.reduce_sum(square_diff)
+
+
+class WeightedSmoothL1LocalizationLoss(Loss):
+  """Smooth L1 localization loss function.
+
+  The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5
+  otherwise, where x is the difference between predictions and target.
+
+  See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
+  """
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the (encoded) predicted locations of objects.
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the regression targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+    """
+    diff = prediction_tensor - target_tensor
+    abs_diff = tf.abs(diff)
+    abs_diff_lt_1 = tf.less(abs_diff, 1)
+    anchorwise_smooth_l1norm = tf.reduce_sum(
+        tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5),
+        2) * weights
+    if self._anchorwise_output:
+      return anchorwise_smooth_l1norm
+    return tf.reduce_sum(anchorwise_smooth_l1norm)
+
+
+class WeightedIOULocalizationLoss(Loss):
+  """IOU localization loss function.
+
+  Sums the IOU for corresponding pairs of predicted/groundtruth boxes
+  and for each pair assign a loss of 1 - IOU.  We then compute a weighted
+  sum over all pairs which is returned as the total loss.
+  """
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
+        representing the decoded predicted boxes
+      target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
+        representing the decoded target boxes
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+    """
+    predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
+    target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
+    per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
+                                                         target_boxes)
+    return tf.reduce_sum(tf.reshape(weights, [-1]) * per_anchor_iou_loss)
+
+
+class WeightedSigmoidClassificationLoss(Loss):
+  """Sigmoid cross entropy classification loss function."""
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self,
+                    prediction_tensor,
+                    target_tensor,
+                    weights,
+                    class_indices=None):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+      class_indices: (Optional) A 1-D integer tensor of class indices.
+        If provided, computes loss only for the specified class indices.
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    weights = tf.expand_dims(weights, 2)
+    if class_indices is not None:
+      weights *= tf.reshape(
+          ops.indices_to_dense_vector(class_indices,
+                                      tf.shape(prediction_tensor)[2]),
+          [1, 1, -1])
+    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=target_tensor, logits=prediction_tensor))
+    if self._anchorwise_output:
+      return tf.reduce_sum(per_entry_cross_ent * weights, 2)
+    return tf.reduce_sum(per_entry_cross_ent * weights)
+
+
+class WeightedSoftmaxClassificationLoss(Loss):
+  """Softmax loss function."""
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Whether to output loss per anchor (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+    """
+    num_classes = prediction_tensor.get_shape().as_list()[-1]
+    per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
+        labels=tf.reshape(target_tensor, [-1, num_classes]),
+        logits=tf.reshape(prediction_tensor, [-1, num_classes])))
+    if self._anchorwise_output:
+      return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
+    return tf.reduce_sum(per_row_cross_ent * tf.reshape(weights, [-1]))
+
+
+class BootstrappedSigmoidClassificationLoss(Loss):
+  """Bootstrapped sigmoid cross entropy classification loss function.
+
+  This loss uses a convex combination of training labels and the current model's
+  predictions as training targets in the classification loss. The idea is that
+  as the model improves over time, its predictions can be trusted more and we
+  can use these predictions to mitigate the damage of noisy/incorrect labels,
+  because incorrect labels are likely to be eventually highly inconsistent with
+  other stimuli predicted to have the same label by the model.
+
+  In "soft" bootstrapping, we use all predicted class probabilities, whereas in
+  "hard" bootstrapping, we use the single class favored by the model.
+
+  See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
+  Reed et al. (ICLR 2015).
+  """
+
+  def __init__(self, alpha, bootstrap_type='soft', anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      alpha: a float32 scalar tensor between 0 and 1 representing interpolation
+        weight
+      bootstrap_type: set to either 'hard' or 'soft' (default)
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    Raises:
+      ValueError: if bootstrap_type is not either 'hard' or 'soft'
+    """
+    if bootstrap_type != 'hard' and bootstrap_type != 'soft':
+      raise ValueError('Unrecognized bootstrap_type: must be one of '
+                       '\'hard\' or \'soft.\'')
+    self._alpha = alpha
+    self._bootstrap_type = bootstrap_type
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    if self._bootstrap_type == 'soft':
+      bootstrap_target_tensor = self._alpha * target_tensor + (
+          1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
+    else:
+      bootstrap_target_tensor = self._alpha * target_tensor + (
+          1.0 - self._alpha) * tf.cast(
+              tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
+    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=bootstrap_target_tensor, logits=prediction_tensor))
+    if self._anchorwise_output:
+      return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2), 2)
+    return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2))
+
+
+class HardExampleMiner(object):
+  """Hard example mining for regions in a list of images.
+
+  Implements hard example mining to select a subset of regions to be
+  back-propagated. For each image, selects the regions with highest losses,
+  subject to the condition that a newly selected region cannot have
+  an IOU > iou_threshold with any of the previously selected regions.
+  This can be achieved by re-using a greedy non-maximum suppression algorithm.
+  A constraint on the number of negatives mined per positive region can also be
+  enforced.
+
+  Reference papers: "Training Region-based Object Detectors with Online
+  Hard Example Mining" (CVPR 2016) by Srivastava et al., and
+  "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
+  """
+
+  def __init__(self,
+               num_hard_examples=64,
+               iou_threshold=0.7,
+               loss_type='both',
+               cls_loss_weight=0.05,
+               loc_loss_weight=0.06,
+               max_negatives_per_positive=None,
+               min_negatives_per_image=0):
+    """Constructor.
+
+    The hard example mining implemented by this class can replicate the behavior
+    in the two aforementioned papers (Srivastava et al., and Liu et al).
+    To replicate the A2 paper (Srivastava et al), num_hard_examples is set
+    to a fixed parameter (64 by default) and iou_threshold is set to .7 for
+    running non-max-suppression the predicted boxes prior to hard mining.
+    In order to replicate the SSD paper (Liu et al), num_hard_examples should
+    be set to None, max_negatives_per_positive should be 3 and iou_threshold
+    should be 1.0 (in order to effectively turn off NMS).
+
+    Args:
+      num_hard_examples: maximum number of hard examples to be
+        selected per image (prior to enforcing max negative to positive ratio
+        constraint).  If set to None, all examples obtained after NMS are
+        considered.
+      iou_threshold: minimum intersection over union for an example
+        to be discarded during NMS.
+      loss_type: use only classification losses ('cls', default),
+        localization losses ('loc') or both losses ('both').
+        In the last case, cls_loss_weight and loc_loss_weight are used to
+        compute weighted sum of the two losses.
+      cls_loss_weight: weight for classification loss.
+      loc_loss_weight: weight for location loss.
+      max_negatives_per_positive: maximum number of negatives to retain for
+        each positive anchor. By default, num_negatives_per_positive is None,
+        which means that we do not enforce a prespecified negative:positive
+        ratio.  Note also that num_negatives_per_positives can be a float
+        (and will be converted to be a float even if it is passed in otherwise).
+      min_negatives_per_image: minimum number of negative anchors to sample for
+        a given image. Setting this to a positive number allows sampling
+        negatives in an image without any positive anchors and thus not biased
+        towards at least one detection per image.
+    """
+    self._num_hard_examples = num_hard_examples
+    self._iou_threshold = iou_threshold
+    self._loss_type = loss_type
+    self._cls_loss_weight = cls_loss_weight
+    self._loc_loss_weight = loc_loss_weight
+    self._max_negatives_per_positive = max_negatives_per_positive
+    self._min_negatives_per_image = min_negatives_per_image
+    if self._max_negatives_per_positive is not None:
+      self._max_negatives_per_positive = float(self._max_negatives_per_positive)
+    self._num_positives_list = None
+    self._num_negatives_list = None
+
+  def __call__(self,
+               location_losses,
+               cls_losses,
+               decoded_boxlist_list,
+               match_list=None):
+    """Computes localization and classification losses after hard mining.
+
+    Args:
+      location_losses: a float tensor of shape [num_images, num_anchors]
+        representing anchorwise localization losses.
+      cls_losses: a float tensor of shape [num_images, num_anchors]
+        representing anchorwise classification losses.
+      decoded_boxlist_list: a list of decoded BoxList representing location
+        predictions for each image.
+      match_list: an optional list of matcher.Match objects encoding the match
+        between anchors and groundtruth boxes for each image of the batch,
+        with rows of the Match objects corresponding to groundtruth boxes
+        and columns corresponding to anchors.  Match objects in match_list are
+        used to reference which anchors are positive, negative or ignored.  If
+        self._max_negatives_per_positive exists, these are then used to enforce
+        a prespecified negative to positive ratio.
+
+    Returns:
+      mined_location_loss: a float scalar with sum of localization losses from
+        selected hard examples.
+      mined_cls_loss: a float scalar with sum of classification losses from
+        selected hard examples.
+    Raises:
+      ValueError: if location_losses, cls_losses and decoded_boxlist_list do
+        not have compatible shapes (i.e., they must correspond to the same
+        number of images).
+      ValueError: if match_list is specified but its length does not match
+        len(decoded_boxlist_list).
+    """
+    mined_location_losses = []
+    mined_cls_losses = []
+    location_losses = tf.unstack(location_losses)
+    cls_losses = tf.unstack(cls_losses)
+    num_images = len(decoded_boxlist_list)
+    if not match_list:
+      match_list = num_images * [None]
+    if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
+      raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
+                       'do not have compatible shapes.')
+    if not isinstance(match_list, list):
+      raise ValueError('match_list must be a list.')
+    if len(match_list) != len(decoded_boxlist_list):
+      raise ValueError('match_list must either be None or have '
+                       'length=len(decoded_boxlist_list).')
+    num_positives_list = []
+    num_negatives_list = []
+    for ind, detection_boxlist in enumerate(decoded_boxlist_list):
+      box_locations = detection_boxlist.get()
+      match = match_list[ind]
+      image_losses = cls_losses[ind]
+      if self._loss_type == 'loc':
+        image_losses = location_losses[ind]
+      elif self._loss_type == 'both':
+        image_losses *= self._cls_loss_weight
+        image_losses += location_losses[ind] * self._loc_loss_weight
+      if self._num_hard_examples is not None:
+        num_hard_examples = self._num_hard_examples
+      else:
+        num_hard_examples = detection_boxlist.num_boxes()
+      selected_indices = tf.image.non_max_suppression(
+          box_locations, image_losses, num_hard_examples, self._iou_threshold)
+      if self._max_negatives_per_positive is not None and match:
+        (selected_indices, num_positives,
+         num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
+             selected_indices, match, self._max_negatives_per_positive,
+             self._min_negatives_per_image)
+        num_positives_list.append(num_positives)
+        num_negatives_list.append(num_negatives)
+      mined_location_losses.append(
+          tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
+      mined_cls_losses.append(
+          tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
+    location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
+    cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
+    if match and self._max_negatives_per_positive:
+      self._num_positives_list = num_positives_list
+      self._num_negatives_list = num_negatives_list
+    return (location_loss, cls_loss)
+
+  def summarize(self):
+    """Summarize the number of positives and negatives after mining."""
+    if self._num_positives_list and self._num_negatives_list:
+      avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
+      avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
+      tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
+      tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
+
+  def _subsample_selection_to_desired_neg_pos_ratio(self,
+                                                    indices,
+                                                    match,
+                                                    max_negatives_per_positive,
+                                                    min_negatives_per_image=0):
+    """Subsample a collection of selected indices to a desired neg:pos ratio.
+
+    This function takes a subset of M indices (indexing into a large anchor
+    collection of N anchors where M<N) which are labeled as positive/negative
+    via a Match object (matched indices are positive, unmatched indices
+    are negative).  It returns a subset of the provided indices retaining all
+    positives as well as up to the first K negatives, where:
+      K=floor(num_negative_per_positive * num_positives).
+
+    For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
+    with positives=[2, 5] and negatives=[4, 7, 9, 10] and
+    num_negatives_per_positive=1, then the returned subset of indices
+    is [2, 4, 5, 7].
+
+    Args:
+      indices: An integer tensor of shape [M] representing a collection
+        of selected anchor indices
+      match: A matcher.Match object encoding the match between anchors and
+        groundtruth boxes for a given image, with rows of the Match objects
+        corresponding to groundtruth boxes and columns corresponding to anchors.
+      max_negatives_per_positive: (float) maximum number of negatives for
+        each positive anchor.
+      min_negatives_per_image: minimum number of negative anchors for a given
+        image. Allow sampling negatives in image without any positive anchors.
+
+    Returns:
+      selected_indices: An integer tensor of shape [M'] representing a
+        collection of selected anchor indices with M' <= M.
+      num_positives: An integer tensor representing the number of positive
+        examples in selected set of indices.
+      num_negatives: An integer tensor representing the number of negative
+        examples in selected set of indices.
+    """
+    positives_indicator = tf.gather(match.matched_column_indicator(), indices)
+    negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices)
+    num_positives = tf.reduce_sum(tf.to_int32(positives_indicator))
+    max_negatives = tf.maximum(min_negatives_per_image,
+                               tf.to_int32(max_negatives_per_positive *
+                                           tf.to_float(num_positives)))
+    topk_negatives_indicator = tf.less_equal(
+        tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives)
+    subsampled_selection_indices = tf.where(
+        tf.logical_or(positives_indicator, topk_negatives_indicator))
+    num_negatives = tf.size(subsampled_selection_indices) - num_positives
+    return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]),
+            num_positives, num_negatives)
--- a/object_detection/core/losses_test.py
+++ b/object_detection/core/losses_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for google3.research.vale.object_detection.losses."""
+import math
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import losses
+from object_detection.core import matcher
+
+
+class WeightedL2LocalizationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    batch_size = 3
+    num_anchors = 10
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], tf.float32)
+    loss_op = losses.WeightedL2LocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    expected_loss = (3 * 5 * 4) / 2.0
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, expected_loss)
+
+  def testReturnsCorrectAnchorwiseLoss(self):
+    batch_size = 3
+    num_anchors = 16
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.ones([batch_size, num_anchors])
+    loss_op = losses.WeightedL2LocalizationLoss(anchorwise_output=True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    expected_loss = np.ones((batch_size, num_anchors)) * 2
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, expected_loss)
+
+  def testReturnsCorrectLossSum(self):
+    batch_size = 3
+    num_anchors = 16
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.ones([batch_size, num_anchors])
+    loss_op = losses.WeightedL2LocalizationLoss(anchorwise_output=False)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    expected_loss = tf.nn.l2_loss(prediction_tensor - target_tensor)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      expected_loss_output = sess.run(expected_loss)
+      self.assertAllClose(loss_output, expected_loss_output)
+
+  def testReturnsCorrectNanLoss(self):
+    batch_size = 3
+    num_anchors = 10
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.concat([
+        tf.zeros([batch_size, num_anchors, code_size / 2]),
+        tf.ones([batch_size, num_anchors, code_size / 2]) * np.nan
+    ], axis=2)
+    weights = tf.ones([batch_size, num_anchors])
+    loss_op = losses.WeightedL2LocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights,
+                   ignore_nan_targets=True)
+
+    expected_loss = (3 * 5 * 4) / 2.0
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, expected_loss)
+
+
+class WeightedSmoothL1LocalizationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    batch_size = 2
+    num_anchors = 3
+    code_size = 4
+    prediction_tensor = tf.constant([[[2.5, 0, .4, 0],
+                                      [0, 0, 0, 0],
+                                      [0, 2.5, 0, .4]],
+                                     [[3.5, 0, 0, 0],
+                                      [0, .4, 0, .9],
+                                      [0, 0, 1.5, 0]]], tf.float32)
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.constant([[2, 1, 1],
+                           [0, 3, 0]], tf.float32)
+    loss_op = losses.WeightedSmoothL1LocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = 7.695
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class WeightedIOULocalizationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    prediction_tensor = tf.constant([[[1.5, 0, 2.4, 1],
+                                      [0, 0, 1, 1],
+                                      [0, 0, .5, .25]]])
+    target_tensor = tf.constant([[[1.5, 0, 2.4, 1],
+                                  [0, 0, 1, 1],
+                                  [50, 50, 500.5, 100.25]]])
+    weights = [[1.0, .5, 2.0]]
+    loss_op = losses.WeightedIOULocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    exp_loss = 2.0
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSigmoidClassificationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = -2 * math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectAnchorWiseLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSigmoidClassificationLoss(True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectLossWithClassIndices(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100, 100],
+                                      [100, -100, -100, -100],
+                                      [100, 0, -100, 100],
+                                      [-100, -100, 100, -100]],
+                                     [[-100, 0, 100, 100],
+                                      [-100, 100, -100, 100],
+                                      [100, 100, 100, 100],
+                                      [0, 0, -1, 100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0, 0],
+                                  [1, 0, 0, 1],
+                                  [1, 0, 0, 0],
+                                  [0, 0, 1, 1]],
+                                 [[0, 0, 1, 0],
+                                  [0, 1, 0, 0],
+                                  [1, 1, 1, 0],
+                                  [1, 0, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    # Ignores the last class.
+    class_indices = tf.constant([0, 1, 2], tf.int32)
+    loss_op = losses.WeightedSigmoidClassificationLoss(True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights,
+                   class_indices=class_indices)
+
+    exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [0, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 0],
+                                      [-100, 100, -100],
+                                      [-100, 100, -100],
+                                      [100, -100, -100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [0, 1, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, .5, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSoftmaxClassificationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = - 1.5 * math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectAnchorWiseLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [0, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 0],
+                                      [-100, 100, -100],
+                                      [-100, 100, -100],
+                                      [100, -100, -100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [0, 1, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, .5, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSoftmaxClassificationLoss(True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = np.matrix([[0, 0, - 0.5 * math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLossSoftBootstrapping(self):
+    prediction_tensor = tf.constant([[[-100, 100, 0],
+                                      [100, -100, -100],
+                                      [100, -100, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, -100, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    alpha = tf.constant(.5, tf.float32)
+    loss_op = losses.BootstrappedSigmoidClassificationLoss(
+        alpha, bootstrap_type='soft')
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    exp_loss = -math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectLossHardBootstrapping(self):
+    prediction_tensor = tf.constant([[[-100, 100, 0],
+                                      [100, -100, -100],
+                                      [100, -100, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, -100, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    alpha = tf.constant(.5, tf.float32)
+    loss_op = losses.BootstrappedSigmoidClassificationLoss(
+        alpha, bootstrap_type='hard')
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    exp_loss = -math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectAnchorWiseLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    alpha = tf.constant(.5, tf.float32)
+    loss_op = losses.BootstrappedSigmoidClassificationLoss(
+        alpha, bootstrap_type='hard', anchorwise_output=True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class HardExampleMinerTest(tf.test.TestCase):
+
+  def testHardMiningWithSingleLossType(self):
+    location_losses = tf.constant([[100, 90, 80, 0],
+                                   [0, 1, 2, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 10, 50, 110],
+                              [9, 6, 3, 0]], tf.float32)
+    box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9]], tf.float32)
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    # Uses only location loss to select hard examples
+    loss_op = losses.HardExampleMiner(num_hard_examples=1,
+                                      iou_threshold=0.0,
+                                      loss_type='loc',
+                                      cls_loss_weight=1,
+                                      loc_loss_weight=1)
+    (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                   decoded_boxlist_list)
+    exp_loc_loss = 100 + 3
+    exp_cls_loss = 0 + 0
+    with self.test_session() as sess:
+      loc_loss_output = sess.run(loc_loss)
+      self.assertAllClose(loc_loss_output, exp_loc_loss)
+      cls_loss_output = sess.run(cls_loss)
+      self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testHardMiningWithBothLossType(self):
+    location_losses = tf.constant([[100, 90, 80, 0],
+                                   [0, 1, 2, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 10, 50, 110],
+                              [9, 6, 3, 0]], tf.float32)
+    box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9]], tf.float32)
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    loss_op = losses.HardExampleMiner(num_hard_examples=1,
+                                      iou_threshold=0.0,
+                                      loss_type='both',
+                                      cls_loss_weight=1,
+                                      loc_loss_weight=1)
+    (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                   decoded_boxlist_list)
+    exp_loc_loss = 80 + 0
+    exp_cls_loss = 50 + 9
+    with self.test_session() as sess:
+      loc_loss_output = sess.run(loc_loss)
+      self.assertAllClose(loc_loss_output, exp_loc_loss)
+      cls_loss_output = sess.run(cls_loss)
+      self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testHardMiningNMS(self):
+    location_losses = tf.constant([[100, 90, 80, 0],
+                                   [0, 1, 2, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 10, 50, 110],
+                              [9, 6, 3, 0]], tf.float32)
+    box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
+                               [0.9, 0.9, 0.99, 0.99],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9]], tf.float32)
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    loss_op = losses.HardExampleMiner(num_hard_examples=2,
+                                      iou_threshold=0.5,
+                                      loss_type='cls',
+                                      cls_loss_weight=1,
+                                      loc_loss_weight=1)
+    (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                   decoded_boxlist_list)
+    exp_loc_loss = 0 + 90 + 0 + 1
+    exp_cls_loss = 110 + 10 + 9 + 6
+    with self.test_session() as sess:
+      loc_loss_output = sess.run(loc_loss)
+      self.assertAllClose(loc_loss_output, exp_loc_loss)
+      cls_loss_output = sess.run(cls_loss)
+      self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testEnforceNegativesPerPositiveRatio(self):
+    location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
+                                    3, 10, 20, 100, 20, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
+                               0, 60, 0, 17, 13, 0]], tf.float32)
+    box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.5, 0.1],
+                               [0.0, 0.0, 0.6, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.8, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 1.0, 0.1],
+                               [0.0, 0.0, 1.1, 0.1],
+                               [0.0, 0.0, 0.2, 0.1]], tf.float32)
+    match_results = tf.constant([2, -1, 0, -1, -1, 1, -1, -1, -1, -1, -1, 3])
+    match_list = [matcher.Match(match_results)]
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+
+    max_negatives_per_positive_list = [0.0, 0.5, 1.0, 1.5, 10]
+    exp_loc_loss_list = [80 + 2,
+                         80 + 1 + 2,
+                         80 + 1 + 2 + 10,
+                         80 + 1 + 2 + 10 + 100,
+                         80 + 1 + 2 + 10 + 100 + 20]
+    exp_cls_loss_list = [100 + 70,
+                         100 + 90 + 70,
+                         100 + 90 + 70 + 60,
+                         100 + 90 + 70 + 60 + 17,
+                         100 + 90 + 70 + 60 + 17 + 13]
+
+    for max_negatives_per_positive, exp_loc_loss, exp_cls_loss in zip(
+        max_negatives_per_positive_list, exp_loc_loss_list, exp_cls_loss_list):
+      loss_op = losses.HardExampleMiner(
+          num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
+          cls_loss_weight=1, loc_loss_weight=1,
+          max_negatives_per_positive=max_negatives_per_positive)
+      (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                     decoded_boxlist_list, match_list)
+      loss_op.summarize()
+
+      with self.test_session() as sess:
+        loc_loss_output = sess.run(loc_loss)
+        self.assertAllClose(loc_loss_output, exp_loc_loss)
+        cls_loss_output = sess.run(cls_loss)
+        self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self):
+    location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
+                                    3, 10, 20, 100, 20, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
+                               0, 60, 0, 17, 13, 0]], tf.float32)
+    box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.5, 0.1],
+                               [0.0, 0.0, 0.6, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.8, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 1.0, 0.1],
+                               [0.0, 0.0, 1.1, 0.1],
+                               [0.0, 0.0, 0.2, 0.1]], tf.float32)
+    match_results = tf.constant([-1] * 12)
+    match_list = [matcher.Match(match_results)]
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+
+    min_negatives_per_image_list = [0, 1, 2, 4, 5, 6]
+    exp_loc_loss_list = [0,
+                         80,
+                         80 + 1,
+                         80 + 1 + 2 + 10,
+                         80 + 1 + 2 + 10 + 100,
+                         80 + 1 + 2 + 10 + 100 + 20]
+    exp_cls_loss_list = [0,
+                         100,
+                         100 + 90,
+                         100 + 90 + 70 + 60,
+                         100 + 90 + 70 + 60 + 17,
+                         100 + 90 + 70 + 60 + 17 + 13]
+
+    for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip(
+        min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list):
+      loss_op = losses.HardExampleMiner(
+          num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
+          cls_loss_weight=1, loc_loss_weight=1,
+          max_negatives_per_positive=3,
+          min_negatives_per_image=min_negatives_per_image)
+      (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                     decoded_boxlist_list, match_list)
+      with self.test_session() as sess:
+        loc_loss_output = sess.run(loc_loss)
+        self.assertAllClose(loc_loss_output, exp_loc_loss)
+        cls_loss_output = sess.run(cls_loss)
+        self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/matcher.py
+++ b/object_detection/core/matcher.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Matcher interface and Match class.
+
+This module defines the Matcher interface and the Match object. The job of the
+matcher is to match row and column indices based on the similarity matrix and
+other optional parameters. Each column is matched to at most one row. There
+are three possibilities for the matching:
+
+1) match: A column matches a row.
+2) no_match: A column does not match any row.
+3) ignore: A column that is neither 'match' nor no_match.
+
+The ignore case is regularly encountered in object detection: when an anchor has
+a relatively small overlap with a ground-truth box, one neither wants to
+consider this box a positive example (match) nor a negative example (no match).
+
+The Match class is used to store the match results and it provides simple apis
+to query the results.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+class Match(object):
+  """Class to store results from the matcher.
+
+  This class is used to store the results from the matcher. It provides
+  convenient methods to query the matching results.
+  """
+
+  def __init__(self, match_results):
+    """Constructs a Match object.
+
+    Args:
+      match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+
+    Raises:
+      ValueError: if match_results does not have rank 1 or is not an
+        integer int32 scalar tensor
+    """
+    if match_results.shape.ndims != 1:
+      raise ValueError('match_results should have rank 1')
+    if match_results.dtype != tf.int32:
+      raise ValueError('match_results should be an int32 or int64 scalar '
+                       'tensor')
+    self._match_results = match_results
+
+  @property
+  def match_results(self):
+    """The accessor for match results.
+
+    Returns:
+      the tensor which encodes the match results.
+    """
+    return self._match_results
+
+  def matched_column_indices(self):
+    """Returns column indices that match to some row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
+
+  def matched_column_indicator(self):
+    """Returns column indices that are matched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.greater_equal(self._match_results, 0)
+
+  def num_matched_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(self.matched_column_indices())
+
+  def unmatched_column_indices(self):
+    """Returns column indices that do not match any row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
+
+  def unmatched_column_indicator(self):
+    """Returns column indices that are unmatched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.equal(self._match_results, -1)
+
+  def num_unmatched_columns(self):
+    """Returns number (int32 scalar tensor) of unmatched columns."""
+    return tf.size(self.unmatched_column_indices())
+
+  def ignored_column_indices(self):
+    """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
+
+  def ignored_column_indicator(self):
+    """Returns boolean column indicator where True means the colum is ignored.
+
+    Returns:
+      column_indicator: boolean vector which is True for all ignored column
+      indices.
+    """
+    return tf.equal(self._match_results, -2)
+
+  def num_ignored_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(self.ignored_column_indices())
+
+  def unmatched_or_ignored_column_indices(self):
+    """Returns column indices that are unmatched or ignored.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
+
+  def matched_row_indices(self):
+    """Returns row indices that match some column.
+
+    The indices returned by this op are ordered so as to be in correspondence
+    with the output of matched_column_indicator().  For example if
+    self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
+    [7, 3], then we know that column 0 was matched to row 7 and column 2 was
+    matched to row 3.
+
+    Returns:
+      row_indices: int32 tensor of shape [K] with row indices.
+    """
+    return self._reshape_and_cast(
+        tf.gather(self._match_results, self.matched_column_indices()))
+
+  def _reshape_and_cast(self, t):
+    return tf.cast(tf.reshape(t, [-1]), tf.int32)
+
+
+class Matcher(object):
+  """Abstract base class for matcher.
+  """
+  __metaclass__ = ABCMeta
+
+  def match(self, similarity_matrix, scope=None, **params):
+    """Computes matches among row and column indices and returns the result.
+
+    Computes matches among the row and column indices based on the similarity
+    matrix and optional arguments.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      scope: Op scope name. Defaults to 'Match' if None.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      A Match object with the results of matching.
+    """
+    with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
+      return Match(self._match(similarity_matrix, **params))
+
+  @abstractmethod
+  def _match(self, similarity_matrix, **params):
+    """Method to be overriden by implementations.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      match_results: Integer tensor of shape [M]: match_results[i]>=0 means
+        that column i is matched to row match_results[i], match_results[i]=-1
+        means that the column is not matched. match_results[i]=-2 means that
+        the column is ignored (usually this happens when there is a very weak
+        match which one neither wants as positive nor negative example).
+    """
+    pass
--- a/object_detection/core/matcher_test.py
+++ b/object_detection/core/matcher_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.matcher."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import matcher
+
+
+class AnchorMatcherTest(tf.test.TestCase):
+
+  def test_get_correct_matched_columnIndices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [0, 1, 3, 5]
+    matched_column_indices = match.matched_column_indices()
+    self.assertEquals(matched_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      matched_column_indices = sess.run(matched_column_indices)
+      self.assertAllEqual(matched_column_indices, expected_column_indices)
+
+  def test_get_correct_counts(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    exp_num_matched_columns = 4
+    exp_num_unmatched_columns = 2
+    exp_num_ignored_columns = 1
+    num_matched_columns = match.num_matched_columns()
+    num_unmatched_columns = match.num_unmatched_columns()
+    num_ignored_columns = match.num_ignored_columns()
+    self.assertEquals(num_matched_columns.dtype, tf.int32)
+    self.assertEquals(num_unmatched_columns.dtype, tf.int32)
+    self.assertEquals(num_ignored_columns.dtype, tf.int32)
+    with self.test_session() as sess:
+      (num_matched_columns_out, num_unmatched_columns_out,
+       num_ignored_columns_out) = sess.run(
+           [num_matched_columns, num_unmatched_columns, num_ignored_columns])
+      self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
+      self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
+      self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
+
+  def testGetCorrectUnmatchedColumnIndices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [2, 4]
+    unmatched_column_indices = match.unmatched_column_indices()
+    self.assertEquals(unmatched_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      unmatched_column_indices = sess.run(unmatched_column_indices)
+      self.assertAllEqual(unmatched_column_indices, expected_column_indices)
+
+  def testGetCorrectMatchedRowIndices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_row_indices = [3, 1, 0, 5]
+    matched_row_indices = match.matched_row_indices()
+    self.assertEquals(matched_row_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      matched_row_inds = sess.run(matched_row_indices)
+      self.assertAllEqual(matched_row_inds, expected_row_indices)
+
+  def test_get_correct_ignored_column_indices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [6]
+    ignored_column_indices = match.ignored_column_indices()
+    self.assertEquals(ignored_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      ignored_column_indices = sess.run(ignored_column_indices)
+      self.assertAllEqual(ignored_column_indices, expected_column_indices)
+
+  def test_get_correct_matched_column_indicator(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indicator = [True, True, False, True, False, True, False]
+    matched_column_indicator = match.matched_column_indicator()
+    self.assertEquals(matched_column_indicator.dtype, tf.bool)
+    with self.test_session() as sess:
+      matched_column_indicator = sess.run(matched_column_indicator)
+      self.assertAllEqual(matched_column_indicator, expected_column_indicator)
+
+  def test_get_correct_unmatched_column_indicator(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indicator = [False, False, True, False, True, False, False]
+    unmatched_column_indicator = match.unmatched_column_indicator()
+    self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
+    with self.test_session() as sess:
+      unmatched_column_indicator = sess.run(unmatched_column_indicator)
+      self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
+
+  def test_get_correct_ignored_column_indicator(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indicator = [False, False, False, False, False, False, True]
+    ignored_column_indicator = match.ignored_column_indicator()
+    self.assertEquals(ignored_column_indicator.dtype, tf.bool)
+    with self.test_session() as sess:
+      ignored_column_indicator = sess.run(ignored_column_indicator)
+      self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
+
+  def test_get_correct_unmatched_ignored_column_indices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [2, 4, 6]
+    unmatched_ignored_column_indices = (match.
+                                        unmatched_or_ignored_column_indices())
+    self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      unmatched_ignored_column_indices = sess.run(
+          unmatched_ignored_column_indices)
+      self.assertAllEqual(unmatched_ignored_column_indices,
+                          expected_column_indices)
+
+  def test_all_columns_accounted_for(self):
+    # Note: deliberately setting to small number so not always
+    # all possibilities appear (matched, unmatched, ignored)
+    num_matches = 10
+    match_results = tf.random_uniform(
+        [num_matches], minval=-2, maxval=5, dtype=tf.int32)
+    match = matcher.Match(match_results)
+    matched_column_indices = match.matched_column_indices()
+    unmatched_column_indices = match.unmatched_column_indices()
+    ignored_column_indices = match.ignored_column_indices()
+    with self.test_session() as sess:
+      matched, unmatched, ignored = sess.run([
+          matched_column_indices, unmatched_column_indices,
+          ignored_column_indices
+      ])
+      all_indices = np.hstack((matched, unmatched, ignored))
+      all_indices_sorted = np.sort(all_indices)
+      self.assertAllEqual(all_indices_sorted,
+                          np.arange(num_matches, dtype=np.int32))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/minibatch_sampler.py
+++ b/object_detection/core/minibatch_sampler.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base minibatch sampler module.
+
+The job of the minibatch_sampler is to subsample a minibatch based on some
+criterion.
+
+The main function call is:
+    subsample(indicator, batch_size, **params).
+Indicator is a 1d boolean tensor where True denotes which examples can be
+sampled. It returns a boolean indicator where True denotes an example has been
+sampled..
+
+Subclasses should implement the Subsample function and can make use of the
+@staticmethod SubsampleIndicator.
+"""
+
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.utils import ops
+
+
+class MinibatchSampler(object):
+  """Abstract base class for subsampling minibatches."""
+  __metaclass__ = ABCMeta
+
+  def __init__(self):
+    """Constructs a minibatch sampler."""
+    pass
+
+  @abstractmethod
+  def subsample(self, indicator, batch_size, **params):
+    """Returns subsample of entries in indicator.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+      batch_size: desired batch size.
+      **params: additional keyword arguments for specific implementations of
+          the MinibatchSampler.
+
+    Returns:
+      sample_indicator: boolean tensor of shape [N] whose True entries have been
+      sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
+    """
+    pass
+
+  @staticmethod
+  def subsample_indicator(indicator, num_samples):
+    """Subsample indicator vector.
+
+    Given a boolean indicator vector with M elements set to `True`, the function
+    assigns all but `num_samples` of these previously `True` elements to
+    `False`. If `num_samples` is greater than M, the original indicator vector
+    is returned.
+
+    Args:
+      indicator: a 1-dimensional boolean tensor indicating which elements
+        are allowed to be sampled and which are not.
+      num_samples: int32 scalar tensor
+
+    Returns:
+      a boolean tensor with the same shape as input (indicator) tensor
+    """
+    indices = tf.where(indicator)
+    indices = tf.random_shuffle(indices)
+    indices = tf.reshape(indices, [-1])
+
+    num_samples = tf.minimum(tf.size(indices), num_samples)
+    selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
+
+    selected_indicator = ops.indices_to_dense_vector(selected_indices,
+                                                     tf.shape(indicator)[0])
+
+    return tf.equal(selected_indicator, 1)
--- a/object_detection/core/minibatch_sampler_test.py
+++ b/object_detection/core/minibatch_sampler_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import minibatch_sampler
+
+
+class MinibatchSamplerTest(tf.test.TestCase):
+
+  def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.constant(np_indicator)
+    samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 3)
+    with self.test_session() as sess:
+      samples_out = sess.run(samples)
+      self.assertTrue(np.sum(samples_out), 3)
+      self.assertAllEqual(samples_out,
+                          np.logical_and(samples_out, np_indicator))
+
+  def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.placeholder(tf.bool)
+    feed_dict = {indicator: np_indicator}
+
+    samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 3)
+    with self.test_session() as sess:
+      samples_out = sess.run(samples, feed_dict=feed_dict)
+      self.assertTrue(np.sum(samples_out), 3)
+      self.assertAllEqual(samples_out,
+                          np.logical_and(samples_out, np_indicator))
+
+  def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.constant(np_indicator)
+    samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 5)
+    with self.test_session() as sess:
+      samples_out = sess.run(samples)
+      self.assertTrue(np.sum(samples_out), 4)
+      self.assertAllEqual(samples_out,
+                          np.logical_and(samples_out, np_indicator))
+
+  def test_subsample_indicator_when_num_samples_is_zero(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.constant(np_indicator)
+    samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 0)
+    with self.test_session() as sess:
+      samples_none_out = sess.run(samples_none)
+      self.assertAllEqual(
+          np.zeros_like(samples_none_out, dtype=bool),
+          samples_none_out)
+
+  def test_subsample_indicator_when_indicator_all_false(self):
+    indicator_empty = tf.zeros([0], dtype=tf.bool)
+    samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator_empty, 4)
+    with self.test_session() as sess:
+      samples_empty_out = sess.run(samples_empty)
+      self.assertEqual(0, samples_empty_out.size)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/model.py
+++ b/object_detection/core/model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Abstract detection model.
+
+This file defines a generic base class for detection models.  Programs that are
+designed to work with arbitrary detection models should only depend on this
+class.  We intend for the functions in this class to follow tensor-in/tensor-out
+design, thus all functions have tensors or lists/dictionaries holding tensors as
+inputs and outputs.
+
+Abstractly, detection models predict output tensors given input images
+which can be passed to a loss function at training time or passed to a
+postprocessing function at eval time.  The computation graphs at a high level
+consequently look as follows:
+
+Training time:
+inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
+
+Evaluation time:
+inputs (images tensor) -> preprocess -> predict -> postprocess
+ -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
+
+DetectionModels must thus implement four functions (1) preprocess, (2) predict,
+(3) postprocess and (4) loss.  DetectionModels should make no assumptions about
+the input size or aspect ratio --- they are responsible for doing any
+resize/reshaping necessary (see docstring for the preprocess function).
+Output classes are always integers in the range [0, num_classes).  Any mapping
+of these integers to semantic labels is to be handled outside of this class.
+
+By default, DetectionModels produce bounding box detections; However, we support
+a handful of auxiliary annotations associated with each bounding box, namely,
+instance masks and keypoints.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+from object_detection.core import standard_fields as fields
+
+
+class DetectionModel(object):
+  """Abstract base class for detection models."""
+  __metaclass__ = ABCMeta
+
+  def __init__(self, num_classes):
+    """Constructor.
+
+    Args:
+      num_classes: number of classes.  Note that num_classes *does not* include
+      background categories that might be implicitly be predicted in various
+      implementations.
+    """
+    self._num_classes = num_classes
+    self._groundtruth_lists = {}
+
+  @property
+  def num_classes(self):
+    return self._num_classes
+
+  def groundtruth_lists(self, field):
+    """Access list of groundtruth tensors.
+
+    Args:
+      field: a string key, options are
+        fields.BoxListFields.{boxes,classes,masks,keypoints}
+
+    Returns:
+      a list of tensors holding groundtruth information (see also
+      provide_groundtruth function below), with one entry for each image in the
+      batch.
+    Raises:
+      RuntimeError: if the field has not been provided via provide_groundtruth.
+    """
+    if field not in self._groundtruth_lists:
+      raise RuntimeError('Groundtruth tensor %s has not been provided', field)
+    return self._groundtruth_lists[field]
+
+  @abstractmethod
+  def preprocess(self, inputs):
+    """Input preprocessing.
+
+    To be overridden by implementations.
+
+    This function is responsible for any scaling/shifting of input values that
+    is necessary prior to running the detector on an input image.
+    It is also responsible for any resizing that might be necessary as images
+    are assumed to arrive in arbitrary sizes.  While this function could
+    conceivably be part of the predict method (below), it is often convenient
+    to keep these separate --- for example, we may want to preprocess on one
+    device, place onto a queue, and let another device (e.g., the GPU) handle
+    prediction.
+
+    A few important notes about the preprocess function:
+    + We assume that this operation does not have any trainable variables nor
+    does it affect the groundtruth annotations in any way (thus data
+    augmentation operations such as random cropping should be performed
+    externally).
+    + There is no assumption that the batchsize in this function is the same as
+    the batch size in the predict function.  In fact, we recommend calling the
+    preprocess function prior to calling any batching operations (which should
+    happen outside of the model) and thus assuming that batch sizes are equal
+    to 1 in the preprocess function.
+    + There is also no explicit assumption that the output resolutions
+    must be fixed across inputs --- this is to support "fully convolutional"
+    settings in which input images can have different shapes/resolutions.
+
+    Args:
+      inputs: a [batch, height_in, width_in, channels] float32 tensor
+        representing a batch of images with values between 0 and 255.0.
+
+    Returns:
+      preprocessed_inputs: a [batch, height_out, width_out, channels] float32
+        tensor representing a batch of images.
+    """
+    pass
+
+  @abstractmethod
+  def predict(self, preprocessed_inputs):
+    """Predict prediction tensors from inputs tensor.
+
+    Outputs of this function can be passed to loss or postprocess functions.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float32 tensor
+        representing a batch of images.
+
+    Returns:
+      prediction_dict: a dictionary holding prediction tensors to be
+        passed to the Loss or Postprocess functions.
+    """
+    pass
+
+  @abstractmethod
+  def postprocess(self, prediction_dict, **params):
+    """Convert predicted output tensors to final detections.
+
+    Outputs adhere to the following conventions:
+    * Classes are integers in [0, num_classes); background classes are removed
+      and the first non-background class is mapped to 0.
+    * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
+      format and normalized relative to the image window.
+    * `num_detections` is provided for settings where detections are padded to a
+      fixed number of boxes.
+    * We do not specifically assume any kind of probabilistic interpretation
+      of the scores --- the only important thing is their relative ordering.
+      Thus implementations of the postprocess function are free to output
+      logits, probabilities, calibrated probabilities, or anything else.
+
+    Args:
+      prediction_dict: a dictionary holding prediction tensors.
+      **params: Additional keyword arguments for specific implementations of
+        DetectionModel.
+
+    Returns:
+      detections: a dictionary containing the following fields
+        detection_boxes: [batch, max_detections, 4]
+        detection_scores: [batch, max_detections]
+        detection_classes: [batch, max_detections]
+        instance_masks: [batch, max_detections, image_height, image_width]
+          (optional)
+        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        num_detections: [batch]
+    """
+    pass
+
+  @abstractmethod
+  def loss(self, prediction_dict):
+    """Compute scalar loss tensors with respect to provided groundtruth.
+
+    Calling this function requires that groundtruth tensors have been
+    provided via the provide_groundtruth function.
+
+    Args:
+      prediction_dict: a dictionary holding predicted tensors
+
+    Returns:
+      a dictionary mapping strings (loss names) to scalar tensors representing
+        loss values.
+    """
+    pass
+
+  def provide_groundtruth(self,
+                          groundtruth_boxes_list,
+                          groundtruth_classes_list,
+                          groundtruth_masks_list=None,
+                          groundtruth_keypoints_list=None):
+    """Provide groundtruth tensors.
+
+    Args:
+      groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
+        [num_boxes, 4] containing coordinates of the groundtruth boxes.
+          Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
+          format and assumed to be normalized and clipped
+          relative to the image window with y_min <= y_max and x_min <= x_max.
+      groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
+        tensors of shape [num_boxes, num_classes] containing the class targets
+        with the 0th index assumed to map to the first non-background class.
+      groundtruth_masks_list: a list of 2-D tf.float32 tensors of
+        shape [max_detections, height_in, width_in] containing instance
+        masks with values in {0, 1}.  If None, no masks are provided.
+        Mask resolution `height_in`x`width_in` must agree with the resolution
+        of the input image tensor provided to the `preprocess` function.
+      groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
+        shape [batch, max_detections, num_keypoints, 2] containing keypoints.
+        Keypoints are assumed to be provided in normalized coordinates and
+        missing keypoints should be encoded as NaN.
+    """
+    self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
+    self._groundtruth_lists[
+        fields.BoxListFields.classes] = groundtruth_classes_list
+    if groundtruth_masks_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.masks] = groundtruth_masks_list
+    if groundtruth_keypoints_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.keypoints] = groundtruth_keypoints_list
+
+  @abstractmethod
+  def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
+    """Return callable for loading a foreign checkpoint into tensorflow graph.
+
+    Loads variables from a different tensorflow graph (typically feature
+    extractor variables). This enables the model to initialize based on weights
+    from another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Args:
+      checkpoint_path: path to checkpoint to restore.
+      from_detection_checkpoint: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+
+    Returns:
+      a callable which takes a tf.Session as input and loads a checkpoint when
+        run.
+    """
+    pass
--- a/object_detection/core/post_processing.py
+++ b/object_detection/core/post_processing.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Post-processing operations on detected boxes."""
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import standard_fields as fields
+
+
+def multiclass_non_max_suppression(boxes,
+                                   scores,
+                                   score_thresh,
+                                   iou_thresh,
+                                   max_size_per_class,
+                                   max_total_size=0,
+                                   clip_window=None,
+                                   change_coordinate_frame=False,
+                                   masks=None,
+                                   additional_fields=None,
+                                   scope=None):
+  """Multi-class version of non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  It operates independently for each class for
+  which scores are provided (via the scores field of the input box_list),
+  pruning boxes with score less than a provided threshold prior to
+  applying NMS.
+
+  Please note that this operation is performed on *all* classes, therefore any
+  background classes should be removed prior to calling this function.
+
+  Args:
+    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
+      number of classes or 1 depending on whether a separate box is predicted
+      per class.
+    scores: A [k, num_classes] float32 tensor containing the scores for each of
+      the k detections.
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
+      with previously selected boxes are removed).
+    max_size_per_class: maximum number of retained boxes per class.
+    max_total_size: maximum number of boxes retained over all classes. By
+      default returns all boxes retained after capping boxes per class.
+    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
+      representing the window to clip and normalize boxes to before performing
+      non-max suppression.
+    change_coordinate_frame: Whether to normalize coordinates after clipping
+      relative to clip_window (this can only be set to True if a clip_window
+      is provided)
+    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
+      containing box masks. `q` can be either number of classes or 1 depending
+      on whether a separate mask is predicted per class.
+    additional_fields: (optional) If not None, a dictionary that maps keys to
+      tensors whose first dimensions are all of size `k`. After non-maximum
+      suppression, all tensors corresponding to the selected boxes will be
+      added to resulting BoxList.
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes with a rank-1 scores field representing
+      corresponding scores for each box with scores sorted in decreasing order
+      and a rank-1 classes field representing a class label for each box.
+      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
+      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
+
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+      a valid scores field.
+  """
+  if not 0 <= iou_thresh <= 1.0:
+    raise ValueError('iou_thresh must be between 0 and 1')
+  if scores.shape.ndims != 2:
+    raise ValueError('scores field must be of rank 2')
+  if scores.shape[1].value is None:
+    raise ValueError('scores must have statically defined second '
+                     'dimension')
+  if boxes.shape.ndims != 3:
+    raise ValueError('boxes must be of rank 3.')
+  if not (boxes.shape[1].value == scores.shape[1].value or
+          boxes.shape[1].value == 1):
+    raise ValueError('second dimension of boxes must be either 1 or equal '
+                     'to the second dimension of scores')
+  if boxes.shape[2].value != 4:
+    raise ValueError('last dimension of boxes must be of size 4.')
+  if change_coordinate_frame and clip_window is None:
+    raise ValueError('if change_coordinate_frame is True, then a clip_window'
+                     'must be specified.')
+
+  with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
+    num_boxes = tf.shape(boxes)[0]
+    num_scores = tf.shape(scores)[0]
+    num_classes = scores.get_shape()[1]
+
+    length_assert = tf.Assert(
+        tf.equal(num_boxes, num_scores),
+        ['Incorrect scores field length: actual vs expected.',
+         num_scores, num_boxes])
+
+    selected_boxes_list = []
+    per_class_boxes_list = tf.unstack(boxes, axis=1)
+    if masks is not None:
+      per_class_masks_list = tf.unstack(masks, axis=1)
+    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
+                 else [0] * num_classes)
+    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
+      per_class_boxes = per_class_boxes_list[boxes_idx]
+      boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
+      with tf.control_dependencies([length_assert]):
+        class_scores = tf.reshape(
+            tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
+      boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
+                                         class_scores)
+      if masks is not None:
+        per_class_masks = per_class_masks_list[boxes_idx]
+        boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
+                                           per_class_masks)
+      if additional_fields is not None:
+        for key, tensor in additional_fields.items():
+          boxlist_and_class_scores.add_field(key, tensor)
+      boxlist_filtered = box_list_ops.filter_greater_than(
+          boxlist_and_class_scores, score_thresh)
+      if clip_window is not None:
+        boxlist_filtered = box_list_ops.clip_to_window(
+            boxlist_filtered, clip_window)
+        if change_coordinate_frame:
+          boxlist_filtered = box_list_ops.change_coordinate_frame(
+              boxlist_filtered, clip_window)
+      max_selection_size = tf.minimum(max_size_per_class,
+                                      boxlist_filtered.num_boxes())
+      selected_indices = tf.image.non_max_suppression(
+          boxlist_filtered.get(),
+          boxlist_filtered.get_field(fields.BoxListFields.scores),
+          max_selection_size,
+          iou_threshold=iou_thresh)
+      nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
+      nms_result.add_field(
+          fields.BoxListFields.classes, (tf.zeros_like(
+              nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
+      selected_boxes_list.append(nms_result)
+    selected_boxes = box_list_ops.concatenate(selected_boxes_list)
+    sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
+                                              fields.BoxListFields.scores)
+    if max_total_size:
+      max_total_size = tf.minimum(max_total_size,
+                                  sorted_boxes.num_boxes())
+      sorted_boxes = box_list_ops.gather(sorted_boxes,
+                                         tf.range(max_total_size))
+    return sorted_boxes
+
+
+def batch_multiclass_non_max_suppression(boxes,
+                                         scores,
+                                         score_thresh,
+                                         iou_thresh,
+                                         max_size_per_class,
+                                         max_total_size=0,
+                                         clip_window=None,
+                                         change_coordinate_frame=False,
+                                         num_valid_boxes=None,
+                                         masks=None,
+                                         scope=None):
+  """Multi-class version of non maximum suppression that operates on a batch.
+
+  This op is similar to `multiclass_non_max_suppression` but operates on a batch
+  of boxes and scores. See documentation for `multiclass_non_max_suppression`
+  for details.
+
+  Args:
+    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
+      detections. If `q` is 1 then same boxes are used for all classes
+        otherwise, if `q` is equal to number of classes, class-specific boxes
+        are used.
+    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
+      the scores for each of the `num_anchors` detections.
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
+      with previously selected boxes are removed).
+    max_size_per_class: maximum number of retained boxes per class.
+    max_total_size: maximum number of boxes retained over all classes. By
+      default returns all boxes retained after capping boxes per class.
+    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
+      representing the window to clip boxes to before performing non-max
+      suppression.
+    change_coordinate_frame: Whether to normalize coordinates after clipping
+      relative to clip_window (this can only be set to True if a clip_window
+      is provided)
+    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
+      [batch_size] representing the number of valid boxes to be considered
+        for each image in the batch.  This parameter allows for ignoring zero
+        paddings.
+    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
+      float32 tensor containing box masks. `q` can be either number of classes
+      or 1 depending on whether a separate mask is predicted per class.
+    scope: tf scope name.
+
+  Returns:
+    A dictionary containing the following entries:
+    'detection_boxes': A [batch_size, max_detections, 4] float32 tensor
+      containing the non-max suppressed boxes.
+    'detection_scores': A [bath_size, max_detections] float32 tensor containing
+      the scores for the boxes.
+    'detection_classes': A [batch_size, max_detections] float32 tensor
+      containing the class for boxes.
+    'num_detections': A [batchsize] float32 tensor indicating the number of
+      valid detections per batch item. Only the top num_detections[i] entries in
+      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
+      entries are zero paddings.
+    'detection_masks': (optional) a
+      [batch_size, max_detections, mask_height, mask_width] float32 tensor
+      containing masks for each selected box.
+
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+      a valid scores field.
+  """
+  q = boxes.shape[2].value
+  num_classes = scores.shape[2].value
+  if q != 1 and q != num_classes:
+    raise ValueError('third dimension of boxes must be either 1 or equal '
+                     'to the third dimension of scores')
+
+  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
+    per_image_boxes_list = tf.unstack(boxes)
+    per_image_scores_list = tf.unstack(scores)
+    num_valid_boxes_list = len(per_image_boxes_list) * [None]
+    per_image_masks_list = len(per_image_boxes_list) * [None]
+    if num_valid_boxes is not None:
+      num_valid_boxes_list = tf.unstack(num_valid_boxes)
+    if masks is not None:
+      per_image_masks_list = tf.unstack(masks)
+
+    detection_boxes_list = []
+    detection_scores_list = []
+    detection_classes_list = []
+    num_detections_list = []
+    detection_masks_list = []
+    for (per_image_boxes, per_image_scores, per_image_masks, num_valid_boxes
+        ) in zip(per_image_boxes_list, per_image_scores_list,
+                 per_image_masks_list, num_valid_boxes_list):
+      if num_valid_boxes is not None:
+        per_image_boxes = tf.reshape(
+            tf.slice(per_image_boxes, 3*[0],
+                     tf.stack([num_valid_boxes, -1, -1])), [-1, q, 4])
+        per_image_scores = tf.reshape(
+            tf.slice(per_image_scores, [0, 0],
+                     tf.stack([num_valid_boxes, -1])), [-1, num_classes])
+        if masks is not None:
+          per_image_masks = tf.reshape(
+              tf.slice(per_image_masks, 4*[0],
+                       tf.stack([num_valid_boxes, -1, -1, -1])),
+              [-1, q, masks.shape[3].value, masks.shape[4].value])
+      nmsed_boxlist = multiclass_non_max_suppression(
+          per_image_boxes,
+          per_image_scores,
+          score_thresh,
+          iou_thresh,
+          max_size_per_class,
+          max_total_size,
+          masks=per_image_masks,
+          clip_window=clip_window,
+          change_coordinate_frame=change_coordinate_frame)
+      num_detections_list.append(tf.to_float(nmsed_boxlist.num_boxes()))
+      padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
+                                                         max_total_size)
+      detection_boxes_list.append(padded_boxlist.get())
+      detection_scores_list.append(
+          padded_boxlist.get_field(fields.BoxListFields.scores))
+      detection_classes_list.append(
+          padded_boxlist.get_field(fields.BoxListFields.classes))
+      if masks is not None:
+        detection_masks_list.append(
+            padded_boxlist.get_field(fields.BoxListFields.masks))
+
+    nms_dict = {
+        'detection_boxes': tf.stack(detection_boxes_list),
+        'detection_scores': tf.stack(detection_scores_list),
+        'detection_classes': tf.stack(detection_classes_list),
+        'num_detections': tf.stack(num_detections_list)
+    }
+    if masks is not None:
+      nms_dict['detection_masks'] = tf.stack(detection_masks_list)
+    return nms_dict
--- a/object_detection/core/post_processing_test.py
+++ b/object_detection/core/post_processing_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tensorflow_models.object_detection.core.post_processing."""
+import numpy as np
+import tensorflow as tf
+from object_detection.core import post_processing
+from object_detection.core import standard_fields as fields
+
+
+class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
+
+  def test_with_invalid_scores_size(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]]], tf.float32)
+    scores = tf.constant([[.9], [.75], [.6], [.95], [.5]])
+    iou_thresh = .5
+    score_thresh = 0.6
+    max_output_size = 3
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      with self.assertRaisesWithPredicateMatch(
+          tf.errors.InvalidArgumentError, 'Incorrect scores field length'):
+        sess.run(nms.get())
+
+  def test_multiclass_nms_select_with_shared_boxes(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    num_keypoints = 6
+    keypoints = tf.tile(
+        tf.reshape(tf.range(8), [8, 1, 1]),
+        [1, num_keypoints, 2])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+    exp_nms_keypoints_tensor = tf.tile(
+        tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
+        [1, num_keypoints, 2])
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        additional_fields={
+            fields.BoxListFields.keypoints: keypoints})
+
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_keypoints,
+       exp_nms_keypoints) = sess.run([
+           nms.get(),
+           nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes),
+           nms.get_field(fields.BoxListFields.keypoints),
+           exp_nms_keypoints_tensor
+       ])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_keypoints, exp_nms_keypoints)
+
+  def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+
+    num_boxes = tf.shape(boxes)[0]
+    heatmap_height = 5
+    heatmap_width = 5
+    num_keypoints = 17
+    keypoint_heatmaps = tf.ones(
+        [num_boxes, heatmap_height, heatmap_width, num_keypoints],
+        dtype=tf.float32)
+
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+    exp_nms_keypoint_heatmaps = np.ones(
+        (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32)
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        additional_fields={
+            fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps})
+
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_keypoint_heatmaps) = sess.run(
+           [nms.get(),
+            nms.get_field(fields.BoxListFields.scores),
+            nms.get_field(fields.BoxListFields.classes),
+            nms.get_field(fields.BoxListFields.keypoint_heatmaps)])
+
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps)
+
+  def test_multiclass_nms_with_additional_fields(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+
+    coarse_boxes_key = 'coarse_boxes'
+    coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1],
+                                [0.1, 0.2, 1.1, 1.2],
+                                [0.1, -0.2, 1.1, 1.0],
+                                [0.1, 10.1, 1.1, 11.1],
+                                [0.1, 10.2, 1.1, 11.2],
+                                [0.1, 100.1, 1.1, 101.1],
+                                [0.1, 1000.1, 1.1, 1002.1],
+                                [0.1, 1000.1, 1.1, 1002.2]], tf.float32)
+
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = np.array([[0, 10, 1, 11],
+                                [0, 0, 1, 1],
+                                [0, 1000, 1, 1002],
+                                [0, 100, 1, 101]], dtype=np.float32)
+
+    exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1],
+                                       [0.1, 0.1, 1.1, 1.1],
+                                       [0.1, 1000.1, 1.1, 1002.1],
+                                       [0.1, 100.1, 1.1, 101.1]],
+                                      dtype=np.float32)
+
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        additional_fields={coarse_boxes_key: coarse_boxes})
+
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_coarse_corners) = sess.run(
+           [nms.get(),
+            nms.get_field(fields.BoxListFields.scores),
+            nms.get_field(fields.BoxListFields.classes),
+            nms.get_field(coarse_boxes_key)])
+
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners)
+
+  def test_multiclass_nms_select_with_shared_boxes_given_masks(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    num_classes = 2
+    mask_height = 3
+    mask_width = 3
+    masks = tf.tile(
+        tf.reshape(tf.range(8), [8, 1, 1, 1]),
+        [1, num_classes, mask_height, mask_width])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+    exp_nms_masks_tensor = tf.tile(
+        tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
+        [1, mask_height, mask_width])
+
+    nms = post_processing.multiclass_non_max_suppression(boxes, scores,
+                                                         score_thresh,
+                                                         iou_thresh,
+                                                         max_output_size,
+                                                         masks=masks)
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_masks,
+       exp_nms_masks) = sess.run([nms.get(),
+                                  nms.get_field(fields.BoxListFields.scores),
+                                  nms.get_field(fields.BoxListFields.classes),
+                                  nms.get_field(fields.BoxListFields.masks),
+                                  exp_nms_masks_tensor])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_masks, exp_nms_masks)
+
+  def test_multiclass_nms_select_with_clip_window(self):
+    boxes = tf.constant([[[0, 0, 10, 10]],
+                         [[1, 1, 11, 11]]], tf.float32)
+    scores = tf.constant([[.9], [.75]])
+    clip_window = tf.constant([5, 4, 8, 7], tf.float32)
+    score_thresh = 0.0
+    iou_thresh = 0.5
+    max_output_size = 100
+
+    exp_nms_corners = [[5, 4, 8, 7]]
+    exp_nms_scores = [.9]
+    exp_nms_classes = [0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        clip_window=clip_window)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self):
+    boxes = tf.constant([[[0, 0, 10, 10]],
+                         [[1, 1, 11, 11]]], tf.float32)
+    scores = tf.constant([[.9], [.75]])
+    clip_window = tf.constant([5, 4, 8, 7], tf.float32)
+    score_thresh = 0.0
+    iou_thresh = 0.5
+    max_output_size = 100
+
+    exp_nms_corners = [[0, 0, 1, 1]]
+    exp_nms_scores = [.9]
+    exp_nms_classes = [0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        clip_window=clip_window, change_coordinate_frame=True)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_per_class_cap(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_size_per_class = 2
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002]]
+    exp_nms_scores = [.95, .9, .85]
+    exp_nms_classes = [0, 0, 1]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_size_per_class)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_total_cap(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_size_per_class = 4
+    max_total_size = 2
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1]]
+    exp_nms_scores = [.95, .9]
+    exp_nms_classes = [0, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_size_per_class,
+        max_total_size)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_threshold_then_select_with_shared_boxes(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 3
+
+    exp_nms = [[0, 10, 1, 11],
+               [0, 0, 1, 1],
+               [0, 100, 1, 101]]
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_multiclass_nms_select_with_separate_boxes(self):
+    boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]],
+                         [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                         [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11], [0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101], [0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                         [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]],
+                        tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 999, 2, 1004],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_batch_multiclass_nms_with_batch_size_1(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]],
+                          [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0],
+                           [.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 10, 1, 11],
+                        [0, 0, 1, 1],
+                        [0, 999, 2, 1004],
+                        [0, 100, 1, 101]]]
+    exp_nms_scores = [[.95, .9, .85, .3]]
+    exp_nms_classes = [[0, 0, 1, 0]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertEqual(nms_output['num_detections'], [4])
+
+  def test_batch_multiclass_nms_with_batch_size_2(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 10, 1, 11],
+                        [0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 999, 2, 1004],
+                        [0, 10.1, 1, 11.1],
+                        [0, 100, 1, 101],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.95, .9, 0, 0],
+                      [.85, .5, .3, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [1, 0, 0, 0]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertAllClose(nms_output['num_detections'], [2, 3])
+
+  def test_batch_multiclass_nms_with_masks(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+                          [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+                          [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+                          [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+                         [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+                          [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+                          [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+                          [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+                        tf.float32)
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 10, 1, 11],
+                        [0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 999, 2, 1004],
+                        [0, 10.1, 1, 11.1],
+                        [0, 100, 1, 101],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.95, .9, 0, 0],
+                      [.85, .5, .3, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [1, 0, 0, 0]]
+    exp_nms_masks = [[[[6, 7], [8, 9]],
+                      [[0, 1], [2, 3]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]]],
+                     [[[13, 14], [15, 16]],
+                      [[8, 9], [10, 11]],
+                      [[10, 11], [12, 13]],
+                      [[0, 0], [0, 0]]]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        masks=masks)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertAllClose(nms_output['num_detections'], [2, 3])
+      self.assertAllClose(nms_output['detection_masks'], exp_nms_masks)
+
+  def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+                          [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+                          [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+                          [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+                         [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+                          [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+                          [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+                          [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+                        tf.float32)
+    num_valid_boxes = tf.constant([1, 1], tf.int32)
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 10.1, 1, 11.1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.9, 0, 0, 0],
+                      [.5, 0, 0, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [0, 0, 0, 0]]
+    exp_nms_masks = [[[[0, 1], [2, 3]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]]],
+                     [[[8, 9], [10, 11]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]]]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        num_valid_boxes=num_valid_boxes, masks=masks)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertAllClose(nms_output['num_detections'], [1, 1])
+      self.assertAllClose(nms_output['detection_masks'], exp_nms_masks)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/prefetcher.py
+++ b/object_detection/core/prefetcher.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Provides functions to prefetch tensors to feed into models."""
+import tensorflow as tf
+
+
+def prefetch(tensor_dict, capacity):
+  """Creates a prefetch queue for tensors.
+
+  Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
+  dequeue op that evaluates to a tensor_dict. This function is useful in
+  prefetching preprocessed tensors so that the data is readily available for
+  consumers.
+
+  Example input pipeline when you don't need batching:
+  ----------------------------------------------------
+  key, string_tensor = slim.parallel_reader.parallel_read(...)
+  tensor_dict = decoder.decode(string_tensor)
+  tensor_dict = preprocessor.preprocess(tensor_dict, ...)
+  prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
+  tensor_dict = prefetch_queue.dequeue()
+  outputs = Model(tensor_dict)
+  ...
+  ----------------------------------------------------
+
+  For input pipelines with batching, refer to core/batcher.py
+
+  Args:
+    tensor_dict: a dictionary of tensors to prefetch.
+    capacity: the size of the prefetch queue.
+
+  Returns:
+    a FIFO prefetcher queue
+  """
+  names = list(tensor_dict.keys())
+  dtypes = [t.dtype for t in tensor_dict.values()]
+  shapes = [t.get_shape() for t in tensor_dict.values()]
+  prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes,
+                                       shapes=shapes,
+                                       names=names,
+                                       name='prefetch_queue')
+  enqueue_op = prefetch_queue.enqueue(tensor_dict)
+  tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
+      prefetch_queue, [enqueue_op]))
+  tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name,
+                                                      capacity),
+                    tf.to_float(prefetch_queue.size()) * (1. / capacity))
+  return prefetch_queue
--- a/object_detection/core/prefetcher_test.py
+++ b/object_detection/core/prefetcher_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.prefetcher."""
+import tensorflow as tf
+
+from object_detection.core import prefetcher
+
+slim = tf.contrib.slim
+
+
+class PrefetcherTest(tf.test.TestCase):
+
+  def test_prefetch_tensors_with_fully_defined_shapes(self):
+    with self.test_session() as sess:
+      batch_size = 10
+      image_size = 32
+      num_batches = 5
+      examples = tf.Variable(tf.constant(0, dtype=tf.int64))
+      counter = examples.count_up_to(num_batches)
+      image = tf.random_normal([batch_size, image_size,
+                                image_size, 3],
+                               dtype=tf.float32,
+                               name='images')
+      label = tf.random_uniform([batch_size, 1], 0, 10,
+                                dtype=tf.int32, name='labels')
+
+      prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
+                                                        'image': image,
+                                                        'label': label},
+                                           capacity=100)
+      tensor_dict = prefetch_queue.dequeue()
+
+      self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
+                          [batch_size, image_size, image_size, 3])
+      self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
+                          [batch_size, 1])
+
+      tf.initialize_all_variables().run()
+      with slim.queues.QueueRunners(sess):
+        for _ in range(num_batches):
+          results = sess.run(tensor_dict)
+          self.assertEquals(results['image'].shape,
+                            (batch_size, image_size, image_size, 3))
+          self.assertEquals(results['label'].shape, (batch_size, 1))
+        with self.assertRaises(tf.errors.OutOfRangeError):
+          sess.run(tensor_dict)
+
+  def test_prefetch_tensors_with_partially_defined_shapes(self):
+    with self.test_session() as sess:
+      batch_size = 10
+      image_size = 32
+      num_batches = 5
+      examples = tf.Variable(tf.constant(0, dtype=tf.int64))
+      counter = examples.count_up_to(num_batches)
+      image = tf.random_normal([batch_size,
+                                tf.Variable(image_size),
+                                tf.Variable(image_size), 3],
+                               dtype=tf.float32,
+                               name='image')
+      image.set_shape([batch_size, None, None, 3])
+      label = tf.random_uniform([batch_size, tf.Variable(1)], 0,
+                                10, dtype=tf.int32, name='label')
+      label.set_shape([batch_size, None])
+
+      prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
+                                                        'image': image,
+                                                        'label': label},
+                                           capacity=100)
+      tensor_dict = prefetch_queue.dequeue()
+
+      self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
+                          [batch_size, None, None, 3])
+      self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
+                          [batch_size, None])
+
+      tf.initialize_all_variables().run()
+      with slim.queues.QueueRunners(sess):
+        for _ in range(num_batches):
+          results = sess.run(tensor_dict)
+          self.assertEquals(results['image'].shape,
+                            (batch_size, image_size, image_size, 3))
+          self.assertEquals(results['label'].shape, (batch_size, 1))
+        with self.assertRaises(tf.errors.OutOfRangeError):
+          sess.run(tensor_dict)
+
+
+if __name__ == '__main__':
+  tf.test.main()