Merge remote-tracking branch 'upstream/master'

44fa1d37 · Alex Lee · d3628a74 · 6e367f67 · 44fa1d37 · 44fa1d37
Commit 44fa1d37 authored Jun 29, 2017 by Alex Lee
20 changed files
--- a/object_detection/core/keypoint_ops_test.py
+++ b/object_detection/core/keypoint_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.keypoint_ops."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import keypoint_ops
+
+
+class KeypointOpsTest(tf.test.TestCase):
+  """Tests for common keypoint operations."""
+
+  def test_scale(self):
+    keypoints = tf.constant([
+        [[0.0, 0.0], [100.0, 200.0]],
+        [[50.0, 120.0], [100.0, 140.0]]
+    ])
+    y_scale = tf.constant(1.0 / 100)
+    x_scale = tf.constant(1.0 / 200)
+
+    expected_keypoints = tf.constant([
+        [[0., 0.], [1.0, 1.0]],
+        [[0.5, 0.6], [1.0, 0.7]]
+    ])
+    output = keypoint_ops.scale(keypoints, y_scale, x_scale)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_clip_to_window(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+    expected_keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.25], [0.75, 0.75]]
+    ])
+    output = keypoint_ops.clip_to_window(keypoints, window)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_prune_outside_window(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+    expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
+                                      [[np.nan, np.nan], [np.nan, np.nan]]])
+    output = keypoint_ops.prune_outside_window(keypoints, window)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_change_coordinate_frame(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+    expected_keypoints = tf.constant([
+        [[0, 0.5], [1.0, 1.0]],
+        [[0.5, -0.5], [1.5, 1.5]]
+    ])
+    output = keypoint_ops.change_coordinate_frame(keypoints, window)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_to_normalized_coordinates(self):
+    keypoints = tf.constant([
+        [[10., 30.], [30., 45.]],
+        [[20., 0.], [40., 60.]]
+    ])
+    output = keypoint_ops.to_normalized_coordinates(
+        keypoints, 40, 60)
+    expected_keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_to_normalized_coordinates_already_normalized(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    output = keypoint_ops.to_normalized_coordinates(
+        keypoints, 40, 60)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(output)
+
+  def test_to_absolute_coordinates(self):
+    keypoints = tf.constant([
+        [[0.25, 0.5], [0.75, 0.75]],
+        [[0.5, 0.0], [1.0, 1.0]]
+    ])
+    output = keypoint_ops.to_absolute_coordinates(
+        keypoints, 40, 60)
+    expected_keypoints = tf.constant([
+        [[10., 30.], [30., 45.]],
+        [[20., 0.], [40., 60.]]
+    ])
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_to_absolute_coordinates_already_absolute(self):
+    keypoints = tf.constant([
+        [[10., 30.], [30., 45.]],
+        [[20., 0.], [40., 60.]]
+    ])
+    output = keypoint_ops.to_absolute_coordinates(
+        keypoints, 40, 60)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(output)
+
+  def test_flip_horizontal(self):
+    keypoints = tf.constant([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
+    ])
+    flip_permutation = [0, 2, 1]
+
+    expected_keypoints = tf.constant([
+        [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
+        [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
+    ])
+    output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/losses.py
+++ b/object_detection/core/losses.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Classification and regression loss functions for object detection.
+
+Localization losses:
+ * WeightedL2LocalizationLoss
+ * WeightedSmoothL1LocalizationLoss
+ * WeightedIOULocalizationLoss
+
+Classification losses:
+ * WeightedSigmoidClassificationLoss
+ * WeightedSoftmaxClassificationLoss
+ * BootstrappedSigmoidClassificationLoss
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.utils import ops
+
+slim = tf.contrib.slim
+
+
+class Loss(object):
+  """Abstract base class for loss functions."""
+  __metaclass__ = ABCMeta
+
+  def __call__(self,
+               prediction_tensor,
+               target_tensor,
+               ignore_nan_targets=False,
+               scope=None,
+               **params):
+    """Call the loss function.
+
+    Args:
+      prediction_tensor: a tensor representing predicted quantities.
+      target_tensor: a tensor representing regression or classification targets.
+      ignore_nan_targets: whether to ignore nan targets in the loss computation.
+        E.g. can be used if the target tensor is missing groundtruth data that
+        shouldn't be factored into the loss.
+      scope: Op scope name. Defaults to 'Loss' if None.
+      **params: Additional keyword arguments for specific implementations of
+              the Loss.
+
+    Returns:
+      loss: a tensor representing the value of the loss function.
+    """
+    with tf.name_scope(scope, 'Loss',
+                       [prediction_tensor, target_tensor, params]) as scope:
+      if ignore_nan_targets:
+        target_tensor = tf.where(tf.is_nan(target_tensor),
+                                 prediction_tensor,
+                                 target_tensor)
+      return self._compute_loss(prediction_tensor, target_tensor, **params)
+
+  @abstractmethod
+  def _compute_loss(self, prediction_tensor, target_tensor, **params):
+    """Method to be overriden by implementations.
+
+    Args:
+      prediction_tensor: a tensor representing predicted quantities
+      target_tensor: a tensor representing regression or classification targets
+      **params: Additional keyword arguments for specific implementations of
+              the Loss.
+
+    Returns:
+      loss: a tensor representing the value of the loss function
+    """
+    pass
+
+
+class WeightedL2LocalizationLoss(Loss):
+  """L2 localization loss function with anchorwise output support.
+
+  Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
+  """
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the (encoded) predicted locations of objects.
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the regression targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
+        weights, 2)
+    square_diff = 0.5 * tf.square(weighted_diff)
+    if self._anchorwise_output:
+      return tf.reduce_sum(square_diff, 2)
+    return tf.reduce_sum(square_diff)
+
+
+class WeightedSmoothL1LocalizationLoss(Loss):
+  """Smooth L1 localization loss function.
+
+  The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5
+  otherwise, where x is the difference between predictions and target.
+
+  See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
+  """
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the (encoded) predicted locations of objects.
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        code_size] representing the regression targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+    """
+    diff = prediction_tensor - target_tensor
+    abs_diff = tf.abs(diff)
+    abs_diff_lt_1 = tf.less(abs_diff, 1)
+    anchorwise_smooth_l1norm = tf.reduce_sum(
+        tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5),
+        2) * weights
+    if self._anchorwise_output:
+      return anchorwise_smooth_l1norm
+    return tf.reduce_sum(anchorwise_smooth_l1norm)
+
+
+class WeightedIOULocalizationLoss(Loss):
+  """IOU localization loss function.
+
+  Sums the IOU for corresponding pairs of predicted/groundtruth boxes
+  and for each pair assign a loss of 1 - IOU.  We then compute a weighted
+  sum over all pairs which is returned as the total loss.
+  """
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
+        representing the decoded predicted boxes
+      target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
+        representing the decoded target boxes
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+    """
+    predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
+    target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
+    per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
+                                                         target_boxes)
+    return tf.reduce_sum(tf.reshape(weights, [-1]) * per_anchor_iou_loss)
+
+
+class WeightedSigmoidClassificationLoss(Loss):
+  """Sigmoid cross entropy classification loss function."""
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self,
+                    prediction_tensor,
+                    target_tensor,
+                    weights,
+                    class_indices=None):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+      class_indices: (Optional) A 1-D integer tensor of class indices.
+        If provided, computes loss only for the specified class indices.
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    weights = tf.expand_dims(weights, 2)
+    if class_indices is not None:
+      weights *= tf.reshape(
+          ops.indices_to_dense_vector(class_indices,
+                                      tf.shape(prediction_tensor)[2]),
+          [1, 1, -1])
+    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=target_tensor, logits=prediction_tensor))
+    if self._anchorwise_output:
+      return tf.reduce_sum(per_entry_cross_ent * weights, 2)
+    return tf.reduce_sum(per_entry_cross_ent * weights)
+
+
+class WeightedSoftmaxClassificationLoss(Loss):
+  """Softmax loss function."""
+
+  def __init__(self, anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Whether to output loss per anchor (default False)
+
+    """
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+    """
+    num_classes = prediction_tensor.get_shape().as_list()[-1]
+    per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
+        labels=tf.reshape(target_tensor, [-1, num_classes]),
+        logits=tf.reshape(prediction_tensor, [-1, num_classes])))
+    if self._anchorwise_output:
+      return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
+    return tf.reduce_sum(per_row_cross_ent * tf.reshape(weights, [-1]))
+
+
+class BootstrappedSigmoidClassificationLoss(Loss):
+  """Bootstrapped sigmoid cross entropy classification loss function.
+
+  This loss uses a convex combination of training labels and the current model's
+  predictions as training targets in the classification loss. The idea is that
+  as the model improves over time, its predictions can be trusted more and we
+  can use these predictions to mitigate the damage of noisy/incorrect labels,
+  because incorrect labels are likely to be eventually highly inconsistent with
+  other stimuli predicted to have the same label by the model.
+
+  In "soft" bootstrapping, we use all predicted class probabilities, whereas in
+  "hard" bootstrapping, we use the single class favored by the model.
+
+  See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
+  Reed et al. (ICLR 2015).
+  """
+
+  def __init__(self, alpha, bootstrap_type='soft', anchorwise_output=False):
+    """Constructor.
+
+    Args:
+      alpha: a float32 scalar tensor between 0 and 1 representing interpolation
+        weight
+      bootstrap_type: set to either 'hard' or 'soft' (default)
+      anchorwise_output: Outputs loss per anchor. (default False)
+
+    Raises:
+      ValueError: if bootstrap_type is not either 'hard' or 'soft'
+    """
+    if bootstrap_type != 'hard' and bootstrap_type != 'soft':
+      raise ValueError('Unrecognized bootstrap_type: must be one of '
+                       '\'hard\' or \'soft.\'')
+    self._alpha = alpha
+    self._bootstrap_type = bootstrap_type
+    self._anchorwise_output = anchorwise_output
+
+  def _compute_loss(self, prediction_tensor, target_tensor, weights):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    if self._bootstrap_type == 'soft':
+      bootstrap_target_tensor = self._alpha * target_tensor + (
+          1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
+    else:
+      bootstrap_target_tensor = self._alpha * target_tensor + (
+          1.0 - self._alpha) * tf.cast(
+              tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
+    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=bootstrap_target_tensor, logits=prediction_tensor))
+    if self._anchorwise_output:
+      return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2), 2)
+    return tf.reduce_sum(per_entry_cross_ent * tf.expand_dims(weights, 2))
+
+
+class HardExampleMiner(object):
+  """Hard example mining for regions in a list of images.
+
+  Implements hard example mining to select a subset of regions to be
+  back-propagated. For each image, selects the regions with highest losses,
+  subject to the condition that a newly selected region cannot have
+  an IOU > iou_threshold with any of the previously selected regions.
+  This can be achieved by re-using a greedy non-maximum suppression algorithm.
+  A constraint on the number of negatives mined per positive region can also be
+  enforced.
+
+  Reference papers: "Training Region-based Object Detectors with Online
+  Hard Example Mining" (CVPR 2016) by Srivastava et al., and
+  "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
+  """
+
+  def __init__(self,
+               num_hard_examples=64,
+               iou_threshold=0.7,
+               loss_type='both',
+               cls_loss_weight=0.05,
+               loc_loss_weight=0.06,
+               max_negatives_per_positive=None,
+               min_negatives_per_image=0):
+    """Constructor.
+
+    The hard example mining implemented by this class can replicate the behavior
+    in the two aforementioned papers (Srivastava et al., and Liu et al).
+    To replicate the A2 paper (Srivastava et al), num_hard_examples is set
+    to a fixed parameter (64 by default) and iou_threshold is set to .7 for
+    running non-max-suppression the predicted boxes prior to hard mining.
+    In order to replicate the SSD paper (Liu et al), num_hard_examples should
+    be set to None, max_negatives_per_positive should be 3 and iou_threshold
+    should be 1.0 (in order to effectively turn off NMS).
+
+    Args:
+      num_hard_examples: maximum number of hard examples to be
+        selected per image (prior to enforcing max negative to positive ratio
+        constraint).  If set to None, all examples obtained after NMS are
+        considered.
+      iou_threshold: minimum intersection over union for an example
+        to be discarded during NMS.
+      loss_type: use only classification losses ('cls', default),
+        localization losses ('loc') or both losses ('both').
+        In the last case, cls_loss_weight and loc_loss_weight are used to
+        compute weighted sum of the two losses.
+      cls_loss_weight: weight for classification loss.
+      loc_loss_weight: weight for location loss.
+      max_negatives_per_positive: maximum number of negatives to retain for
+        each positive anchor. By default, num_negatives_per_positive is None,
+        which means that we do not enforce a prespecified negative:positive
+        ratio.  Note also that num_negatives_per_positives can be a float
+        (and will be converted to be a float even if it is passed in otherwise).
+      min_negatives_per_image: minimum number of negative anchors to sample for
+        a given image. Setting this to a positive number allows sampling
+        negatives in an image without any positive anchors and thus not biased
+        towards at least one detection per image.
+    """
+    self._num_hard_examples = num_hard_examples
+    self._iou_threshold = iou_threshold
+    self._loss_type = loss_type
+    self._cls_loss_weight = cls_loss_weight
+    self._loc_loss_weight = loc_loss_weight
+    self._max_negatives_per_positive = max_negatives_per_positive
+    self._min_negatives_per_image = min_negatives_per_image
+    if self._max_negatives_per_positive is not None:
+      self._max_negatives_per_positive = float(self._max_negatives_per_positive)
+    self._num_positives_list = None
+    self._num_negatives_list = None
+
+  def __call__(self,
+               location_losses,
+               cls_losses,
+               decoded_boxlist_list,
+               match_list=None):
+    """Computes localization and classification losses after hard mining.
+
+    Args:
+      location_losses: a float tensor of shape [num_images, num_anchors]
+        representing anchorwise localization losses.
+      cls_losses: a float tensor of shape [num_images, num_anchors]
+        representing anchorwise classification losses.
+      decoded_boxlist_list: a list of decoded BoxList representing location
+        predictions for each image.
+      match_list: an optional list of matcher.Match objects encoding the match
+        between anchors and groundtruth boxes for each image of the batch,
+        with rows of the Match objects corresponding to groundtruth boxes
+        and columns corresponding to anchors.  Match objects in match_list are
+        used to reference which anchors are positive, negative or ignored.  If
+        self._max_negatives_per_positive exists, these are then used to enforce
+        a prespecified negative to positive ratio.
+
+    Returns:
+      mined_location_loss: a float scalar with sum of localization losses from
+        selected hard examples.
+      mined_cls_loss: a float scalar with sum of classification losses from
+        selected hard examples.
+    Raises:
+      ValueError: if location_losses, cls_losses and decoded_boxlist_list do
+        not have compatible shapes (i.e., they must correspond to the same
+        number of images).
+      ValueError: if match_list is specified but its length does not match
+        len(decoded_boxlist_list).
+    """
+    mined_location_losses = []
+    mined_cls_losses = []
+    location_losses = tf.unstack(location_losses)
+    cls_losses = tf.unstack(cls_losses)
+    num_images = len(decoded_boxlist_list)
+    if not match_list:
+      match_list = num_images * [None]
+    if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
+      raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
+                       'do not have compatible shapes.')
+    if not isinstance(match_list, list):
+      raise ValueError('match_list must be a list.')
+    if len(match_list) != len(decoded_boxlist_list):
+      raise ValueError('match_list must either be None or have '
+                       'length=len(decoded_boxlist_list).')
+    num_positives_list = []
+    num_negatives_list = []
+    for ind, detection_boxlist in enumerate(decoded_boxlist_list):
+      box_locations = detection_boxlist.get()
+      match = match_list[ind]
+      image_losses = cls_losses[ind]
+      if self._loss_type == 'loc':
+        image_losses = location_losses[ind]
+      elif self._loss_type == 'both':
+        image_losses *= self._cls_loss_weight
+        image_losses += location_losses[ind] * self._loc_loss_weight
+      if self._num_hard_examples is not None:
+        num_hard_examples = self._num_hard_examples
+      else:
+        num_hard_examples = detection_boxlist.num_boxes()
+      selected_indices = tf.image.non_max_suppression(
+          box_locations, image_losses, num_hard_examples, self._iou_threshold)
+      if self._max_negatives_per_positive is not None and match:
+        (selected_indices, num_positives,
+         num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
+             selected_indices, match, self._max_negatives_per_positive,
+             self._min_negatives_per_image)
+        num_positives_list.append(num_positives)
+        num_negatives_list.append(num_negatives)
+      mined_location_losses.append(
+          tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
+      mined_cls_losses.append(
+          tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
+    location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
+    cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
+    if match and self._max_negatives_per_positive:
+      self._num_positives_list = num_positives_list
+      self._num_negatives_list = num_negatives_list
+    return (location_loss, cls_loss)
+
+  def summarize(self):
+    """Summarize the number of positives and negatives after mining."""
+    if self._num_positives_list and self._num_negatives_list:
+      avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
+      avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
+      tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
+      tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
+
+  def _subsample_selection_to_desired_neg_pos_ratio(self,
+                                                    indices,
+                                                    match,
+                                                    max_negatives_per_positive,
+                                                    min_negatives_per_image=0):
+    """Subsample a collection of selected indices to a desired neg:pos ratio.
+
+    This function takes a subset of M indices (indexing into a large anchor
+    collection of N anchors where M<N) which are labeled as positive/negative
+    via a Match object (matched indices are positive, unmatched indices
+    are negative).  It returns a subset of the provided indices retaining all
+    positives as well as up to the first K negatives, where:
+      K=floor(num_negative_per_positive * num_positives).
+
+    For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
+    with positives=[2, 5] and negatives=[4, 7, 9, 10] and
+    num_negatives_per_positive=1, then the returned subset of indices
+    is [2, 4, 5, 7].
+
+    Args:
+      indices: An integer tensor of shape [M] representing a collection
+        of selected anchor indices
+      match: A matcher.Match object encoding the match between anchors and
+        groundtruth boxes for a given image, with rows of the Match objects
+        corresponding to groundtruth boxes and columns corresponding to anchors.
+      max_negatives_per_positive: (float) maximum number of negatives for
+        each positive anchor.
+      min_negatives_per_image: minimum number of negative anchors for a given
+        image. Allow sampling negatives in image without any positive anchors.
+
+    Returns:
+      selected_indices: An integer tensor of shape [M'] representing a
+        collection of selected anchor indices with M' <= M.
+      num_positives: An integer tensor representing the number of positive
+        examples in selected set of indices.
+      num_negatives: An integer tensor representing the number of negative
+        examples in selected set of indices.
+    """
+    positives_indicator = tf.gather(match.matched_column_indicator(), indices)
+    negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices)
+    num_positives = tf.reduce_sum(tf.to_int32(positives_indicator))
+    max_negatives = tf.maximum(min_negatives_per_image,
+                               tf.to_int32(max_negatives_per_positive *
+                                           tf.to_float(num_positives)))
+    topk_negatives_indicator = tf.less_equal(
+        tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives)
+    subsampled_selection_indices = tf.where(
+        tf.logical_or(positives_indicator, topk_negatives_indicator))
+    num_negatives = tf.size(subsampled_selection_indices) - num_positives
+    return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]),
+            num_positives, num_negatives)
--- a/object_detection/core/losses_test.py
+++ b/object_detection/core/losses_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for google3.research.vale.object_detection.losses."""
+import math
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import losses
+from object_detection.core import matcher
+
+
+class WeightedL2LocalizationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    batch_size = 3
+    num_anchors = 10
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], tf.float32)
+    loss_op = losses.WeightedL2LocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    expected_loss = (3 * 5 * 4) / 2.0
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, expected_loss)
+
+  def testReturnsCorrectAnchorwiseLoss(self):
+    batch_size = 3
+    num_anchors = 16
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.ones([batch_size, num_anchors])
+    loss_op = losses.WeightedL2LocalizationLoss(anchorwise_output=True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    expected_loss = np.ones((batch_size, num_anchors)) * 2
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, expected_loss)
+
+  def testReturnsCorrectLossSum(self):
+    batch_size = 3
+    num_anchors = 16
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.ones([batch_size, num_anchors])
+    loss_op = losses.WeightedL2LocalizationLoss(anchorwise_output=False)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    expected_loss = tf.nn.l2_loss(prediction_tensor - target_tensor)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      expected_loss_output = sess.run(expected_loss)
+      self.assertAllClose(loss_output, expected_loss_output)
+
+  def testReturnsCorrectNanLoss(self):
+    batch_size = 3
+    num_anchors = 10
+    code_size = 4
+    prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
+    target_tensor = tf.concat([
+        tf.zeros([batch_size, num_anchors, code_size / 2]),
+        tf.ones([batch_size, num_anchors, code_size / 2]) * np.nan
+    ], axis=2)
+    weights = tf.ones([batch_size, num_anchors])
+    loss_op = losses.WeightedL2LocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights,
+                   ignore_nan_targets=True)
+
+    expected_loss = (3 * 5 * 4) / 2.0
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, expected_loss)
+
+
+class WeightedSmoothL1LocalizationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    batch_size = 2
+    num_anchors = 3
+    code_size = 4
+    prediction_tensor = tf.constant([[[2.5, 0, .4, 0],
+                                      [0, 0, 0, 0],
+                                      [0, 2.5, 0, .4]],
+                                     [[3.5, 0, 0, 0],
+                                      [0, .4, 0, .9],
+                                      [0, 0, 1.5, 0]]], tf.float32)
+    target_tensor = tf.zeros([batch_size, num_anchors, code_size])
+    weights = tf.constant([[2, 1, 1],
+                           [0, 3, 0]], tf.float32)
+    loss_op = losses.WeightedSmoothL1LocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = 7.695
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class WeightedIOULocalizationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    prediction_tensor = tf.constant([[[1.5, 0, 2.4, 1],
+                                      [0, 0, 1, 1],
+                                      [0, 0, .5, .25]]])
+    target_tensor = tf.constant([[[1.5, 0, 2.4, 1],
+                                  [0, 0, 1, 1],
+                                  [50, 50, 500.5, 100.25]]])
+    weights = [[1.0, .5, 2.0]]
+    loss_op = losses.WeightedIOULocalizationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    exp_loss = 2.0
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSigmoidClassificationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = -2 * math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectAnchorWiseLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSigmoidClassificationLoss(True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectLossWithClassIndices(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100, 100],
+                                      [100, -100, -100, -100],
+                                      [100, 0, -100, 100],
+                                      [-100, -100, 100, -100]],
+                                     [[-100, 0, 100, 100],
+                                      [-100, 100, -100, 100],
+                                      [100, 100, 100, 100],
+                                      [0, 0, -1, 100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0, 0],
+                                  [1, 0, 0, 1],
+                                  [1, 0, 0, 0],
+                                  [0, 0, 1, 1]],
+                                 [[0, 0, 1, 0],
+                                  [0, 1, 0, 0],
+                                  [1, 1, 1, 0],
+                                  [1, 0, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    # Ignores the last class.
+    class_indices = tf.constant([0, 1, 2], tf.int32)
+    loss_op = losses.WeightedSigmoidClassificationLoss(True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights,
+                   class_indices=class_indices)
+
+    exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [0, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 0],
+                                      [-100, 100, -100],
+                                      [-100, 100, -100],
+                                      [100, -100, -100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [0, 1, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, .5, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSoftmaxClassificationLoss()
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = - 1.5 * math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectAnchorWiseLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [0, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 0],
+                                      [-100, 100, -100],
+                                      [-100, 100, -100],
+                                      [100, -100, -100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [0, 1, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, .5, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    loss_op = losses.WeightedSoftmaxClassificationLoss(True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = np.matrix([[0, 0, - 0.5 * math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):
+
+  def testReturnsCorrectLossSoftBootstrapping(self):
+    prediction_tensor = tf.constant([[[-100, 100, 0],
+                                      [100, -100, -100],
+                                      [100, -100, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, -100, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    alpha = tf.constant(.5, tf.float32)
+    loss_op = losses.BootstrappedSigmoidClassificationLoss(
+        alpha, bootstrap_type='soft')
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    exp_loss = -math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectLossHardBootstrapping(self):
+    prediction_tensor = tf.constant([[[-100, 100, 0],
+                                      [100, -100, -100],
+                                      [100, -100, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, -100, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    alpha = tf.constant(.5, tf.float32)
+    loss_op = losses.BootstrappedSigmoidClassificationLoss(
+        alpha, bootstrap_type='hard')
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    exp_loss = -math.log(.5)
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+  def testReturnsCorrectAnchorWiseLoss(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    alpha = tf.constant(.5, tf.float32)
+    loss_op = losses.BootstrappedSigmoidClassificationLoss(
+        alpha, bootstrap_type='hard', anchorwise_output=True)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    exp_loss = np.matrix([[0, 0, -math.log(.5), 0],
+                          [-math.log(.5), 0, 0, 0]])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+
+
+class HardExampleMinerTest(tf.test.TestCase):
+
+  def testHardMiningWithSingleLossType(self):
+    location_losses = tf.constant([[100, 90, 80, 0],
+                                   [0, 1, 2, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 10, 50, 110],
+                              [9, 6, 3, 0]], tf.float32)
+    box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9]], tf.float32)
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    # Uses only location loss to select hard examples
+    loss_op = losses.HardExampleMiner(num_hard_examples=1,
+                                      iou_threshold=0.0,
+                                      loss_type='loc',
+                                      cls_loss_weight=1,
+                                      loc_loss_weight=1)
+    (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                   decoded_boxlist_list)
+    exp_loc_loss = 100 + 3
+    exp_cls_loss = 0 + 0
+    with self.test_session() as sess:
+      loc_loss_output = sess.run(loc_loss)
+      self.assertAllClose(loc_loss_output, exp_loc_loss)
+      cls_loss_output = sess.run(cls_loss)
+      self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testHardMiningWithBothLossType(self):
+    location_losses = tf.constant([[100, 90, 80, 0],
+                                   [0, 1, 2, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 10, 50, 110],
+                              [9, 6, 3, 0]], tf.float32)
+    box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9]], tf.float32)
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    loss_op = losses.HardExampleMiner(num_hard_examples=1,
+                                      iou_threshold=0.0,
+                                      loss_type='both',
+                                      cls_loss_weight=1,
+                                      loc_loss_weight=1)
+    (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                   decoded_boxlist_list)
+    exp_loc_loss = 80 + 0
+    exp_cls_loss = 50 + 9
+    with self.test_session() as sess:
+      loc_loss_output = sess.run(loc_loss)
+      self.assertAllClose(loc_loss_output, exp_loc_loss)
+      cls_loss_output = sess.run(cls_loss)
+      self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testHardMiningNMS(self):
+    location_losses = tf.constant([[100, 90, 80, 0],
+                                   [0, 1, 2, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 10, 50, 110],
+                              [9, 6, 3, 0]], tf.float32)
+    box_corners = tf.constant([[0.1, 0.1, 0.9, 0.9],
+                               [0.9, 0.9, 0.99, 0.99],
+                               [0.1, 0.1, 0.9, 0.9],
+                               [0.1, 0.1, 0.9, 0.9]], tf.float32)
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+    loss_op = losses.HardExampleMiner(num_hard_examples=2,
+                                      iou_threshold=0.5,
+                                      loss_type='cls',
+                                      cls_loss_weight=1,
+                                      loc_loss_weight=1)
+    (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                   decoded_boxlist_list)
+    exp_loc_loss = 0 + 90 + 0 + 1
+    exp_cls_loss = 110 + 10 + 9 + 6
+    with self.test_session() as sess:
+      loc_loss_output = sess.run(loc_loss)
+      self.assertAllClose(loc_loss_output, exp_loc_loss)
+      cls_loss_output = sess.run(cls_loss)
+      self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testEnforceNegativesPerPositiveRatio(self):
+    location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
+                                    3, 10, 20, 100, 20, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
+                               0, 60, 0, 17, 13, 0]], tf.float32)
+    box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.5, 0.1],
+                               [0.0, 0.0, 0.6, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.8, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 1.0, 0.1],
+                               [0.0, 0.0, 1.1, 0.1],
+                               [0.0, 0.0, 0.2, 0.1]], tf.float32)
+    match_results = tf.constant([2, -1, 0, -1, -1, 1, -1, -1, -1, -1, -1, 3])
+    match_list = [matcher.Match(match_results)]
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+
+    max_negatives_per_positive_list = [0.0, 0.5, 1.0, 1.5, 10]
+    exp_loc_loss_list = [80 + 2,
+                         80 + 1 + 2,
+                         80 + 1 + 2 + 10,
+                         80 + 1 + 2 + 10 + 100,
+                         80 + 1 + 2 + 10 + 100 + 20]
+    exp_cls_loss_list = [100 + 70,
+                         100 + 90 + 70,
+                         100 + 90 + 70 + 60,
+                         100 + 90 + 70 + 60 + 17,
+                         100 + 90 + 70 + 60 + 17 + 13]
+
+    for max_negatives_per_positive, exp_loc_loss, exp_cls_loss in zip(
+        max_negatives_per_positive_list, exp_loc_loss_list, exp_cls_loss_list):
+      loss_op = losses.HardExampleMiner(
+          num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
+          cls_loss_weight=1, loc_loss_weight=1,
+          max_negatives_per_positive=max_negatives_per_positive)
+      (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                     decoded_boxlist_list, match_list)
+      loss_op.summarize()
+
+      with self.test_session() as sess:
+        loc_loss_output = sess.run(loc_loss)
+        self.assertAllClose(loc_loss_output, exp_loc_loss)
+        cls_loss_output = sess.run(cls_loss)
+        self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+  def testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage(self):
+    location_losses = tf.constant([[100, 90, 80, 0, 1, 2,
+                                    3, 10, 20, 100, 20, 3]], tf.float32)
+    cls_losses = tf.constant([[0, 0, 100, 0, 90, 70,
+                               0, 60, 0, 17, 13, 0]], tf.float32)
+    box_corners = tf.constant([[0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.5, 0.1],
+                               [0.0, 0.0, 0.6, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 0.8, 0.1],
+                               [0.0, 0.0, 0.2, 0.1],
+                               [0.0, 0.0, 1.0, 0.1],
+                               [0.0, 0.0, 1.1, 0.1],
+                               [0.0, 0.0, 0.2, 0.1]], tf.float32)
+    match_results = tf.constant([-1] * 12)
+    match_list = [matcher.Match(match_results)]
+    decoded_boxlist_list = []
+    decoded_boxlist_list.append(box_list.BoxList(box_corners))
+
+    min_negatives_per_image_list = [0, 1, 2, 4, 5, 6]
+    exp_loc_loss_list = [0,
+                         80,
+                         80 + 1,
+                         80 + 1 + 2 + 10,
+                         80 + 1 + 2 + 10 + 100,
+                         80 + 1 + 2 + 10 + 100 + 20]
+    exp_cls_loss_list = [0,
+                         100,
+                         100 + 90,
+                         100 + 90 + 70 + 60,
+                         100 + 90 + 70 + 60 + 17,
+                         100 + 90 + 70 + 60 + 17 + 13]
+
+    for min_negatives_per_image, exp_loc_loss, exp_cls_loss in zip(
+        min_negatives_per_image_list, exp_loc_loss_list, exp_cls_loss_list):
+      loss_op = losses.HardExampleMiner(
+          num_hard_examples=None, iou_threshold=0.9999, loss_type='cls',
+          cls_loss_weight=1, loc_loss_weight=1,
+          max_negatives_per_positive=3,
+          min_negatives_per_image=min_negatives_per_image)
+      (loc_loss, cls_loss) = loss_op(location_losses, cls_losses,
+                                     decoded_boxlist_list, match_list)
+      with self.test_session() as sess:
+        loc_loss_output = sess.run(loc_loss)
+        self.assertAllClose(loc_loss_output, exp_loc_loss)
+        cls_loss_output = sess.run(cls_loss)
+        self.assertAllClose(cls_loss_output, exp_cls_loss)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/matcher.py
+++ b/object_detection/core/matcher.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Matcher interface and Match class.
+
+This module defines the Matcher interface and the Match object. The job of the
+matcher is to match row and column indices based on the similarity matrix and
+other optional parameters. Each column is matched to at most one row. There
+are three possibilities for the matching:
+
+1) match: A column matches a row.
+2) no_match: A column does not match any row.
+3) ignore: A column that is neither 'match' nor no_match.
+
+The ignore case is regularly encountered in object detection: when an anchor has
+a relatively small overlap with a ground-truth box, one neither wants to
+consider this box a positive example (match) nor a negative example (no match).
+
+The Match class is used to store the match results and it provides simple apis
+to query the results.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+class Match(object):
+  """Class to store results from the matcher.
+
+  This class is used to store the results from the matcher. It provides
+  convenient methods to query the matching results.
+  """
+
+  def __init__(self, match_results):
+    """Constructs a Match object.
+
+    Args:
+      match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+
+    Raises:
+      ValueError: if match_results does not have rank 1 or is not an
+        integer int32 scalar tensor
+    """
+    if match_results.shape.ndims != 1:
+      raise ValueError('match_results should have rank 1')
+    if match_results.dtype != tf.int32:
+      raise ValueError('match_results should be an int32 or int64 scalar '
+                       'tensor')
+    self._match_results = match_results
+
+  @property
+  def match_results(self):
+    """The accessor for match results.
+
+    Returns:
+      the tensor which encodes the match results.
+    """
+    return self._match_results
+
+  def matched_column_indices(self):
+    """Returns column indices that match to some row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
+
+  def matched_column_indicator(self):
+    """Returns column indices that are matched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.greater_equal(self._match_results, 0)
+
+  def num_matched_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(self.matched_column_indices())
+
+  def unmatched_column_indices(self):
+    """Returns column indices that do not match any row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
+
+  def unmatched_column_indicator(self):
+    """Returns column indices that are unmatched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.equal(self._match_results, -1)
+
+  def num_unmatched_columns(self):
+    """Returns number (int32 scalar tensor) of unmatched columns."""
+    return tf.size(self.unmatched_column_indices())
+
+  def ignored_column_indices(self):
+    """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
+
+  def ignored_column_indicator(self):
+    """Returns boolean column indicator where True means the colum is ignored.
+
+    Returns:
+      column_indicator: boolean vector which is True for all ignored column
+      indices.
+    """
+    return tf.equal(self._match_results, -2)
+
+  def num_ignored_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(self.ignored_column_indices())
+
+  def unmatched_or_ignored_column_indices(self):
+    """Returns column indices that are unmatched or ignored.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
+
+  def matched_row_indices(self):
+    """Returns row indices that match some column.
+
+    The indices returned by this op are ordered so as to be in correspondence
+    with the output of matched_column_indicator().  For example if
+    self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
+    [7, 3], then we know that column 0 was matched to row 7 and column 2 was
+    matched to row 3.
+
+    Returns:
+      row_indices: int32 tensor of shape [K] with row indices.
+    """
+    return self._reshape_and_cast(
+        tf.gather(self._match_results, self.matched_column_indices()))
+
+  def _reshape_and_cast(self, t):
+    return tf.cast(tf.reshape(t, [-1]), tf.int32)
+
+
+class Matcher(object):
+  """Abstract base class for matcher.
+  """
+  __metaclass__ = ABCMeta
+
+  def match(self, similarity_matrix, scope=None, **params):
+    """Computes matches among row and column indices and returns the result.
+
+    Computes matches among the row and column indices based on the similarity
+    matrix and optional arguments.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      scope: Op scope name. Defaults to 'Match' if None.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      A Match object with the results of matching.
+    """
+    with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
+      return Match(self._match(similarity_matrix, **params))
+
+  @abstractmethod
+  def _match(self, similarity_matrix, **params):
+    """Method to be overriden by implementations.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      match_results: Integer tensor of shape [M]: match_results[i]>=0 means
+        that column i is matched to row match_results[i], match_results[i]=-1
+        means that the column is not matched. match_results[i]=-2 means that
+        the column is ignored (usually this happens when there is a very weak
+        match which one neither wants as positive nor negative example).
+    """
+    pass
--- a/object_detection/core/matcher_test.py
+++ b/object_detection/core/matcher_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.matcher."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import matcher
+
+
+class AnchorMatcherTest(tf.test.TestCase):
+
+  def test_get_correct_matched_columnIndices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [0, 1, 3, 5]
+    matched_column_indices = match.matched_column_indices()
+    self.assertEquals(matched_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      matched_column_indices = sess.run(matched_column_indices)
+      self.assertAllEqual(matched_column_indices, expected_column_indices)
+
+  def test_get_correct_counts(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    exp_num_matched_columns = 4
+    exp_num_unmatched_columns = 2
+    exp_num_ignored_columns = 1
+    num_matched_columns = match.num_matched_columns()
+    num_unmatched_columns = match.num_unmatched_columns()
+    num_ignored_columns = match.num_ignored_columns()
+    self.assertEquals(num_matched_columns.dtype, tf.int32)
+    self.assertEquals(num_unmatched_columns.dtype, tf.int32)
+    self.assertEquals(num_ignored_columns.dtype, tf.int32)
+    with self.test_session() as sess:
+      (num_matched_columns_out, num_unmatched_columns_out,
+       num_ignored_columns_out) = sess.run(
+           [num_matched_columns, num_unmatched_columns, num_ignored_columns])
+      self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
+      self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
+      self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
+
+  def testGetCorrectUnmatchedColumnIndices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [2, 4]
+    unmatched_column_indices = match.unmatched_column_indices()
+    self.assertEquals(unmatched_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      unmatched_column_indices = sess.run(unmatched_column_indices)
+      self.assertAllEqual(unmatched_column_indices, expected_column_indices)
+
+  def testGetCorrectMatchedRowIndices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_row_indices = [3, 1, 0, 5]
+    matched_row_indices = match.matched_row_indices()
+    self.assertEquals(matched_row_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      matched_row_inds = sess.run(matched_row_indices)
+      self.assertAllEqual(matched_row_inds, expected_row_indices)
+
+  def test_get_correct_ignored_column_indices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [6]
+    ignored_column_indices = match.ignored_column_indices()
+    self.assertEquals(ignored_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      ignored_column_indices = sess.run(ignored_column_indices)
+      self.assertAllEqual(ignored_column_indices, expected_column_indices)
+
+  def test_get_correct_matched_column_indicator(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indicator = [True, True, False, True, False, True, False]
+    matched_column_indicator = match.matched_column_indicator()
+    self.assertEquals(matched_column_indicator.dtype, tf.bool)
+    with self.test_session() as sess:
+      matched_column_indicator = sess.run(matched_column_indicator)
+      self.assertAllEqual(matched_column_indicator, expected_column_indicator)
+
+  def test_get_correct_unmatched_column_indicator(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indicator = [False, False, True, False, True, False, False]
+    unmatched_column_indicator = match.unmatched_column_indicator()
+    self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
+    with self.test_session() as sess:
+      unmatched_column_indicator = sess.run(unmatched_column_indicator)
+      self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
+
+  def test_get_correct_ignored_column_indicator(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indicator = [False, False, False, False, False, False, True]
+    ignored_column_indicator = match.ignored_column_indicator()
+    self.assertEquals(ignored_column_indicator.dtype, tf.bool)
+    with self.test_session() as sess:
+      ignored_column_indicator = sess.run(ignored_column_indicator)
+      self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
+
+  def test_get_correct_unmatched_ignored_column_indices(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    match = matcher.Match(match_results)
+    expected_column_indices = [2, 4, 6]
+    unmatched_ignored_column_indices = (match.
+                                        unmatched_or_ignored_column_indices())
+    self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
+    with self.test_session() as sess:
+      unmatched_ignored_column_indices = sess.run(
+          unmatched_ignored_column_indices)
+      self.assertAllEqual(unmatched_ignored_column_indices,
+                          expected_column_indices)
+
+  def test_all_columns_accounted_for(self):
+    # Note: deliberately setting to small number so not always
+    # all possibilities appear (matched, unmatched, ignored)
+    num_matches = 10
+    match_results = tf.random_uniform(
+        [num_matches], minval=-2, maxval=5, dtype=tf.int32)
+    match = matcher.Match(match_results)
+    matched_column_indices = match.matched_column_indices()
+    unmatched_column_indices = match.unmatched_column_indices()
+    ignored_column_indices = match.ignored_column_indices()
+    with self.test_session() as sess:
+      matched, unmatched, ignored = sess.run([
+          matched_column_indices, unmatched_column_indices,
+          ignored_column_indices
+      ])
+      all_indices = np.hstack((matched, unmatched, ignored))
+      all_indices_sorted = np.sort(all_indices)
+      self.assertAllEqual(all_indices_sorted,
+                          np.arange(num_matches, dtype=np.int32))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/minibatch_sampler.py
+++ b/object_detection/core/minibatch_sampler.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base minibatch sampler module.
+
+The job of the minibatch_sampler is to subsample a minibatch based on some
+criterion.
+
+The main function call is:
+    subsample(indicator, batch_size, **params).
+Indicator is a 1d boolean tensor where True denotes which examples can be
+sampled. It returns a boolean indicator where True denotes an example has been
+sampled..
+
+Subclasses should implement the Subsample function and can make use of the
+@staticmethod SubsampleIndicator.
+"""
+
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.utils import ops
+
+
+class MinibatchSampler(object):
+  """Abstract base class for subsampling minibatches."""
+  __metaclass__ = ABCMeta
+
+  def __init__(self):
+    """Constructs a minibatch sampler."""
+    pass
+
+  @abstractmethod
+  def subsample(self, indicator, batch_size, **params):
+    """Returns subsample of entries in indicator.
+
+    Args:
+      indicator: boolean tensor of shape [N] whose True entries can be sampled.
+      batch_size: desired batch size.
+      **params: additional keyword arguments for specific implementations of
+          the MinibatchSampler.
+
+    Returns:
+      sample_indicator: boolean tensor of shape [N] whose True entries have been
+      sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
+    """
+    pass
+
+  @staticmethod
+  def subsample_indicator(indicator, num_samples):
+    """Subsample indicator vector.
+
+    Given a boolean indicator vector with M elements set to `True`, the function
+    assigns all but `num_samples` of these previously `True` elements to
+    `False`. If `num_samples` is greater than M, the original indicator vector
+    is returned.
+
+    Args:
+      indicator: a 1-dimensional boolean tensor indicating which elements
+        are allowed to be sampled and which are not.
+      num_samples: int32 scalar tensor
+
+    Returns:
+      a boolean tensor with the same shape as input (indicator) tensor
+    """
+    indices = tf.where(indicator)
+    indices = tf.random_shuffle(indices)
+    indices = tf.reshape(indices, [-1])
+
+    num_samples = tf.minimum(tf.size(indices), num_samples)
+    selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
+
+    selected_indicator = ops.indices_to_dense_vector(selected_indices,
+                                                     tf.shape(indicator)[0])
+
+    return tf.equal(selected_indicator, 1)
--- a/object_detection/core/minibatch_sampler_test.py
+++ b/object_detection/core/minibatch_sampler_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import minibatch_sampler
+
+
+class MinibatchSamplerTest(tf.test.TestCase):
+
+  def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.constant(np_indicator)
+    samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 3)
+    with self.test_session() as sess:
+      samples_out = sess.run(samples)
+      self.assertTrue(np.sum(samples_out), 3)
+      self.assertAllEqual(samples_out,
+                          np.logical_and(samples_out, np_indicator))
+
+  def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.placeholder(tf.bool)
+    feed_dict = {indicator: np_indicator}
+
+    samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 3)
+    with self.test_session() as sess:
+      samples_out = sess.run(samples, feed_dict=feed_dict)
+      self.assertTrue(np.sum(samples_out), 3)
+      self.assertAllEqual(samples_out,
+                          np.logical_and(samples_out, np_indicator))
+
+  def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.constant(np_indicator)
+    samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 5)
+    with self.test_session() as sess:
+      samples_out = sess.run(samples)
+      self.assertTrue(np.sum(samples_out), 4)
+      self.assertAllEqual(samples_out,
+                          np.logical_and(samples_out, np_indicator))
+
+  def test_subsample_indicator_when_num_samples_is_zero(self):
+    np_indicator = [True, False, True, False, True, True, False]
+    indicator = tf.constant(np_indicator)
+    samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator, 0)
+    with self.test_session() as sess:
+      samples_none_out = sess.run(samples_none)
+      self.assertAllEqual(
+          np.zeros_like(samples_none_out, dtype=bool),
+          samples_none_out)
+
+  def test_subsample_indicator_when_indicator_all_false(self):
+    indicator_empty = tf.zeros([0], dtype=tf.bool)
+    samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
+        indicator_empty, 4)
+    with self.test_session() as sess:
+      samples_empty_out = sess.run(samples_empty)
+      self.assertEqual(0, samples_empty_out.size)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/model.py
+++ b/object_detection/core/model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Abstract detection model.
+
+This file defines a generic base class for detection models.  Programs that are
+designed to work with arbitrary detection models should only depend on this
+class.  We intend for the functions in this class to follow tensor-in/tensor-out
+design, thus all functions have tensors or lists/dictionaries holding tensors as
+inputs and outputs.
+
+Abstractly, detection models predict output tensors given input images
+which can be passed to a loss function at training time or passed to a
+postprocessing function at eval time.  The computation graphs at a high level
+consequently look as follows:
+
+Training time:
+inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
+
+Evaluation time:
+inputs (images tensor) -> preprocess -> predict -> postprocess
+ -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
+
+DetectionModels must thus implement four functions (1) preprocess, (2) predict,
+(3) postprocess and (4) loss.  DetectionModels should make no assumptions about
+the input size or aspect ratio --- they are responsible for doing any
+resize/reshaping necessary (see docstring for the preprocess function).
+Output classes are always integers in the range [0, num_classes).  Any mapping
+of these integers to semantic labels is to be handled outside of this class.
+
+By default, DetectionModels produce bounding box detections; However, we support
+a handful of auxiliary annotations associated with each bounding box, namely,
+instance masks and keypoints.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+from object_detection.core import standard_fields as fields
+
+
+class DetectionModel(object):
+  """Abstract base class for detection models."""
+  __metaclass__ = ABCMeta
+
+  def __init__(self, num_classes):
+    """Constructor.
+
+    Args:
+      num_classes: number of classes.  Note that num_classes *does not* include
+      background categories that might be implicitly be predicted in various
+      implementations.
+    """
+    self._num_classes = num_classes
+    self._groundtruth_lists = {}
+
+  @property
+  def num_classes(self):
+    return self._num_classes
+
+  def groundtruth_lists(self, field):
+    """Access list of groundtruth tensors.
+
+    Args:
+      field: a string key, options are
+        fields.BoxListFields.{boxes,classes,masks,keypoints}
+
+    Returns:
+      a list of tensors holding groundtruth information (see also
+      provide_groundtruth function below), with one entry for each image in the
+      batch.
+    Raises:
+      RuntimeError: if the field has not been provided via provide_groundtruth.
+    """
+    if field not in self._groundtruth_lists:
+      raise RuntimeError('Groundtruth tensor %s has not been provided', field)
+    return self._groundtruth_lists[field]
+
+  @abstractmethod
+  def preprocess(self, inputs):
+    """Input preprocessing.
+
+    To be overridden by implementations.
+
+    This function is responsible for any scaling/shifting of input values that
+    is necessary prior to running the detector on an input image.
+    It is also responsible for any resizing that might be necessary as images
+    are assumed to arrive in arbitrary sizes.  While this function could
+    conceivably be part of the predict method (below), it is often convenient
+    to keep these separate --- for example, we may want to preprocess on one
+    device, place onto a queue, and let another device (e.g., the GPU) handle
+    prediction.
+
+    A few important notes about the preprocess function:
+    + We assume that this operation does not have any trainable variables nor
+    does it affect the groundtruth annotations in any way (thus data
+    augmentation operations such as random cropping should be performed
+    externally).
+    + There is no assumption that the batchsize in this function is the same as
+    the batch size in the predict function.  In fact, we recommend calling the
+    preprocess function prior to calling any batching operations (which should
+    happen outside of the model) and thus assuming that batch sizes are equal
+    to 1 in the preprocess function.
+    + There is also no explicit assumption that the output resolutions
+    must be fixed across inputs --- this is to support "fully convolutional"
+    settings in which input images can have different shapes/resolutions.
+
+    Args:
+      inputs: a [batch, height_in, width_in, channels] float32 tensor
+        representing a batch of images with values between 0 and 255.0.
+
+    Returns:
+      preprocessed_inputs: a [batch, height_out, width_out, channels] float32
+        tensor representing a batch of images.
+    """
+    pass
+
+  @abstractmethod
+  def predict(self, preprocessed_inputs):
+    """Predict prediction tensors from inputs tensor.
+
+    Outputs of this function can be passed to loss or postprocess functions.
+
+    Args:
+      preprocessed_inputs: a [batch, height, width, channels] float32 tensor
+        representing a batch of images.
+
+    Returns:
+      prediction_dict: a dictionary holding prediction tensors to be
+        passed to the Loss or Postprocess functions.
+    """
+    pass
+
+  @abstractmethod
+  def postprocess(self, prediction_dict, **params):
+    """Convert predicted output tensors to final detections.
+
+    Outputs adhere to the following conventions:
+    * Classes are integers in [0, num_classes); background classes are removed
+      and the first non-background class is mapped to 0.
+    * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
+      format and normalized relative to the image window.
+    * `num_detections` is provided for settings where detections are padded to a
+      fixed number of boxes.
+    * We do not specifically assume any kind of probabilistic interpretation
+      of the scores --- the only important thing is their relative ordering.
+      Thus implementations of the postprocess function are free to output
+      logits, probabilities, calibrated probabilities, or anything else.
+
+    Args:
+      prediction_dict: a dictionary holding prediction tensors.
+      **params: Additional keyword arguments for specific implementations of
+        DetectionModel.
+
+    Returns:
+      detections: a dictionary containing the following fields
+        detection_boxes: [batch, max_detections, 4]
+        detection_scores: [batch, max_detections]
+        detection_classes: [batch, max_detections]
+        instance_masks: [batch, max_detections, image_height, image_width]
+          (optional)
+        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        num_detections: [batch]
+    """
+    pass
+
+  @abstractmethod
+  def loss(self, prediction_dict):
+    """Compute scalar loss tensors with respect to provided groundtruth.
+
+    Calling this function requires that groundtruth tensors have been
+    provided via the provide_groundtruth function.
+
+    Args:
+      prediction_dict: a dictionary holding predicted tensors
+
+    Returns:
+      a dictionary mapping strings (loss names) to scalar tensors representing
+        loss values.
+    """
+    pass
+
+  def provide_groundtruth(self,
+                          groundtruth_boxes_list,
+                          groundtruth_classes_list,
+                          groundtruth_masks_list=None,
+                          groundtruth_keypoints_list=None):
+    """Provide groundtruth tensors.
+
+    Args:
+      groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
+        [num_boxes, 4] containing coordinates of the groundtruth boxes.
+          Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
+          format and assumed to be normalized and clipped
+          relative to the image window with y_min <= y_max and x_min <= x_max.
+      groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
+        tensors of shape [num_boxes, num_classes] containing the class targets
+        with the 0th index assumed to map to the first non-background class.
+      groundtruth_masks_list: a list of 2-D tf.float32 tensors of
+        shape [max_detections, height_in, width_in] containing instance
+        masks with values in {0, 1}.  If None, no masks are provided.
+        Mask resolution `height_in`x`width_in` must agree with the resolution
+        of the input image tensor provided to the `preprocess` function.
+      groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
+        shape [batch, max_detections, num_keypoints, 2] containing keypoints.
+        Keypoints are assumed to be provided in normalized coordinates and
+        missing keypoints should be encoded as NaN.
+    """
+    self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
+    self._groundtruth_lists[
+        fields.BoxListFields.classes] = groundtruth_classes_list
+    if groundtruth_masks_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.masks] = groundtruth_masks_list
+    if groundtruth_keypoints_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.keypoints] = groundtruth_keypoints_list
+
+  @abstractmethod
+  def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
+    """Return callable for loading a foreign checkpoint into tensorflow graph.
+
+    Loads variables from a different tensorflow graph (typically feature
+    extractor variables). This enables the model to initialize based on weights
+    from another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Args:
+      checkpoint_path: path to checkpoint to restore.
+      from_detection_checkpoint: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+
+    Returns:
+      a callable which takes a tf.Session as input and loads a checkpoint when
+        run.
+    """
+    pass
--- a/object_detection/core/post_processing.py
+++ b/object_detection/core/post_processing.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Post-processing operations on detected boxes."""
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import standard_fields as fields
+
+
+def multiclass_non_max_suppression(boxes,
+                                   scores,
+                                   score_thresh,
+                                   iou_thresh,
+                                   max_size_per_class,
+                                   max_total_size=0,
+                                   clip_window=None,
+                                   change_coordinate_frame=False,
+                                   masks=None,
+                                   additional_fields=None,
+                                   scope=None):
+  """Multi-class version of non maximum suppression.
+
+  This op greedily selects a subset of detection bounding boxes, pruning
+  away boxes that have high IOU (intersection over union) overlap (> thresh)
+  with already selected boxes.  It operates independently for each class for
+  which scores are provided (via the scores field of the input box_list),
+  pruning boxes with score less than a provided threshold prior to
+  applying NMS.
+
+  Please note that this operation is performed on *all* classes, therefore any
+  background classes should be removed prior to calling this function.
+
+  Args:
+    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
+      number of classes or 1 depending on whether a separate box is predicted
+      per class.
+    scores: A [k, num_classes] float32 tensor containing the scores for each of
+      the k detections.
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
+      with previously selected boxes are removed).
+    max_size_per_class: maximum number of retained boxes per class.
+    max_total_size: maximum number of boxes retained over all classes. By
+      default returns all boxes retained after capping boxes per class.
+    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
+      representing the window to clip and normalize boxes to before performing
+      non-max suppression.
+    change_coordinate_frame: Whether to normalize coordinates after clipping
+      relative to clip_window (this can only be set to True if a clip_window
+      is provided)
+    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
+      containing box masks. `q` can be either number of classes or 1 depending
+      on whether a separate mask is predicted per class.
+    additional_fields: (optional) If not None, a dictionary that maps keys to
+      tensors whose first dimensions are all of size `k`. After non-maximum
+      suppression, all tensors corresponding to the selected boxes will be
+      added to resulting BoxList.
+    scope: name scope.
+
+  Returns:
+    a BoxList holding M boxes with a rank-1 scores field representing
+      corresponding scores for each box with scores sorted in decreasing order
+      and a rank-1 classes field representing a class label for each box.
+      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
+      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
+
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+      a valid scores field.
+  """
+  if not 0 <= iou_thresh <= 1.0:
+    raise ValueError('iou_thresh must be between 0 and 1')
+  if scores.shape.ndims != 2:
+    raise ValueError('scores field must be of rank 2')
+  if scores.shape[1].value is None:
+    raise ValueError('scores must have statically defined second '
+                     'dimension')
+  if boxes.shape.ndims != 3:
+    raise ValueError('boxes must be of rank 3.')
+  if not (boxes.shape[1].value == scores.shape[1].value or
+          boxes.shape[1].value == 1):
+    raise ValueError('second dimension of boxes must be either 1 or equal '
+                     'to the second dimension of scores')
+  if boxes.shape[2].value != 4:
+    raise ValueError('last dimension of boxes must be of size 4.')
+  if change_coordinate_frame and clip_window is None:
+    raise ValueError('if change_coordinate_frame is True, then a clip_window'
+                     'must be specified.')
+
+  with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
+    num_boxes = tf.shape(boxes)[0]
+    num_scores = tf.shape(scores)[0]
+    num_classes = scores.get_shape()[1]
+
+    length_assert = tf.Assert(
+        tf.equal(num_boxes, num_scores),
+        ['Incorrect scores field length: actual vs expected.',
+         num_scores, num_boxes])
+
+    selected_boxes_list = []
+    per_class_boxes_list = tf.unstack(boxes, axis=1)
+    if masks is not None:
+      per_class_masks_list = tf.unstack(masks, axis=1)
+    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
+                 else [0] * num_classes)
+    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
+      per_class_boxes = per_class_boxes_list[boxes_idx]
+      boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
+      with tf.control_dependencies([length_assert]):
+        class_scores = tf.reshape(
+            tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
+      boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
+                                         class_scores)
+      if masks is not None:
+        per_class_masks = per_class_masks_list[boxes_idx]
+        boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
+                                           per_class_masks)
+      if additional_fields is not None:
+        for key, tensor in additional_fields.items():
+          boxlist_and_class_scores.add_field(key, tensor)
+      boxlist_filtered = box_list_ops.filter_greater_than(
+          boxlist_and_class_scores, score_thresh)
+      if clip_window is not None:
+        boxlist_filtered = box_list_ops.clip_to_window(
+            boxlist_filtered, clip_window)
+        if change_coordinate_frame:
+          boxlist_filtered = box_list_ops.change_coordinate_frame(
+              boxlist_filtered, clip_window)
+      max_selection_size = tf.minimum(max_size_per_class,
+                                      boxlist_filtered.num_boxes())
+      selected_indices = tf.image.non_max_suppression(
+          boxlist_filtered.get(),
+          boxlist_filtered.get_field(fields.BoxListFields.scores),
+          max_selection_size,
+          iou_threshold=iou_thresh)
+      nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
+      nms_result.add_field(
+          fields.BoxListFields.classes, (tf.zeros_like(
+              nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
+      selected_boxes_list.append(nms_result)
+    selected_boxes = box_list_ops.concatenate(selected_boxes_list)
+    sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
+                                              fields.BoxListFields.scores)
+    if max_total_size:
+      max_total_size = tf.minimum(max_total_size,
+                                  sorted_boxes.num_boxes())
+      sorted_boxes = box_list_ops.gather(sorted_boxes,
+                                         tf.range(max_total_size))
+    return sorted_boxes
+
+
+def batch_multiclass_non_max_suppression(boxes,
+                                         scores,
+                                         score_thresh,
+                                         iou_thresh,
+                                         max_size_per_class,
+                                         max_total_size=0,
+                                         clip_window=None,
+                                         change_coordinate_frame=False,
+                                         num_valid_boxes=None,
+                                         masks=None,
+                                         scope=None):
+  """Multi-class version of non maximum suppression that operates on a batch.
+
+  This op is similar to `multiclass_non_max_suppression` but operates on a batch
+  of boxes and scores. See documentation for `multiclass_non_max_suppression`
+  for details.
+
+  Args:
+    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
+      detections. If `q` is 1 then same boxes are used for all classes
+        otherwise, if `q` is equal to number of classes, class-specific boxes
+        are used.
+    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
+      the scores for each of the `num_anchors` detections.
+    score_thresh: scalar threshold for score (low scoring boxes are removed).
+    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
+      with previously selected boxes are removed).
+    max_size_per_class: maximum number of retained boxes per class.
+    max_total_size: maximum number of boxes retained over all classes. By
+      default returns all boxes retained after capping boxes per class.
+    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
+      representing the window to clip boxes to before performing non-max
+      suppression.
+    change_coordinate_frame: Whether to normalize coordinates after clipping
+      relative to clip_window (this can only be set to True if a clip_window
+      is provided)
+    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
+      [batch_size] representing the number of valid boxes to be considered
+        for each image in the batch.  This parameter allows for ignoring zero
+        paddings.
+    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
+      float32 tensor containing box masks. `q` can be either number of classes
+      or 1 depending on whether a separate mask is predicted per class.
+    scope: tf scope name.
+
+  Returns:
+    A dictionary containing the following entries:
+    'detection_boxes': A [batch_size, max_detections, 4] float32 tensor
+      containing the non-max suppressed boxes.
+    'detection_scores': A [bath_size, max_detections] float32 tensor containing
+      the scores for the boxes.
+    'detection_classes': A [batch_size, max_detections] float32 tensor
+      containing the class for boxes.
+    'num_detections': A [batchsize] float32 tensor indicating the number of
+      valid detections per batch item. Only the top num_detections[i] entries in
+      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
+      entries are zero paddings.
+    'detection_masks': (optional) a
+      [batch_size, max_detections, mask_height, mask_width] float32 tensor
+      containing masks for each selected box.
+
+  Raises:
+    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+      a valid scores field.
+  """
+  q = boxes.shape[2].value
+  num_classes = scores.shape[2].value
+  if q != 1 and q != num_classes:
+    raise ValueError('third dimension of boxes must be either 1 or equal '
+                     'to the third dimension of scores')
+
+  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
+    per_image_boxes_list = tf.unstack(boxes)
+    per_image_scores_list = tf.unstack(scores)
+    num_valid_boxes_list = len(per_image_boxes_list) * [None]
+    per_image_masks_list = len(per_image_boxes_list) * [None]
+    if num_valid_boxes is not None:
+      num_valid_boxes_list = tf.unstack(num_valid_boxes)
+    if masks is not None:
+      per_image_masks_list = tf.unstack(masks)
+
+    detection_boxes_list = []
+    detection_scores_list = []
+    detection_classes_list = []
+    num_detections_list = []
+    detection_masks_list = []
+    for (per_image_boxes, per_image_scores, per_image_masks, num_valid_boxes
+        ) in zip(per_image_boxes_list, per_image_scores_list,
+                 per_image_masks_list, num_valid_boxes_list):
+      if num_valid_boxes is not None:
+        per_image_boxes = tf.reshape(
+            tf.slice(per_image_boxes, 3*[0],
+                     tf.stack([num_valid_boxes, -1, -1])), [-1, q, 4])
+        per_image_scores = tf.reshape(
+            tf.slice(per_image_scores, [0, 0],
+                     tf.stack([num_valid_boxes, -1])), [-1, num_classes])
+        if masks is not None:
+          per_image_masks = tf.reshape(
+              tf.slice(per_image_masks, 4*[0],
+                       tf.stack([num_valid_boxes, -1, -1, -1])),
+              [-1, q, masks.shape[3].value, masks.shape[4].value])
+      nmsed_boxlist = multiclass_non_max_suppression(
+          per_image_boxes,
+          per_image_scores,
+          score_thresh,
+          iou_thresh,
+          max_size_per_class,
+          max_total_size,
+          masks=per_image_masks,
+          clip_window=clip_window,
+          change_coordinate_frame=change_coordinate_frame)
+      num_detections_list.append(tf.to_float(nmsed_boxlist.num_boxes()))
+      padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
+                                                         max_total_size)
+      detection_boxes_list.append(padded_boxlist.get())
+      detection_scores_list.append(
+          padded_boxlist.get_field(fields.BoxListFields.scores))
+      detection_classes_list.append(
+          padded_boxlist.get_field(fields.BoxListFields.classes))
+      if masks is not None:
+        detection_masks_list.append(
+            padded_boxlist.get_field(fields.BoxListFields.masks))
+
+    nms_dict = {
+        'detection_boxes': tf.stack(detection_boxes_list),
+        'detection_scores': tf.stack(detection_scores_list),
+        'detection_classes': tf.stack(detection_classes_list),
+        'num_detections': tf.stack(num_detections_list)
+    }
+    if masks is not None:
+      nms_dict['detection_masks'] = tf.stack(detection_masks_list)
+    return nms_dict
--- a/object_detection/core/post_processing_test.py
+++ b/object_detection/core/post_processing_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tensorflow_models.object_detection.core.post_processing."""
+import numpy as np
+import tensorflow as tf
+from object_detection.core import post_processing
+from object_detection.core import standard_fields as fields
+
+
+class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
+
+  def test_with_invalid_scores_size(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]]], tf.float32)
+    scores = tf.constant([[.9], [.75], [.6], [.95], [.5]])
+    iou_thresh = .5
+    score_thresh = 0.6
+    max_output_size = 3
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      with self.assertRaisesWithPredicateMatch(
+          tf.errors.InvalidArgumentError, 'Incorrect scores field length'):
+        sess.run(nms.get())
+
+  def test_multiclass_nms_select_with_shared_boxes(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    num_keypoints = 6
+    keypoints = tf.tile(
+        tf.reshape(tf.range(8), [8, 1, 1]),
+        [1, num_keypoints, 2])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+    exp_nms_keypoints_tensor = tf.tile(
+        tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
+        [1, num_keypoints, 2])
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        additional_fields={
+            fields.BoxListFields.keypoints: keypoints})
+
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_keypoints,
+       exp_nms_keypoints) = sess.run([
+           nms.get(),
+           nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes),
+           nms.get_field(fields.BoxListFields.keypoints),
+           exp_nms_keypoints_tensor
+       ])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_keypoints, exp_nms_keypoints)
+
+  def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+
+    num_boxes = tf.shape(boxes)[0]
+    heatmap_height = 5
+    heatmap_width = 5
+    num_keypoints = 17
+    keypoint_heatmaps = tf.ones(
+        [num_boxes, heatmap_height, heatmap_width, num_keypoints],
+        dtype=tf.float32)
+
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+    exp_nms_keypoint_heatmaps = np.ones(
+        (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32)
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        additional_fields={
+            fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps})
+
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_keypoint_heatmaps) = sess.run(
+           [nms.get(),
+            nms.get_field(fields.BoxListFields.scores),
+            nms.get_field(fields.BoxListFields.classes),
+            nms.get_field(fields.BoxListFields.keypoint_heatmaps)])
+
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps)
+
+  def test_multiclass_nms_with_additional_fields(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+
+    coarse_boxes_key = 'coarse_boxes'
+    coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1],
+                                [0.1, 0.2, 1.1, 1.2],
+                                [0.1, -0.2, 1.1, 1.0],
+                                [0.1, 10.1, 1.1, 11.1],
+                                [0.1, 10.2, 1.1, 11.2],
+                                [0.1, 100.1, 1.1, 101.1],
+                                [0.1, 1000.1, 1.1, 1002.1],
+                                [0.1, 1000.1, 1.1, 1002.2]], tf.float32)
+
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = np.array([[0, 10, 1, 11],
+                                [0, 0, 1, 1],
+                                [0, 1000, 1, 1002],
+                                [0, 100, 1, 101]], dtype=np.float32)
+
+    exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1],
+                                       [0.1, 0.1, 1.1, 1.1],
+                                       [0.1, 1000.1, 1.1, 1002.1],
+                                       [0.1, 100.1, 1.1, 101.1]],
+                                      dtype=np.float32)
+
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        additional_fields={coarse_boxes_key: coarse_boxes})
+
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_coarse_corners) = sess.run(
+           [nms.get(),
+            nms.get_field(fields.BoxListFields.scores),
+            nms.get_field(fields.BoxListFields.classes),
+            nms.get_field(coarse_boxes_key)])
+
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners)
+
+  def test_multiclass_nms_select_with_shared_boxes_given_masks(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    num_classes = 2
+    mask_height = 3
+    mask_width = 3
+    masks = tf.tile(
+        tf.reshape(tf.range(8), [8, 1, 1, 1]),
+        [1, num_classes, mask_height, mask_width])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+    exp_nms_masks_tensor = tf.tile(
+        tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
+        [1, mask_height, mask_width])
+
+    nms = post_processing.multiclass_non_max_suppression(boxes, scores,
+                                                         score_thresh,
+                                                         iou_thresh,
+                                                         max_output_size,
+                                                         masks=masks)
+    with self.test_session() as sess:
+      (nms_corners_output,
+       nms_scores_output,
+       nms_classes_output,
+       nms_masks,
+       exp_nms_masks) = sess.run([nms.get(),
+                                  nms.get_field(fields.BoxListFields.scores),
+                                  nms.get_field(fields.BoxListFields.classes),
+                                  nms.get_field(fields.BoxListFields.masks),
+                                  exp_nms_masks_tensor])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+      self.assertAllEqual(nms_masks, exp_nms_masks)
+
+  def test_multiclass_nms_select_with_clip_window(self):
+    boxes = tf.constant([[[0, 0, 10, 10]],
+                         [[1, 1, 11, 11]]], tf.float32)
+    scores = tf.constant([[.9], [.75]])
+    clip_window = tf.constant([5, 4, 8, 7], tf.float32)
+    score_thresh = 0.0
+    iou_thresh = 0.5
+    max_output_size = 100
+
+    exp_nms_corners = [[5, 4, 8, 7]]
+    exp_nms_scores = [.9]
+    exp_nms_classes = [0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        clip_window=clip_window)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self):
+    boxes = tf.constant([[[0, 0, 10, 10]],
+                         [[1, 1, 11, 11]]], tf.float32)
+    scores = tf.constant([[.9], [.75]])
+    clip_window = tf.constant([5, 4, 8, 7], tf.float32)
+    score_thresh = 0.0
+    iou_thresh = 0.5
+    max_output_size = 100
+
+    exp_nms_corners = [[0, 0, 1, 1]]
+    exp_nms_scores = [.9]
+    exp_nms_classes = [0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size,
+        clip_window=clip_window, change_coordinate_frame=True)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_per_class_cap(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_size_per_class = 2
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 1000, 1, 1002]]
+    exp_nms_scores = [.95, .9, .85]
+    exp_nms_classes = [0, 0, 1]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_size_per_class)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_select_with_total_cap(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_size_per_class = 4
+    max_total_size = 2
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1]]
+    exp_nms_scores = [.95, .9]
+    exp_nms_classes = [0, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_size_per_class,
+        max_total_size)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_multiclass_nms_threshold_then_select_with_shared_boxes(self):
+    boxes = tf.constant([[[0, 0, 1, 1]],
+                         [[0, 0.1, 1, 1.1]],
+                         [[0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002]],
+                         [[0, 1000, 1, 1002.1]]], tf.float32)
+    scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 3
+
+    exp_nms = [[0, 10, 1, 11],
+               [0, 0, 1, 1],
+               [0, 100, 1, 101]]
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms.get())
+      self.assertAllClose(nms_output, exp_nms)
+
+  def test_multiclass_nms_select_with_separate_boxes(self):
+    boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]],
+                         [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                         [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                         [[0, 10, 1, 11], [0, 10, 1, 11]],
+                         [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                         [[0, 100, 1, 101], [0, 100, 1, 101]],
+                         [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                         [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]],
+                        tf.float32)
+    scores = tf.constant([[.9, 0.01], [.75, 0.05],
+                          [.6, 0.01], [.95, 0],
+                          [.5, 0.01], [.3, 0.01],
+                          [.01, .85], [.01, .5]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[0, 10, 1, 11],
+                       [0, 0, 1, 1],
+                       [0, 999, 2, 1004],
+                       [0, 100, 1, 101]]
+    exp_nms_scores = [.95, .9, .85, .3]
+    exp_nms_classes = [0, 0, 1, 0]
+
+    nms = post_processing.multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh, max_output_size)
+    with self.test_session() as sess:
+      nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+          [nms.get(), nms.get_field(fields.BoxListFields.scores),
+           nms.get_field(fields.BoxListFields.classes)])
+      self.assertAllClose(nms_corners_output, exp_nms_corners)
+      self.assertAllClose(nms_scores_output, exp_nms_scores)
+      self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+  def test_batch_multiclass_nms_with_batch_size_1(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]],
+                          [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0],
+                           [.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 10, 1, 11],
+                        [0, 0, 1, 1],
+                        [0, 999, 2, 1004],
+                        [0, 100, 1, 101]]]
+    exp_nms_scores = [[.95, .9, .85, .3]]
+    exp_nms_classes = [[0, 0, 1, 0]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertEqual(nms_output['num_detections'], [4])
+
+  def test_batch_multiclass_nms_with_batch_size_2(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 10, 1, 11],
+                        [0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 999, 2, 1004],
+                        [0, 10.1, 1, 11.1],
+                        [0, 100, 1, 101],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.95, .9, 0, 0],
+                      [.85, .5, .3, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [1, 0, 0, 0]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertAllClose(nms_output['num_detections'], [2, 3])
+
+  def test_batch_multiclass_nms_with_masks(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+                          [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+                          [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+                          [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+                         [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+                          [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+                          [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+                          [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+                        tf.float32)
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 10, 1, 11],
+                        [0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 999, 2, 1004],
+                        [0, 10.1, 1, 11.1],
+                        [0, 100, 1, 101],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.95, .9, 0, 0],
+                      [.85, .5, .3, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [1, 0, 0, 0]]
+    exp_nms_masks = [[[[6, 7], [8, 9]],
+                      [[0, 1], [2, 3]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]]],
+                     [[[13, 14], [15, 16]],
+                      [[8, 9], [10, 11]],
+                      [[10, 11], [12, 13]],
+                      [[0, 0], [0, 0]]]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        masks=masks)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertAllClose(nms_output['num_detections'], [2, 3])
+      self.assertAllClose(nms_output['detection_masks'], exp_nms_masks)
+
+  def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+                          [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+                          [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+                          [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+                         [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+                          [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+                          [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+                          [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+                        tf.float32)
+    num_valid_boxes = tf.constant([1, 1], tf.int32)
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 10.1, 1, 11.1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.9, 0, 0, 0],
+                      [.5, 0, 0, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [0, 0, 0, 0]]
+    exp_nms_masks = [[[[0, 1], [2, 3]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]]],
+                     [[[8, 9], [10, 11]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]],
+                      [[0, 0], [0, 0]]]]
+
+    nms_dict = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        num_valid_boxes=num_valid_boxes, masks=masks)
+    with self.test_session() as sess:
+      nms_output = sess.run(nms_dict)
+      self.assertAllClose(nms_output['detection_boxes'], exp_nms_corners)
+      self.assertAllClose(nms_output['detection_scores'], exp_nms_scores)
+      self.assertAllClose(nms_output['detection_classes'], exp_nms_classes)
+      self.assertAllClose(nms_output['num_detections'], [1, 1])
+      self.assertAllClose(nms_output['detection_masks'], exp_nms_masks)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/prefetcher.py
+++ b/object_detection/core/prefetcher.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Provides functions to prefetch tensors to feed into models."""
+import tensorflow as tf
+
+
+def prefetch(tensor_dict, capacity):
+  """Creates a prefetch queue for tensors.
+
+  Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
+  dequeue op that evaluates to a tensor_dict. This function is useful in
+  prefetching preprocessed tensors so that the data is readily available for
+  consumers.
+
+  Example input pipeline when you don't need batching:
+  ----------------------------------------------------
+  key, string_tensor = slim.parallel_reader.parallel_read(...)
+  tensor_dict = decoder.decode(string_tensor)
+  tensor_dict = preprocessor.preprocess(tensor_dict, ...)
+  prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
+  tensor_dict = prefetch_queue.dequeue()
+  outputs = Model(tensor_dict)
+  ...
+  ----------------------------------------------------
+
+  For input pipelines with batching, refer to core/batcher.py
+
+  Args:
+    tensor_dict: a dictionary of tensors to prefetch.
+    capacity: the size of the prefetch queue.
+
+  Returns:
+    a FIFO prefetcher queue
+  """
+  names = list(tensor_dict.keys())
+  dtypes = [t.dtype for t in tensor_dict.values()]
+  shapes = [t.get_shape() for t in tensor_dict.values()]
+  prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes,
+                                       shapes=shapes,
+                                       names=names,
+                                       name='prefetch_queue')
+  enqueue_op = prefetch_queue.enqueue(tensor_dict)
+  tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
+      prefetch_queue, [enqueue_op]))
+  tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name,
+                                                      capacity),
+                    tf.to_float(prefetch_queue.size()) * (1. / capacity))
+  return prefetch_queue
--- a/object_detection/core/prefetcher_test.py
+++ b/object_detection/core/prefetcher_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.prefetcher."""
+import tensorflow as tf
+
+from object_detection.core import prefetcher
+
+slim = tf.contrib.slim
+
+
+class PrefetcherTest(tf.test.TestCase):
+
+  def test_prefetch_tensors_with_fully_defined_shapes(self):
+    with self.test_session() as sess:
+      batch_size = 10
+      image_size = 32
+      num_batches = 5
+      examples = tf.Variable(tf.constant(0, dtype=tf.int64))
+      counter = examples.count_up_to(num_batches)
+      image = tf.random_normal([batch_size, image_size,
+                                image_size, 3],
+                               dtype=tf.float32,
+                               name='images')
+      label = tf.random_uniform([batch_size, 1], 0, 10,
+                                dtype=tf.int32, name='labels')
+
+      prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
+                                                        'image': image,
+                                                        'label': label},
+                                           capacity=100)
+      tensor_dict = prefetch_queue.dequeue()
+
+      self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
+                          [batch_size, image_size, image_size, 3])
+      self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
+                          [batch_size, 1])
+
+      tf.initialize_all_variables().run()
+      with slim.queues.QueueRunners(sess):
+        for _ in range(num_batches):
+          results = sess.run(tensor_dict)
+          self.assertEquals(results['image'].shape,
+                            (batch_size, image_size, image_size, 3))
+          self.assertEquals(results['label'].shape, (batch_size, 1))
+        with self.assertRaises(tf.errors.OutOfRangeError):
+          sess.run(tensor_dict)
+
+  def test_prefetch_tensors_with_partially_defined_shapes(self):
+    with self.test_session() as sess:
+      batch_size = 10
+      image_size = 32
+      num_batches = 5
+      examples = tf.Variable(tf.constant(0, dtype=tf.int64))
+      counter = examples.count_up_to(num_batches)
+      image = tf.random_normal([batch_size,
+                                tf.Variable(image_size),
+                                tf.Variable(image_size), 3],
+                               dtype=tf.float32,
+                               name='image')
+      image.set_shape([batch_size, None, None, 3])
+      label = tf.random_uniform([batch_size, tf.Variable(1)], 0,
+                                10, dtype=tf.int32, name='label')
+      label.set_shape([batch_size, None])
+
+      prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
+                                                        'image': image,
+                                                        'label': label},
+                                           capacity=100)
+      tensor_dict = prefetch_queue.dequeue()
+
+      self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
+                          [batch_size, None, None, 3])
+      self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
+                          [batch_size, None])
+
+      tf.initialize_all_variables().run()
+      with slim.queues.QueueRunners(sess):
+        for _ in range(num_batches):
+          results = sess.run(tensor_dict)
+          self.assertEquals(results['image'].shape,
+                            (batch_size, image_size, image_size, 3))
+          self.assertEquals(results['label'].shape, (batch_size, 1))
+        with self.assertRaises(tf.errors.OutOfRangeError):
+          sess.run(tensor_dict)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/preprocessor.py
+++ b/object_detection/core/preprocessor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Preprocess images and bounding boxes for detection.
+
+We perform two sets of operations in preprocessing stage:
+(a) operations that are applied to both training and testing data,
+(b) operations that are applied only to training data for the purpose of
+    data augmentation.
+
+A preprocessing function receives a set of inputs,
+e.g. an image and bounding boxes,
+performs an operation on them, and returns them.
+Some examples are: randomly cropping the image, randomly mirroring the image,
+                   randomly changing the brightness, contrast, hue and
+                   randomly jittering the bounding boxes.
+
+The preprocess function receives a tensor_dict which is a dictionary that maps
+different field names to their tensors. For example,
+tensor_dict[fields.InputDataFields.image] holds the image tensor.
+The image is a rank 4 tensor: [1, height, width, channels] with
+dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
+in each row there is a box with [ymin xmin ymax xmax].
+Boxes are in normalized coordinates meaning
+their coordinate values range in [0, 1]
+
+Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
+functions receive a rank 3 tensor for processing the image. Thus, inside the
+preprocess function we squeeze the image to become a rank 3 tensor and then
+we pass it to the functions. At the end of the preprocess we expand the image
+back to rank 4.
+"""
+
+import sys
+import tensorflow as tf
+
+from tensorflow.python.ops import control_flow_ops
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import keypoint_ops
+from object_detection.core import standard_fields as fields
+
+
+def _apply_with_random_selector(x, func, num_cases):
+  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+  Args:
+    x: input Tensor.
+    func: Python function to apply.
+    num_cases: Python int32, number of cases to sample sel from.
+
+  Returns:
+    The result of func(x, sel), where func receives the value of the
+    selector as a python integer, but sel is sampled dynamically.
+  """
+  rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  # Pass the real x only to one of the func calls.
+  return control_flow_ops.merge([func(
+      control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
+                                 for case in range(num_cases)])[0]
+
+
+def _apply_with_random_selector_tuples(x, func, num_cases):
+  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+  Args:
+    x: A tuple of input tensors.
+    func: Python function to apply.
+    num_cases: Python int32, number of cases to sample sel from.
+
+  Returns:
+    The result of func(x, sel), where func receives the value of the
+    selector as a python integer, but sel is sampled dynamically.
+  """
+  num_inputs = len(x)
+  rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  # Pass the real x only to one of the func calls.
+
+  tuples = [list() for t in x]
+  for case in range(num_cases):
+    new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
+    output = func(tuple(new_x), case)
+    for j in range(num_inputs):
+      tuples[j].append(output[j])
+
+  for i in range(num_inputs):
+    tuples[i] = control_flow_ops.merge(tuples[i])[0]
+  return tuple(tuples)
+
+
+def _random_integer(minval, maxval, seed):
+  """Returns a random 0-D tensor between minval and maxval.
+
+  Args:
+    minval: minimum value of the random tensor.
+    maxval: maximum value of the random tensor.
+    seed: random seed.
+
+  Returns:
+    A random 0-D tensor between minval and maxval.
+  """
+  return tf.random_uniform(
+      [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
+
+
+def normalize_image(image, original_minval, original_maxval, target_minval,
+                    target_maxval):
+  """Normalizes pixel values in the image.
+
+  Moves the pixel values from the current [original_minval, original_maxval]
+  range to a the [target_minval, target_maxval] range.
+
+  Args:
+    image: rank 3 float32 tensor containing 1
+           image -> [height, width, channels].
+    original_minval: current image minimum value.
+    original_maxval: current image maximum value.
+    target_minval: target image minimum value.
+    target_maxval: target image maximum value.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('NormalizeImage', values=[image]):
+    original_minval = float(original_minval)
+    original_maxval = float(original_maxval)
+    target_minval = float(target_minval)
+    target_maxval = float(target_maxval)
+    image = tf.to_float(image)
+    image = tf.subtract(image, original_minval)
+    image = tf.multiply(image, (target_maxval - target_minval) /
+                        (original_maxval - original_minval))
+    image = tf.add(image, target_minval)
+    return image
+
+
+def flip_boxes(boxes):
+  """Left-right flip the boxes.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+
+  Returns:
+    Flipped boxes.
+  """
+  # Flip boxes.
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_xmin = tf.subtract(1.0, xmax)
+  flipped_xmax = tf.subtract(1.0, xmin)
+  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+  return flipped_boxes
+
+
+def retain_boxes_above_threshold(
+    boxes, labels, label_scores, masks=None, keypoints=None, threshold=0.0):
+  """Retains boxes whose label score is above a given threshold.
+
+  If the label score for a box is missing (represented by NaN), the box is
+  retained. The boxes that don't pass the threshold will not appear in the
+  returned tensor.
+
+  Args:
+    boxes: float32 tensor of shape [num_instance, 4] representing boxes
+      location in normalized coordinates.
+    labels: rank 1 int32 tensor of shape [num_instance] containing the object
+      classes.
+    label_scores: float32 tensor of shape [num_instance] representing the
+      score for each box.
+    masks: (optional) rank 3 float32 tensor with shape
+      [num_instances, height, width] containing instance masks. The masks are of
+      the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+    threshold: scalar python float.
+
+  Returns:
+    retained_boxes: [num_retained_instance, 4]
+    retianed_labels: [num_retained_instance]
+    retained_label_scores: [num_retained_instance]
+
+    If masks, or keypoints are not None, the function also returns:
+
+    retained_masks: [num_retained_instance, height, width]
+    retained_keypoints: [num_retained_instance, num_keypoints, 2]
+  """
+  with tf.name_scope('RetainBoxesAboveThreshold',
+                     values=[boxes, labels, label_scores]):
+    indices = tf.where(
+        tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
+    indices = tf.squeeze(indices, axis=1)
+    retained_boxes = tf.gather(boxes, indices)
+    retained_labels = tf.gather(labels, indices)
+    retained_label_scores = tf.gather(label_scores, indices)
+    result = [retained_boxes, retained_labels, retained_label_scores]
+
+    if masks is not None:
+      retained_masks = tf.gather(masks, indices)
+      result.append(retained_masks)
+
+    if keypoints is not None:
+      retained_keypoints = tf.gather(keypoints, indices)
+      result.append(retained_keypoints)
+
+    return result
+
+
+def _flip_masks(masks):
+  """Left-right flips masks.
+
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+
+  Returns:
+    flipped masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  return masks[:, :, ::-1]
+
+
+def random_horizontal_flip(
+    image,
+    boxes=None,
+    masks=None,
+    keypoints=None,
+    keypoint_flip_permutation=None,
+    seed=None):
+  """Randomly decides whether to mirror the image and detections or not.
+
+  The probability of flipping the image is 50%.
+
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
+                               permutation.
+    seed: random seed
+
+  Returns:
+    image: image which is the same shape as input image.
+
+    If boxes, masks, keypoints, and keypoint_flip_permutation is not None,
+    the function also returns the following tensors.
+
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+  """
+  def _flip_image(image):
+    # flip image
+    image_flipped = tf.image.flip_left_right(image)
+    return image_flipped
+
+  if keypoints is not None and keypoint_flip_permutation is None:
+    raise ValueError(
+        'keypoints are provided but keypoints_flip_permutation is not provided')
+
+  with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
+    result = []
+    # random variable defining whether to do flip or not
+    do_a_flip_random = tf.random_uniform([], seed=seed)
+    # flip only if there are bounding boxes in image!
+    do_a_flip_random = tf.logical_and(
+        tf.greater(tf.size(boxes), 0), tf.greater(do_a_flip_random, 0.5))
+
+    # flip image
+    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
+    result.append(image)
+
+    # flip boxes
+    if boxes is not None:
+      boxes = tf.cond(
+          do_a_flip_random, lambda: flip_boxes(boxes), lambda: boxes)
+      result.append(boxes)
+
+    # flip masks
+    if masks is not None:
+      masks = tf.cond(
+          do_a_flip_random, lambda: _flip_masks(masks), lambda: masks)
+      result.append(masks)
+
+    # flip keypoints
+    if keypoints is not None and keypoint_flip_permutation is not None:
+      permutation = keypoint_flip_permutation
+      keypoints = tf.cond(
+          do_a_flip_random,
+          lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
+          lambda: keypoints)
+      result.append(keypoints)
+
+    return tuple(result)
+
+
+def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
+  """Scales each value in the pixels of the image.
+
+     This function scales each pixel independent of the other ones.
+     For each value in image tensor, draws a random number between
+     minval and maxval and multiples the values with them.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    minval: lower ratio of scaling pixel values.
+    maxval: upper ratio of scaling pixel values.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomPixelValueScale', values=[image]):
+    color_coef = tf.random_uniform(
+        tf.shape(image),
+        minval=minval,
+        maxval=maxval,
+        dtype=tf.float32,
+        seed=seed)
+    image = tf.multiply(image, color_coef)
+    image = tf.clip_by_value(image, 0.0, 1.0)
+
+  return image
+
+
+def random_image_scale(image,
+                       masks=None,
+                       min_scale_ratio=0.5,
+                       max_scale_ratio=2.0,
+                       seed=None):
+  """Scales the image size.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
+    masks: (optional) rank 3 float32 tensor containing masks with
+      size [height, width, num_masks]. The value is set to None if there are no
+      masks.
+    min_scale_ratio: minimum scaling ratio.
+    max_scale_ratio: maximum scaling ratio.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    masks: If masks is not none, resized masks which are the same rank as input
+      masks will be returned.
+  """
+  with tf.name_scope('RandomImageScale', values=[image]):
+    result = []
+    image_shape = tf.shape(image)
+    image_height = image_shape[0]
+    image_width = image_shape[1]
+    size_coef = tf.random_uniform([],
+                                  minval=min_scale_ratio,
+                                  maxval=max_scale_ratio,
+                                  dtype=tf.float32, seed=seed)
+    image_newysize = tf.to_int32(
+        tf.multiply(tf.to_float(image_height), size_coef))
+    image_newxsize = tf.to_int32(
+        tf.multiply(tf.to_float(image_width), size_coef))
+    image = tf.image.resize_images(
+        image, [image_newysize, image_newxsize], align_corners=True)
+    result.append(image)
+    if masks:
+      masks = tf.image.resize_nearest_neighbor(
+          masks, [image_newysize, image_newxsize], align_corners=True)
+      result.append(masks)
+    return tuple(result)
+
+
+def random_rgb_to_gray(image, probability=0.1, seed=None):
+  """Changes the image from RGB to Grayscale with the given probability.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    probability: the probability of returning a grayscale image.
+            The probability should be a number between [0, 1].
+    seed: random seed.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  def _image_to_gray(image):
+    image_gray1 = tf.image.rgb_to_grayscale(image)
+    image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
+    return image_gray3
+
+  with tf.name_scope('RandomRGBtoGray', values=[image]):
+    # random variable defining whether to do flip or not
+    do_gray_random = tf.random_uniform([], seed=seed)
+
+    image = tf.cond(
+        tf.greater(do_gray_random, probability), lambda: image,
+        lambda: _image_to_gray(image))
+
+  return image
+
+
+def random_adjust_brightness(image, max_delta=0.2):
+  """Randomly adjusts brightness.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    max_delta: how much to change the brightness. A value between [0, 1).
+
+  Returns:
+    image: image which is the same shape as input image.
+    boxes: boxes which is the same shape as input boxes.
+  """
+  with tf.name_scope('RandomAdjustBrightness', values=[image]):
+    image = tf.image.random_brightness(image, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
+  """Randomly adjusts contrast.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    min_delta: see max_delta.
+    max_delta: how much to change the contrast. Contrast will change with a
+               value between min_delta and max_delta. This value will be
+               multiplied to the current contrast of the image.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomAdjustContrast', values=[image]):
+    image = tf.image.random_contrast(image, min_delta, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_adjust_hue(image, max_delta=0.02):
+  """Randomly adjusts hue.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    max_delta: change hue randomly with a value between 0 and max_delta.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomAdjustHue', values=[image]):
+    image = tf.image.random_hue(image, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
+  """Randomly adjusts saturation.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    min_delta: see max_delta.
+    max_delta: how much to change the saturation. Saturation will change with a
+               value between min_delta and max_delta. This value will be
+               multiplied to the current saturation of the image.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomAdjustSaturation', values=[image]):
+    image = tf.image.random_saturation(image, min_delta, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_distort_color(image, color_ordering=0):
+  """Randomly distorts color.
+
+  Randomly distorts color using a combination of brightness, hue, contrast
+  and saturation changes. Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    color_ordering: Python int, a type of distortion (valid values: 0, 1).
+
+  Returns:
+    image: image which is the same shape as input image.
+
+  Raises:
+    ValueError: if color_ordering is not in {0, 1}.
+  """
+  with tf.name_scope('RandomDistortColor', values=[image]):
+    if color_ordering == 0:
+      image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+      image = tf.image.random_hue(image, max_delta=0.2)
+      image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+    elif color_ordering == 1:
+      image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+      image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+      image = tf.image.random_hue(image, max_delta=0.2)
+    else:
+      raise ValueError('color_ordering must be in {0, 1}')
+
+    # The random_* ops do not necessarily clamp.
+    image = tf.clip_by_value(image, 0.0, 1.0)
+    return image
+
+
+def random_jitter_boxes(boxes, ratio=0.05, seed=None):
+  """Randomly jitter boxes in image.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    ratio: The ratio of the box width and height that the corners can jitter.
+           For example if the width is 100 pixels and ratio is 0.05,
+           the corners can jitter up to 5 pixels in the x direction.
+    seed: random seed.
+
+  Returns:
+    boxes: boxes which is the same shape as input boxes.
+  """
+  def random_jitter_box(box, ratio, seed):
+    """Randomly jitter box.
+
+    Args:
+      box: bounding box [1, 1, 4].
+      ratio: max ratio between jittered box and original box,
+      a number between [0, 0.5].
+      seed: random seed.
+
+    Returns:
+      jittered_box: jittered box.
+    """
+    rand_numbers = tf.random_uniform(
+        [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
+    box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
+    box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
+    hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
+    hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
+    jittered_box = tf.add(box, hw_rand_coefs)
+    jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
+    return jittered_box
+
+  with tf.name_scope('RandomJitterBoxes', values=[boxes]):
+    # boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
+    boxes_shape = tf.shape(boxes)
+    boxes = tf.expand_dims(boxes, 1)
+    boxes = tf.expand_dims(boxes, 2)
+
+    distorted_boxes = tf.map_fn(
+        lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
+
+    distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
+
+    return distorted_boxes
+
+
+def _strict_random_crop_image(image,
+                              boxes,
+                              labels,
+                              masks=None,
+                              keypoints=None,
+                              min_object_covered=1.0,
+                              aspect_ratio_range=(0.75, 1.33),
+                              area_range=(0.1, 1.0),
+                              overlap_thresh=0.3):
+  """Performs random crop.
+
+  Note: boxes will be clipped to the crop. Keypoint coordinates that are
+  outside the crop will be set to NaN, which is consistent with the original
+  keypoint encoding for non-existing keypoints. This function always crops
+  the image and is supposed to be used by `random_crop_image` function which
+  sometimes returns image unchanged.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes with shape
+           [num_instances, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope('RandomCropImage', values=[image, boxes]):
+    image_shape = tf.shape(image)
+
+    # boxes are [N, 4]. Lets first make them [N, 1, 4].
+    boxes_expanded = tf.expand_dims(
+        tf.clip_by_value(
+            boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
+
+    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
+        image_shape,
+        bounding_boxes=boxes_expanded,
+        min_object_covered=min_object_covered,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        max_attempts=100,
+        use_image_if_no_bounding_boxes=True)
+
+    im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
+
+    new_image = tf.slice(image, im_box_begin, im_box_size)
+    new_image.set_shape([None, None, image.get_shape()[2]])
+
+    # [1, 4]
+    im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
+    # [4]
+    im_box_rank1 = tf.squeeze(im_box)
+
+    boxlist = box_list.BoxList(boxes)
+    boxlist.add_field('labels', labels)
+
+    im_boxlist = box_list.BoxList(im_box_rank2)
+
+    # remove boxes that are outside cropped image
+    boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
+        boxlist, im_box_rank1)
+
+    # remove boxes that are outside image
+    overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
+        boxlist, im_boxlist, overlap_thresh)
+
+    # change the coordinate of the remaining boxes
+    new_labels = overlapping_boxlist.get_field('labels')
+    new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
+                                                       im_box_rank1)
+    new_boxes = new_boxlist.get()
+    new_boxes = tf.clip_by_value(
+        new_boxes, clip_value_min=0.0, clip_value_max=1.0)
+
+    result = [new_image, new_boxes, new_labels]
+
+    if masks is not None:
+      masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
+      masks_of_boxes_completely_inside_window = tf.gather(
+          masks_of_boxes_inside_window, keep_ids)
+      masks_box_begin = [im_box_begin[2], im_box_begin[0], im_box_begin[1]]
+      masks_box_size = [im_box_size[2], im_box_size[0], im_box_size[1]]
+      new_masks = tf.slice(
+          masks_of_boxes_completely_inside_window,
+          masks_box_begin, masks_box_size)
+      result.append(new_masks)
+
+    if keypoints is not None:
+      keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
+      keypoints_of_boxes_completely_inside_window = tf.gather(
+          keypoints_of_boxes_inside_window, keep_ids)
+      new_keypoints = keypoint_ops.change_coordinate_frame(
+          keypoints_of_boxes_completely_inside_window, im_box_rank1)
+      new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
+                                                        [0.0, 0.0, 1.0, 1.0])
+      result.append(new_keypoints)
+
+    return tuple(result)
+
+
+def random_crop_image(image,
+                      boxes,
+                      labels,
+                      masks=None,
+                      keypoints=None,
+                      min_object_covered=1.0,
+                      aspect_ratio_range=(0.75, 1.33),
+                      area_range=(0.1, 1.0),
+                      overlap_thresh=0.3,
+                      random_coef=0.0,
+                      seed=None):
+  """Randomly crops the image.
+
+  Given the input image and its bounding boxes, this op randomly
+  crops a subimage.  Given a user-provided set of input constraints,
+  the crop window is resampled until it satisfies these constraints.
+  If within 100 trials it is unable to find a valid crop, the original
+  image is returned. See the Args section for a description of the input
+  constraints. Both input boxes and returned Boxes are in normalized
+  form (e.g., lie in the unit square [0, 1]).
+  This function will return the original image with probability random_coef.
+
+  Note: boxes will be clipped to the crop. Keypoint coordinates that are
+  outside the crop will be set to NaN, which is consistent with the original
+  keypoint encoding for non-existing keypoints.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes with shape
+           [num_instances, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    seed: random seed.
+
+  Returns:
+    image: Image shape will be [new_height, new_width, channels].
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form.
+    labels: new labels.
+
+    If masks, or keypoints are not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+
+  def strict_random_crop_image_fn():
+    return _strict_random_crop_image(
+        image,
+        boxes,
+        labels,
+        masks=masks,
+        keypoints=keypoints,
+        min_object_covered=min_object_covered,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        overlap_thresh=overlap_thresh)
+
+  # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
+  if random_coef < sys.float_info.min:
+    result = strict_random_crop_image_fn()
+  else:
+    do_a_crop_random = tf.random_uniform([], seed=seed)
+    do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
+
+    outputs = [image, boxes, labels]
+    if masks is not None:
+      outputs.append(masks)
+    if keypoints is not None:
+      outputs.append(keypoints)
+
+    result = tf.cond(do_a_crop_random,
+                     strict_random_crop_image_fn,
+                     lambda: tuple(outputs))
+  return result
+
+
+def random_pad_image(image,
+                     boxes,
+                     min_image_size=None,
+                     max_image_size=None,
+                     pad_color=None,
+                     seed=None):
+  """Randomly pads the image.
+
+  This function randomly pads the image with zeros. The final size of the
+  padded image will be between min_image_size and max_image_size.
+  if min_image_size is smaller than the input image size, min_image_size will
+  be set to the input image size. The same for max_image_size. The input image
+  will be located at a uniformly random location inside the padded image.
+  The relative location of the boxes to the original image will remain the same.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    min_image_size: a tensor of size [min_height, min_width], type tf.int32.
+                    If passed as None, will be set to image size
+                    [height, width].
+    max_image_size: a tensor of size [max_height, max_width], type tf.int32.
+                    If passed as None, will be set to twice the
+                    image [height * 2, width * 2].
+    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+               if set as None, it will be set to average color of the input
+               image.
+
+    seed: random seed.
+
+  Returns:
+    image: Image shape will be [new_height, new_width, channels].
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form.
+  """
+  if pad_color is None:
+    pad_color = tf.reduce_mean(image, reduction_indices=[0, 1])
+
+  image_shape = tf.shape(image)
+  image_height = image_shape[0]
+  image_width = image_shape[1]
+
+  if max_image_size is None:
+    max_image_size = tf.stack([image_height * 2, image_width * 2])
+  max_image_size = tf.maximum(max_image_size,
+                              tf.stack([image_height, image_width]))
+
+  if min_image_size is None:
+    min_image_size = tf.stack([image_height, image_width])
+  min_image_size = tf.maximum(min_image_size,
+                              tf.stack([image_height, image_width]))
+
+  target_height = tf.cond(
+      max_image_size[0] > min_image_size[0],
+      lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
+      lambda: max_image_size[0])
+
+  target_width = tf.cond(
+      max_image_size[1] > min_image_size[1],
+      lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
+      lambda: max_image_size[1])
+
+  offset_height = tf.cond(
+      target_height > image_height,
+      lambda: _random_integer(0, target_height - image_height, seed),
+      lambda: tf.constant(0, dtype=tf.int32))
+
+  offset_width = tf.cond(
+      target_width > image_width,
+      lambda: _random_integer(0, target_width - image_width, seed),
+      lambda: tf.constant(0, dtype=tf.int32))
+
+  new_image = tf.image.pad_to_bounding_box(
+      image, offset_height=offset_height, offset_width=offset_width,
+      target_height=target_height, target_width=target_width)
+
+  # Setting color of the padded pixels
+  image_ones = tf.ones_like(image)
+  image_ones_padded = tf.image.pad_to_bounding_box(
+      image_ones, offset_height=offset_height, offset_width=offset_width,
+      target_height=target_height, target_width=target_width)
+  image_color_paded = (1.0 - image_ones_padded) * pad_color
+  new_image += image_color_paded
+
+  # setting boxes
+  new_window = tf.to_float(
+      tf.stack([
+          -offset_height, -offset_width, target_height - offset_height,
+          target_width - offset_width
+      ]))
+  new_window /= tf.to_float(
+      tf.stack([image_height, image_width, image_height, image_width]))
+  boxlist = box_list.BoxList(boxes)
+  new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+  new_boxes = new_boxlist.get()
+
+  return new_image, new_boxes
+
+
+def random_crop_pad_image(image,
+                          boxes,
+                          labels,
+                          min_object_covered=1.0,
+                          aspect_ratio_range=(0.75, 1.33),
+                          area_range=(0.1, 1.0),
+                          overlap_thresh=0.3,
+                          random_coef=0.0,
+                          min_padded_size_ratio=None,
+                          max_padded_size_ratio=None,
+                          pad_color=None,
+                          seed=None):
+  """Randomly crops and pads the image.
+
+  Given an input image and its bounding boxes, this op first randomly crops
+  the image and then randomly pads the image with background values. Parameters
+  min_padded_size_ratio and max_padded_size_ratio, determine the range of the
+  final output image size.  Specifically, the final image size will have a size
+  in the range of min_padded_size_ratio * tf.shape(image) and
+  max_padded_size_ratio * tf.shape(image). Note that these ratios are with
+  respect to the size of the original image, so we can't capture the same
+  effect easily by independently applying RandomCropImage
+  followed by RandomPadImage.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    min_padded_size_ratio: min ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [0.0, 0.0].
+    max_padded_size_ratio: max ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [2.0, 2.0].
+    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+               if set as None, it will be set to average color of the randomly
+               cropped image.
+    seed: random seed.
+
+  Returns:
+    padded_image: padded image.
+    padded_boxes: boxes which is the same rank as input boxes. Boxes are in
+                  normalized form.
+    cropped_labels: cropped labels.
+  """
+  image_size = tf.shape(image)
+  image_height = image_size[0]
+  image_width = image_size[1]
+  if min_padded_size_ratio is None:
+    min_padded_size_ratio = tf.constant([0.0, 0.0], tf.float32)
+  if max_padded_size_ratio is None:
+    max_padded_size_ratio = tf.constant([2.0, 2.0], tf.float32)
+  cropped_image, cropped_boxes, cropped_labels = random_crop_image(
+      image=image,
+      boxes=boxes,
+      labels=labels,
+      min_object_covered=min_object_covered,
+      aspect_ratio_range=aspect_ratio_range,
+      area_range=area_range,
+      overlap_thresh=overlap_thresh,
+      random_coef=random_coef,
+      seed=seed)
+
+  min_image_size = tf.to_int32(
+      tf.to_float(tf.stack([image_height, image_width])) *
+      min_padded_size_ratio)
+  max_image_size = tf.to_int32(
+      tf.to_float(tf.stack([image_height, image_width])) *
+      max_padded_size_ratio)
+
+  padded_image, padded_boxes = random_pad_image(
+      cropped_image,
+      cropped_boxes,
+      min_image_size=min_image_size,
+      max_image_size=max_image_size,
+      pad_color=pad_color,
+      seed=seed)
+
+  return padded_image, padded_boxes, cropped_labels
+
+
+def random_crop_to_aspect_ratio(image,
+                                boxes,
+                                labels,
+                                masks=None,
+                                keypoints=None,
+                                aspect_ratio=1.0,
+                                overlap_thresh=0.3,
+                                seed=None):
+  """Randomly crops an image to the specified aspect ratio.
+
+  Randomly crops the a portion of the image such that the crop is of the
+  specified aspect ratio, and the crop is as large as possible. If the specified
+  aspect ratio is larger than the aspect ratio of the image, this op will
+  randomly remove rows from the top and bottom of the image. If the specified
+  aspect ratio is less than the aspect ratio of the image, this op will randomly
+  remove cols from the left and right of the image. If the specified aspect
+  ratio is the same as the aspect ratio of the image, this op will return the
+  image.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    aspect_ratio: the aspect ratio of cropped image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: If image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  with tf.name_scope('RandomCropToAspectRatio', values=[image]):
+    image_shape = tf.shape(image)
+    orig_height = image_shape[0]
+    orig_width = image_shape[1]
+    orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
+    new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
+    def target_height_fn():
+      return tf.to_int32(
+          tf.round(
+              tf.to_float(orig_height) * orig_aspect_ratio / new_aspect_ratio))
+    target_height = tf.cond(
+        orig_aspect_ratio >= new_aspect_ratio,
+        lambda: orig_height,
+        target_height_fn)
+    def target_width_fn():
+      return tf.to_int32(
+          tf.round(
+              tf.to_float(orig_width) * new_aspect_ratio / orig_aspect_ratio))
+    target_width = tf.cond(
+        orig_aspect_ratio <= new_aspect_ratio,
+        lambda: orig_width,
+        target_width_fn)
+
+    # either offset_height = 0 and offset_width is randomly chosen from
+    # [0, offset_width - target_width), or else offset_width = 0 and
+    # offset_height is randomly chosen from [0, offset_height - target_height)
+    offset_height = _random_integer(0, orig_height - target_height + 1, seed)
+    offset_width = _random_integer(0, orig_width - target_width + 1, seed)
+    new_image = tf.image.crop_to_bounding_box(
+        image, offset_height, offset_width, target_height, target_width)
+
+    im_box = tf.stack([
+        tf.to_float(offset_height) / tf.to_float(orig_height),
+        tf.to_float(offset_width) / tf.to_float(orig_width),
+        tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
+        tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
+    ])
+
+    boxlist = box_list.BoxList(boxes)
+    boxlist.add_field('labels', labels)
+
+    im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
+
+    # remove boxes whose overlap with the image is less than overlap_thresh
+    overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
+        boxlist, im_boxlist, overlap_thresh)
+
+    # change the coordinate of the remaining boxes
+    new_labels = overlapping_boxlist.get_field('labels')
+    new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
+                                                       im_box)
+    new_boxlist = box_list_ops.clip_to_window(new_boxlist,
+                                              tf.constant(
+                                                  [0.0, 0.0, 1.0, 1.0],
+                                                  tf.float32))
+    new_boxes = new_boxlist.get()
+
+    result = [new_image, new_boxes, new_labels]
+
+    if masks is not None:
+      masks_inside_window = tf.gather(masks, keep_ids)
+      masks_box_begin = tf.stack([0, offset_height, offset_width])
+      masks_box_size = tf.stack([-1, target_height, target_width])
+      new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
+      result.append(new_masks)
+
+    if keypoints is not None:
+      keypoints_inside_window = tf.gather(keypoints, keep_ids)
+      new_keypoints = keypoint_ops.change_coordinate_frame(
+          keypoints_inside_window, im_box)
+      new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
+                                                        [0.0, 0.0, 1.0, 1.0])
+      result.append(new_keypoints)
+
+    return tuple(result)
+
+
+def random_black_patches(image,
+                         max_black_patches=10,
+                         probability=0.5,
+                         size_to_image_ratio=0.1,
+                         random_seed=None):
+  """Randomly adds some black patches to the image.
+
+  This op adds up to max_black_patches square black patches of a fixed size
+  to the image where size is specified via the size_to_image_ratio parameter.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    max_black_patches: number of times that the function tries to add a
+                       black box to the image.
+    probability: at each try, what is the chance of adding a box.
+    size_to_image_ratio: Determines the ratio of the size of the black patches
+                         to the size of the image.
+                         box_size = size_to_image_ratio *
+                                    min(image_width, image_height)
+    random_seed: random seed.
+
+  Returns:
+    image
+  """
+  def add_black_patch_to_image(image):
+    """Function for adding one patch to the image.
+
+    Args:
+      image: image
+
+    Returns:
+      image with a randomly added black box
+    """
+    image_shape = tf.shape(image)
+    image_height = image_shape[0]
+    image_width = image_shape[1]
+    box_size = tf.to_int32(
+        tf.multiply(
+            tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
+            size_to_image_ratio))
+    normalized_y_min = tf.random_uniform(
+        [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
+    normalized_x_min = tf.random_uniform(
+        [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
+    y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
+    x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
+    black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
+    mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
+                                              image_height, image_width)
+    image = tf.multiply(image, mask)
+    return image
+
+  with tf.name_scope('RandomBlackPatchInImage', values=[image]):
+    for _ in range(max_black_patches):
+      random_prob = tf.random_uniform([], minval=0.0, maxval=1.0,
+                                      dtype=tf.float32, seed=random_seed)
+      image = tf.cond(
+          tf.greater(random_prob, probability), lambda: image,
+          lambda: add_black_patch_to_image(image))
+
+    return image
+
+
+def image_to_float(image):
+  """Used in Faster R-CNN. Casts image pixel values to float.
+
+  Args:
+    image: input image which might be in tf.uint8 or sth else format
+
+  Returns:
+    image: image in tf.float32 format.
+  """
+  with tf.name_scope('ImageToFloat', values=[image]):
+    image = tf.to_float(image)
+    return image
+
+
+def random_resize_method(image, target_size):
+  """Uses a random resize method to resize the image to target size.
+
+  Args:
+    image: a rank 3 tensor.
+    target_size: a list of [target_height, target_width]
+
+  Returns:
+    resized image.
+  """
+
+  resized_image = _apply_with_random_selector(
+      image,
+      lambda x, method: tf.image.resize_images(x, target_size, method),
+      num_cases=4)
+
+  return resized_image
+
+
+def resize_to_range(image,
+                    masks=None,
+                    min_dimension=None,
+                    max_dimension=None,
+                    align_corners=False):
+  """Resizes an image so its dimensions are within the provided value.
+
+  The output size can be described by two cases:
+  1. If the image can be rescaled so its minimum dimension is equal to the
+     provided value without the other dimension exceeding max_dimension,
+     then do so.
+  2. Otherwise, resize so the largest dimension is equal to max_dimension.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks.
+    min_dimension: (optional) (scalar) desired size of the smaller image
+                   dimension.
+    max_dimension: (optional) (scalar) maximum allowed size
+                   of the larger image dimension.
+    align_corners: bool. If true, exactly align all 4 corners of the input
+                   and output. Defaults to False.
+
+  Returns:
+    A 3D tensor of shape [new_height, new_width, channels],
+    where the image has been resized (with bilinear interpolation) so that
+    min(new_height, new_width) == min_dimension or
+    max(new_height, new_width) == max_dimension.
+
+    If masks is not None, also outputs masks:
+    A 3D tensor of shape [num_instances, new_height, new_width]
+
+  Raises:
+    ValueError: if the image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
+    image_shape = tf.shape(image)
+    orig_height = tf.to_float(image_shape[0])
+    orig_width = tf.to_float(image_shape[1])
+    orig_min_dim = tf.minimum(orig_height, orig_width)
+
+    # Calculates the larger of the possible sizes
+    min_dimension = tf.constant(min_dimension, dtype=tf.float32)
+    large_scale_factor = min_dimension / orig_min_dim
+    # Scaling orig_(height|width) by large_scale_factor will make the smaller
+    # dimension equal to min_dimension, save for floating point rounding errors.
+    # For reasonably-sized images, taking the nearest integer will reliably
+    # eliminate this error.
+    large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
+    large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
+    large_size = tf.stack([large_height, large_width])
+
+    if max_dimension:
+      # Calculates the smaller of the possible sizes, use that if the larger
+      # is too big.
+      orig_max_dim = tf.maximum(orig_height, orig_width)
+      max_dimension = tf.constant(max_dimension, dtype=tf.float32)
+      small_scale_factor = max_dimension / orig_max_dim
+      # Scaling orig_(height|width) by small_scale_factor will make the larger
+      # dimension equal to max_dimension, save for floating point rounding
+      # errors. For reasonably-sized images, taking the nearest integer will
+      # reliably eliminate this error.
+      small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
+      small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
+      small_size = tf.stack([small_height, small_width])
+
+      new_size = tf.cond(
+          tf.to_float(tf.reduce_max(large_size)) > max_dimension,
+          lambda: small_size, lambda: large_size)
+    else:
+      new_size = large_size
+
+    new_image = tf.image.resize_images(image, new_size,
+                                       align_corners=align_corners)
+
+    result = new_image
+    if masks is not None:
+      num_instances = tf.shape(masks)[0]
+
+      def resize_masks_branch():
+        new_masks = tf.expand_dims(masks, 3)
+        new_masks = tf.image.resize_nearest_neighbor(
+            new_masks, new_size, align_corners=align_corners)
+        new_masks = tf.squeeze(new_masks, axis=3)
+        return new_masks
+
+      def reshape_masks_branch():
+        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
+        return new_masks
+
+      masks = tf.cond(num_instances > 0,
+                      resize_masks_branch,
+                      reshape_masks_branch)
+      result = [new_image, masks]
+
+    return result
+
+
+def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
+  """Scales boxes from normalized to pixel coordinates.
+
+  Args:
+    image: A 3D float32 tensor of shape [height, width, channels].
+    boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
+      boxes in normalized coordinates. Each row is of the form
+      [ymin, xmin, ymax, xmax].
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+
+  Returns:
+    image: unchanged input image.
+    scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
+      bounding boxes in pixel coordinates.
+    scaled_keypoints: a 3D float32 tensor with shape
+      [num_instances, num_keypoints, 2] containing the keypoints in pixel
+      coordinates.
+  """
+  boxlist = box_list.BoxList(boxes)
+  image_height = tf.shape(image)[0]
+  image_width = tf.shape(image)[1]
+  scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
+  result = [image, scaled_boxes]
+  if keypoints is not None:
+    scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
+    result.append(scaled_keypoints)
+  return tuple(result)
+
+
+# pylint: disable=g-doc-return-or-yield
+def resize_image(image,
+                 masks=None,
+                 new_height=600,
+                 new_width=1024,
+                 method=tf.image.ResizeMethod.BILINEAR,
+                 align_corners=False):
+  """See `tf.image.resize_images` for detailed doc."""
+  with tf.name_scope(
+      'ResizeImage',
+      values=[image, new_height, new_width, method, align_corners]):
+    new_image = tf.image.resize_images(image, [new_height, new_width],
+                                       method=method,
+                                       align_corners=align_corners)
+    result = new_image
+    if masks is not None:
+      num_instances = tf.shape(masks)[0]
+      new_size = tf.constant([new_height, new_width], dtype=tf.int32)
+      def resize_masks_branch():
+        new_masks = tf.expand_dims(masks, 3)
+        new_masks = tf.image.resize_nearest_neighbor(
+            new_masks, new_size, align_corners=align_corners)
+        new_masks = tf.squeeze(new_masks, axis=3)
+        return new_masks
+
+      def reshape_masks_branch():
+        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
+        return new_masks
+
+      masks = tf.cond(num_instances > 0,
+                      resize_masks_branch,
+                      reshape_masks_branch)
+      result = [new_image, masks]
+
+    return result
+
+
+def subtract_channel_mean(image, means=None):
+  """Normalizes an image by subtracting a mean from each channel.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    means: float list containing a mean for each channel
+  Returns:
+    normalized_images: a tensor of shape [height, width, channels]
+  Raises:
+    ValueError: if images is not a 4D tensor or if the number of means is not
+      equal to the number of channels.
+  """
+  with tf.name_scope('SubtractChannelMean', values=[image, means]):
+    if len(image.get_shape()) != 3:
+      raise ValueError('Input must be of size [height, width, channels]')
+    if len(means) != image.get_shape()[-1]:
+      raise ValueError('len(means) must match the number of channels')
+    return image - [[means]]
+
+
+def one_hot_encoding(labels, num_classes=None):
+  """One-hot encodes the multiclass labels.
+
+  Example usage:
+    labels = tf.constant([1, 4], dtype=tf.int32)
+    one_hot = OneHotEncoding(labels, num_classes=5)
+    one_hot.eval()    # evaluates to [0, 1, 0, 0, 1]
+
+  Args:
+    labels: A tensor of shape [None] corresponding to the labels.
+    num_classes: Number of classes in the dataset.
+  Returns:
+    onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
+      encoding of the labels.
+  Raises:
+    ValueError: if num_classes is not specified.
+  """
+  with tf.name_scope('OneHotEncoding', values=[labels]):
+    if num_classes is None:
+      raise ValueError('num_classes must be specified')
+
+    labels = tf.one_hot(labels, num_classes, 1, 0)
+    return tf.reduce_max(labels, 0)
+
+
+def rgb_to_gray(image):
+  """Converts a 3 channel RGB image to a 1 channel grayscale image.
+
+  Args:
+    image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
+           with pixel values varying between [0, 1].
+
+  Returns:
+    image: A single channel grayscale image -> [image, height, 1].
+  """
+  return tf.image.rgb_to_grayscale(image)
+
+
+def ssd_random_crop(image,
+                    boxes,
+                    labels,
+                    masks=None,
+                    keypoints=None,
+                    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                    aspect_ratio_range=((0.5, 2.0),) * 7,
+                    area_range=((0.1, 1.0),) * 7,
+                    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                    random_coef=(0.15,) * 7,
+                    seed=None):
+  """Random crop preprocessing with default parameters as in SSD paper.
+
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+  def random_crop_selector(selected_result, index):
+    """Applies random_crop_image to selected result.
+
+    Args:
+      selected_result: A tuple containing image, boxes, labels, keypoints (if
+                       not None), and masks (if not None).
+      index: The index that was randomly selected.
+
+    Returns: A tuple containing image, boxes, labels, keypoints (if not None),
+             and masks (if not None).
+    """
+    i = 3
+    image, boxes, labels = selected_result[:i]
+    selected_masks = None
+    selected_keypoints = None
+    if masks is not None:
+      selected_masks = selected_result[i]
+      i += 1
+    if keypoints is not None:
+      selected_keypoints = selected_result[i]
+
+    return random_crop_image(
+        image=image,
+        boxes=boxes,
+        labels=labels,
+        masks=selected_masks,
+        keypoints=selected_keypoints,
+        min_object_covered=min_object_covered[index],
+        aspect_ratio_range=aspect_ratio_range[index],
+        area_range=area_range[index],
+        overlap_thresh=overlap_thresh[index],
+        random_coef=random_coef[index],
+        seed=seed)
+
+  result = _apply_with_random_selector_tuples(
+      tuple(
+          t for t in (image, boxes, labels, masks, keypoints) if t is not None),
+      random_crop_selector,
+      num_cases=len(min_object_covered))
+  return result
+
+
+def ssd_random_crop_pad(image,
+                        boxes,
+                        labels,
+                        min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                        aspect_ratio_range=((0.5, 2.0),) * 6,
+                        area_range=((0.1, 1.0),) * 6,
+                        overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                        random_coef=(0.15,) * 6,
+                        min_padded_size_ratio=(None,) * 6,
+                        max_padded_size_ratio=(None,) * 6,
+                        pad_color=(None,) * 6,
+                        seed=None):
+  """Random crop preprocessing with default parameters as in SSD paper.
+
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    min_padded_size_ratio: min ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [0.0, 0.0].
+    max_padded_size_ratio: max ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [2.0, 2.0].
+    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+               if set as None, it will be set to average color of the randomly
+               cropped image.
+    seed: random seed.
+
+  Returns:
+    image: Image shape will be [new_height, new_width, channels].
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form.
+    new_labels: new labels.
+  """
+  def random_crop_pad_selector(image_boxes_labels, index):
+    image, boxes, labels = image_boxes_labels
+
+    return random_crop_pad_image(
+        image,
+        boxes,
+        labels,
+        min_object_covered=min_object_covered[index],
+        aspect_ratio_range=aspect_ratio_range[index],
+        area_range=area_range[index],
+        overlap_thresh=overlap_thresh[index],
+        random_coef=random_coef[index],
+        min_padded_size_ratio=min_padded_size_ratio[index],
+        max_padded_size_ratio=max_padded_size_ratio[index],
+        pad_color=pad_color[index],
+        seed=seed)
+
+  new_image, new_boxes, new_labels = _apply_with_random_selector_tuples(
+      (image, boxes, labels),
+      random_crop_pad_selector,
+      num_cases=len(min_object_covered))
+  return new_image, new_boxes, new_labels
+
+
+def ssd_random_crop_fixed_aspect_ratio(
+    image,
+    boxes,
+    labels,
+    masks=None,
+    keypoints=None,
+    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+    aspect_ratio=1.0,
+    area_range=((0.1, 1.0),) * 7,
+    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+    random_coef=(0.15,) * 7,
+    seed=None):
+  """Random crop preprocessing with default parameters as in SSD paper.
+
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+
+  The only difference is that the aspect ratio of the crops are fixed.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio: aspect ratio of the cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  """
+  aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
+
+  crop_result = ssd_random_crop(image, boxes, labels, masks, keypoints,
+                                min_object_covered, aspect_ratio_range,
+                                area_range, overlap_thresh, random_coef, seed)
+  i = 3
+  new_image, new_boxes, new_labels = crop_result[:i]
+  new_masks = None
+  new_keypoints = None
+  if masks is not None:
+    new_masks = crop_result[i]
+    i += 1
+  if keypoints is not None:
+    new_keypoints = crop_result[i]
+  result = random_crop_to_aspect_ratio(
+      new_image,
+      new_boxes,
+      new_labels,
+      new_masks,
+      new_keypoints,
+      aspect_ratio=aspect_ratio,
+      seed=seed)
+
+  return result
+
+
+def get_default_func_arg_map(include_instance_masks=False,
+                             include_keypoints=False):
+  """Returns the default mapping from a preprocessor function to its args.
+
+  Args:
+    include_instance_masks: If True, preprocessing functions will modify the
+      instance masks, too.
+    include_keypoints: If True, preprocessing functions will modify the
+      keypoints, too.
+
+  Returns:
+    A map from preprocessing functions to the arguments they receive.
+  """
+  groundtruth_instance_masks = None
+  if include_instance_masks:
+    groundtruth_instance_masks = (
+        fields.InputDataFields.groundtruth_instance_masks)
+
+  groundtruth_keypoints = None
+  if include_keypoints:
+    groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
+
+  prep_func_arg_map = {
+      normalize_image: (fields.InputDataFields.image,),
+      random_horizontal_flip: (fields.InputDataFields.image,
+                               fields.InputDataFields.groundtruth_boxes,
+                               groundtruth_instance_masks,
+                               groundtruth_keypoints,),
+      random_pixel_value_scale: (fields.InputDataFields.image,),
+      random_image_scale: (fields.InputDataFields.image,
+                           groundtruth_instance_masks,),
+      random_rgb_to_gray: (fields.InputDataFields.image,),
+      random_adjust_brightness: (fields.InputDataFields.image,),
+      random_adjust_contrast: (fields.InputDataFields.image,),
+      random_adjust_hue: (fields.InputDataFields.image,),
+      random_adjust_saturation: (fields.InputDataFields.image,),
+      random_distort_color: (fields.InputDataFields.image,),
+      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
+      random_crop_image: (fields.InputDataFields.image,
+                          fields.InputDataFields.groundtruth_boxes,
+                          fields.InputDataFields.groundtruth_classes,
+                          groundtruth_instance_masks,
+                          groundtruth_keypoints,),
+      random_pad_image: (fields.InputDataFields.image,
+                         fields.InputDataFields.groundtruth_boxes),
+      random_crop_pad_image: (fields.InputDataFields.image,
+                              fields.InputDataFields.groundtruth_boxes,
+                              fields.InputDataFields.groundtruth_classes),
+      random_crop_to_aspect_ratio: (fields.InputDataFields.image,
+                                    fields.InputDataFields.groundtruth_boxes,
+                                    fields.InputDataFields.groundtruth_classes,
+                                    groundtruth_instance_masks,
+                                    groundtruth_keypoints,),
+      random_black_patches: (fields.InputDataFields.image,),
+      retain_boxes_above_threshold: (
+          fields.InputDataFields.groundtruth_boxes,
+          fields.InputDataFields.groundtruth_classes,
+          fields.InputDataFields.groundtruth_label_scores,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+      image_to_float: (fields.InputDataFields.image,),
+      random_resize_method: (fields.InputDataFields.image,),
+      resize_to_range: (fields.InputDataFields.image,
+                        groundtruth_instance_masks,),
+      scale_boxes_to_pixel_coordinates: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          groundtruth_keypoints,),
+      flip_boxes: (fields.InputDataFields.groundtruth_boxes,),
+      resize_image: (fields.InputDataFields.image,
+                     groundtruth_instance_masks,),
+      subtract_channel_mean: (fields.InputDataFields.image,),
+      one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
+      rgb_to_gray: (fields.InputDataFields.image,),
+      ssd_random_crop: (fields.InputDataFields.image,
+                        fields.InputDataFields.groundtruth_boxes,
+                        fields.InputDataFields.groundtruth_classes,
+                        groundtruth_instance_masks,
+                        groundtruth_keypoints,),
+      ssd_random_crop_pad: (fields.InputDataFields.image,
+                            fields.InputDataFields.groundtruth_boxes,
+                            fields.InputDataFields.groundtruth_classes),
+      ssd_random_crop_fixed_aspect_ratio: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          fields.InputDataFields.groundtruth_classes,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+  }
+
+  return prep_func_arg_map
+
+
+def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
+  """Preprocess images and bounding boxes.
+
+  Various types of preprocessing (to be implemented) based on the
+  preprocess_options dictionary e.g. "crop image" (affects image and possibly
+  boxes), "white balance image" (affects only image), etc. If self._options
+  is None, no preprocessing is done.
+
+  Args:
+    tensor_dict: dictionary that contains images, boxes, and can contain other
+                 things as well.
+                 images-> rank 4 float32 tensor contains
+                          1 image -> [1, height, width, 3].
+                          with pixel values varying between [0, 1]
+                 boxes-> rank 2 float32 tensor containing
+                         the bounding boxes -> [N, 4].
+                         Boxes are in normalized form meaning
+                         their coordinates vary between [0, 1].
+                         Each row is in the form
+                         of [ymin, xmin, ymax, xmax].
+    preprocess_options: It is a list of tuples, where each tuple contains a
+                        function and a dictionary that contains arguments and
+                        their values.
+    func_arg_map: mapping from preprocessing functions to arguments that they
+                  expect to receive and return.
+
+  Returns:
+    tensor_dict: which contains the preprocessed images, bounding boxes, etc.
+
+  Raises:
+    ValueError: (a) If the functions passed to Preprocess
+                    are not in func_arg_map.
+                (b) If the arguments that a function needs
+                    do not exist in tensor_dict.
+                (c) If image in tensor_dict is not rank 4
+  """
+  if func_arg_map is None:
+    func_arg_map = get_default_func_arg_map()
+
+  # changes the images to image (rank 4 to rank 3) since the functions
+  # receive rank 3 tensor for image
+  if fields.InputDataFields.image in tensor_dict:
+    images = tensor_dict[fields.InputDataFields.image]
+    if len(images.get_shape()) != 4:
+      raise ValueError('images in tensor_dict should be rank 4')
+    image = tf.squeeze(images, squeeze_dims=[0])
+    tensor_dict[fields.InputDataFields.image] = image
+
+  # Preprocess inputs based on preprocess_options
+  for option in preprocess_options:
+    func, params = option
+    if func not in func_arg_map:
+      raise ValueError('The function %s does not exist in func_arg_map' %
+                       (func.__name__))
+    arg_names = func_arg_map[func]
+    for a in arg_names:
+      if a is not None and a not in tensor_dict:
+        raise ValueError('The function %s requires argument %s' %
+                         (func.__name__, a))
+
+    def get_arg(key):
+      return tensor_dict[key] if key is not None else None
+    args = [get_arg(a) for a in arg_names]
+    results = func(*args, **params)
+    if not isinstance(results, (list, tuple)):
+      results = (results,)
+    # Removes None args since the return values will not contain those.
+    arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
+    for res, arg_name in zip(results, arg_names):
+      tensor_dict[arg_name] = res
+
+  # changes the image to images (rank 3 to rank 4) to be compatible to what
+  # we received in the first place
+  if fields.InputDataFields.image in tensor_dict:
+    image = tensor_dict[fields.InputDataFields.image]
+    images = tf.expand_dims(image, 0)
+    tensor_dict[fields.InputDataFields.image] = images
+
+  return tensor_dict
--- a/object_detection/core/preprocessor_test.py
+++ b/object_detection/core/preprocessor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.preprocessor."""
+
+import numpy as np
+import six
+
+import tensorflow as tf
+
+from object_detection.core import preprocessor
+from object_detection.core import standard_fields as fields
+
+if six.PY2:
+  import mock # pylint: disable=g-import-not-at-top
+else:
+  from unittest import mock # pylint: disable=g-import-not-at-top
+
+
+class PreprocessorTest(tf.test.TestCase):
+
+  def createColorfulTestImage(self):
+    ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
+    ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
+    ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
+    imr = tf.concat([ch255, ch0, ch0], 3)
+    img = tf.concat([ch255, ch255, ch0], 3)
+    imb = tf.concat([ch255, ch0, ch255], 3)
+    imw = tf.concat([ch128, ch128, ch128], 3)
+    imu = tf.concat([imr, img], 2)
+    imd = tf.concat([imb, imw], 2)
+    im = tf.concat([imu, imd], 1)
+    return im
+
+  def createTestImages(self):
+    images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
+                             [0, 128, 128, 128], [192, 192, 128, 128]]],
+                           dtype=tf.uint8)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
+                             [0, 128, 192, 192], [192, 192, 128, 192]]],
+                           dtype=tf.uint8)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
+                             [0, 128, 128, 0], [192, 192, 192, 128]]],
+                           dtype=tf.uint8)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def createTestBoxes(self):
+    boxes = tf.constant(
+        [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
+    return boxes
+
+  def createTestLabelScores(self):
+    return tf.constant([1.0, 0.5], dtype=tf.float32)
+
+  def createTestLabelScoresWithMissingScore(self):
+    return tf.constant([0.5, np.nan], dtype=tf.float32)
+
+  def createTestMasks(self):
+    mask = np.array([
+        [[255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0]],
+        [[255.0, 255.0, 0.0],
+         [255.0, 255.0, 0.0],
+         [255.0, 255.0, 0.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+
+  def createTestKeypoints(self):
+    keypoints = np.array([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def createTestKeypointsInsideCrop(self):
+    keypoints = np.array([
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def createTestKeypointsOutsideCrop(self):
+    keypoints = np.array([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def createKeypointFlipPermutation(self):
+    return np.array([0, 2, 1], dtype=np.int32)
+
+  def createTestLabels(self):
+    labels = tf.constant([1, 2], dtype=tf.int32)
+    return labels
+
+  def createTestBoxesOutOfImage(self):
+    boxes = tf.constant(
+        [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
+    return boxes
+
+  def expectedImagesAfterNormalization(self):
+    images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
+                             [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
+                             [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
+                             [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedMaxImageAfterColorScale(self):
+    images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
+                             [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
+                             [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
+                             [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedMinImageAfterColorScale(self):
+    images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
+                             [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
+                             [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
+                             [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedImagesAfterMirroring(self):
+    images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
+                             [0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
+                             [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
+                             [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedBoxesAfterMirroring(self):
+    boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
+                        dtype=tf.float32)
+    return boxes
+
+  def expectedBoxesAfterXY(self):
+    boxes = tf.constant([[0.25, 0.0, 1.0, 0.75], [0.5, 0.25, 1, 0.75]],
+                        dtype=tf.float32)
+    return boxes
+
+  def expectedMasksAfterMirroring(self):
+    mask = np.array([
+        [[0.0, 0.0, 255.0],
+         [0.0, 0.0, 255.0],
+         [0.0, 0.0, 255.0]],
+        [[0.0, 255.0, 255.0],
+         [0.0, 255.0, 255.0],
+         [0.0, 255.0, 255.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+
+  def expectedLabelScoresAfterThresholding(self):
+    return tf.constant([1.0], dtype=tf.float32)
+
+  def expectedBoxesAfterThresholding(self):
+    return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
+
+  def expectedLabelsAfterThresholding(self):
+    return tf.constant([1], dtype=tf.float32)
+
+  def expectedMasksAfterThresholding(self):
+    mask = np.array([
+        [[255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+
+  def expectedKeypointsAfterThresholding(self):
+    keypoints = np.array([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def expectedLabelScoresAfterThresholdingWithMissingScore(self):
+    return tf.constant([np.nan], dtype=tf.float32)
+
+  def expectedBoxesAfterThresholdingWithMissingScore(self):
+    return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
+
+  def expectedLabelsAfterThresholdingWithMissingScore(self):
+    return tf.constant([2], dtype=tf.float32)
+
+  def testNormalizeImage(self):
+    preprocess_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 256,
+        'target_minval': -1,
+        'target_maxval': 1
+    })]
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    images_expected = self.expectedImagesAfterNormalization()
+
+    with self.test_session() as sess:
+      (images_, images_expected_) = sess.run(
+          [images, images_expected])
+      images_shape_ = images_.shape
+      images_expected_shape_ = images_expected_.shape
+      expected_shape = [1, 4, 4, 3]
+      self.assertAllEqual(images_expected_shape_, images_shape_)
+      self.assertAllEqual(images_shape_, expected_shape)
+      self.assertAllClose(images_, images_expected_)
+
+  def testRetainBoxesAboveThreshold(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    (retained_boxes, retained_labels,
+     retained_label_scores) = preprocessor.retain_boxes_above_threshold(
+         boxes, labels, label_scores, threshold=0.6)
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_, retained_label_scores_,
+       expected_retained_boxes_, expected_retained_labels_,
+       expected_retained_label_scores_) = sess.run([
+           retained_boxes, retained_labels, retained_label_scores,
+           self.expectedBoxesAfterThresholding(),
+           self.expectedLabelsAfterThresholding(),
+           self.expectedLabelScoresAfterThresholding()])
+      self.assertAllClose(
+          retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(
+          retained_labels_, expected_retained_labels_)
+      self.assertAllClose(
+          retained_label_scores_, expected_retained_label_scores_)
+
+  def testRetainBoxesAboveThresholdWithMasks(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    masks = self.createTestMasks()
+    _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
+        boxes, labels, label_scores, masks, threshold=0.6)
+    with self.test_session() as sess:
+      retained_masks_, expected_retained_masks_ = sess.run([
+          retained_masks, self.expectedMasksAfterThresholding()])
+
+      self.assertAllClose(
+          retained_masks_, expected_retained_masks_)
+
+  def testRetainBoxesAboveThresholdWithKeypoints(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    keypoints = self.createTestKeypoints()
+    (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
+        boxes, labels, label_scores, keypoints=keypoints, threshold=0.6)
+    with self.test_session() as sess:
+      (retained_keypoints_,
+       expected_retained_keypoints_) = sess.run([
+           retained_keypoints,
+           self.expectedKeypointsAfterThresholding()])
+
+      self.assertAllClose(
+          retained_keypoints_, expected_retained_keypoints_)
+
+  def testRetainBoxesAboveThresholdWithMissingScore(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScoresWithMissingScore()
+    (retained_boxes, retained_labels,
+     retained_label_scores) = preprocessor.retain_boxes_above_threshold(
+         boxes, labels, label_scores, threshold=0.6)
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_, retained_label_scores_,
+       expected_retained_boxes_, expected_retained_labels_,
+       expected_retained_label_scores_) = sess.run([
+           retained_boxes, retained_labels, retained_label_scores,
+           self.expectedBoxesAfterThresholdingWithMissingScore(),
+           self.expectedLabelsAfterThresholdingWithMissingScore(),
+           self.expectedLabelScoresAfterThresholdingWithMissingScore()])
+      self.assertAllClose(
+          retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(
+          retained_labels_, expected_retained_labels_)
+      self.assertAllClose(
+          retained_label_scores_, expected_retained_label_scores_)
+
+  def testRandomFlipBoxes(self):
+    boxes = self.createTestBoxes()
+
+    # Case where the boxes are flipped.
+    boxes_expected1 = self.expectedBoxesAfterMirroring()
+
+    # Case where the boxes are not flipped.
+    boxes_expected2 = boxes
+
+    # After elementwise multiplication, the result should be all-zero since one
+    # of them is all-zero.
+    boxes_diff = tf.multiply(
+        tf.squared_difference(boxes, boxes_expected1),
+        tf.squared_difference(boxes, boxes_expected2))
+    expected_result = tf.zeros_like(boxes_diff)
+
+    with self.test_session() as sess:
+      (boxes_diff, expected_result) = sess.run([boxes_diff, expected_result])
+      self.assertAllEqual(boxes_diff, expected_result)
+
+  def testFlipMasks(self):
+    test_mask = self.createTestMasks()
+    flipped_mask = preprocessor._flip_masks(test_mask)
+    expected_mask = self.expectedMasksAfterMirroring()
+    with self.test_session() as sess:
+      flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
+      self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
+
+  def testRandomHorizontalFlip(self):
+    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterMirroring()
+    boxes_expected1 = self.expectedBoxesAfterMirroring()
+    images_expected2 = images
+    boxes_expected2 = boxes
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+    boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+    boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+    boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+    boxes_diff_expected = tf.zeros_like(boxes_diff)
+
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_diff_,
+       boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+                                         boxes_diff, boxes_diff_expected])
+      self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
+
+  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
+    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+    image_height = 3
+    image_width = 3
+    images = tf.random_uniform([1, image_height, image_width, 3])
+    boxes = self.createTestBoxes()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    keypoint_flip_permutation = self.createKeypointFlipPermutation()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+    preprocess_options = [
+        (preprocessor.random_horizontal_flip,
+         {'keypoint_flip_permutation': keypoint_flip_permutation})]
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True, include_keypoints=True)
+    tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+    keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+    with self.test_session() as sess:
+      boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+      self.assertTrue(boxes is not None)
+      self.assertTrue(masks is not None)
+      self.assertTrue(keypoints is not None)
+
+  def testRandomPixelValueScale(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_min = tf.to_float(images) * 0.9 / 255.0
+    images_max = tf.to_float(images) * 1.1 / 255.0
+    images = tensor_dict[fields.InputDataFields.image]
+    values_greater = tf.greater_equal(images, images_min)
+    values_less = tf.less_equal(images, images_max)
+    values_true = tf.fill([1, 4, 4, 3], True)
+    with self.test_session() as sess:
+      (values_greater_, values_less_, values_true_) = sess.run(
+          [values_greater, values_less, values_true])
+      self.assertAllClose(values_greater_, values_true_)
+      self.assertAllClose(values_less_, values_true_)
+
+  def testRandomImageScale(self):
+    preprocess_options = [(preprocessor.random_image_scale, {})]
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images_scaled = tensor_dict[fields.InputDataFields.image]
+    images_original_shape = tf.shape(images_original)
+    images_scaled_shape = tf.shape(images_scaled)
+    with self.test_session() as sess:
+      (images_original_shape_, images_scaled_shape_) = sess.run(
+          [images_original_shape, images_scaled_shape])
+      self.assertTrue(
+          images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
+      self.assertTrue(
+          images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
+      self.assertTrue(
+          images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
+      self.assertTrue(
+          images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
+
+  def testRandomRGBtoGray(self):
+    preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images_gray = tensor_dict[fields.InputDataFields.image]
+    images_gray_r, images_gray_g, images_gray_b = tf.split(
+        value=images_gray, num_or_size_splits=3, axis=3)
+    images_r, images_g, images_b = tf.split(
+        value=images_original, num_or_size_splits=3, axis=3)
+    images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
+                                           tf.to_float(images_gray_r))
+    images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
+                                           tf.to_float(images_gray_g))
+    images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
+    images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
+                                           tf.to_float(images_gray_g))
+    images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
+                                           tf.to_float(images_gray_b))
+    images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
+    images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
+                                           tf.to_float(images_gray_b))
+    images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
+                                           tf.to_float(images_gray_r))
+    images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
+    image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
+    with self.test_session() as sess:
+      (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
+          [images_r_diff, images_g_diff, images_b_diff, image_zero1])
+      self.assertAllClose(images_r_diff_, image_zero1_)
+      self.assertAllClose(images_g_diff_, image_zero1_)
+      self.assertAllClose(images_b_diff_, image_zero1_)
+
+  def testRandomAdjustBrightness(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_bright = tensor_dict[fields.InputDataFields.image]
+    image_original_shape = tf.shape(images_original)
+    image_bright_shape = tf.shape(images_bright)
+    with self.test_session() as sess:
+      (image_original_shape_, image_bright_shape_) = sess.run(
+          [image_original_shape, image_bright_shape])
+      self.assertAllEqual(image_original_shape_, image_bright_shape_)
+
+  def testRandomAdjustContrast(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_contrast = tensor_dict[fields.InputDataFields.image]
+    image_original_shape = tf.shape(images_original)
+    image_contrast_shape = tf.shape(images_contrast)
+    with self.test_session() as sess:
+      (image_original_shape_, image_contrast_shape_) = sess.run(
+          [image_original_shape, image_contrast_shape])
+      self.assertAllEqual(image_original_shape_, image_contrast_shape_)
+
+  def testRandomAdjustHue(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_adjust_hue, {}))
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_hue = tensor_dict[fields.InputDataFields.image]
+    image_original_shape = tf.shape(images_original)
+    image_hue_shape = tf.shape(images_hue)
+    with self.test_session() as sess:
+      (image_original_shape_, image_hue_shape_) = sess.run(
+          [image_original_shape, image_hue_shape])
+      self.assertAllEqual(image_original_shape_, image_hue_shape_)
+
+  def testRandomDistortColor(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_distort_color, {}))
+    images_original = self.createTestImages()
+    images_original_shape = tf.shape(images_original)
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_distorted_color = tensor_dict[fields.InputDataFields.image]
+    images_distorted_color_shape = tf.shape(images_distorted_color)
+    with self.test_session() as sess:
+      (images_original_shape_, images_distorted_color_shape_) = sess.run(
+          [images_original_shape, images_distorted_color_shape])
+      self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
+
+  def testRandomJitterBoxes(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
+    boxes = self.createTestBoxes()
+    boxes_shape = tf.shape(boxes)
+    tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    distorted_boxes_shape = tf.shape(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_shape_, distorted_boxes_shape_) = sess.run(
+          [boxes_shape, distorted_boxes_shape])
+      self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
+
+  def testRandomCropImage(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_crop_image, {}))
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    self.assertEqual(3, distorted_images.get_shape()[3])
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run([
+           boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+       ])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testRandomCropImageGrayscale(self):
+    preprocessing_options = [(preprocessor.rgb_to_gray, {}),
+                             (preprocessor.normalize_image, {
+                                 'original_minval': 0,
+                                 'original_maxval': 255,
+                                 'target_minval': 0,
+                                 'target_maxval': 1,
+                             }),
+                             (preprocessor.random_crop_image, {})]
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels
+    }
+    distorted_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    self.assertEqual(1, distorted_images.get_shape()[3])
+
+    with self.test_session() as sess:
+      session_results = sess.run([
+          boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+      ])
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = session_results
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testRandomCropImageWithBoxOutOfImage(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_crop_image, {}))
+    images = self.createTestImages()
+    boxes = self.createTestBoxesOutOfImage()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testRandomCropImageWithRandomCoefOne(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_image, {
+        'random_coef': 1.0
+    })]
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    distorted_labels = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_classes]
+    boxes_shape = tf.shape(boxes)
+    distorted_boxes_shape = tf.shape(distorted_boxes)
+    images_shape = tf.shape(images)
+    distorted_images_shape = tf.shape(distorted_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, distorted_boxes_shape_, images_shape_,
+       distorted_images_shape_, images_, distorted_images_,
+       boxes_, distorted_boxes_, labels_, distorted_labels_) = sess.run(
+           [boxes_shape, distorted_boxes_shape, images_shape,
+            distorted_images_shape, images, distorted_images,
+            boxes, distorted_boxes, labels, distorted_labels])
+      self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
+      self.assertAllEqual(images_shape_, distorted_images_shape_)
+      self.assertAllClose(images_, distorted_images_)
+      self.assertAllClose(boxes_, distorted_boxes_)
+      self.assertAllEqual(labels_, distorted_labels_)
+
+  def testRandomCropWithMockSampleDistortedBoundingBox(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createColorfulTestImage()
+    boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
+                         [0.2, 0.4, 0.75, 0.75],
+                         [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
+    labels = tf.constant([1, 7, 11], dtype=tf.int32)
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (tf.constant(
+          [6, 143, 0], dtype=tf.int32), tf.constant(
+              [190, 237, -1], dtype=tf.int32), tf.constant(
+                  [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+
+      distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                      preprocessing_options)
+
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
+                                    [0.28421, 0.0, 0.38947365, 0.57805908]],
+                                   dtype=tf.float32)
+      expected_labels = tf.constant([7, 11], dtype=tf.int32)
+
+      with self.test_session() as sess:
+        (distorted_boxes_, distorted_labels_,
+         expected_boxes_, expected_labels_) = sess.run(
+             [distorted_boxes, distorted_labels,
+              expected_boxes, expected_labels])
+        self.assertAllClose(distorted_boxes_, expected_boxes_)
+        self.assertAllEqual(distorted_labels_, expected_labels_)
+
+  def testStrictRandomCropImageWithMasks(self):
+    image = self.createColorfulTestImage()[0]
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      (new_image, new_boxes, new_labels,
+       new_masks) = preprocessor._strict_random_crop_image(
+           image, boxes, labels, masks=masks)
+      with self.test_session() as sess:
+        new_image, new_boxes, new_labels, new_masks = sess.run([
+            new_image, new_boxes, new_labels, new_masks])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        self.assertAllEqual(new_image.shape, [190, 237, 3])
+        self.assertAllEqual(new_masks.shape, [2, 190, 237])
+        self.assertAllClose(
+            new_boxes.flatten(), expected_boxes.flatten())
+
+  def testStrictRandomCropImageWithKeypoints(self):
+    image = self.createColorfulTestImage()[0]
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypoints()
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      (new_image, new_boxes, new_labels,
+       new_keypoints) = preprocessor._strict_random_crop_image(
+           image, boxes, labels, keypoints=keypoints)
+      with self.test_session() as sess:
+        new_image, new_boxes, new_labels, new_keypoints = sess.run([
+            new_image, new_boxes, new_labels, new_keypoints])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        expected_keypoints = np.array([
+            [[np.nan, np.nan],
+             [np.nan, np.nan],
+             [np.nan, np.nan]],
+            [[0.38947368, 0.07173],
+             [0.49473682, 0.24050637],
+             [0.60000002, 0.40928277]]
+        ], dtype=np.float32)
+        self.assertAllEqual(new_image.shape, [190, 237, 3])
+        self.assertAllClose(
+            new_boxes.flatten(), expected_boxes.flatten())
+        self.assertAllClose(
+            new_keypoints.flatten(), expected_keypoints.flatten())
+
+  def testRunRandomCropImageWithMasks(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_masks = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_instance_masks]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_masks_) = sess.run(
+             [distorted_image, distorted_boxes, distorted_labels,
+              distorted_masks])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+        self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
+        self.assertAllEqual(distorted_labels_, [1, 2])
+        self.assertAllClose(
+            distorted_boxes_.flatten(), expected_boxes.flatten())
+
+  def testRunRandomCropImageWithKeypointsInsideCrop(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypointsInsideCrop()
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_keypoints = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_keypoints]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_keypoints_) = sess.run(
+             [distorted_image, distorted_boxes, distorted_labels,
+              distorted_keypoints])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        expected_keypoints = np.array([
+            [[0.38947368, 0.07173],
+             [0.49473682, 0.24050637],
+             [0.60000002, 0.40928277]],
+            [[0.38947368, 0.07173],
+             [0.49473682, 0.24050637],
+             [0.60000002, 0.40928277]]
+        ])
+        self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+        self.assertAllEqual(distorted_labels_, [1, 2])
+        self.assertAllClose(
+            distorted_boxes_.flatten(), expected_boxes.flatten())
+        self.assertAllClose(
+            distorted_keypoints_.flatten(), expected_keypoints.flatten())
+
+  def testRunRandomCropImageWithKeypointsOutsideCrop(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypointsOutsideCrop()
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_keypoints = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_keypoints]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_keypoints_) = sess.run(
+             [distorted_image, distorted_boxes, distorted_labels,
+              distorted_keypoints])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        expected_keypoints = np.array([
+            [[np.nan, np.nan],
+             [np.nan, np.nan],
+             [np.nan, np.nan]],
+            [[np.nan, np.nan],
+             [np.nan, np.nan],
+             [np.nan, np.nan]],
+        ])
+        self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+        self.assertAllEqual(distorted_labels_, [1, 2])
+        self.assertAllClose(
+            distorted_boxes_.flatten(), expected_boxes.flatten())
+        self.assertAllClose(
+            distorted_keypoints_.flatten(), expected_keypoints.flatten())
+
+  def testRunRetainBoxesAboveThreshold(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+
+    tensor_dict = {
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores
+    }
+
+    preprocessing_options = [
+        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+    ]
+
+    retained_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options)
+    retained_boxes = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    retained_labels = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_classes]
+    retained_label_scores = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_label_scores]
+
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_,
+       retained_label_scores_, expected_retained_boxes_,
+       expected_retained_labels_, expected_retained_label_scores_) = sess.run(
+           [retained_boxes, retained_labels, retained_label_scores,
+            self.expectedBoxesAfterThresholding(),
+            self.expectedLabelsAfterThresholding(),
+            self.expectedLabelScoresAfterThresholding()])
+
+      self.assertAllClose(retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(retained_labels_, expected_retained_labels_)
+      self.assertAllClose(
+          retained_label_scores_, expected_retained_label_scores_)
+
+  def testRunRetainBoxesAboveThresholdWithMasks(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    masks = self.createTestMasks()
+
+    tensor_dict = {
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores,
+        fields.InputDataFields.groundtruth_instance_masks: masks
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+
+    preprocessing_options = [
+        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+    ]
+
+    retained_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    retained_masks = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_instance_masks]
+
+    with self.test_session() as sess:
+      (retained_masks_, expected_masks_) = sess.run(
+          [retained_masks,
+           self.expectedMasksAfterThresholding()])
+      self.assertAllClose(retained_masks_, expected_masks_)
+
+  def testRunRetainBoxesAboveThresholdWithKeypoints(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    keypoints = self.createTestKeypoints()
+
+    tensor_dict = {
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [
+        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+    ]
+
+    retained_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    retained_keypoints = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_keypoints]
+
+    with self.test_session() as sess:
+      (retained_keypoints_, expected_keypoints_) = sess.run(
+          [retained_keypoints,
+           self.expectedKeypointsAfterThresholding()])
+      self.assertAllClose(retained_keypoints_, expected_keypoints_)
+
+  def testRunRandomCropToAspectRatioWithMasks(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+
+    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+
+    with mock.patch.object(preprocessor,
+                           '_random_integer') as mock_random_integer:
+      mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_masks = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_instance_masks]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_masks_) = sess.run([
+             distorted_image, distorted_boxes, distorted_labels, distorted_masks
+         ])
+
+        expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
+        self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+        self.assertAllEqual(distorted_labels_, [1])
+        self.assertAllClose(distorted_boxes_.flatten(),
+                            expected_boxes.flatten())
+        self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
+
+  def testRunRandomCropToAspectRatioWithKeypoints(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypoints()
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+
+    with mock.patch.object(preprocessor,
+                           '_random_integer') as mock_random_integer:
+      mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_keypoints = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_keypoints]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_keypoints_) = sess.run([
+             distorted_image, distorted_boxes, distorted_labels,
+             distorted_keypoints
+         ])
+
+        expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
+        expected_keypoints = np.array(
+            [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
+        self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+        self.assertAllEqual(distorted_labels_, [1])
+        self.assertAllClose(distorted_boxes_.flatten(),
+                            expected_boxes.flatten())
+        self.assertAllClose(distorted_keypoints_.flatten(),
+                            expected_keypoints.flatten())
+
+  def testRandomPadImage(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_pad_image, {})]
+    padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                 preprocessing_options)
+
+    padded_images = padded_tensor_dict[fields.InputDataFields.image]
+    padded_boxes = padded_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    padded_boxes_shape = tf.shape(padded_boxes)
+    images_shape = tf.shape(images)
+    padded_images_shape = tf.shape(padded_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, padded_boxes_shape_, images_shape_,
+       padded_images_shape_, boxes_, padded_boxes_) = sess.run(
+           [boxes_shape, padded_boxes_shape, images_shape,
+            padded_images_shape, boxes, padded_boxes])
+      self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+      self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+      self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+      self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+      self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+      self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+          padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+      self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+          padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+
+  def testRandomCropPadImageWithRandomCoefOne(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_pad_image, {
+        'random_coef': 1.0
+    })]
+    padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                 preprocessing_options)
+
+    padded_images = padded_tensor_dict[fields.InputDataFields.image]
+    padded_boxes = padded_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    padded_boxes_shape = tf.shape(padded_boxes)
+    images_shape = tf.shape(images)
+    padded_images_shape = tf.shape(padded_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, padded_boxes_shape_, images_shape_,
+       padded_images_shape_, boxes_, padded_boxes_) = sess.run(
+           [boxes_shape, padded_boxes_shape, images_shape,
+            padded_images_shape, boxes, padded_boxes])
+      self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+      self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+      self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+      self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+      self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+      self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+          padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+      self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+          padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+
+  def testRandomCropToAspectRatio(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels
+    }
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
+        'aspect_ratio': 2.0
+    })]
+    cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                  preprocessing_options)
+
+    cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
+    cropped_boxes = cropped_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    cropped_boxes_shape = tf.shape(cropped_boxes)
+    images_shape = tf.shape(images)
+    cropped_images_shape = tf.shape(cropped_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, cropped_boxes_shape_, images_shape_,
+       cropped_images_shape_) = sess.run([
+           boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
+       ])
+      self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
+      self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
+      self.assertEqual(images_shape_[2], cropped_images_shape_[2])
+
+  def testRandomBlackPatches(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_black_patches, {
+        'size_to_image_ratio': 0.5
+    }))
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                  preprocessing_options)
+    blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
+    images_shape = tf.shape(images)
+    blacked_images_shape = tf.shape(blacked_images)
+
+    with self.test_session() as sess:
+      (images_shape_, blacked_images_shape_) = sess.run(
+          [images_shape, blacked_images_shape])
+      self.assertAllEqual(images_shape_, blacked_images_shape_)
+
+  def testRandomResizeMethod(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_resize_method, {
+        'target_size': (75, 150)
+    }))
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    resized_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                  preprocessing_options)
+    resized_images = resized_tensor_dict[fields.InputDataFields.image]
+    resized_images_shape = tf.shape(resized_images)
+    expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
+
+    with self.test_session() as sess:
+      (expected_images_shape_, resized_images_shape_) = sess.run(
+          [expected_images_shape, resized_images_shape])
+      self.assertAllEqual(expected_images_shape_,
+                          resized_images_shape_)
+
+  def testResizeToRange(self):
+    """Tests image resizing, checking output sizes."""
+    in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+    min_dim = 50
+    max_dim = 100
+    expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
+
+    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+      in_image = tf.random_uniform(in_shape)
+      out_image = preprocessor.resize_to_range(
+          in_image, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+
+      with self.test_session() as sess:
+        out_image_shape = sess.run(out_image_shape)
+        self.assertAllEqual(out_image_shape, expected_shape)
+
+  def testResizeToRangeWithMasks(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+    min_dim = 50
+    max_dim = 100
+    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_to_range(
+          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeToRangeWithNoInstanceMask(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    min_dim = 50
+    max_dim = 100
+    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_to_range(
+          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeImageWithMasks(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+    height = 50
+    width = 100
+    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_image(
+          in_image, in_masks, new_height=height, new_width=width)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeImageWithNoInstanceMask(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    height = 50
+    width = 100
+    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_image(
+          in_image, in_masks, new_height=height, new_width=width)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeToRange4DImageTensor(self):
+    image = tf.random_uniform([1, 200, 300, 3])
+    with self.assertRaises(ValueError):
+      preprocessor.resize_to_range(image, 500, 600)
+
+  def testResizeToRangeSameMinMax(self):
+    """Tests image resizing, checking output sizes."""
+    in_shape_list = [[312, 312, 3], [299, 299, 3]]
+    min_dim = 320
+    max_dim = 320
+    expected_shape_list = [[320, 320, 3], [320, 320, 3]]
+
+    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+      in_image = tf.random_uniform(in_shape)
+      out_image = preprocessor.resize_to_range(
+          in_image, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+
+      with self.test_session() as sess:
+        out_image_shape = sess.run(out_image_shape)
+        self.assertAllEqual(out_image_shape, expected_shape)
+
+  def testScaleBoxesToPixelCoordinates(self):
+    """Tests box scaling, checking scaled values."""
+    in_shape = [60, 40, 3]
+    in_boxes = [[0.1, 0.2, 0.4, 0.6],
+                [0.5, 0.3, 0.9, 0.7]]
+
+    expected_boxes = [[6., 8., 24., 24.],
+                      [30., 12., 54., 28.]]
+
+    in_image = tf.random_uniform(in_shape)
+    in_boxes = tf.constant(in_boxes)
+    _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
+        in_image, boxes=in_boxes)
+    with self.test_session() as sess:
+      out_boxes = sess.run(out_boxes)
+      self.assertAllClose(out_boxes, expected_boxes)
+
+  def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
+    """Tests box and keypoint scaling, checking scaled values."""
+    in_shape = [60, 40, 3]
+    in_boxes = self.createTestBoxes()
+    in_keypoints = self.createTestKeypoints()
+
+    expected_boxes = [[0., 10., 45., 40.],
+                      [15., 20., 45., 40.]]
+    expected_keypoints = [
+        [[6., 4.], [12., 8.], [18., 12.]],
+        [[24., 16.], [30., 20.], [36., 24.]],
+    ]
+
+    in_image = tf.random_uniform(in_shape)
+    _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
+        in_image, boxes=in_boxes, keypoints=in_keypoints)
+    with self.test_session() as sess:
+      out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
+      self.assertAllClose(out_boxes_, expected_boxes)
+      self.assertAllClose(out_keypoints_, expected_keypoints)
+
+  def testSubtractChannelMean(self):
+    """Tests whether channel means have been subtracted."""
+    with self.test_session():
+      image = tf.zeros((240, 320, 3))
+      means = [1, 2, 3]
+      actual = preprocessor.subtract_channel_mean(image, means=means)
+      actual = actual.eval()
+
+      self.assertTrue((actual[:, :, 0] == -1).all())
+      self.assertTrue((actual[:, :, 1] == -2).all())
+      self.assertTrue((actual[:, :, 2] == -3).all())
+
+  def testOneHotEncoding(self):
+    """Tests one hot encoding of multiclass labels."""
+    with self.test_session():
+      labels = tf.constant([1, 4, 2], dtype=tf.int32)
+      one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
+      one_hot = one_hot.eval()
+
+      self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
+
+  def testSSDRandomCrop(self):
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop, {})]
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testSSDRandomCropPad(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_pad, {})]
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run([
+           boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+       ])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testSSDRandomCropFixedAspectRatio(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels
+    }
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+        fields.InputDataFields.groundtruth_keypoints: keypoints,
+    }
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True, include_keypoints=True)
+    distorted_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/region_similarity_calculator.py
+++ b/object_detection/core/region_similarity_calculator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.core import box_list_ops
+
+
+class RegionSimilarityCalculator(object):
+  """Abstract base class for region similarity calculator."""
+  __metaclass__ = ABCMeta
+
+  def compare(self, boxlist1, boxlist2, scope=None):
+    """Computes matrix of pairwise similarity between BoxLists.
+
+    This op (to be overriden) computes a measure of pairwise similarity between
+    the boxes in the given BoxLists. Higher values indicate more similarity.
+
+    Note that this method simply measures similarity and does not explicitly
+    perform a matching.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+      scope: Op scope name. Defaults to 'Compare' if None.
+
+    Returns:
+      a (float32) tensor of shape [N, M] with pairwise similarity score.
+    """
+    with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
+      return self._compare(boxlist1, boxlist2)
+
+  @abstractmethod
+  def _compare(self, boxlist1, boxlist2):
+    pass
+
+
+class IouSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+  This class computes pairwise similarity between two BoxLists based on IOU.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOU similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise iou scores.
+    """
+    return box_list_ops.iou(boxlist1, boxlist2)
+
+
+class NegSqDistSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on the squared distance metric.
+
+  This class computes pairwise similarity between two BoxLists based on the
+  negative squared distance metric.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute matrix of (negated) sq distances.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing negated pairwise squared distance.
+    """
+    return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
+
+
+class IoaSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Area (IOA) metric.
+
+  This class computes pairwise similarity between two BoxLists based on their
+  pairwise intersections divided by the areas of second BoxLists.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOA similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise IOA scores.
+    """
+    return box_list_ops.ioa(boxlist1, boxlist2)
--- a/object_detection/core/region_similarity_calculator_test.py
+++ b/object_detection/core/region_similarity_calculator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for region_similarity_calculator."""
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import region_similarity_calculator
+
+
+class RegionSimilarityCalculatorTest(tf.test.TestCase):
+
+  def test_get_correct_pairwise_similarity_based_on_iou(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
+    iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
+    with self.test_session() as sess:
+      iou_output = sess.run(iou_similarity)
+      self.assertAllClose(iou_output, exp_output)
+
+  def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
+    corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
+                            [1.0, 1.0, 0.0, 2.0]])
+    corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
+                            [-4.0, 0.0, 0.0, 3.0],
+                            [0.0, 0.0, 0.0, 0.0]])
+    exp_output = [[-26, -25, 0], [-18, -27, -6]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
+    with self.test_session() as sess:
+      dist_output = sess.run(dist_similarity)
+      self.assertAllClose(dist_output, exp_output)
+
+  def test_get_correct_pairwise_similarity_based_on_ioa(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
+                    [1.0 / 12.0, 0.0, 5.0 / 400.0]]
+    exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
+                    [0, 0],
+                    [6.0 / 6.0, 5.0 / 5.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
+    ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
+    ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
+    with self.test_session() as sess:
+      iou_output_1, iou_output_2 = sess.run(
+          [ioa_similarity_1, ioa_similarity_2])
+      self.assertAllClose(iou_output_1, exp_output_1)
+      self.assertAllClose(iou_output_2, exp_output_2)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/standard_fields.py
+++ b/object_detection/core/standard_fields.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Contains classes specifying naming conventions used for object detection.
+
+
+Specifies:
+  InputDataFields: standard fields used by reader/preprocessor/batcher.
+  BoxListFields: standard field used by BoxList
+  TfExampleFields: standard fields for tf-example data format (go/tf-example).
+"""
+
+
+class InputDataFields(object):
+  """Names for the input tensors.
+
+  Holds the standard data field names to use for identifying input tensors. This
+  should be used by the decoder to identify keys for the returned tensor_dict
+  containing input tensors. And it should be used by the model to identify the
+  tensors it needs.
+
+  Attributes:
+    image: image.
+    original_image: image in the original input size.
+    key: unique key corresponding to image.
+    source_id: source of the original image.
+    filename: original filename of the dataset (without common path).
+    groundtruth_image_classes: image-level class labels.
+    groundtruth_boxes: coordinates of the ground truth boxes in the image.
+    groundtruth_classes: box-level class labels.
+    groundtruth_label_types: box-level label types (e.g. explicit negative).
+    groundtruth_is_crowd: is the groundtruth a single object or a crowd.
+    groundtruth_area: area of a groundtruth segment.
+    groundtruth_difficult: is a `difficult` object
+    proposal_boxes: coordinates of object proposal boxes.
+    proposal_objectness: objectness score of each proposal.
+    groundtruth_instance_masks: ground truth instance masks.
+    groundtruth_instance_classes: instance mask-level class labels.
+    groundtruth_keypoints: ground truth keypoints.
+    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
+    groundtruth_label_scores: groundtruth label scores.
+  """
+  image = 'image'
+  original_image = 'original_image'
+  key = 'key'
+  source_id = 'source_id'
+  filename = 'filename'
+  groundtruth_image_classes = 'groundtruth_image_classes'
+  groundtruth_boxes = 'groundtruth_boxes'
+  groundtruth_classes = 'groundtruth_classes'
+  groundtruth_label_types = 'groundtruth_label_types'
+  groundtruth_is_crowd = 'groundtruth_is_crowd'
+  groundtruth_area = 'groundtruth_area'
+  groundtruth_difficult = 'groundtruth_difficult'
+  proposal_boxes = 'proposal_boxes'
+  proposal_objectness = 'proposal_objectness'
+  groundtruth_instance_masks = 'groundtruth_instance_masks'
+  groundtruth_instance_classes = 'groundtruth_instance_classes'
+  groundtruth_keypoints = 'groundtruth_keypoints'
+  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
+  groundtruth_label_scores = 'groundtruth_label_scores'
+
+
+class BoxListFields(object):
+  """Naming conventions for BoxLists.
+
+  Attributes:
+    boxes: bounding box coordinates.
+    classes: classes per bounding box.
+    scores: scores per bounding box.
+    weights: sample weights per bounding box.
+    objectness: objectness score per bounding box.
+    masks: masks per bounding box.
+    keypoints: keypoints per bounding box.
+    keypoint_heatmaps: keypoint heatmaps per bounding box.
+  """
+  boxes = 'boxes'
+  classes = 'classes'
+  scores = 'scores'
+  weights = 'weights'
+  objectness = 'objectness'
+  masks = 'masks'
+  keypoints = 'keypoints'
+  keypoint_heatmaps = 'keypoint_heatmaps'
+
+
+class TfExampleFields(object):
+  """TF-example proto feature names for object detection.
+
+  Holds the standard feature names to load from an Example proto for object
+  detection.
+
+  Attributes:
+    image_encoded: JPEG encoded string
+    image_format: image format, e.g. "JPEG"
+    filename: filename
+    channels: number of channels of image
+    colorspace: colorspace, e.g. "RGB"
+    height: height of image in pixels, e.g. 462
+    width: width of image in pixels, e.g. 581
+    source_id: original source of the image
+    object_class_text: labels in text format, e.g. ["person", "cat"]
+    object_class_text: labels in numbers, e.g. [16, 8]
+    object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
+    object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
+    object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
+    object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
+    object_view: viewpoint of object, e.g. ["frontal", "left"]
+    object_truncated: is object truncated, e.g. [true, false]
+    object_occluded: is object occluded, e.g. [true, false]
+    object_difficult: is object difficult, e.g. [true, false]
+    object_is_crowd: is the object a single object or a crowd
+    object_segment_area: the area of the segment.
+    instance_masks: instance segmentation masks.
+    instance_classes: Classes for each instance segmentation mask.
+  """
+  image_encoded = 'image/encoded'
+  image_format = 'image/format'  # format is reserved keyword
+  filename = 'image/filename'
+  channels = 'image/channels'
+  colorspace = 'image/colorspace'
+  height = 'image/height'
+  width = 'image/width'
+  source_id = 'image/source_id'
+  object_class_text = 'image/object/class/text'
+  object_class_label = 'image/object/class/label'
+  object_bbox_ymin = 'image/object/bbox/ymin'
+  object_bbox_xmin = 'image/object/bbox/xmin'
+  object_bbox_ymax = 'image/object/bbox/ymax'
+  object_bbox_xmax = 'image/object/bbox/xmax'
+  object_view = 'image/object/view'
+  object_truncated = 'image/object/truncated'
+  object_occluded = 'image/object/occluded'
+  object_difficult = 'image/object/difficult'
+  object_is_crowd = 'image/object/is_crowd'
+  object_segment_area = 'image/object/segment/area'
+  instance_masks = 'image/segmentation/object'
+  instance_classes = 'image/segmentation/object/class'
--- a/object_detection/core/target_assigner.py
+++ b/object_detection/core/target_assigner.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+  provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import tensorflow as tf
+
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_coder as bcoder
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import matcher as mat
+from object_detection.core import region_similarity_calculator as sim_calc
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+
+
+class TargetAssigner(object):
+  """Target assigner to compute classification and regression targets."""
+
+  def __init__(self, similarity_calc, matcher, box_coder,
+               positive_class_weight=1.0, negative_class_weight=1.0,
+               unmatched_cls_target=None):
+    """Construct Multibox Target Assigner.
+
+    Args:
+      similarity_calc: a RegionSimilarityCalculator
+      matcher: an object_detection.core.Matcher used to match groundtruth to
+        anchors.
+      box_coder: an object_detection.core.BoxCoder used to encode matching
+        groundtruth boxes with respect to anchors.
+      positive_class_weight: classification weight to be associated to positive
+        anchors (default: 1.0)
+      negative_class_weight: classification weight to be associated to negative
+        anchors (default: 1.0)
+      unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+        which is consistent with the classification target for each
+        anchor (and can be empty for scalar targets).  This shape must thus be
+        compatible with the groundtruth labels that are passed to the "assign"
+        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+        If set to None, unmatched_cls_target is set to be [0] for each anchor.
+
+    Raises:
+      ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+        if matcher is not a Matcher or if box_coder is not a BoxCoder
+    """
+    if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
+      raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
+    if not isinstance(matcher, mat.Matcher):
+      raise ValueError('matcher must be a Matcher')
+    if not isinstance(box_coder, bcoder.BoxCoder):
+      raise ValueError('box_coder must be a BoxCoder')
+    self._similarity_calc = similarity_calc
+    self._matcher = matcher
+    self._box_coder = box_coder
+    self._positive_class_weight = positive_class_weight
+    self._negative_class_weight = negative_class_weight
+    if unmatched_cls_target is None:
+      self._unmatched_cls_target = tf.constant([0], tf.float32)
+    else:
+      self._unmatched_cls_target = unmatched_cls_target
+
+  @property
+  def box_coder(self):
+    return self._box_coder
+
+  def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
+             **params):
+    """Assign classification and regression targets to each anchor.
+
+    For a given set of anchors and groundtruth detections, match anchors
+    to groundtruth_boxes and assign classification and regression targets to
+    each anchor as well as weights based on the resulting match (specifying,
+    e.g., which anchors should not contribute to training loss).
+
+    Anchors that are not matched to anything are given a classification target
+    of self._unmatched_cls_target which can be specified via the constructor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth boxes
+      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
+        to None, groundtruth_labels assumes a binary problem where all
+        ground_truth boxes get a positive label (of 1).
+      **params: Additional keyword arguments for specific implementations of
+              the Matcher.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+      cls_weights: a float32 tensor with shape [num_anchors]
+      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+      reg_weights: a float32 tensor with shape [num_anchors]
+      match: a matcher.Match object encoding the match between anchors and
+        groundtruth boxes, with rows corresponding to groundtruth boxes
+        and columns corresponding to anchors.
+
+    Raises:
+      ValueError: if anchors or groundtruth_boxes are not of type
+        box_list.BoxList
+    """
+    if not isinstance(anchors, box_list.BoxList):
+      raise ValueError('anchors must be an BoxList')
+    if not isinstance(groundtruth_boxes, box_list.BoxList):
+      raise ValueError('groundtruth_boxes must be an BoxList')
+
+    if groundtruth_labels is None:
+      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
+                                                  0))
+      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+    shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
+                                   tf.shape(self._unmatched_cls_target))
+
+    with tf.control_dependencies([shape_assert]):
+      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
+                                                           anchors)
+      match = self._matcher.match(match_quality_matrix, **params)
+      reg_targets = self._create_regression_targets(anchors,
+                                                    groundtruth_boxes,
+                                                    match)
+      cls_targets = self._create_classification_targets(groundtruth_labels,
+                                                        match)
+      reg_weights = self._create_regression_weights(match)
+      cls_weights = self._create_classification_weights(
+          match, self._positive_class_weight, self._negative_class_weight)
+
+      num_anchors = anchors.num_boxes_static()
+      if num_anchors is not None:
+        reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+        cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+        reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+        cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+
+    return cls_targets, cls_weights, reg_targets, reg_weights, match
+
+  def _reset_target_shape(self, target, num_anchors):
+    """Sets the static shape of the target.
+
+    Args:
+      target: the target tensor. Its first dimension will be overwritten.
+      num_anchors: the number of anchors, which is used to override the target's
+        first dimension.
+
+    Returns:
+      A tensor with the shape info filled in.
+    """
+    target_shape = target.get_shape().as_list()
+    target_shape[0] = num_anchors
+    target.set_shape(target_shape)
+    return target
+
+  def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+    """Returns a regression target for each anchor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth_boxes
+      match: a matcher.Match object
+
+    Returns:
+      reg_targets: a float32 tensor with shape [N, box_code_dimension]
+    """
+    matched_anchor_indices = match.matched_column_indices()
+    unmatched_ignored_anchor_indices = (match.
+                                        unmatched_or_ignored_column_indices())
+    matched_gt_indices = match.matched_row_indices()
+    matched_anchors = box_list_ops.gather(anchors,
+                                          matched_anchor_indices)
+    matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
+                                           matched_gt_indices)
+    matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
+                                                 matched_anchors)
+    unmatched_ignored_reg_targets = tf.tile(
+        self._default_regression_target(),
+        tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
+    reg_targets = tf.dynamic_stitch(
+        [matched_anchor_indices, unmatched_ignored_anchor_indices],
+        [matched_reg_targets, unmatched_ignored_reg_targets])
+    # TODO: summarize the number of matches on average.
+    return reg_targets
+
+  def _default_regression_target(self):
+    """Returns the default target for anchors to regress to.
+
+    Default regression targets are set to zero (though in
+    this implementation what these targets are set to should
+    not matter as the regression weight of any box set to
+    regress to the default target is zero).
+
+    Returns:
+      default_target: a float32 tensor with shape [1, box_code_dimension]
+    """
+    return tf.constant([self._box_coder.code_size*[0]], tf.float32)
+
+  def _create_classification_targets(self, groundtruth_labels, match):
+    """Create classification targets for each anchor.
+
+    Assign a classification target of for each anchor to the matching
+    groundtruth label that is provided by match.  Anchors that are not matched
+    to anything are given the target self._unmatched_cls_target
+
+    Args:
+      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar labels).
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+    """
+    matched_anchor_indices = match.matched_column_indices()
+    unmatched_ignored_anchor_indices = (match.
+                                        unmatched_or_ignored_column_indices())
+    matched_gt_indices = match.matched_row_indices()
+    matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
+
+    ones = self._unmatched_cls_target.shape.ndims * [1]
+    unmatched_ignored_cls_targets = tf.tile(
+        tf.expand_dims(self._unmatched_cls_target, 0),
+        tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
+
+    cls_targets = tf.dynamic_stitch(
+        [matched_anchor_indices, unmatched_ignored_anchor_indices],
+        [matched_cls_targets, unmatched_ignored_cls_targets])
+    return cls_targets
+
+  def _create_regression_weights(self, match):
+    """Set regression weight for each anchor.
+
+    Only positive anchors are set to contribute to the regression loss, so this
+    method returns a weight of 1 for every positive anchor and 0 for every
+    negative anchor.
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+
+    Returns:
+      reg_weights: a float32 tensor with shape [num_anchors] representing
+        regression weights
+    """
+    reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
+    return reg_weights
+
+  def _create_classification_weights(self,
+                                     match,
+                                     positive_class_weight=1.0,
+                                     negative_class_weight=1.0):
+    """Create classification weights for each anchor.
+
+    Positive (matched) anchors are associated with a weight of
+    positive_class_weight and negative (unmatched) anchors are associated with
+    a weight of negative_class_weight. When anchors are ignored, weights are set
+    to zero. By default, both positive/negative weights are set to 1.0,
+    but they can be adjusted to handle class imbalance (which is almost always
+    the case in object detection).
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+      positive_class_weight: weight to be associated to positive anchors
+      negative_class_weight: weight to be associated to negative anchors
+
+    Returns:
+      cls_weights: a float32 tensor with shape [num_anchors] representing
+        classification weights.
+    """
+    matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
+    ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
+    unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
+    cls_weights = (positive_class_weight * matched_indicator
+                   + negative_class_weight * unmatched_indicator)
+    return cls_weights
+
+  def get_box_coder(self):
+    """Get BoxCoder of this TargetAssigner.
+
+    Returns:
+      BoxCoder: BoxCoder object.
+    """
+    return self._box_coder
+
+
+# TODO: This method pulls in all the implementation dependencies into core.
+# Therefore its best to have this factory method outside of core.
+def create_target_assigner(reference, stage=None,
+                           positive_class_weight=1.0,
+                           negative_class_weight=1.0,
+                           unmatched_cls_target=None):
+  """Factory function for creating standard target assigners.
+
+  Args:
+    reference: string referencing the type of TargetAssigner.
+    stage: string denoting stage: {proposal, detection}.
+    positive_class_weight: classification weight to be associated to positive
+      anchors (default: 1.0)
+    negative_class_weight: classification weight to be associated to negative
+      anchors (default: 1.0)
+    unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+      which is consistent with the classification target for each
+      anchor (and can be empty for scalar targets).  This shape must thus be
+      compatible with the groundtruth labels that are passed to the Assign
+      function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+      If set to None, unmatched_cls_target is set to be 0 for each anchor.
+
+  Returns:
+    TargetAssigner: desired target assigner.
+
+  Raises:
+    ValueError: if combination reference+stage is invalid.
+  """
+  if reference == 'Multibox' and stage == 'proposal':
+    similarity_calc = sim_calc.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+
+  elif reference == 'FasterRCNN' and stage == 'proposal':
+    similarity_calc = sim_calc.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
+                                           unmatched_threshold=0.3,
+                                           force_match_for_each_row=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=[10.0, 10.0, 5.0, 5.0])
+
+  elif reference == 'FasterRCNN' and stage == 'detection':
+    similarity_calc = sim_calc.IouSimilarity()
+    # Uses all proposals with IOU < 0.5 as candidate negatives.
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           negatives_lower_than_unmatched=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=[10.0, 10.0, 5.0, 5.0])
+
+  elif reference == 'FastRCNN':
+    similarity_calc = sim_calc.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.1,
+                                           force_match_for_each_row=False,
+                                           negatives_lower_than_unmatched=False)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+
+  else:
+    raise ValueError('No valid combination of reference and stage.')
+
+  return TargetAssigner(similarity_calc, matcher, box_coder,
+                        positive_class_weight=positive_class_weight,
+                        negative_class_weight=negative_class_weight,
+                        unmatched_cls_target=unmatched_cls_target)
+
+
+def batch_assign_targets(target_assigner,
+                         anchors_batch,
+                         gt_box_batch,
+                         gt_class_targets_batch):
+  """Batched assignment of classification and regression targets.
+
+  Args:
+    target_assigner: a target assigner.
+    anchors_batch: BoxList representing N box anchors or list of BoxList objects
+      with length batch_size representing anchor sets.
+    gt_box_batch: a list of BoxList objects with length batch_size
+      representing groundtruth boxes for each image in the batch
+    gt_class_targets_batch: a list of tensors with length batch_size, where
+      each tensor has shape [num_gt_boxes_i, classification_target_size] and
+      num_gt_boxes_i is the number of boxes in the ith boxlist of
+      gt_box_batch.
+
+  Returns:
+    batch_cls_targets: a tensor with shape [batch_size, num_anchors,
+      num_classes],
+    batch_cls_weights: a tensor with shape [batch_size, num_anchors],
+    batch_reg_targets: a tensor with shape [batch_size, num_anchors,
+      box_code_dimension]
+    batch_reg_weights: a tensor with shape [batch_size, num_anchors],
+    match_list: a list of matcher.Match objects encoding the match between
+      anchors and groundtruth boxes for each image of the batch,
+      with rows of the Match objects corresponding to groundtruth boxes
+      and columns corresponding to anchors.
+  Raises:
+    ValueError: if input list lengths are inconsistent, i.e.,
+      batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
+        and batch_size == len(anchors_batch) unless anchors_batch is a single
+        BoxList.
+  """
+  if not isinstance(anchors_batch, list):
+    anchors_batch = len(gt_box_batch) * [anchors_batch]
+  if not all(
+      isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
+    raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
+  if not (len(anchors_batch)
+          == len(gt_box_batch)
+          == len(gt_class_targets_batch)):
+    raise ValueError('batch size incompatible with lengths of anchors_batch, '
+                     'gt_box_batch and gt_class_targets_batch.')
+  cls_targets_list = []
+  cls_weights_list = []
+  reg_targets_list = []
+  reg_weights_list = []
+  match_list = []
+  for anchors, gt_boxes, gt_class_targets in zip(
+      anchors_batch, gt_box_batch, gt_class_targets_batch):
+    (cls_targets, cls_weights, reg_targets,
+     reg_weights, match) = target_assigner.assign(
+         anchors, gt_boxes, gt_class_targets)
+    cls_targets_list.append(cls_targets)
+    cls_weights_list.append(cls_weights)
+    reg_targets_list.append(reg_targets)
+    reg_weights_list.append(reg_weights)
+    match_list.append(match)
+  batch_cls_targets = tf.stack(cls_targets_list)
+  batch_cls_weights = tf.stack(cls_weights_list)
+  batch_reg_targets = tf.stack(reg_targets_list)
+  batch_reg_weights = tf.stack(reg_weights_list)
+  return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
+          batch_reg_weights, match_list)
--- a/object_detection/core/target_assigner_test.py
+++ b/object_detection/core/target_assigner_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.target_assigner."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_list
+from object_detection.core import region_similarity_calculator
+from object_detection.core import target_assigner as targetassigner
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+
+
+class TargetAssignerTest(tf.test.TestCase):
+
+  def test_assign_agnostic(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder, unmatched_cls_target=None)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0]])
+    prior_stddevs = tf.constant(3 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+    exp_cls_targets = [[1], [1], [0]]
+    exp_cls_weights = [1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+    exp_matching_anchors = [0, 1]
+
+    result = target_assigner.assign(priors, boxes, num_valid_rows=2)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_with_ignored_matches(self):
+    # Note: test is very similar to above. The third box matched with an IOU
+    # of 0.35, which is between the matched and unmatched threshold. This means
+    # That like above the expected classification targets are [1, 1, 0].
+    # Unlike above, the third target is ignored and therefore expected
+    # classification weights are [1, 1, 0].
+    similarity_calc = region_similarity_calculator.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.3)
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0.0, 0.5, .9, 1.0]])
+    prior_stddevs = tf.constant(3 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+    exp_cls_targets = [[1], [1], [0]]
+    exp_cls_weights = [1, 1, 0]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+    exp_matching_anchors = [0, 1]
+
+    result = target_assigner.assign(priors, boxes)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_multiclass(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                                      [0, 0, 0, 0, 0, 1, 0],
+                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+
+    exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
+                       [0, 0, 0, 0, 0, 1, 0],
+                       [1, 0, 0, 0, 0, 0, 0],
+                       [0, 0, 0, 1, 0, 0, 0]]
+    exp_cls_weights = [1, 1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0],
+                       [0, 0, -.5, .2]]
+    exp_reg_weights = [1, 1, 0, 1]
+    exp_matching_anchors = [0, 1, 3]
+
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_multiclass_unequal_class_weights(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0, negative_class_weight=0.5,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                                      [0, 0, 0, 0, 0, 1, 0],
+                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+
+    exp_cls_weights = [1, 1, .5, 1]
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (_, cls_weights, _, _, _) = result
+    with self.test_session() as sess:
+      cls_weights_out = sess.run(cls_weights)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+
+  def test_assign_multidimensional_class_targets(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
+                                      [[1, 0], [0, 1]],
+                                      [[0, 1], [1, .5]]], tf.float32)
+
+    exp_cls_targets = [[[0, 1], [1, 0]],
+                       [[1, 0], [0, 1]],
+                       [[0, 0], [0, 0]],
+                       [[0, 1], [1, .5]]]
+    exp_cls_weights = [1, 1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0],
+                       [0, 0, -.5, .2]]
+    exp_reg_weights = [1, 1, 0, 1]
+    exp_matching_anchors = [0, 1, 3]
+
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_empty_groundtruth(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
+    box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
+    boxes = box_list.BoxList(box_corners)
+
+    groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
+    groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
+
+    exp_cls_targets = [[0, 0, 0],
+                       [0, 0, 0],
+                       [0, 0, 0],
+                       [0, 0, 0]]
+    exp_cls_weights = [1, 1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, 0, 0],
+                       [0, 0, 0, 0],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [0, 0, 0, 0]
+    exp_matching_anchors = []
+
+    result = target_assigner.assign(priors, boxes, groundtruth_labels)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_raises_error_on_invalid_groundtruth_labels(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
+    prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
+
+    with self.assertRaises(ValueError):
+      target_assigner.assign(priors, boxes, groundtruth_labels,
+                             num_valid_rows=3)
+
+
+class BatchTargetAssignerTest(tf.test.TestCase):
+
+  def _get_agnostic_target_assigner(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    return targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0,
+        negative_class_weight=1.0,
+        unmatched_cls_target=None)
+
+  def _get_multi_class_target_assigner(self, num_classes):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
+    return targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0,
+        negative_class_weight=1.0,
+        unmatched_cls_target=unmatched_cls_target)
+
+  def _get_multi_dimensional_target_assigner(self, target_dimensions):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
+                                       tf.float32)
+    return targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0,
+        negative_class_weight=1.0,
+        unmatched_cls_target=unmatched_cls_target)
+
+  def test_batch_assign_targets(self):
+    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+    box_list2 = box_list.BoxList(tf.constant(
+        [[0, 0.25123152, 1, 1],
+         [0.015789, 0.0985, 0.55789, 0.3842]]
+    ))
+
+    gt_box_batch = [box_list1, box_list2]
+    gt_class_targets = [None, None]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1],
+                               [0, .1, .5, .5],
+                               [.75, .75, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0,],
+                        [0, 0, 0, 0,],],
+                       [[0, 0, 0, 0,],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+    exp_cls_targets = [[[1], [0], [0], [0]],
+                       [[0], [1], [1], [0]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
+    exp_match_0 = [0]
+    exp_match_1 = [1, 2]
+
+    agnostic_target_assigner = self._get_agnostic_target_assigner()
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0, match_out_1) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+      self.assertAllClose(match_out_1, exp_match_1)
+
+  def test_batch_assign_multiclass_targets(self):
+    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+
+    box_list2 = box_list.BoxList(tf.constant(
+        [[0, 0.25123152, 1, 1],
+         [0.015789, 0.0985, 0.55789, 0.3842]]
+    ))
+
+    gt_box_batch = [box_list1, box_list2]
+
+    class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
+    class_targets2 = tf.constant([[0, 0, 0, 1],
+                                  [0, 0, 1, 0]], tf.float32)
+
+    gt_class_targets = [class_targets1, class_targets2]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1],
+                               [0, .1, .5, .5],
+                               [.75, .75, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 0, 0, 0],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+    exp_cls_targets = [[[0, 1, 0, 0],
+                        [1, 0, 0, 0],
+                        [1, 0, 0, 0],
+                        [1, 0, 0, 0]],
+                       [[1, 0, 0, 0],
+                        [0, 0, 0, 1],
+                        [0, 0, 1, 0],
+                        [1, 0, 0, 0]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
+    exp_match_0 = [0]
+    exp_match_1 = [1, 2]
+
+    multiclass_target_assigner = self._get_multi_class_target_assigner(
+        num_classes=3)
+
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0, match_out_1) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+      self.assertAllClose(match_out_1, exp_match_1)
+
+  def test_batch_assign_multidimensional_targets(self):
+    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+
+    box_list2 = box_list.BoxList(tf.constant(
+        [[0, 0.25123152, 1, 1],
+         [0.015789, 0.0985, 0.55789, 0.3842]]
+    ))
+
+    gt_box_batch = [box_list1, box_list2]
+    class_targets1 = tf.constant([[[0, 1, 1],
+                                   [1, 1, 0]]], tf.float32)
+    class_targets2 = tf.constant([[[0, 1, 1],
+                                   [1, 1, 0]],
+                                  [[0, 0, 1],
+                                   [0, 0, 1]]], tf.float32)
+
+    gt_class_targets = [class_targets1, class_targets2]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1],
+                               [0, .1, .5, .5],
+                               [.75, .75, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 0, 0, 0],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+
+    exp_cls_targets = [[[[0., 1., 1.],
+                         [1., 1., 0.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]]],
+                       [[[0., 0., 0.],
+                         [0., 0., 0.]],
+                        [[0., 1., 1.],
+                         [1., 1., 0.]],
+                        [[0., 0., 1.],
+                         [0., 0., 1.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
+    exp_match_0 = [0]
+    exp_match_1 = [1, 2]
+
+    multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
+        target_dimensions=(2, 3))
+
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0, match_out_1) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+      self.assertAllClose(match_out_1, exp_match_1)
+
+  def test_batch_assign_empty_groundtruth(self):
+    box_coords_expanded = tf.zeros((1, 4), tf.float32)
+    box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
+    box_list1 = box_list.BoxList(box_coords)
+    gt_box_batch = [box_list1]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, 0, 0],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1]]
+    exp_cls_targets = [[[1, 0, 0, 0],
+                        [1, 0, 0, 0]]]
+    exp_reg_weights = [[0, 0]]
+    exp_match_0 = []
+
+    num_classes = 3
+    pad = 1
+    gt_class_targets = tf.zeros((0, num_classes + pad))
+    gt_class_targets_batch = [gt_class_targets]
+
+    multiclass_target_assigner = self._get_multi_class_target_assigner(
+        num_classes=3)
+
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         multiclass_target_assigner, priors,
+         gt_box_batch, gt_class_targets_batch)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+
+
+class CreateTargetAssignerTest(tf.test.TestCase):
+
+  def test_create_target_assigner(self):
+    """Tests that named constructor gives working target assigners.
+
+    TODO: Make this test more general.
+    """
+    corners = [[0.0, 0.0, 1.0, 1.0]]
+    groundtruth = box_list.BoxList(tf.constant(corners))
+
+    priors = box_list.BoxList(tf.constant(corners))
+    prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
+    priors.add_field('stddev', prior_stddevs)
+    multibox_ta = (targetassigner
+                   .create_target_assigner('Multibox', stage='proposal'))
+    multibox_ta.assign(priors, groundtruth)
+    # No tests on output, as that may vary arbitrarily as new target assigners
+    # are added. As long as it is constructed correctly and runs without errors,
+    # tests on the individual assigners cover correctness of the assignments.
+
+    anchors = box_list.BoxList(tf.constant(corners))
+    faster_rcnn_proposals_ta = (targetassigner
+                                .create_target_assigner('FasterRCNN',
+                                                        stage='proposal'))
+    faster_rcnn_proposals_ta.assign(anchors, groundtruth)
+
+    fast_rcnn_ta = (targetassigner
+                    .create_target_assigner('FastRCNN'))
+    fast_rcnn_ta.assign(anchors, groundtruth)
+
+    faster_rcnn_detection_ta = (targetassigner
+                                .create_target_assigner('FasterRCNN',
+                                                        stage='detection'))
+    faster_rcnn_detection_ta.assign(anchors, groundtruth)
+
+    with self.assertRaises(ValueError):
+      targetassigner.create_target_assigner('InvalidDetector',
+                                            stage='invalid_stage')
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/create_pascal_tf_record.py
+++ b/object_detection/create_pascal_tf_record.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert raw PASCAL dataset to TFRecord for object_detection.
+
+Example usage:
+    ./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \
+        --year=VOC2012 \
+        --output_path=/home/user/pascal.record
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import io
+import logging
+import os
+
+from lxml import etree
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+
+flags = tf.app.flags
+flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
+flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
+                    'merged set.')
+flags.DEFINE_string('annotations_dir', 'Annotations',
+                    '(Relative) path to annotations directory.')
+flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
+flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
+flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
+                    'Path to label map proto')
+flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
+                     'difficult instances')
+FLAGS = flags.FLAGS
+
+SETS = ['train', 'val', 'trainval', 'test']
+YEARS = ['VOC2007', 'VOC2012', 'merged']
+
+
+def dict_to_tf_example(data,
+                       dataset_directory,
+                       label_map_dict,
+                       ignore_difficult_instances=False,
+                       image_subdirectory='JPEGImages'):
+  """Convert XML derived dict to tf.Example proto.
+
+  Notice that this function normalizes the bounding box coordinates provided
+  by the raw data.
+
+  Args:
+    data: dict holding PASCAL XML fields for a single image (obtained by
+      running dataset_util.recursive_parse_xml_to_dict)
+    dataset_directory: Path to root directory holding PASCAL dataset
+    label_map_dict: A map from string label names to integers ids.
+    ignore_difficult_instances: Whether to skip difficult instances in the
+      dataset  (default: False).
+    image_subdirectory: String specifying subdirectory within the
+      PASCAL dataset directory holding the actual image data.
+
+  Returns:
+    example: The converted tf.Example.
+
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
+  full_path = os.path.join(dataset_directory, img_path)
+  with tf.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  if image.format != 'JPEG':
+    raise ValueError('Image format not JPEG')
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+
+  width = int(data['size']['width'])
+  height = int(data['size']['height'])
+
+  xmin = []
+  ymin = []
+  xmax = []
+  ymax = []
+  classes = []
+  classes_text = []
+  truncated = []
+  poses = []
+  difficult_obj = []
+  for obj in data['object']:
+    difficult = bool(int(obj['difficult']))
+    if ignore_difficult_instances and difficult:
+      continue
+
+    difficult_obj.append(int(difficult))
+
+    xmin.append(float(obj['bndbox']['xmin']) / width)
+    ymin.append(float(obj['bndbox']['ymin']) / height)
+    xmax.append(float(obj['bndbox']['xmax']) / width)
+    ymax.append(float(obj['bndbox']['ymax']) / height)
+    classes_text.append(obj['name'].encode('utf8'))
+    classes.append(label_map_dict[obj['name']])
+    truncated.append(int(obj['truncated']))
+    poses.append(obj['pose'].encode('utf8'))
+
+  example = tf.train.Example(features=tf.train.Features(feature={
+      'image/height': dataset_util.int64_feature(height),
+      'image/width': dataset_util.int64_feature(width),
+      'image/filename': dataset_util.bytes_feature(
+          data['filename'].encode('utf8')),
+      'image/source_id': dataset_util.bytes_feature(
+          data['filename'].encode('utf8')),
+      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
+      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
+      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
+      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
+      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
+      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
+      'image/object/class/label': dataset_util.int64_list_feature(classes),
+      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
+      'image/object/truncated': dataset_util.int64_list_feature(truncated),
+      'image/object/view': dataset_util.bytes_list_feature(poses),
+  }))
+  return example
+
+
+def main(_):
+  if FLAGS.set not in SETS:
+    raise ValueError('set must be in : {}'.format(SETS))
+  if FLAGS.year not in YEARS:
+    raise ValueError('year must be in : {}'.format(YEARS))
+
+  data_dir = FLAGS.data_dir
+  years = ['VOC2007', 'VOC2012']
+  if FLAGS.year != 'merged':
+    years = [FLAGS.year]
+
+  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
+
+  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
+
+  for year in years:
+    logging.info('Reading from PASCAL %s dataset.', year)
+    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
+                                 'aeroplane_' + FLAGS.set + '.txt')
+    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
+    examples_list = dataset_util.read_examples_list(examples_path)
+    for idx, example in enumerate(examples_list):
+      if idx % 100 == 0:
+        logging.info('On image %d of %d', idx, len(examples_list))
+      path = os.path.join(annotations_dir, example + '.xml')
+      with tf.gfile.GFile(path, 'r') as fid:
+        xml_str = fid.read()
+      xml = etree.fromstring(xml_str)
+      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
+
+      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
+                                      FLAGS.ignore_difficult_instances)
+      writer.write(tf_example.SerializeToString())
+
+  writer.close()
+
+
+if __name__ == '__main__':
+  tf.app.run()