updates changes in object_detecion/cores directory.

e7de233b · Vivek Rathod · edcd29f2 · e7de233b · e7de233b · e7de233b
Commit e7de233b authored Oct 27, 2017 by Vivek Rathod
14 changed files
--- a/research/object_detection/core/BUILD
+++ b/research/object_detection/core/BUILD
@@ -264,6 +264,11 @@ py_library(
    srcs = ["data_decoder.py"],
 )
+py_library(
+    name = "data_parser",
+    srcs = ["data_parser.py"],
+)
 py_library(
    name = "box_predictor",
    srcs = ["box_predictor.py"],

--- a/research/object_detection/core/box_list_ops.py
+++ b/research/object_detection/core/box_list_ops.py
@@ -584,7 +584,8 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
        ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
    with tf.control_dependencies([length_assert]):
-      # TODO: Remove with tf.device when top_k operation runs correctly on GPU.
+      # TODO: Remove with tf.device when top_k operation runs
+      # correctly on GPU.
      with tf.device('/cpu:0'):
        _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
@@ -655,7 +656,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
  This op keeps the collection of boxes whose corresponding scores are
  greater than the input threshold.
-  TODO: Change function name to FilterScoresGreaterThan
+  TODO: Change function name to filter_scores_greater_than
  Args:
    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
@@ -772,18 +773,25 @@ def to_normalized_coordinates(boxlist, height, width,
    return scale(boxlist, 1 / height, 1 / width)
-def to_absolute_coordinates(boxlist, height, width,
+def to_absolute_coordinates(boxlist,
-                            check_range=True, scope=None):
+                            height,
+                            width,
+                            check_range=True,
+                            maximum_normalized_coordinate=1.01,
+                            scope=None):
  """Converts normalized box coordinates to absolute pixel coordinates.
  This function raises an assertion failed error when the maximum box coordinate
-  value is larger than 1.01 (in which case coordinates are already absolute).
+  value is larger than maximum_normalized_coordinate (in which case coordinates
+  are already absolute).
  Args:
    boxlist: BoxList with coordinates in range [0, 1].
    height: Maximum value for height of absolute box coordinates.
    width: Maximum value for width of absolute box coordinates.
    check_range: If True, checks if the coordinates are normalized or not.
+    maximum_normalized_coordinate: Maximum coordinate value to be considered
+      as normalized, default to 1.01.
    scope: name scope.
  Returns:
@@ -797,9 +805,10 @@ def to_absolute_coordinates(boxlist, height, width,
    # Ensure range of input boxes is correct.
    if check_range:
      box_maximum = tf.reduce_max(boxlist.get())
-      max_assert = tf.Assert(tf.greater_equal(1.01, box_maximum),
+      max_assert = tf.Assert(
+          tf.greater_equal(maximum_normalized_coordinate, box_maximum),
          ['maximum box coordinate value is larger '
-                              'than 1.01: ', box_maximum])
+           'than %f: ' % maximum_normalized_coordinate, box_maximum])
      with tf.control_dependencies([max_assert]):
        width = tf.identity(width)
@@ -927,9 +936,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
  iou_ = iou(selected_boxes, pool_boxes)
  match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
  num_matches = tf.reduce_sum(match_indicator, 1)
-  # TODO: Handle the case where some boxes in selected_boxes do not match to any
+  # TODO: Handle the case where some boxes in selected_boxes do not
-  # boxes in pool_boxes. For such boxes without any matches, we should return
+  # match to any boxes in pool_boxes. For such boxes without any matches, we
-  # the original boxes without voting.
+  # should return the original boxes without voting.
  match_assert = tf.Assert(
      tf.reduce_all(tf.greater(num_matches, 0)),
      ['Each box in selected_boxes must match with at least one box '

--- a/research/object_detection/core/box_predictor.py
+++ b/research/object_detection/core/box_predictor.py
@@ -278,6 +278,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
               box_code_size,
               conv_hyperparams=None,
               predict_instance_masks=False,
+               mask_height=14,
+               mask_width=14,
               mask_prediction_conv_depth=256,
               predict_keypoints=False):
    """Constructor.
@@ -300,6 +302,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
        ops.
      predict_instance_masks: Whether to predict object masks inside detection
        boxes.
+      mask_height: Desired output mask height. The default value is 14.
+      mask_width: Desired output mask width. The default value is 14.
      mask_prediction_conv_depth: The depth for the first conv2d_transpose op
        applied to the image_features in the mask prediciton branch.
      predict_keypoints: Whether to predict keypoints insde detection boxes.
@@ -315,10 +319,10 @@ class MaskRCNNBoxPredictor(BoxPredictor):
    self._dropout_keep_prob = dropout_keep_prob
    self._conv_hyperparams = conv_hyperparams
    self._predict_instance_masks = predict_instance_masks
+    self._mask_height = mask_height
+    self._mask_width = mask_width
    self._mask_prediction_conv_depth = mask_prediction_conv_depth
    self._predict_keypoints = predict_keypoints
-    if self._predict_instance_masks:
-      raise ValueError('Mask prediction is unimplemented.')
    if self._predict_keypoints:
      raise ValueError('Keypoint prediction is unimplemented.')
    if ((self._predict_instance_masks or self._predict_keypoints) and
@@ -339,6 +343,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
    have been folded into the batch dimension.  Thus we output 1 for the
    anchors dimension.
+    Also optionally predicts instance masks.
+    The mask prediction head is based on the Mask RCNN paper with the following
+    modifications: We replace the deconvolution layer with a bilinear resize
+    and a convolution.
    Args:
      image_features: A float tensor of shape [batch_size, height, width,
        channels] containing features for a batch of images.
@@ -397,15 +406,18 @@ class MaskRCNNBoxPredictor(BoxPredictor):
    if self._predict_instance_masks:
      with slim.arg_scope(self._conv_hyperparams):
-        upsampled_features = slim.conv2d_transpose(
+        upsampled_features = tf.image.resize_bilinear(
            image_features,
+            [self._mask_height, self._mask_width],
+            align_corners=True)
+        upsampled_features = slim.conv2d(
+            upsampled_features,
            num_outputs=self._mask_prediction_conv_depth,
-            kernel_size=[2, 2],
+            kernel_size=[2, 2])
-            stride=2)
        mask_predictions = slim.conv2d(upsampled_features,
                                       num_outputs=self.num_classes,
                                       activation_fn=None,
-                                       kernel_size=[1, 1])
+                                       kernel_size=[3, 3])
        instance_masks = tf.expand_dims(tf.transpose(mask_predictions,
                                                     perm=[0, 3, 1, 2]),
                                        axis=1,
@@ -437,7 +449,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
               dropout_keep_prob,
               kernel_size,
               box_code_size,
-               apply_sigmoid_to_scores=False):
+               apply_sigmoid_to_scores=False,
+               class_prediction_bias_init=0.0):
    """Constructor.
    Args:
@@ -464,6 +477,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
      box_code_size: Size of encoding for each box.
      apply_sigmoid_to_scores: if True, apply the sigmoid on the output
        class_predictions.
+      class_prediction_bias_init: constant value to initialize bias of the last
+        conv2d layer before class prediction.
    Raises:
      ValueError: if min_depth > max_depth.
@@ -480,6 +495,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
    self._box_code_size = box_code_size
    self._dropout_keep_prob = dropout_keep_prob
    self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
+    self._class_prediction_bias_init = class_prediction_bias_init
  def _predict(self, image_features, num_predictions_per_location):
    """Computes encoded object locations and corresponding confidences.
@@ -499,15 +515,16 @@ class ConvolutionalBoxPredictor(BoxPredictor):
          [batch_size, num_anchors, num_classes + 1] representing the class
          predictions for the proposals.
    """
-    features_depth = static_shape.get_depth(image_features.get_shape())
-    depth = max(min(features_depth, self._max_depth), self._min_depth)
    # Add a slot for the background class.
    num_class_slots = self.num_classes + 1
    net = image_features
    with slim.arg_scope(self._conv_hyperparams), \
         slim.arg_scope([slim.dropout], is_training=self._is_training):
-      # Add additional conv layers before the predictor.
+      # Add additional conv layers before the class predictor.
+      features_depth = static_shape.get_depth(image_features.get_shape())
+      depth = max(min(features_depth, self._max_depth), self._min_depth)
+      tf.logging.info('depth of additional conv before box predictor: {}'.
+                      format(depth))
      if depth > 0 and self._num_layers_before_predictor > 0:
        for i in range(self._num_layers_before_predictor):
          net = slim.conv2d(
@@ -522,7 +539,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
          net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        class_predictions_with_background = slim.conv2d(
            net, num_predictions_per_location * num_class_slots,
-            [self._kernel_size, self._kernel_size], scope='ClassPredictor')
+            [self._kernel_size, self._kernel_size], scope='ClassPredictor',
+            biases_initializer=tf.constant_initializer(
+                self._class_prediction_bias_init))
        if self._apply_sigmoid_to_scores:
          class_predictions_with_background = tf.sigmoid(
              class_predictions_with_background)

--- a/research/object_detection/core/data_parser.py
+++ b/research/object_detection/core/data_parser.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface for data parsers.
+Data parser parses input data and returns a dictionary of numpy arrays
+keyed by the entries in standard_fields.py. Since the parser parses records
+to numpy arrays (materialized tensors) directly, it is used to read data for
+evaluation/visualization; to parse the data during training, DataDecoder should
+be used.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+class DataToNumpyParser(object):
+  __metaclass__ = ABCMeta
+  @abstractmethod
+  def parse(self, input_data):
+    """Parses input and returns a numpy array or a dictionary of numpy arrays.
+    Args:
+      input_data: an input data
+    Returns:
+      A numpy array or a dictionary of numpy arrays or None, if input
+      cannot be parsed.
+    """
+    pass
--- a/research/object_detection/core/keypoint_ops.py
+++ b/research/object_detection/core/keypoint_ops.py
@@ -229,3 +229,54 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
    new_keypoints = tf.concat([v, u], 2)
    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
    return new_keypoints
+def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
+  """Flips the keypoints vertically around the flip_point.
+  This operation flips the y coordinate for each keypoint around the flip_point
+  and also permutes the keypoints in a manner specified by flip_permutation.
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    flip_point:  (float) scalar tensor representing the y coordinate to flip the
+      keypoints around.
+    flip_permutation: rank 1 int32 tensor containing the keypoint flip
+      permutation. This specifies the mapping from original keypoint indices
+      to the flipped keypoint indices. This is used primarily for keypoints
+      that are not reflection invariant. E.g. Suppose there are 3 keypoints
+      representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+      flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+      and 'right_eye' after a horizontal flip.
+    scope: name scope.
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'FlipVertical'):
+    keypoints = tf.transpose(keypoints, [1, 0, 2])
+    keypoints = tf.gather(keypoints, flip_permutation)
+    v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    v = flip_point * 2.0 - v
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+    return new_keypoints
+def rot90(keypoints, scope=None):
+  """Rotates the keypoints counter-clockwise by 90 degrees.
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    scope: name scope.
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'Rot90'):
+    keypoints = tf.transpose(keypoints, [1, 0, 2])
+    v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
+    v = 1.0 - v
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+    return new_keypoints
--- a/research/object_detection/core/keypoint_ops_test.py
+++ b/research/object_detection/core/keypoint_ops_test.py
@@ -163,6 +163,38 @@ class KeypointOpsTest(tf.test.TestCase):
      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
      self.assertAllClose(output_, expected_keypoints_)
+  def test_flip_vertical(self):
+    keypoints = tf.constant([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
+    ])
+    flip_permutation = [0, 2, 1]
+    expected_keypoints = tf.constant([
+        [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
+        [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]],
+    ])
+    output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation)
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+  def test_rot90(self):
+    keypoints = tf.constant([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]
+    ])
+    expected_keypoints = tf.constant([
+        [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
+        [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]],
+    ])
+    output = keypoint_ops.rot90(keypoints)
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/core/losses.py
+++ b/research/object_detection/core/losses.py
@@ -72,7 +72,7 @@ class Loss(object):
  @abstractmethod
  def _compute_loss(self, prediction_tensor, target_tensor, **params):
-    """Method to be overriden by implementations.
+    """Method to be overridden by implementations.
    Args:
      prediction_tensor: a tensor representing predicted quantities
@@ -238,17 +238,85 @@ class WeightedSigmoidClassificationLoss(Loss):
    return tf.reduce_sum(per_entry_cross_ent * weights)
+class SigmoidFocalClassificationLoss(Loss):
+  """Sigmoid focal cross entropy loss.
+  Focal loss down-weights well classified examples and focusses on the hard
+  examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
+  """
+  def __init__(self, anchorwise_output=False, gamma=2.0, alpha=0.25):
+    """Constructor.
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+      gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
+      alpha: optional alpha weighting factor to balance positives vs negatives.
+    """
+    self._anchorwise_output = anchorwise_output
+    self._alpha = alpha
+    self._gamma = gamma
+  def _compute_loss(self,
+                    prediction_tensor,
+                    target_tensor,
+                    weights,
+                    class_indices=None):
+    """Compute loss function.
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+      class_indices: (Optional) A 1-D integer tensor of class indices.
+        If provided, computes loss only for the specified class indices.
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    weights = tf.expand_dims(weights, 2)
+    if class_indices is not None:
+      weights *= tf.reshape(
+          ops.indices_to_dense_vector(class_indices,
+                                      tf.shape(prediction_tensor)[2]),
+          [1, 1, -1])
+    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=target_tensor, logits=prediction_tensor))
+    prediction_probabilities = tf.sigmoid(prediction_tensor)
+    p_t = ((target_tensor * prediction_probabilities) +
+           ((1 - target_tensor) * (1 - prediction_probabilities)))
+    modulating_factor = 1.0
+    if self._gamma:
+      modulating_factor = tf.pow(1.0 - p_t, self._gamma)
+    alpha_weight_factor = 1.0
+    if self._alpha is not None:
+      alpha_weight_factor = (target_tensor * self._alpha +
+                             (1 - target_tensor) * (1 - self._alpha))
+    focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
+                                per_entry_cross_ent)
+    if self._anchorwise_output:
+      return tf.reduce_sum(focal_cross_entropy_loss * weights, 2)
+    return tf.reduce_sum(focal_cross_entropy_loss * weights)
 class WeightedSoftmaxClassificationLoss(Loss):
  """Softmax loss function."""
-  def __init__(self, anchorwise_output=False):
+  def __init__(self, anchorwise_output=False, logit_scale=1.0):
    """Constructor.
    Args:
      anchorwise_output: Whether to output loss per anchor (default False)
+      logit_scale: When this value is high, the prediction is "diffused" and
+                   when this value is low, the prediction is made peakier.
+                   (default 1.0)
    """
    self._anchorwise_output = anchorwise_output
+    self._logit_scale = logit_scale
  def _compute_loss(self, prediction_tensor, target_tensor, weights):
    """Compute loss function.
@@ -264,6 +332,8 @@ class WeightedSoftmaxClassificationLoss(Loss):
      loss: a (scalar) tensor representing the value of the loss function
    """
    num_classes = prediction_tensor.get_shape().as_list()[-1]
+    prediction_tensor = tf.divide(
+        prediction_tensor, self._logit_scale, name='scale_logit')
    per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.reshape(target_tensor, [-1, num_classes]),
        logits=tf.reshape(prediction_tensor, [-1, num_classes])))

--- a/research/object_detection/core/losses_test.py
+++ b/research/object_detection/core/losses_test.py
@@ -225,6 +225,286 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
      self.assertAllClose(loss_output, exp_loss)
+def _logit(probability):
+  return math.log(probability / (1. - probability))
+class SigmoidFocalClassificationLossTest(tf.test.TestCase):
+  def testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy(self):
+    prediction_tensor = tf.constant([[[_logit(0.97)],
+                                      [_logit(0.90)],
+                                      [_logit(0.73)],
+                                      [_logit(0.27)],
+                                      [_logit(0.09)],
+                                      [_logit(0.03)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=None)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss / focal_loss)))
+      self.assertAllClose(order_of_ratio, [[1000, 100, 10, 10, 100, 1000]])
+  def testHardExamplesProduceLossComparableToSigmoidXEntropy(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=None)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss / focal_loss)))
+      self.assertAllClose(order_of_ratio, [[1., 1., 1., 1., 1.]])
+  def testNonAnchorWiseOutputComparableToSigmoidXEntropy(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=False, gamma=2.0, alpha=None)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=False)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss / focal_loss)))
+      self.assertAlmostEqual(order_of_ratio, 1.)
+  def testIgnoreNegativeExampleLossViaAlphaMultiplier(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=1.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(focal_loss[0][3:], [0., 0.])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss[0][:3] /
+                                                  focal_loss[0][:3])))
+      self.assertAllClose(order_of_ratio, [1., 1., 1.])
+  def testIgnorePositiveExampleLossViaAlphaMultiplier(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=0.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(focal_loss[0][:3], [0., 0., 0.])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss[0][3:] /
+                                                  focal_loss[0][3:])))
+      self.assertAllClose(order_of_ratio, [1., 1.])
+  def testSimilarToSigmoidXEntropyWithHalfAlphaAndZeroGammaUpToAScale(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, alpha=0.5, gamma=0.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(sigmoid_loss, focal_loss * 2)
+  def testSameAsSigmoidXEntropyWithNoAlphaAndZeroGamma(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, alpha=None, gamma=0.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(sigmoid_loss, focal_loss)
+  def testExpectedLossWithAlphaOneAndZeroGamma(self):
+    # All zeros correspond to 0.5 probability.
+    prediction_tensor = tf.constant([[[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]],
+                                     [[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=False, alpha=1.0, gamma=0.0)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    with self.test_session() as sess:
+      focal_loss = sess.run(focal_loss)
+      self.assertAllClose(
+          (-math.log(.5) *  # x-entropy per class per anchor
+           1.0 *            # alpha
+           8),              # positives from 8 anchors
+          focal_loss)
+  def testExpectedLossWithAlpha75AndZeroGamma(self):
+    # All zeros correspond to 0.5 probability.
+    prediction_tensor = tf.constant([[[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]],
+                                     [[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=False, alpha=0.75, gamma=0.0)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    with self.test_session() as sess:
+      focal_loss = sess.run(focal_loss)
+      self.assertAllClose(
+          (-math.log(.5) *  # x-entropy per class per anchor.
+           ((0.75 *         # alpha for positives.
+             8) +           # positives from 8 anchors.
+            (0.25 *         # alpha for negatives.
+             8 * 2))),      # negatives from 8 anchors for two classes.
+          focal_loss)
 class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
  def testReturnsCorrectLoss(self):
@@ -282,6 +562,39 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
      loss_output = sess.run(loss)
      self.assertAllClose(loss_output, exp_loss)
+  def testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting(self):
+    """At very high logit_scale, all predictions will be ~0.33."""
+    # TODO(yonib): Also test logit_scale with anchorwise=False.
+    logit_scale = 10e16
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [0, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 0],
+                                      [-100, 100, -100],
+                                      [-100, 100, -100],
+                                      [100, -100, -100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [0, 1, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 1]], tf.float32)
+    loss_op = losses.WeightedSoftmaxClassificationLoss(
+        anchorwise_output=True, logit_scale=logit_scale)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+    uniform_distribution_loss = - math.log(.33333333333)
+    exp_loss = np.matrix([[uniform_distribution_loss] * 4,
+                          [uniform_distribution_loss] * 4])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
 class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):

--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Abstract detection model.
 This file defines a generic base class for detection models.  Programs that are
@@ -87,6 +86,18 @@ class DetectionModel(object):
      raise RuntimeError('Groundtruth tensor %s has not been provided', field)
    return self._groundtruth_lists[field]
+  def groundtruth_has_field(self, field):
+    """Determines whether the groundtruth includes the given field.
+    Args:
+      field: a string key, options are
+        fields.BoxListFields.{boxes,classes,masks,keypoints}
+    Returns:
+      True if the groundtruth includes the given field, False otherwise.
+    """
+    return field in self._groundtruth_lists
  @abstractmethod
  def preprocess(self, inputs):
    """Input preprocessing.
@@ -148,7 +159,8 @@ class DetectionModel(object):
    Outputs adhere to the following conventions:
    * Classes are integers in [0, num_classes); background classes are removed
-      and the first non-background class is mapped to 0.
+      and the first non-background class is mapped to 0. If the model produces
+      class-agnostic detections, then no output is produced for classes.
    * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
      format and normalized relative to the image window.
    * `num_detections` is provided for settings where detections are padded to a
@@ -168,6 +180,8 @@ class DetectionModel(object):
        detection_boxes: [batch, max_detections, 4]
        detection_scores: [batch, max_detections]
        detection_classes: [batch, max_detections]
+          (If a model is producing class-agnostic detections, this field may be
+          missing)
        instance_masks: [batch, max_detections, image_height, image_width]
          (optional)
        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
@@ -207,13 +221,13 @@ class DetectionModel(object):
      groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
        tensors of shape [num_boxes, num_classes] containing the class targets
        with the 0th index assumed to map to the first non-background class.
-      groundtruth_masks_list: a list of 2-D tf.float32 tensors of
+      groundtruth_masks_list: a list of 3-D tf.float32 tensors of
-        shape [max_detections, height_in, width_in] containing instance
+        shape [num_boxes, height_in, width_in] containing instance
        masks with values in {0, 1}.  If None, no masks are provided.
        Mask resolution `height_in`x`width_in` must agree with the resolution
        of the input image tensor provided to the `preprocess` function.
-      groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
+      groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
-        shape [batch, max_detections, num_keypoints, 2] containing keypoints.
+        shape [num_boxes, num_keypoints, 2] containing keypoints.
        Keypoints are assumed to be provided in normalized coordinates and
        missing keypoints should be encoded as NaN.
    """

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Preprocess images and bounding boxes for detection.
 We perform two sets of operations in preprocessing stage:
@@ -147,28 +146,12 @@ def normalize_image(image, original_minval, original_maxval, target_minval,
    return image
-def flip_boxes(boxes):
+def retain_boxes_above_threshold(boxes,
-  """Left-right flip the boxes.
+                                 labels,
+                                 label_scores,
-  Args:
+                                 masks=None,
-    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+                                 keypoints=None,
-           Boxes are in normalized form meaning their coordinates vary
+                                 threshold=0.0):
-           between [0, 1].
-           Each row is in the form of [ymin, xmin, ymax, xmax].
-  Returns:
-    Flipped boxes.
-  """
-  # Flip boxes.
-  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
-  flipped_xmin = tf.subtract(1.0, xmax)
-  flipped_xmax = tf.subtract(1.0, xmin)
-  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
-  return flipped_boxes
-def retain_boxes_above_threshold(
-    boxes, labels, label_scores, masks=None, keypoints=None, threshold=0.0):
  """Retains boxes whose label score is above a given threshold.
  If the label score for a box is missing (represented by NaN), the box is
@@ -221,8 +204,68 @@ def retain_boxes_above_threshold(
    return result
-def _flip_masks(masks):
+def _flip_boxes_left_right(boxes):
-  """Left-right flips masks.
+  """Left-right flip the boxes.
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+  Returns:
+    Flipped boxes.
+  """
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_xmin = tf.subtract(1.0, xmax)
+  flipped_xmax = tf.subtract(1.0, xmin)
+  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+  return flipped_boxes
+def _flip_boxes_up_down(boxes):
+  """Up-down flip the boxes.
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+  Returns:
+    Flipped boxes.
+  """
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_ymin = tf.subtract(1.0, ymax)
+  flipped_ymax = tf.subtract(1.0, ymin)
+  flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1)
+  return flipped_boxes
+def _rot90_boxes(boxes):
+  """Rotate boxes counter-clockwise by 90 degrees.
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+  Returns:
+    Rotated boxes.
+  """
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  rotated_ymin = tf.subtract(1.0, xmax)
+  rotated_ymax = tf.subtract(1.0, xmin)
+  rotated_xmin = ymin
+  rotated_xmax = ymax
+  rotated_boxes = tf.concat(
+      [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1)
+  return rotated_boxes
+def _flip_masks_left_right(masks):
+  """Left-right flip masks.
  Args:
    masks: rank 3 float32 tensor with shape
@@ -235,14 +278,42 @@ def _flip_masks(masks):
  return masks[:, :, ::-1]
-def random_horizontal_flip(
+def _flip_masks_up_down(masks):
-    image,
+  """Up-down flip masks.
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  Returns:
+    flipped masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  return masks[:, ::-1, :]
+def _rot90_masks(masks):
+  """Rotate masks counter-clockwise by 90 degrees.
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  Returns:
+    rotated masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  masks = tf.transpose(masks, [0, 2, 1])
+  return masks[:, ::-1, :]
+def random_horizontal_flip(image,
                           boxes=None,
                           masks=None,
                           keypoints=None,
                           keypoint_flip_permutation=None,
                           seed=None):
-  """Randomly decides whether to mirror the image and detections or not.
+  """Randomly flips the image and detections horizontally.
  The probability of flipping the image is 50%.
@@ -259,14 +330,14 @@ def random_horizontal_flip(
    keypoints: (optional) rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]. The keypoints are in y-x
               normalized coordinates.
-    keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
+    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
                               permutation.
    seed: random seed
  Returns:
    image: image which is the same shape as input image.
-    If boxes, masks, keypoints, and keypoint_flip_permutation is not None,
+    If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
    the function also returns the following tensors.
    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
@@ -280,6 +351,7 @@ def random_horizontal_flip(
  Raises:
    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
  """
  def _flip_image(image):
    # flip image
    image_flipped = tf.image.flip_left_right(image)
@@ -292,10 +364,7 @@ def random_horizontal_flip(
  with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
    result = []
    # random variable defining whether to do flip or not
-    do_a_flip_random = tf.random_uniform([], seed=seed)
+    do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
-    # flip only if there are bounding boxes in image!
-    do_a_flip_random = tf.logical_and(
-        tf.greater(tf.size(boxes), 0), tf.greater(do_a_flip_random, 0.5))
    # flip image
    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
@@ -303,14 +372,14 @@ def random_horizontal_flip(
    # flip boxes
    if boxes is not None:
-      boxes = tf.cond(
+      boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes),
-          do_a_flip_random, lambda: flip_boxes(boxes), lambda: boxes)
+                      lambda: boxes)
      result.append(boxes)
    # flip masks
    if masks is not None:
-      masks = tf.cond(
+      masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks),
-          do_a_flip_random, lambda: _flip_masks(masks), lambda: masks)
+                      lambda: masks)
      result.append(masks)
    # flip keypoints
@@ -325,6 +394,174 @@ def random_horizontal_flip(
    return tuple(result)
+def random_vertical_flip(image,
+                         boxes=None,
+                         masks=None,
+                         keypoints=None,
+                         keypoint_flip_permutation=None,
+                         seed=None):
+  """Randomly flips the image and detections vertically.
+  The probability of flipping the image is 50%.
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
+                               permutation.
+    seed: random seed
+  Returns:
+    image: image which is the same shape as input image.
+    If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
+    the function also returns the following tensors.
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  Raises:
+    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+  """
+  def _flip_image(image):
+    # flip image
+    image_flipped = tf.image.flip_up_down(image)
+    return image_flipped
+  if keypoints is not None and keypoint_flip_permutation is None:
+    raise ValueError(
+        'keypoints are provided but keypoints_flip_permutation is not provided')
+  with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
+    result = []
+    # random variable defining whether to do flip or not
+    do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
+    # flip image
+    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
+    result.append(image)
+    # flip boxes
+    if boxes is not None:
+      boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes),
+                      lambda: boxes)
+      result.append(boxes)
+    # flip masks
+    if masks is not None:
+      masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks),
+                      lambda: masks)
+      result.append(masks)
+    # flip keypoints
+    if keypoints is not None and keypoint_flip_permutation is not None:
+      permutation = keypoint_flip_permutation
+      keypoints = tf.cond(
+          do_a_flip_random,
+          lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation),
+          lambda: keypoints)
+      result.append(keypoints)
+    return tuple(result)
+def random_rotation90(image,
+                      boxes=None,
+                      masks=None,
+                      keypoints=None,
+                      seed=None):
+  """Randomly rotates the image and detections 90 degrees counter-clockwise.
+  The probability of rotating the image is 50%. This can be combined with
+  random_horizontal_flip and random_vertical_flip to produce an output with a
+  uniform distribution of the eight possible 90 degree rotation / reflection
+  combinations.
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    seed: random seed
+  Returns:
+    image: image which is the same shape as input image.
+    If boxes, masks, and keypoints, are not None,
+    the function also returns the following tensors.
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+  def _rot90_image(image):
+    # flip image
+    image_rotated = tf.image.rot90(image)
+    return image_rotated
+  with tf.name_scope('RandomRotation90', values=[image, boxes]):
+    result = []
+    # random variable defining whether to rotate by 90 degrees or not
+    do_a_rot90_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
+    # flip image
+    image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
+                    lambda: image)
+    result.append(image)
+    # flip boxes
+    if boxes is not None:
+      boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes),
+                      lambda: boxes)
+      result.append(boxes)
+    # flip masks
+    if masks is not None:
+      masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks),
+                      lambda: masks)
+      result.append(masks)
+    # flip keypoints
+    if keypoints is not None:
+      keypoints = tf.cond(
+          do_a_rot90_random,
+          lambda: keypoint_ops.rot90(keypoints),
+          lambda: keypoints)
+      result.append(keypoints)
+    return tuple(result)
 def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
  """Scales each value in the pixels of the image.
@@ -602,6 +839,7 @@ def random_jitter_boxes(boxes, ratio=0.05, seed=None):
 def _strict_random_crop_image(image,
                              boxes,
                              labels,
+                              label_scores=None,
                              masks=None,
                              keypoints=None,
                              min_object_covered=1.0,
@@ -625,6 +863,8 @@ def _strict_random_crop_image(image,
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: (optional) float32 tensor of shape [num_instances]
+      representing the score for each box.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
@@ -645,8 +885,8 @@ def _strict_random_crop_image(image,
           Boxes are in normalized form.
    labels: new labels.
-    If masks, or keypoints is not None, the function also returns:
+    If label_scores, masks, or keypoints is not None, the function also returns:
+    label_scores: rank 1 float32 tensor with shape [num_instances].
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
@@ -682,6 +922,9 @@ def _strict_random_crop_image(image,
    boxlist = box_list.BoxList(boxes)
    boxlist.add_field('labels', labels)
+    if label_scores is not None:
+      boxlist.add_field('label_scores', label_scores)
    im_boxlist = box_list.BoxList(im_box_rank2)
    # remove boxes that are outside cropped image
@@ -702,6 +945,10 @@ def _strict_random_crop_image(image,
    result = [new_image, new_boxes, new_labels]
+    if label_scores is not None:
+      new_label_scores = overlapping_boxlist.get_field('label_scores')
+      result.append(new_label_scores)
    if masks is not None:
      masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
      masks_of_boxes_completely_inside_window = tf.gather(
@@ -729,6 +976,7 @@ def _strict_random_crop_image(image,
 def random_crop_image(image,
                      boxes,
                      labels,
+                      label_scores=None,
                      masks=None,
                      keypoints=None,
                      min_object_covered=1.0,
@@ -761,6 +1009,8 @@ def random_crop_image(image,
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: (optional) float32 tensor of shape [num_instances].
+      representing the score for each box.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
@@ -786,8 +1036,9 @@ def random_crop_image(image,
           form.
    labels: new labels.
-    If masks, or keypoints are not None, the function also returns:
+    If label_scores, masks, or keypoints are not None, the function also
+    returns:
+    label_scores: new scores.
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
@@ -799,6 +1050,7 @@ def random_crop_image(image,
        image,
        boxes,
        labels,
+        label_scores=label_scores,
        masks=masks,
        keypoints=keypoints,
        min_object_covered=min_object_covered,
@@ -814,13 +1066,15 @@ def random_crop_image(image,
    do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
    outputs = [image, boxes, labels]
+    if label_scores is not None:
+      outputs.append(label_scores)
    if masks is not None:
      outputs.append(masks)
    if keypoints is not None:
      outputs.append(keypoints)
-    result = tf.cond(do_a_crop_random,
+    result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
-                     strict_random_crop_image_fn,
                     lambda: tuple(outputs))
  return result
@@ -865,7 +1119,7 @@ def random_pad_image(image,
           form.
  """
  if pad_color is None:
-    pad_color = tf.reduce_mean(image, reduction_indices=[0, 1])
+    pad_color = tf.reduce_mean(image, axis=[0, 1])
  image_shape = tf.shape(image)
  image_height = image_shape[0]
@@ -902,16 +1156,22 @@ def random_pad_image(image,
      lambda: tf.constant(0, dtype=tf.int32))
  new_image = tf.image.pad_to_bounding_box(
-      image, offset_height=offset_height, offset_width=offset_width,
+      image,
-      target_height=target_height, target_width=target_width)
+      offset_height=offset_height,
+      offset_width=offset_width,
+      target_height=target_height,
+      target_width=target_width)
  # Setting color of the padded pixels
  image_ones = tf.ones_like(image)
  image_ones_padded = tf.image.pad_to_bounding_box(
-      image_ones, offset_height=offset_height, offset_width=offset_width,
+      image_ones,
-      target_height=target_height, target_width=target_width)
+      offset_height=offset_height,
-  image_color_paded = (1.0 - image_ones_padded) * pad_color
+      offset_width=offset_width,
-  new_image += image_color_paded
+      target_height=target_height,
+      target_width=target_width)
+  image_color_padded = (1.0 - image_ones_padded) * pad_color
+  new_image += image_color_padded
  # setting boxes
  new_window = tf.to_float(
@@ -931,13 +1191,14 @@ def random_pad_image(image,
 def random_crop_pad_image(image,
                          boxes,
                          labels,
+                          label_scores=None,
                          min_object_covered=1.0,
                          aspect_ratio_range=(0.75, 1.33),
                          area_range=(0.1, 1.0),
                          overlap_thresh=0.3,
                          random_coef=0.0,
-                          min_padded_size_ratio=None,
+                          min_padded_size_ratio=(1.0, 1.0),
-                          max_padded_size_ratio=None,
+                          max_padded_size_ratio=(2.0, 2.0),
                          pad_color=None,
                          seed=None):
  """Randomly crops and pads the image.
@@ -960,6 +1221,7 @@ def random_crop_pad_image(image,
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: rank 1 float32 containing the label scores.
    min_object_covered: the cropped image must cover at least this fraction of
                        at least one of the input bounding boxes.
    aspect_ratio_range: allowed range for aspect ratio of cropped image.
@@ -972,11 +1234,9 @@ def random_crop_pad_image(image,
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    min_padded_size_ratio: min ratio of padded image height and width to the
-                           input image's height and width. If None, it will
+                           input image's height and width.
-                           be set to [0.0, 0.0].
    max_padded_size_ratio: max ratio of padded image height and width to the
-                           input image's height and width. If None, it will
+                           input image's height and width.
-                           be set to [2.0, 2.0].
    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
               if set as None, it will be set to average color of the randomly
               cropped image.
@@ -987,18 +1247,17 @@ def random_crop_pad_image(image,
    padded_boxes: boxes which is the same rank as input boxes. Boxes are in
                  normalized form.
    cropped_labels: cropped labels.
+    if label_scores is not None also returns:
+    cropped_label_scores: cropped label scores.
  """
  image_size = tf.shape(image)
  image_height = image_size[0]
  image_width = image_size[1]
-  if min_padded_size_ratio is None:
+  result = random_crop_image(
-    min_padded_size_ratio = tf.constant([0.0, 0.0], tf.float32)
-  if max_padded_size_ratio is None:
-    max_padded_size_ratio = tf.constant([2.0, 2.0], tf.float32)
-  cropped_image, cropped_boxes, cropped_labels = random_crop_image(
      image=image,
      boxes=boxes,
      labels=labels,
+      label_scores=label_scores,
      min_object_covered=min_object_covered,
      aspect_ratio_range=aspect_ratio_range,
      area_range=area_range,
@@ -1006,6 +1265,8 @@ def random_crop_pad_image(image,
      random_coef=random_coef,
      seed=seed)
+  cropped_image, cropped_boxes, cropped_labels = result[:3]
  min_image_size = tf.to_int32(
      tf.to_float(tf.stack([image_height, image_width])) *
      min_padded_size_ratio)
@@ -1021,12 +1282,19 @@ def random_crop_pad_image(image,
      pad_color=pad_color,
      seed=seed)
-  return padded_image, padded_boxes, cropped_labels
+  cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
+  if label_scores is not None:
+    cropped_label_scores = result[3]
+    cropped_padded_output += (cropped_label_scores,)
+  return cropped_padded_output
 def random_crop_to_aspect_ratio(image,
                                boxes,
                                labels,
+                                label_scores=None,
                                masks=None,
                                keypoints=None,
                                aspect_ratio=1.0,
@@ -1051,6 +1319,8 @@ def random_crop_to_aspect_ratio(image,
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: (optional) float32 tensor of shape [num_instances]
+      representing the score for each box.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
@@ -1068,8 +1338,8 @@ def random_crop_to_aspect_ratio(image,
           Boxes are in normalized form.
    labels: new labels.
-    If masks, or keypoints is not None, the function also returns:
+    If label_scores, masks, or keypoints is not None, the function also returns:
+    label_scores: new label scores.
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
@@ -1088,21 +1358,16 @@ def random_crop_to_aspect_ratio(image,
    orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
    new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
    def target_height_fn():
-      return tf.to_int32(
+      return tf.to_int32(tf.round(tf.to_float(orig_width) / new_aspect_ratio))
-          tf.round(
-              tf.to_float(orig_height) * orig_aspect_ratio / new_aspect_ratio))
+    target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio,
-    target_height = tf.cond(
+                            lambda: orig_height, target_height_fn)
-        orig_aspect_ratio >= new_aspect_ratio,
-        lambda: orig_height,
-        target_height_fn)
    def target_width_fn():
-      return tf.to_int32(
+      return tf.to_int32(tf.round(tf.to_float(orig_height) * new_aspect_ratio))
-          tf.round(
-              tf.to_float(orig_width) * new_aspect_ratio / orig_aspect_ratio))
+    target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio,
-    target_width = tf.cond(
+                           lambda: orig_width, target_width_fn)
-        orig_aspect_ratio <= new_aspect_ratio,
-        lambda: orig_width,
-        target_width_fn)
    # either offset_height = 0 and offset_width is randomly chosen from
    # [0, offset_width - target_width), or else offset_width = 0 and
@@ -1122,6 +1387,9 @@ def random_crop_to_aspect_ratio(image,
    boxlist = box_list.BoxList(boxes)
    boxlist.add_field('labels', labels)
+    if label_scores is not None:
+      boxlist.add_field('label_scores', label_scores)
    im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
    # remove boxes whose overlap with the image is less than overlap_thresh
@@ -1133,13 +1401,16 @@ def random_crop_to_aspect_ratio(image,
    new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
                                                       im_box)
    new_boxlist = box_list_ops.clip_to_window(new_boxlist,
-                                              tf.constant(
+                                              tf.constant([0.0, 0.0, 1.0, 1.0],
-                                                  [0.0, 0.0, 1.0, 1.0],
                                                          tf.float32))
    new_boxes = new_boxlist.get()
    result = [new_image, new_boxes, new_labels]
+    if label_scores is not None:
+      new_label_scores = overlapping_boxlist.get_field('label_scores')
+      result.append(new_label_scores)
    if masks is not None:
      masks_inside_window = tf.gather(masks, keep_ids)
      masks_box_begin = tf.stack([0, offset_height, offset_width])
@@ -1158,6 +1429,122 @@ def random_crop_to_aspect_ratio(image,
    return tuple(result)
+def random_pad_to_aspect_ratio(image,
+                               boxes,
+                               masks=None,
+                               keypoints=None,
+                               aspect_ratio=1.0,
+                               min_padded_size_ratio=(1.0, 1.0),
+                               max_padded_size_ratio=(2.0, 2.0),
+                               seed=None):
+  """Randomly zero pads an image to the specified aspect ratio.
+  Pads the image so that the resulting image will have the specified aspect
+  ratio without scaling less than the min_padded_size_ratio or more than the
+  max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio
+  is lower than what is possible to maintain the aspect ratio, then this method
+  will use the least padding to achieve the specified aspect ratio.
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    aspect_ratio: aspect ratio of the final image.
+    min_padded_size_ratio: min ratio of padded image height and width to the
+                           input image's height and width.
+    max_padded_size_ratio: max ratio of padded image height and width to the
+                           input image's height and width.
+    seed: random seed.
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+    If label_scores, masks, or keypoints is not None, the function also returns:
+    label_scores: new label scores.
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  Raises:
+    ValueError: If image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+  with tf.name_scope('RandomPadToAspectRatio', values=[image]):
+    image_shape = tf.shape(image)
+    image_height = tf.to_float(image_shape[0])
+    image_width = tf.to_float(image_shape[1])
+    image_aspect_ratio = image_width / image_height
+    new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
+    target_height = tf.cond(
+        image_aspect_ratio <= new_aspect_ratio,
+        lambda: image_height,
+        lambda: image_width / new_aspect_ratio)
+    target_width = tf.cond(
+        image_aspect_ratio >= new_aspect_ratio,
+        lambda: image_width,
+        lambda: image_height * new_aspect_ratio)
+    min_height = tf.maximum(
+        min_padded_size_ratio[0] * image_height, target_height)
+    min_width = tf.maximum(
+        min_padded_size_ratio[1] * image_width, target_width)
+    max_height = tf.maximum(
+        max_padded_size_ratio[0] * image_height, target_height)
+    max_width = tf.maximum(
+        max_padded_size_ratio[1] * image_width, target_width)
+    min_scale = tf.maximum(min_height / target_height, min_width / target_width)
+    max_scale = tf.minimum(max_height / target_height, max_width / target_width)
+    scale = tf.random_uniform([], min_scale, max_scale, seed=seed)
+    target_height = scale * target_height
+    target_width = scale * target_width
+    new_image = tf.image.pad_to_bounding_box(
+        image, 0, 0, tf.to_int32(target_height), tf.to_int32(target_width))
+    im_box = tf.stack([
+        0.0,
+        0.0,
+        target_height / image_height,
+        target_width / image_width
+    ])
+    boxlist = box_list.BoxList(boxes)
+    new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box)
+    new_boxes = new_boxlist.get()
+    result = [new_image, new_boxes]
+    if masks is not None:
+      new_masks = tf.expand_dims(masks, -1)
+      new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0,
+                                               tf.to_int32(target_height),
+                                               tf.to_int32(target_width))
+      new_masks = tf.squeeze(new_masks, [-1])
+      result.append(new_masks)
+    if keypoints is not None:
+      new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box)
+      result.append(new_keypoints)
+    return tuple(result)
 def random_black_patches(image,
                         max_black_patches=10,
                         probability=0.5,
@@ -1213,8 +1600,8 @@ def random_black_patches(image,
  with tf.name_scope('RandomBlackPatchInImage', values=[image]):
    for _ in range(max_black_patches):
-      random_prob = tf.random_uniform([], minval=0.0, maxval=1.0,
+      random_prob = tf.random_uniform(
-                                      dtype=tf.float32, seed=random_seed)
+          [], minval=0.0, maxval=1.0, dtype=tf.float32, seed=random_seed)
      image = tf.cond(
          tf.greater(random_prob, probability), lambda: image,
          lambda: add_black_patch_to_image(image))
@@ -1255,9 +1642,7 @@ def random_resize_method(image, target_size):
  return resized_image
-def _compute_new_static_size(image,
+def _compute_new_static_size(image, min_dimension, max_dimension):
-                             min_dimension,
-                             max_dimension):
  """Compute new static shape for resize_to_range method."""
  image_shape = image.get_shape().as_list()
  orig_height = image_shape[0]
@@ -1292,9 +1677,7 @@ def _compute_new_static_size(image,
  return tf.constant(new_size)
-def _compute_new_dynamic_size(image,
+def _compute_new_dynamic_size(image, min_dimension, max_dimension):
-                              min_dimension,
-                              max_dimension):
  """Compute new dynamic shape for resize_to_range method."""
  image_shape = tf.shape(image)
  orig_height = tf.to_float(image_shape[0])
@@ -1335,6 +1718,7 @@ def resize_to_range(image,
                    masks=None,
                    min_dimension=None,
                    max_dimension=None,
+                    method=tf.image.ResizeMethod.BILINEAR,
                    align_corners=False):
  """Resizes an image so its dimensions are within the provided value.
@@ -1352,6 +1736,8 @@ def resize_to_range(image,
                   dimension.
    max_dimension: (optional) (scalar) maximum allowed size
                   of the larger image dimension.
+    method: (optional) interpolation method used in resizing. Defaults to
+            BILINEAR.
    align_corners: bool. If true, exactly align all 4 corners of the input
                   and output. Defaults to False.
@@ -1372,25 +1758,71 @@ def resize_to_range(image,
  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
    if image.get_shape().is_fully_defined():
-      new_size = _compute_new_static_size(image, min_dimension,
+      new_size = _compute_new_static_size(image, min_dimension, max_dimension)
-                                          max_dimension)
    else:
-      new_size = _compute_new_dynamic_size(image, min_dimension,
+      new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
-                                           max_dimension)
+    new_image = tf.image.resize_images(
-    new_image = tf.image.resize_images(image, new_size,
+        image, new_size, method=method, align_corners=align_corners)
-                                       align_corners=align_corners)
    result = new_image
    if masks is not None:
      new_masks = tf.expand_dims(masks, 3)
-      new_masks = tf.image.resize_nearest_neighbor(new_masks, new_size,
+      new_masks = tf.image.resize_nearest_neighbor(
-                                                   align_corners=align_corners)
+          new_masks, new_size, align_corners=align_corners)
      new_masks = tf.squeeze(new_masks, 3)
      result = [new_image, new_masks]
    return result
+# TODO: Make sure the static shapes are preserved.
+def resize_to_min_dimension(image, masks=None, min_dimension=600):
+  """Resizes image and masks given the min size maintaining the aspect ratio.
+  If one of the image dimensions is smaller that min_dimension, it will scale
+  the image such that its smallest dimension is equal to min_dimension.
+  Otherwise, will keep the image size as is.
+  Args:
+    image: a tensor of size [height, width, channels].
+    masks: (optional) a tensors of size [num_instances, height, width].
+    min_dimension: minimum image dimension.
+  Returns:
+    a tuple containing the following:
+      Resized image. A tensor of size [new_height, new_width, channels].
+      (optional) Resized masks. A tensor of
+        size [num_instances, new_height, new_width].
+  Raises:
+    ValueError: if the image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+  with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
+    image_height = tf.shape(image)[0]
+    image_width = tf.shape(image)[1]
+    min_image_dimension = tf.minimum(image_height, image_width)
+    min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
+    target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
+        min_image_dimension)
+    target_height = tf.to_int32(tf.to_float(image_height) * target_ratio)
+    target_width = tf.to_int32(tf.to_float(image_width) * target_ratio)
+    image = tf.image.resize_bilinear(
+        tf.expand_dims(image, axis=0),
+        size=[target_height, target_width],
+        align_corners=True)
+    result = tf.squeeze(image, axis=0)
+    if masks is not None:
+      masks = tf.image.resize_nearest_neighbor(
+          tf.expand_dims(masks, axis=3),
+          size=[target_height, target_width],
+          align_corners=True)
+      result = (result, tf.squeeze(masks, axis=3))
+    return result
 def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
  """Scales boxes from normalized to pixel coordinates.
@@ -1433,7 +1865,8 @@ def resize_image(image,
  with tf.name_scope(
      'ResizeImage',
      values=[image, new_height, new_width, method, align_corners]):
-    new_image = tf.image.resize_images(image, [new_height, new_width],
+    new_image = tf.image.resize_images(
+        image, [new_height, new_width],
        method=method,
        align_corners=align_corners)
    result = new_image
@@ -1451,8 +1884,7 @@ def resize_image(image,
        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
        return new_masks
-      masks = tf.cond(num_instances > 0,
+      masks = tf.cond(num_instances > 0, resize_masks_branch,
-                      resize_masks_branch,
                      reshape_masks_branch)
      result = [new_image, masks]
@@ -1520,6 +1952,7 @@ def rgb_to_gray(image):
 def ssd_random_crop(image,
                    boxes,
                    labels,
+                    label_scores=None,
                    masks=None,
                    keypoints=None,
                    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
@@ -1542,6 +1975,7 @@ def ssd_random_crop(image,
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: rank 1 float32 tensor containing the scores.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
@@ -1567,13 +2001,14 @@ def ssd_random_crop(image,
           Boxes are in normalized form.
    labels: new labels.
-    If masks, or keypoints is not None, the function also returns:
+    If label_scores, masks, or keypoints is not None, the function also returns:
+    label_scores: new label scores.
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]
  """
  def random_crop_selector(selected_result, index):
    """Applies random_crop_image to selected result.
@@ -1587,8 +2022,12 @@ def ssd_random_crop(image,
    """
    i = 3
    image, boxes, labels = selected_result[:i]
+    selected_label_scores = None
    selected_masks = None
    selected_keypoints = None
+    if label_scores is not None:
+      selected_label_scores = selected_result[i]
+      i += 1
    if masks is not None:
      selected_masks = selected_result[i]
      i += 1
@@ -1599,6 +2038,7 @@ def ssd_random_crop(image,
        image=image,
        boxes=boxes,
        labels=labels,
+        label_scores=selected_label_scores,
        masks=selected_masks,
        keypoints=selected_keypoints,
        min_object_covered=min_object_covered[index],
@@ -1610,7 +2050,8 @@ def ssd_random_crop(image,
  result = _apply_with_random_selector_tuples(
      tuple(
-          t for t in (image, boxes, labels, masks, keypoints) if t is not None),
+          t for t in (image, boxes, labels, label_scores, masks, keypoints)
+          if t is not None),
      random_crop_selector,
      num_cases=len(min_object_covered))
  return result
@@ -1619,13 +2060,14 @@ def ssd_random_crop(image,
 def ssd_random_crop_pad(image,
                        boxes,
                        labels,
+                        label_scores=None,
                        min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
                        aspect_ratio_range=((0.5, 2.0),) * 6,
                        area_range=((0.1, 1.0),) * 6,
                        overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
                        random_coef=(0.15,) * 6,
-                        min_padded_size_ratio=(None,) * 6,
+                        min_padded_size_ratio=((1.0, 1.0),) * 6,
-                        max_padded_size_ratio=(None,) * 6,
+                        max_padded_size_ratio=((2.0, 2.0),) * 6,
                        pad_color=(None,) * 6,
                        seed=None):
  """Random crop preprocessing with default parameters as in SSD paper.
@@ -1642,6 +2084,8 @@ def ssd_random_crop_pad(image,
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: float32 tensor of shape [num_instances] representing the
+      score for each box.
    min_object_covered: the cropped image must cover at least this fraction of
                        at least one of the input bounding boxes.
    aspect_ratio_range: allowed range for aspect ratio of cropped image.
@@ -1654,11 +2098,9 @@ def ssd_random_crop_pad(image,
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    min_padded_size_ratio: min ratio of padded image height and width to the
-                           input image's height and width. If None, it will
+                           input image's height and width.
-                           be set to [0.0, 0.0].
    max_padded_size_ratio: max ratio of padded image height and width to the
-                           input image's height and width. If None, it will
+                           input image's height and width.
-                           be set to [2.0, 2.0].
    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
               if set as None, it will be set to average color of the randomly
               cropped image.
@@ -1669,14 +2111,21 @@ def ssd_random_crop_pad(image,
    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
           form.
    new_labels: new labels.
+    new_label_scores: new label scores.
  """
  def random_crop_pad_selector(image_boxes_labels, index):
-    image, boxes, labels = image_boxes_labels
+    i = 3
+    image, boxes, labels = image_boxes_labels[:i]
+    selected_label_scores = None
+    if label_scores is not None:
+      selected_label_scores = image_boxes_labels[i]
    return random_crop_pad_image(
        image,
        boxes,
        labels,
+        selected_label_scores,
        min_object_covered=min_object_covered[index],
        aspect_ratio_range=aspect_ratio_range[index],
        area_range=area_range[index],
@@ -1687,17 +2136,17 @@ def ssd_random_crop_pad(image,
        pad_color=pad_color[index],
        seed=seed)
-  new_image, new_boxes, new_labels = _apply_with_random_selector_tuples(
+  return _apply_with_random_selector_tuples(
-      (image, boxes, labels),
+      tuple(t for t in (image, boxes, labels, label_scores) if t is not None),
      random_crop_pad_selector,
      num_cases=len(min_object_covered))
-  return new_image, new_boxes, new_labels
 def ssd_random_crop_fixed_aspect_ratio(
    image,
    boxes,
    labels,
+    label_scores=None,
    masks=None,
    keypoints=None,
    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
@@ -1722,6 +2171,8 @@ def ssd_random_crop_fixed_aspect_ratio(
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: (optional) float32 tensor of shape [num_instances]
+      representing the score for each box.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
@@ -1747,23 +2198,26 @@ def ssd_random_crop_fixed_aspect_ratio(
           Boxes are in normalized form.
    labels: new labels.
-    If masks, or keypoints is not None, the function also returns:
+    If masks or keypoints is not None, the function also returns:
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]
  """
  aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
-  crop_result = ssd_random_crop(image, boxes, labels, masks, keypoints,
+  crop_result = ssd_random_crop(
-                                min_object_covered, aspect_ratio_range,
+      image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
-                                area_range, overlap_thresh, random_coef, seed)
+      aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
  i = 3
  new_image, new_boxes, new_labels = crop_result[:i]
+  new_label_scores = None
  new_masks = None
  new_keypoints = None
+  if label_scores is not None:
+    new_label_scores = crop_result[i]
+    i += 1
  if masks is not None:
    new_masks = crop_result[i]
    i += 1
@@ -1773,6 +2227,7 @@ def ssd_random_crop_fixed_aspect_ratio(
      new_image,
      new_boxes,
      new_labels,
+      new_label_scores,
      new_masks,
      new_keypoints,
      aspect_ratio=aspect_ratio,
@@ -1781,11 +2236,121 @@ def ssd_random_crop_fixed_aspect_ratio(
  return result
-def get_default_func_arg_map(include_instance_masks=False,
+def ssd_random_crop_pad_fixed_aspect_ratio(
+    image,
+    boxes,
+    labels,
+    label_scores=None,
+    masks=None,
+    keypoints=None,
+    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+    aspect_ratio=1.0,
+    aspect_ratio_range=((0.5, 2.0),) * 7,
+    area_range=((0.1, 1.0),) * 7,
+    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+    random_coef=(0.15,) * 7,
+    min_padded_size_ratio=(1.0, 1.0),
+    max_padded_size_ratio=(2.0, 2.0),
+    seed=None):
+  """Random crop and pad preprocessing with default parameters as in SSD paper.
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+  The only difference is that after the initial crop, images are zero-padded
+  to a fixed aspect ratio instead of being resized to that aspect ratio.
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    label_scores: (optional) float32 tensor of shape [num_instances]
+      representing the score for each box.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio: the final aspect ratio to pad to.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    min_padded_size_ratio: min ratio of padded image height and width to the
+                           input image's height and width.
+    max_padded_size_ratio: max ratio of padded image height and width to the
+                           input image's height and width.
+    seed: random seed.
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+    If masks or keypoints is not None, the function also returns:
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+  crop_result = ssd_random_crop(
+      image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
+      aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
+  i = 3
+  new_image, new_boxes, new_labels = crop_result[:i]
+  new_label_scores = None
+  new_masks = None
+  new_keypoints = None
+  if label_scores is not None:
+    new_label_scores = crop_result[i]
+    i += 1
+  if masks is not None:
+    new_masks = crop_result[i]
+    i += 1
+  if keypoints is not None:
+    new_keypoints = crop_result[i]
+  result = random_pad_to_aspect_ratio(
+      new_image,
+      new_boxes,
+      new_masks,
+      new_keypoints,
+      aspect_ratio=aspect_ratio,
+      min_padded_size_ratio=min_padded_size_ratio,
+      max_padded_size_ratio=max_padded_size_ratio,
+      seed=seed)
+  result = list(result)
+  if new_label_scores is not None:
+    result.insert(2, new_label_scores)
+  result.insert(2, new_labels)
+  result = tuple(result)
+  return result
+def get_default_func_arg_map(include_label_scores=False,
+                             include_instance_masks=False,
                             include_keypoints=False):
  """Returns the default mapping from a preprocessor function to its args.
  Args:
+    include_label_scores: If True, preprocessing functions will modify the
+      label scores, too.
    include_instance_masks: If True, preprocessing functions will modify the
      instance masks, too.
    include_keypoints: If True, preprocessing functions will modify the
@@ -1794,6 +2359,10 @@ def get_default_func_arg_map(include_instance_masks=False,
  Returns:
    A map from preprocessing functions to the arguments they receive.
  """
+  groundtruth_label_scores = None
+  if include_label_scores:
+    groundtruth_label_scores = (fields.InputDataFields.groundtruth_label_scores)
  groundtruth_instance_masks = None
  if include_instance_masks:
    groundtruth_instance_masks = (
@@ -1805,12 +2374,24 @@ def get_default_func_arg_map(include_instance_masks=False,
  prep_func_arg_map = {
      normalize_image: (fields.InputDataFields.image,),
-      random_horizontal_flip: (fields.InputDataFields.image,
+      random_horizontal_flip: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+      random_vertical_flip: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+      random_rotation90: (
+          fields.InputDataFields.image,
          fields.InputDataFields.groundtruth_boxes,
          groundtruth_instance_masks,
          groundtruth_keypoints,),
      random_pixel_value_scale: (fields.InputDataFields.image,),
-      random_image_scale: (fields.InputDataFields.image,
+      random_image_scale: (
+          fields.InputDataFields.image,
          groundtruth_instance_masks,),
      random_rgb_to_gray: (fields.InputDataFields.image,),
      random_adjust_brightness: (fields.InputDataFields.image,),
@@ -1819,54 +2400,79 @@ def get_default_func_arg_map(include_instance_masks=False,
      random_adjust_saturation: (fields.InputDataFields.image,),
      random_distort_color: (fields.InputDataFields.image,),
      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
-      random_crop_image: (fields.InputDataFields.image,
+      random_crop_image: (
+          fields.InputDataFields.image,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_classes,
+          groundtruth_label_scores,
          groundtruth_instance_masks,
          groundtruth_keypoints,),
      random_pad_image: (fields.InputDataFields.image,
                         fields.InputDataFields.groundtruth_boxes),
      random_crop_pad_image: (fields.InputDataFields.image,
                              fields.InputDataFields.groundtruth_boxes,
-                              fields.InputDataFields.groundtruth_classes),
+                              fields.InputDataFields.groundtruth_classes,
-      random_crop_to_aspect_ratio: (fields.InputDataFields.image,
+                              groundtruth_label_scores),
+      random_crop_to_aspect_ratio: (
+          fields.InputDataFields.image,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_classes,
+          groundtruth_label_scores,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+      random_pad_to_aspect_ratio: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
          groundtruth_instance_masks,
          groundtruth_keypoints,),
      random_black_patches: (fields.InputDataFields.image,),
      retain_boxes_above_threshold: (
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_classes,
-          fields.InputDataFields.groundtruth_label_scores,
+          groundtruth_label_scores,
          groundtruth_instance_masks,
          groundtruth_keypoints,),
      image_to_float: (fields.InputDataFields.image,),
      random_resize_method: (fields.InputDataFields.image,),
-      resize_to_range: (fields.InputDataFields.image,
+      resize_to_range: (
+          fields.InputDataFields.image,
+          groundtruth_instance_masks,),
+      resize_to_min_dimension: (
+          fields.InputDataFields.image,
          groundtruth_instance_masks,),
      scale_boxes_to_pixel_coordinates: (
          fields.InputDataFields.image,
          fields.InputDataFields.groundtruth_boxes,
          groundtruth_keypoints,),
-      flip_boxes: (fields.InputDataFields.groundtruth_boxes,),
+      resize_image: (
-      resize_image: (fields.InputDataFields.image,
+          fields.InputDataFields.image,
          groundtruth_instance_masks,),
      subtract_channel_mean: (fields.InputDataFields.image,),
      one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
      rgb_to_gray: (fields.InputDataFields.image,),
-      ssd_random_crop: (fields.InputDataFields.image,
+      ssd_random_crop: (
+          fields.InputDataFields.image,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_classes,
+          groundtruth_label_scores,
          groundtruth_instance_masks,
          groundtruth_keypoints,),
      ssd_random_crop_pad: (fields.InputDataFields.image,
                            fields.InputDataFields.groundtruth_boxes,
-                            fields.InputDataFields.groundtruth_classes),
+                            fields.InputDataFields.groundtruth_classes,
+                            groundtruth_label_scores),
      ssd_random_crop_fixed_aspect_ratio: (
          fields.InputDataFields.image,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_classes,
+          groundtruth_label_scores,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+      ssd_random_crop_pad_fixed_aspect_ratio: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          fields.InputDataFields.groundtruth_classes,
+          groundtruth_label_scores,
          groundtruth_instance_masks,
          groundtruth_keypoints,),
  }
@@ -1936,6 +2542,7 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
    def get_arg(key):
      return tensor_dict[key] if key is not None else None
    args = [get_arg(a) for a in arg_names]
    results = func(*args, **params)
    if not isinstance(results, (list, tuple)):

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -60,6 +60,10 @@ class PreprocessorTest(tf.test.TestCase):
    images = tf.concat([images_r, images_g, images_b], 3)
    return images
+  def createEmptyTestBoxes(self):
+    boxes = tf.constant([[]], dtype=tf.float32)
+    return boxes
  def createTestBoxes(self):
    boxes = tf.constant(
        [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
@@ -162,7 +166,7 @@ class PreprocessorTest(tf.test.TestCase):
    images = tf.concat([images_r, images_g, images_b], 3)
    return images
-  def expectedImagesAfterMirroring(self):
+  def expectedImagesAfterLeftRightFlip(self):
    images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
                             [0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
                           dtype=tf.float32)
@@ -178,17 +182,54 @@ class PreprocessorTest(tf.test.TestCase):
    images = tf.concat([images_r, images_g, images_b], 3)
    return images
-  def expectedBoxesAfterMirroring(self):
+  def expectedImagesAfterUpDownFlip(self):
+    images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0],
+                             [-1, -1, 0, 0], [0, 0, 0, 0]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5],
+                             [-1, -1, 0, 0], [-1, -1, 0, 0]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1],
+                             [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+  def expectedImagesAfterRot90(self):
+    images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0],
+                             [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0],
+                             [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5],
+                             [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+  def expectedBoxesAfterLeftRightFlip(self):
    boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
                        dtype=tf.float32)
    return boxes
-  def expectedBoxesAfterXY(self):
+  def expectedBoxesAfterUpDownFlip(self):
-    boxes = tf.constant([[0.25, 0.0, 1.0, 0.75], [0.5, 0.25, 1, 0.75]],
+    boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]],
                        dtype=tf.float32)
    return boxes
-  def expectedMasksAfterMirroring(self):
+  def expectedBoxesAfterRot90(self):
+    boxes = tf.constant(
+        [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32)
+    return boxes
+  def expectedMasksAfterLeftRightFlip(self):
    mask = np.array([
        [[0.0, 0.0, 255.0],
         [0.0, 0.0, 255.0],
@@ -198,6 +239,26 @@ class PreprocessorTest(tf.test.TestCase):
         [0.0, 255.0, 255.0]]])
    return tf.constant(mask, dtype=tf.float32)
+  def expectedMasksAfterUpDownFlip(self):
+    mask = np.array([
+        [[255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0]],
+        [[255.0, 255.0, 0.0],
+         [255.0, 255.0, 0.0],
+         [255.0, 255.0, 0.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+  def expectedMasksAfterRot90(self):
+    mask = np.array([
+        [[0.0, 0.0, 0.0],
+         [0.0, 0.0, 0.0],
+         [255.0, 255.0, 255.0]],
+        [[0.0, 0.0, 0.0],
+         [255.0, 255.0, 255.0],
+         [255.0, 255.0, 255.0]]])
+    return tf.constant(mask, dtype=tf.float32)
  def expectedLabelScoresAfterThresholding(self):
    return tf.constant([1.0], dtype=tf.float32)
@@ -326,42 +387,62 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(
          retained_label_scores_, expected_retained_label_scores_)
-  def testRandomFlipBoxes(self):
+  def testFlipBoxesLeftRight(self):
    boxes = self.createTestBoxes()
+    flipped_boxes = preprocessor._flip_boxes_left_right(boxes)
+    expected_boxes = self.expectedBoxesAfterLeftRightFlip()
+    with self.test_session() as sess:
+      flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
+      self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
-    # Case where the boxes are flipped.
+  def testFlipBoxesUpDown(self):
-    boxes_expected1 = self.expectedBoxesAfterMirroring()
+    boxes = self.createTestBoxes()
+    flipped_boxes = preprocessor._flip_boxes_up_down(boxes)
-    # Case where the boxes are not flipped.
+    expected_boxes = self.expectedBoxesAfterUpDownFlip()
-    boxes_expected2 = boxes
+    with self.test_session() as sess:
+      flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
+      self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
-    # After elementwise multiplication, the result should be all-zero since one
+  def testRot90Boxes(self):
-    # of them is all-zero.
+    boxes = self.createTestBoxes()
-    boxes_diff = tf.multiply(
+    rotated_boxes = preprocessor._rot90_boxes(boxes)
-        tf.squared_difference(boxes, boxes_expected1),
+    expected_boxes = self.expectedBoxesAfterRot90()
-        tf.squared_difference(boxes, boxes_expected2))
+    with self.test_session() as sess:
-    expected_result = tf.zeros_like(boxes_diff)
+      rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes])
+      self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten())
+  def testFlipMasksLeftRight(self):
+    test_mask = self.createTestMasks()
+    flipped_mask = preprocessor._flip_masks_left_right(test_mask)
+    expected_mask = self.expectedMasksAfterLeftRightFlip()
    with self.test_session() as sess:
-      (boxes_diff, expected_result) = sess.run([boxes_diff, expected_result])
+      flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
-      self.assertAllEqual(boxes_diff, expected_result)
+      self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
-  def testFlipMasks(self):
+  def testFlipMasksUpDown(self):
    test_mask = self.createTestMasks()
-    flipped_mask = preprocessor._flip_masks(test_mask)
+    flipped_mask = preprocessor._flip_masks_up_down(test_mask)
-    expected_mask = self.expectedMasksAfterMirroring()
+    expected_mask = self.expectedMasksAfterUpDownFlip()
    with self.test_session() as sess:
      flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
      self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
+  def testRot90Masks(self):
+    test_mask = self.createTestMasks()
+    rotated_mask = preprocessor._rot90_masks(test_mask)
+    expected_mask = self.expectedMasksAfterRot90()
+    with self.test_session() as sess:
+      rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
+      self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
  def testRandomHorizontalFlip(self):
    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
    images = self.expectedImagesAfterNormalization()
    boxes = self.createTestBoxes()
    tensor_dict = {fields.InputDataFields.image: images,
                   fields.InputDataFields.groundtruth_boxes: boxes}
-    images_expected1 = self.expectedImagesAfterMirroring()
+    images_expected1 = self.expectedImagesAfterLeftRightFlip()
-    boxes_expected1 = self.expectedBoxesAfterMirroring()
+    boxes_expected1 = self.expectedBoxesAfterLeftRightFlip()
    images_expected2 = images
    boxes_expected2 = boxes
    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
@@ -385,6 +466,31 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(boxes_diff_, boxes_diff_expected_)
      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRandomHorizontalFlipWithEmptyBoxes(self):
+    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createEmptyTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterLeftRightFlip()
+    boxes_expected = self.createEmptyTestBoxes()
+    images_expected2 = images
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_,
+       boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
+                                    boxes_expected])
+      self.assertAllClose(boxes_, boxes_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
    image_height = 3
@@ -416,6 +522,176 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertTrue(masks is not None)
      self.assertTrue(keypoints is not None)
+  def testRandomVerticalFlip(self):
+    preprocess_options = [(preprocessor.random_vertical_flip, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterUpDownFlip()
+    boxes_expected1 = self.expectedBoxesAfterUpDownFlip()
+    images_expected2 = images
+    boxes_expected2 = boxes
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+    boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+    boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+    boxes_diff_expected = tf.zeros_like(boxes_diff)
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_diff_,
+       boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+                                         boxes_diff, boxes_diff_expected])
+      self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRandomVerticalFlipWithEmptyBoxes(self):
+    preprocess_options = [(preprocessor.random_vertical_flip, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createEmptyTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterUpDownFlip()
+    boxes_expected = self.createEmptyTestBoxes()
+    images_expected2 = images
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_,
+       boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
+                                    boxes_expected])
+      self.assertAllClose(boxes_, boxes_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
+    preprocess_options = [(preprocessor.random_vertical_flip, {})]
+    image_height = 3
+    image_width = 3
+    images = tf.random_uniform([1, image_height, image_width, 3])
+    boxes = self.createTestBoxes()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    keypoint_flip_permutation = self.createKeypointFlipPermutation()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+    preprocess_options = [
+        (preprocessor.random_vertical_flip,
+         {'keypoint_flip_permutation': keypoint_flip_permutation})]
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True, include_keypoints=True)
+    tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+    keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+    with self.test_session() as sess:
+      boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+      self.assertTrue(boxes is not None)
+      self.assertTrue(masks is not None)
+      self.assertTrue(keypoints is not None)
+  def testRandomRotation90(self):
+    preprocess_options = [(preprocessor.random_rotation90, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterRot90()
+    boxes_expected1 = self.expectedBoxesAfterRot90()
+    images_expected2 = images
+    boxes_expected2 = boxes
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+    boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+    boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+    boxes_diff_expected = tf.zeros_like(boxes_diff)
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_diff_,
+       boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+                                         boxes_diff, boxes_diff_expected])
+      self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRandomRotation90WithEmptyBoxes(self):
+    preprocess_options = [(preprocessor.random_rotation90, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createEmptyTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterRot90()
+    boxes_expected = self.createEmptyTestBoxes()
+    images_expected2 = images
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_,
+       boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
+                                    boxes_expected])
+      self.assertAllClose(boxes_, boxes_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRunRandomRotation90WithMaskAndKeypoints(self):
+    preprocess_options = [(preprocessor.random_rotation90, {})]
+    image_height = 3
+    image_width = 3
+    images = tf.random_uniform([1, image_height, image_width, 3])
+    boxes = self.createTestBoxes()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True, include_keypoints=True)
+    tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+    keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+    with self.test_session() as sess:
+      boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+      self.assertTrue(boxes is not None)
+      self.assertTrue(masks is not None)
+      self.assertTrue(keypoints is not None)
  def testRandomPixelValueScale(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -600,9 +876,11 @@ class PreprocessorTest(tf.test.TestCase):
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
                                                    preprocessing_options)
    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
@@ -637,7 +915,7 @@ class PreprocessorTest(tf.test.TestCase):
    tensor_dict = {
        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-        fields.InputDataFields.groundtruth_classes: labels
+        fields.InputDataFields.groundtruth_classes: labels,
    }
    distorted_tensor_dict = preprocessor.preprocess(
        tensor_dict, preprocessing_options)
@@ -671,9 +949,11 @@ class PreprocessorTest(tf.test.TestCase):
    images = self.createTestImages()
    boxes = self.createTestBoxesOutOfImage()
    labels = self.createTestLabels()
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+        }
    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
                                                    preprocessing_options)
    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
@@ -703,9 +983,13 @@ class PreprocessorTest(tf.test.TestCase):
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
-    tensor_dict = {fields.InputDataFields.image: images,
+    label_scores = self.createTestLabelScores()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores
+    }
    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
    images = tensor_dict[fields.InputDataFields.image]
@@ -720,6 +1004,8 @@ class PreprocessorTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_boxes]
    distorted_labels = distorted_tensor_dict[
        fields.InputDataFields.groundtruth_classes]
+    distorted_label_scores = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_label_scores]
    boxes_shape = tf.shape(boxes)
    distorted_boxes_shape = tf.shape(distorted_boxes)
    images_shape = tf.shape(images)
@@ -728,15 +1014,18 @@ class PreprocessorTest(tf.test.TestCase):
    with self.test_session() as sess:
      (boxes_shape_, distorted_boxes_shape_, images_shape_,
       distorted_images_shape_, images_, distorted_images_,
-       boxes_, distorted_boxes_, labels_, distorted_labels_) = sess.run(
+       boxes_, distorted_boxes_, labels_, distorted_labels_,
+       label_scores_, distorted_label_scores_) = sess.run(
           [boxes_shape, distorted_boxes_shape, images_shape,
            distorted_images_shape, images, distorted_images,
-            boxes, distorted_boxes, labels, distorted_labels])
+            boxes, distorted_boxes, labels, distorted_labels,
+            label_scores, distorted_label_scores])
      self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
      self.assertAllEqual(images_shape_, distorted_images_shape_)
      self.assertAllClose(images_, distorted_images_)
      self.assertAllClose(boxes_, distorted_boxes_)
      self.assertAllEqual(labels_, distorted_labels_)
+      self.assertAllEqual(label_scores_, distorted_label_scores_)
  def testRandomCropWithMockSampleDistortedBoundingBox(self):
    preprocessing_options = [(preprocessor.normalize_image, {
@@ -751,9 +1040,12 @@ class PreprocessorTest(tf.test.TestCase):
                         [0.2, 0.4, 0.75, 0.75],
                         [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
    labels = tf.constant([1, 7, 11], dtype=tf.int32)
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
    images = tensor_dict[fields.InputDataFields.image]
@@ -786,6 +1078,36 @@ class PreprocessorTest(tf.test.TestCase):
        self.assertAllClose(distorted_boxes_, expected_boxes_)
        self.assertAllEqual(distorted_labels_, expected_labels_)
+  def testStrictRandomCropImageWithLabelScores(self):
+    image = self.createColorfulTestImage()[0]
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      new_image, new_boxes, new_labels, new_label_scores = (
+          preprocessor._strict_random_crop_image(
+              image, boxes, labels, label_scores))
+      with self.test_session() as sess:
+        new_image, new_boxes, new_labels, new_label_scores = (
+            sess.run(
+                [new_image, new_boxes, new_labels, new_label_scores])
+        )
+        expected_boxes = np.array(
+            [[0.0, 0.0, 0.75789469, 1.0],
+             [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
+        self.assertAllEqual(new_image.shape, [190, 237, 3])
+        self.assertAllEqual(new_label_scores, [1.0, 0.5])
+        self.assertAllClose(
+            new_boxes.flatten(), expected_boxes.flatten())
  def testStrictRandomCropImageWithMasks(self):
    image = self.createColorfulTestImage()[0]
    boxes = self.createTestBoxes()
@@ -799,17 +1121,15 @@ class PreprocessorTest(tf.test.TestCase):
          tf.constant([6, 143, 0], dtype=tf.int32),
          tf.constant([190, 237, -1], dtype=tf.int32),
          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
-      (new_image, new_boxes, new_labels,
+      new_image, new_boxes, new_labels, new_masks = (
-       new_masks) = preprocessor._strict_random_crop_image(
+          preprocessor._strict_random_crop_image(
-           image, boxes, labels, masks=masks)
+              image, boxes, labels, masks=masks))
      with self.test_session() as sess:
-        new_image, new_boxes, new_labels, new_masks = sess.run([
+        new_image, new_boxes, new_labels, new_masks = sess.run(
-            new_image, new_boxes, new_labels, new_masks])
+            [new_image, new_boxes, new_labels, new_masks])
+        expected_boxes = np.array(
-        expected_boxes = np.array([
+            [[0.0, 0.0, 0.75789469, 1.0],
-            [0.0, 0.0, 0.75789469, 1.0],
+             [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
-            [0.23157893, 0.24050637, 0.75789469, 1.0],
-        ], dtype=np.float32)
        self.assertAllEqual(new_image.shape, [190, 237, 3])
        self.assertAllEqual(new_masks.shape, [2, 190, 237])
        self.assertAllClose(
@@ -828,17 +1148,16 @@ class PreprocessorTest(tf.test.TestCase):
          tf.constant([6, 143, 0], dtype=tf.int32),
          tf.constant([190, 237, -1], dtype=tf.int32),
          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
-      (new_image, new_boxes, new_labels,
+      new_image, new_boxes, new_labels, new_keypoints = (
-       new_keypoints) = preprocessor._strict_random_crop_image(
+          preprocessor._strict_random_crop_image(
-           image, boxes, labels, keypoints=keypoints)
+              image, boxes, labels, keypoints=keypoints))
      with self.test_session() as sess:
-        new_image, new_boxes, new_labels, new_keypoints = sess.run([
+        new_image, new_boxes, new_labels, new_keypoints = sess.run(
-            new_image, new_boxes, new_labels, new_keypoints])
+            [new_image, new_boxes, new_labels, new_keypoints])
        expected_boxes = np.array([
            [0.0, 0.0, 0.75789469, 1.0],
-            [0.23157893, 0.24050637, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32)
-        ], dtype=np.float32)
        expected_keypoints = np.array([
            [[np.nan, np.nan],
             [np.nan, np.nan],
@@ -1038,9 +1357,10 @@ class PreprocessorTest(tf.test.TestCase):
    preprocessing_options = [
        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
    ]
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_label_scores=True)
    retained_tensor_dict = preprocessor.preprocess(
-        tensor_dict, preprocessing_options)
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
    retained_boxes = retained_tensor_dict[
        fields.InputDataFields.groundtruth_boxes]
    retained_labels = retained_tensor_dict[
@@ -1076,6 +1396,7 @@ class PreprocessorTest(tf.test.TestCase):
    }
    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_label_scores=True,
        include_instance_masks=True)
    preprocessing_options = [
@@ -1107,6 +1428,7 @@ class PreprocessorTest(tf.test.TestCase):
    }
    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_label_scores=True,
        include_keypoints=True)
    preprocessing_options = [
@@ -1214,6 +1536,94 @@ class PreprocessorTest(tf.test.TestCase):
        self.assertAllClose(distorted_keypoints_.flatten(),
                            expected_keypoints.flatten())
+  def testRunRandomPadToAspectRatioWithMasks(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks
+    }
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+    preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
+    distorted_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    distorted_labels = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_classes]
+    distorted_masks = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_instance_masks]
+    with self.test_session() as sess:
+      (distorted_image_, distorted_boxes_, distorted_labels_,
+       distorted_masks_) = sess.run([
+           distorted_image, distorted_boxes, distorted_labels, distorted_masks
+       ])
+      expected_boxes = np.array(
+          [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
+      self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
+      self.assertAllEqual(distorted_labels_, [1, 2])
+      self.assertAllClose(distorted_boxes_.flatten(),
+                          expected_boxes.flatten())
+      self.assertAllEqual(distorted_masks_.shape, [2, 400, 400])
+  def testRunRandomPadToAspectRatioWithKeypoints(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypoints()
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+    preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
+    distorted_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    distorted_labels = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_classes]
+    distorted_keypoints = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_keypoints]
+    with self.test_session() as sess:
+      (distorted_image_, distorted_boxes_, distorted_labels_,
+       distorted_keypoints_) = sess.run([
+           distorted_image, distorted_boxes, distorted_labels,
+           distorted_keypoints
+       ])
+      expected_boxes = np.array(
+          [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
+      expected_keypoints = np.array([
+          [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]],
+          [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]],
+      ], dtype=np.float32)
+      self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
+      self.assertAllEqual(distorted_labels_, [1, 2])
+      self.assertAllClose(distorted_boxes_.flatten(),
+                          expected_boxes.flatten())
+      self.assertAllClose(distorted_keypoints_.flatten(),
+                          expected_keypoints.flatten())
  def testRandomPadImage(self):
    preprocessing_options = [(preprocessor.normalize_image, {
        'original_minval': 0,
@@ -1225,9 +1635,11 @@ class PreprocessorTest(tf.test.TestCase):
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
    images = tensor_dict[fields.InputDataFields.image]
@@ -1269,9 +1681,11 @@ class PreprocessorTest(tf.test.TestCase):
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
    images = tensor_dict[fields.InputDataFields.image]
@@ -1305,22 +1719,15 @@ class PreprocessorTest(tf.test.TestCase):
          padded_boxes_[:, 3] - padded_boxes_[:, 1])))
  def testRandomCropToAspectRatio(self):
-    preprocessing_options = [(preprocessor.normalize_image, {
-        'original_minval': 0,
-        'original_maxval': 255,
-        'target_minval': 0,
-        'target_maxval': 1
-    })]
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
    tensor_dict = {
        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-        fields.InputDataFields.groundtruth_classes: labels
+        fields.InputDataFields.groundtruth_classes: labels,
    }
-    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    tensor_dict = preprocessor.preprocess(tensor_dict, [])
    images = tensor_dict[fields.InputDataFields.image]
    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
@@ -1346,6 +1753,41 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
      self.assertEqual(images_shape_[2], cropped_images_shape_[2])
+  def testRandomPadToAspectRatio(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
+    tensor_dict = preprocessor.preprocess(tensor_dict, [])
+    images = tensor_dict[fields.InputDataFields.image]
+    preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {
+        'aspect_ratio': 2.0
+    })]
+    padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                 preprocessing_options)
+    padded_images = padded_tensor_dict[fields.InputDataFields.image]
+    padded_boxes = padded_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    padded_boxes_shape = tf.shape(padded_boxes)
+    images_shape = tf.shape(images)
+    padded_images_shape = tf.shape(padded_images)
+    with self.test_session() as sess:
+      (boxes_shape_, padded_boxes_shape_, images_shape_,
+       padded_images_shape_) = sess.run([
+           boxes_shape, padded_boxes_shape, images_shape, padded_images_shape
+       ])
+      self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+      self.assertEqual(images_shape_[1], padded_images_shape_[1])
+      self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
  def testRandomBlackPatches(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -1395,6 +1837,60 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual(expected_images_shape_,
                          resized_images_shape_)
+  def testResizeImageWithMasks(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+    height = 50
+    width = 100
+    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_image(
+          in_image, in_masks, new_height=height, new_width=width)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+  def testResizeImageWithNoInstanceMask(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    height = 50
+    width = 100
+    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_image(
+          in_image, in_masks, new_height=height, new_width=width)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
  def testResizeToRangePreservesStaticSpatialShape(self):
    """Tests image resizing, checking output sizes."""
    in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
@@ -1483,10 +1979,10 @@ class PreprocessorTest(tf.test.TestCase):
    """Tests image resizing, checking output sizes."""
    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
-    height = 50
+    min_dim = 50
-    width = 100
+    max_dim = 100
-    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
-    expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
+    expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
    for (in_image_shape, expected_image_shape, in_masks_shape,
         expected_mask_shape) in zip(in_image_shape_list,
@@ -1495,8 +1991,8 @@ class PreprocessorTest(tf.test.TestCase):
                                     expected_masks_shape_list):
      in_image = tf.random_uniform(in_image_shape)
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_image(
+      out_image, out_masks = preprocessor.resize_to_range(
-          in_image, in_masks, new_height=height, new_width=width)
+          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -1528,6 +2024,67 @@ class PreprocessorTest(tf.test.TestCase):
        out_image_shape = sess.run(out_image_shape)
        self.assertAllEqual(out_image_shape, expected_shape)
+  def testResizeToMinDimensionTensorShapes(self):
+    in_image_shape_list = [[60, 55, 3], [15, 30, 3]]
+    in_masks_shape_list = [[15, 60, 55], [10, 15, 30]]
+    min_dim = 50
+    expected_image_shape_list = [[60, 55, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]]
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+      in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_to_min_dimension(
+          in_image, in_masks, min_dimension=min_dim)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape],
+            feed_dict={
+                in_image: np.random.randn(*in_image_shape),
+                in_masks: np.random.randn(*in_masks_shape)
+            })
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+  def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    min_dim = 50
+    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_to_min_dimension(
+          in_image, in_masks, min_dimension=min_dim)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+  def testResizeToMinDimensionRaisesErrorOn4DImage(self):
+    image = tf.random_uniform([1, 200, 300, 3])
+    with self.assertRaises(ValueError):
+      preprocessor.resize_to_min_dimension(image, 500)
  def testScaleBoxesToPixelCoordinates(self):
    """Tests box scaling, checking scaled values."""
    in_shape = [60, 40, 3]
@@ -1599,9 +2156,11 @@ class PreprocessorTest(tf.test.TestCase):
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
                                                    preprocessing_options)
    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
@@ -1633,9 +2192,11 @@ class PreprocessorTest(tf.test.TestCase):
            'target_maxval': 1
        }),
        (preprocessor.ssd_random_crop_pad, {})]
-    tensor_dict = {fields.InputDataFields.image: images,
+    tensor_dict = {
+        fields.InputDataFields.image: images,
        fields.InputDataFields.groundtruth_boxes: boxes,
-                   fields.InputDataFields.groundtruth_classes: labels}
+        fields.InputDataFields.groundtruth_classes: labels,
+    }
    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
                                                    preprocessing_options)
    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
@@ -1655,7 +2216,10 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
      self.assertAllEqual(images_rank_, distorted_images_rank_)
-  def testSSDRandomCropFixedAspectRatio(self):
+  def _testSSDRandomCropFixedAspectRatio(self,
+                                         include_label_scores,
+                                         include_instance_masks,
+                                         include_keypoints):
    images = self.createTestImages()
    boxes = self.createTestBoxes()
    labels = self.createTestLabels()
@@ -1672,54 +2236,26 @@ class PreprocessorTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_boxes: boxes,
        fields.InputDataFields.groundtruth_classes: labels
    }
-    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+    if include_label_scores:
-                                                    preprocessing_options)
+      label_scores = self.createTestLabelScores()
-    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+      tensor_dict[fields.InputDataFields.groundtruth_label_scores] = (
-    distorted_boxes = distorted_tensor_dict[
+          label_scores)
-        fields.InputDataFields.groundtruth_boxes]
+    if include_instance_masks:
-    images_rank = tf.rank(images)
-    distorted_images_rank = tf.rank(distorted_images)
-    boxes_rank = tf.rank(boxes)
-    distorted_boxes_rank = tf.rank(distorted_boxes)
-    with self.test_session() as sess:
-      (boxes_rank_, distorted_boxes_rank_, images_rank_,
-       distorted_images_rank_) = sess.run(
-           [boxes_rank, distorted_boxes_rank, images_rank,
-            distorted_images_rank])
-      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
-      self.assertAllEqual(images_rank_, distorted_images_rank_)
-  def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
-    images = self.createTestImages()
-    boxes = self.createTestBoxes()
-    labels = self.createTestLabels()
      masks = self.createTestMasks()
+      tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
+    if include_keypoints:
      keypoints = self.createTestKeypoints()
-    preprocessing_options = [
+      tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
-        (preprocessor.normalize_image, {
-            'original_minval': 0,
-            'original_maxval': 255,
-            'target_minval': 0,
-            'target_maxval': 1
-        }),
-        (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
-    tensor_dict = {
-        fields.InputDataFields.image: images,
-        fields.InputDataFields.groundtruth_boxes: boxes,
-        fields.InputDataFields.groundtruth_classes: labels,
-        fields.InputDataFields.groundtruth_instance_masks: masks,
-        fields.InputDataFields.groundtruth_keypoints: keypoints,
-    }
    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
-        include_instance_masks=True, include_keypoints=True)
+        include_label_scores=include_label_scores,
+        include_instance_masks=include_instance_masks,
+        include_keypoints=include_keypoints)
    distorted_tensor_dict = preprocessor.preprocess(
        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
    distorted_boxes = distorted_tensor_dict[
        fields.InputDataFields.groundtruth_boxes]
    images_rank = tf.rank(images)
    distorted_images_rank = tf.rank(distorted_images)
    boxes_rank = tf.rank(boxes)
@@ -1733,5 +2269,20 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
      self.assertAllEqual(images_rank_, distorted_images_rank_)
+  def testSSDRandomCropFixedAspectRatio(self):
+    self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
+                                            include_instance_masks=False,
+                                            include_keypoints=False)
+  def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
+    self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
+                                            include_instance_masks=True,
+                                            include_keypoints=True)
+  def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self):
+    self._testSSDRandomCropFixedAspectRatio(include_label_scores=True,
+                                            include_instance_masks=True,
+                                            include_keypoints=True)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -18,6 +18,7 @@
 Specifies:
  InputDataFields: standard fields used by reader/preprocessor/batcher.
+  DetectionResultFields: standard fields returned by object detector.
  BoxListFields: standard field used by BoxList
  TfExampleFields: standard fields for tf-example data format (go/tf-example).
 """
@@ -41,12 +42,17 @@ class InputDataFields(object):
    groundtruth_boxes: coordinates of the ground truth boxes in the image.
    groundtruth_classes: box-level class labels.
    groundtruth_label_types: box-level label types (e.g. explicit negative).
-    groundtruth_is_crowd: is the groundtruth a single object or a crowd.
+    groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
+      is the groundtruth a single object or a crowd.
    groundtruth_area: area of a groundtruth segment.
    groundtruth_difficult: is a `difficult` object
+    groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
+      same class, forming a connected group, where instances are heavily
+      occluding each other.
    proposal_boxes: coordinates of object proposal boxes.
    proposal_objectness: objectness score of each proposal.
    groundtruth_instance_masks: ground truth instance masks.
+    groundtruth_instance_boundaries: ground truth instance boundaries.
    groundtruth_instance_classes: instance mask-level class labels.
    groundtruth_keypoints: ground truth keypoints.
    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
@@ -64,15 +70,43 @@ class InputDataFields(object):
  groundtruth_is_crowd = 'groundtruth_is_crowd'
  groundtruth_area = 'groundtruth_area'
  groundtruth_difficult = 'groundtruth_difficult'
+  groundtruth_group_of = 'groundtruth_group_of'
  proposal_boxes = 'proposal_boxes'
  proposal_objectness = 'proposal_objectness'
  groundtruth_instance_masks = 'groundtruth_instance_masks'
+  groundtruth_instance_boundaries = 'groundtruth_instance_boundaries'
  groundtruth_instance_classes = 'groundtruth_instance_classes'
  groundtruth_keypoints = 'groundtruth_keypoints'
  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
  groundtruth_label_scores = 'groundtruth_label_scores'
+class DetectionResultFields(object):
+  """Naming converntions for storing the output of the detector.
+  Attributes:
+    source_id: source of the original image.
+    key: unique key corresponding to image.
+    detection_boxes: coordinates of the detection boxes in the image.
+    detection_scores: detection scores for the detection boxes in the image.
+    detection_classes: detection-level class labels.
+    detection_masks: contains a segmentation mask for each detection box.
+    detection_boundaries: contains an object boundary for each detection box.
+    detection_keypoints: contains detection keypoints for each detection box.
+    num_detections: number of detections in the batch.
+  """
+  source_id = 'source_id'
+  key = 'key'
+  detection_boxes = 'detection_boxes'
+  detection_scores = 'detection_scores'
+  detection_classes = 'detection_classes'
+  detection_masks = 'detection_masks'
+  detection_boundaries = 'detection_boundaries'
+  detection_keypoints = 'detection_keypoints'
+  num_detections = 'num_detections'
 class BoxListFields(object):
  """Naming conventions for BoxLists.
@@ -83,6 +117,7 @@ class BoxListFields(object):
    weights: sample weights per bounding box.
    objectness: objectness score per bounding box.
    masks: masks per bounding box.
+    boundaries: boundaries per bounding box.
    keypoints: keypoints per bounding box.
    keypoint_heatmaps: keypoint heatmaps per bounding box.
  """
@@ -92,6 +127,7 @@ class BoxListFields(object):
  weights = 'weights'
  objectness = 'objectness'
  masks = 'masks'
+  boundaries = 'boundaries'
  keypoints = 'keypoints'
  keypoint_heatmaps = 'keypoint_heatmaps'
@@ -112,7 +148,7 @@ class TfExampleFields(object):
    width: width of image in pixels, e.g. 581
    source_id: original source of the image
    object_class_text: labels in text format, e.g. ["person", "cat"]
-    object_class_text: labels in numbers, e.g. [16, 8]
+    object_class_label: labels in numbers, e.g. [16, 8]
    object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
    object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
    object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
@@ -121,10 +157,20 @@ class TfExampleFields(object):
    object_truncated: is object truncated, e.g. [true, false]
    object_occluded: is object occluded, e.g. [true, false]
    object_difficult: is object difficult, e.g. [true, false]
-    object_is_crowd: is the object a single object or a crowd
+    object_group_of: is object a single object or a group of objects
+    object_depiction: is object a depiction
+    object_is_crowd: [DEPRECATED, use object_group_of instead]
+      is the object a single object or a crowd
    object_segment_area: the area of the segment.
    instance_masks: instance segmentation masks.
+    instance_boundaries: instance boundaries.
    instance_classes: Classes for each instance segmentation mask.
+    detection_class_label: class label in numbers.
+    detection_bbox_ymin: ymin coordinates of a detection box.
+    detection_bbox_xmin: xmin coordinates of a detection box.
+    detection_bbox_ymax: ymax coordinates of a detection box.
+    detection_bbox_xmax: xmax coordinates of a detection box.
+    detection_score: detection score for the class label and box.
  """
  image_encoded = 'image/encoded'
  image_format = 'image/format'  # format is reserved keyword
@@ -144,7 +190,16 @@ class TfExampleFields(object):
  object_truncated = 'image/object/truncated'
  object_occluded = 'image/object/occluded'
  object_difficult = 'image/object/difficult'
+  object_group_of = 'image/object/group_of'
+  object_depiction = 'image/object/depiction'
  object_is_crowd = 'image/object/is_crowd'
  object_segment_area = 'image/object/segment/area'
  instance_masks = 'image/segmentation/object'
+  instance_boundaries = 'image/boundaries/object'
  instance_classes = 'image/segmentation/object/class'
+  detection_class_label = 'image/detection/label'
+  detection_bbox_ymin = 'image/detection/bbox/ymin'
+  detection_bbox_xmin = 'image/detection/bbox/xmin'
+  detection_bbox_ymax = 'image/detection/bbox/ymax'
+  detection_bbox_xmax = 'image/detection/bbox/xmax'
+  detection_score = 'image/detection/score'
--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -50,7 +50,7 @@ class TargetAssigner(object):
  def __init__(self, similarity_calc, matcher, box_coder,
               positive_class_weight=1.0, negative_class_weight=1.0,
               unmatched_cls_target=None):
-    """Construct Multibox Target Assigner.
+    """Construct Object Detection Target Assigner.
    Args:
      similarity_calc: a RegionSimilarityCalculator
@@ -108,7 +108,7 @@ class TargetAssigner(object):
    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
-      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
@@ -140,10 +140,16 @@ class TargetAssigner(object):
      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
                                                  0))
      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
-    shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
+    unmatched_shape_assert = tf.assert_equal(
-                                   tf.shape(self._unmatched_cls_target))
+        tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target),
+        message='Unmatched class target shape incompatible '
-    with tf.control_dependencies([shape_assert]):
+        'with groundtruth labels shape!')
+    labels_and_box_shapes_assert = tf.assert_equal(
+        tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(),
+        message='Groundtruth boxes and labels have incompatible shapes!')
+    with tf.control_dependencies(
+        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
                                                           anchors)
      match = self._matcher.match(match_quality_matrix, **params)
@@ -316,8 +322,8 @@ class TargetAssigner(object):
    return self._box_coder
-# TODO: This method pulls in all the implementation dependencies into core.
+# TODO: This method pulls in all the implementation dependencies into
-# Therefore its best to have this factory method outside of core.
+# core. Therefore its best to have this factory method outside of core.
 def create_target_assigner(reference, stage=None,
                           positive_class_weight=1.0,
                           negative_class_weight=1.0,

--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -327,6 +327,41 @@ class TargetAssignerTest(tf.test.TestCase):
      self.assertEquals(reg_weights_out.dtype, np.float32)
      self.assertEquals(matching_anchors_out.dtype, np.int32)
+  def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.0, 0.0, 0.5, 0.8],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                                      [0, 0, 0, 0, 0, 1, 0],
+                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+    with self.test_session() as sess:
+      with self.assertRaisesWithPredicateMatch(
+          tf.errors.InvalidArgumentError,
+          'Groundtruth boxes and labels have incompatible shapes!'):
+        sess.run([cls_targets, cls_weights, reg_targets, reg_weights])
  def test_raises_error_on_invalid_groundtruth_labels(self):
    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
    matcher = bipartite_matcher.GreedyBipartiteMatcher()