Merge pull request #3293 from pkulzc/master

Internal changes of object_detection

Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
fd7b6887 · Jonathan Huang · GitHub · f98ec55e · 1efe98bb · fd7b6887
Unverified Commit fd7b6887 authored Feb 09, 2018 by Jonathan Huang Committed by GitHub Feb 09, 2018
20 changed files
--- a/research/object_detection/core/matcher_test.py
+++ b/research/object_detection/core/matcher_test.py
@@ -20,7 +20,7 @@ import tensorflow as tf
 from object_detection.core import matcher
-class AnchorMatcherTest(tf.test.TestCase):
+class MatchTest(tf.test.TestCase):
  def test_get_correct_matched_columnIndices(self):
    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase):
      self.assertAllEqual(all_indices_sorted,
                          np.arange(num_matches, dtype=np.int32))
+  def test_scalar_gather_based_on_match(self):
+    match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+    input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32)
+    expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200]
+    match = matcher.Match(match_results)
+    gathered_tensor = match.gather_based_on_match(input_tensor,
+                                                  unmatched_value=100.,
+                                                  ignored_value=200.)
+    self.assertEquals(gathered_tensor.dtype, tf.float32)
+    with self.test_session():
+      gathered_tensor_out = gathered_tensor.eval()
+    self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
+  def test_multidimensional_gather_based_on_match(self):
+    match_results = tf.constant([1, -1, -2])
+    input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
+                               dtype=tf.float32)
+    expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
+    match = matcher.Match(match_results)
+    gathered_tensor = match.gather_based_on_match(input_tensor,
+                                                  unmatched_value=tf.zeros(4),
+                                                  ignored_value=tf.zeros(4))
+    self.assertEquals(gathered_tensor.dtype, tf.float32)
+    with self.test_session():
+      gathered_tensor_out = gathered_tensor.eval()
+    self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
+  def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self):
+    match_results = tf.constant([1, -1, -2])
+    input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
+                               dtype=tf.float32)
+    expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
+    match = matcher.Match(match_results, use_matmul_gather=True)
+    gathered_tensor = match.gather_based_on_match(input_tensor,
+                                                  unmatched_value=tf.zeros(4),
+                                                  ignored_value=tf.zeros(4))
+    self.assertEquals(gathered_tensor.dtype, tf.float32)
+    with self.test_session() as sess:
+      self.assertTrue(
+          all([op.name is not 'Gather' for op in sess.graph.get_operations()]))
+      gathered_tensor_out = gathered_tensor.eval()
+    self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function).
 Output classes are always integers in the range [0, num_classes).  Any mapping
 of these integers to semantic labels is to be handled outside of this class.
+Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
+and `postprocess` should be reentrant.
+The `preprocess` method runs `image_resizer_fn` that returns resized_images and
+`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
+true_image_shapes indicate the slices that contain the image without padding.
+This is useful for padding images to be a fixed size for batching.
+The `postprocess` method uses the true image shapes to clip predictions that lie
+outside of images.
 By default, DetectionModels produce bounding box detections; However, we support
 a handful of auxiliary annotations associated with each bounding box, namely,
 instance masks and keypoints.
@@ -106,12 +117,12 @@ class DetectionModel(object):
    This function is responsible for any scaling/shifting of input values that
    is necessary prior to running the detector on an input image.
-    It is also responsible for any resizing that might be necessary as images
+    It is also responsible for any resizing, padding that might be necessary
-    are assumed to arrive in arbitrary sizes.  While this function could
+    as images are assumed to arrive in arbitrary sizes.  While this function
-    conceivably be part of the predict method (below), it is often convenient
+    could conceivably be part of the predict method (below), it is often
-    to keep these separate --- for example, we may want to preprocess on one
+    convenient to keep these separate --- for example, we may want to preprocess
-    device, place onto a queue, and let another device (e.g., the GPU) handle
+    on one device, place onto a queue, and let another device (e.g., the GPU)
-    prediction.
+    handle prediction.
    A few important notes about the preprocess function:
    + We assume that this operation does not have any trainable variables nor
@@ -134,11 +145,15 @@ class DetectionModel(object):
    Returns:
      preprocessed_inputs: a [batch, height_out, width_out, channels] float32
        tensor representing a batch of images.
+      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+        of the form [height, width, channels] indicating the shapes
+        of true images in the resized images, as resized images can be padded
+        with zeros.
    """
    pass
  @abstractmethod
-  def predict(self, preprocessed_inputs):
+  def predict(self, preprocessed_inputs, true_image_shapes):
    """Predict prediction tensors from inputs tensor.
    Outputs of this function can be passed to loss or postprocess functions.
@@ -146,6 +161,10 @@ class DetectionModel(object):
    Args:
      preprocessed_inputs: a [batch, height, width, channels] float32 tensor
        representing a batch of images.
+      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+        of the form [height, width, channels] indicating the shapes
+        of true images in the resized images, as resized images can be padded
+        with zeros.
    Returns:
      prediction_dict: a dictionary holding prediction tensors to be
@@ -154,7 +173,7 @@ class DetectionModel(object):
    pass
  @abstractmethod
-  def postprocess(self, prediction_dict, **params):
+  def postprocess(self, prediction_dict, true_image_shapes, **params):
    """Convert predicted output tensors to final detections.
    Outputs adhere to the following conventions:
@@ -172,6 +191,10 @@ class DetectionModel(object):
    Args:
      prediction_dict: a dictionary holding prediction tensors.
+      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+        of the form [height, width, channels] indicating the shapes
+        of true images in the resized images, as resized images can be padded
+        with zeros.
      **params: Additional keyword arguments for specific implementations of
        DetectionModel.
@@ -190,7 +213,7 @@ class DetectionModel(object):
    pass
  @abstractmethod
-  def loss(self, prediction_dict):
+  def loss(self, prediction_dict, true_image_shapes):
    """Compute scalar loss tensors with respect to provided groundtruth.
    Calling this function requires that groundtruth tensors have been
@@ -198,6 +221,10 @@ class DetectionModel(object):
    Args:
      prediction_dict: a dictionary holding predicted tensors
+      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+        of the form [height, width, channels] indicating the shapes
+        of true images in the resized images, as resized images can be padded
+        with zeros.
    Returns:
      a dictionary mapping strings (loss names) to scalar tensors representing
@@ -209,7 +236,8 @@ class DetectionModel(object):
                          groundtruth_boxes_list,
                          groundtruth_classes_list,
                          groundtruth_masks_list=None,
-                          groundtruth_keypoints_list=None):
+                          groundtruth_keypoints_list=None,
+                          groundtruth_weights_list=None):
    """Provide groundtruth tensors.
    Args:
@@ -230,10 +258,15 @@ class DetectionModel(object):
        shape [num_boxes, num_keypoints, 2] containing keypoints.
        Keypoints are assumed to be provided in normalized coordinates and
        missing keypoints should be encoded as NaN.
+      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
+        [num_boxes] containing weights for groundtruth boxes.
    """
    self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
    self._groundtruth_lists[
        fields.BoxListFields.classes] = groundtruth_classes_list
+    if groundtruth_weights_list:
+      self._groundtruth_lists[fields.BoxListFields.
+                              weights] = groundtruth_weights_list
    if groundtruth_masks_list:
      self._groundtruth_lists[
          fields.BoxListFields.masks] = groundtruth_masks_list

--- a/research/object_detection/core/post_processing.py
+++ b/research/object_detection/core/post_processing.py
@@ -20,6 +20,7 @@ import tensorflow as tf
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
 from object_detection.core import standard_fields as fields
+from object_detection.utils import shape_utils
 def multiclass_non_max_suppression(boxes,
@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
+                                   boundaries=None,
                                   additional_fields=None,
                                   scope=None):
  """Multi-class version of non maximum suppression.
@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes,
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
+    boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
+      tensor containing box boundaries. `q` can be either number of classes or 1
+      depending on whether a separate boundary is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes,
    per_class_boxes_list = tf.unstack(boxes, axis=1)
    if masks is not None:
      per_class_masks_list = tf.unstack(masks, axis=1)
+    if boundaries is not None:
+      per_class_boundaries_list = tf.unstack(boundaries, axis=1)
    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
                 else [0] * num_classes)
    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes,
        per_class_masks = per_class_masks_list[boxes_idx]
        boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                           per_class_masks)
+      if boundaries is not None:
+        per_class_boundaries = per_class_boundaries_list[boxes_idx]
+        boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
+                                           per_class_boundaries)
      if additional_fields is not None:
        for key, tensor in additional_fields.items():
          boxlist_and_class_scores.add_field(key, tensor)
@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes,
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
-    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
+    clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
-      representing the window to clip boxes to before performing non-max
+      of the form [y_min, x_min, y_max, x_max] representing the window to clip
-      suppression.
+      boxes to before performing non-max suppression. This argument can also be
+      a tensor of shape [4] in which case, the same clip window is applied to
+      all images in the batch. If clip_widow is None, all boxes are used to
+      perform non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes,
  if q != 1 and q != num_classes:
    raise ValueError('third dimension of boxes must be either 1 or equal '
                     'to the third dimension of scores')
+  if change_coordinate_frame and clip_window is None:
+    raise ValueError('if change_coordinate_frame is True, then a clip_window'
+                     'must be specified.')
  original_masks = masks
  original_additional_fields = additional_fields
  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes,
      masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
      masks = tf.zeros(masks_shape)
+    if clip_window is None:
+      clip_window = tf.stack([
+          tf.reduce_min(boxes[:, :, :, 0]),
+          tf.reduce_min(boxes[:, :, :, 1]),
+          tf.reduce_max(boxes[:, :, :, 2]),
+          tf.reduce_max(boxes[:, :, :, 3])
+      ])
+    if clip_window.shape.ndims == 1:
+      clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])
    if additional_fields is None:
      additional_fields = {}
@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes,
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
+          per_image_clip_window - A 1D float32 tensor of the form
+            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
+            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes,
      per_image_boxes = args[0]
      per_image_scores = args[1]
      per_image_masks = args[2]
+      per_image_clip_window = args[3]
      per_image_additional_fields = {
          key: value
-          for key, value in zip(additional_fields, args[3:-1])
+          for key, value in zip(additional_fields, args[4:-1])
      }
      per_image_num_valid_boxes = args[-1]
      per_image_boxes = tf.reshape(
@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes,
          iou_thresh,
          max_size_per_class,
          max_total_size,
-          clip_window=clip_window,
+          clip_window=per_image_clip_window,
          change_coordinate_frame=change_coordinate_frame,
          masks=per_image_masks,
          additional_fields=per_image_additional_fields)
@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes,
      num_additional_fields = len(additional_fields)
    num_nmsed_outputs = 4 + num_additional_fields
-    batch_outputs = tf.map_fn(
+    batch_outputs = shape_utils.static_or_dynamic_map_fn(
        _single_image_nms_fn,
-        elems=([boxes, scores, masks] + list(additional_fields.values()) +
+        elems=([boxes, scores, masks, clip_window] +
-               [num_valid_boxes]),
+               list(additional_fields.values()) + [num_valid_boxes]),
        dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
        parallel_iterations=parallel_iterations)

--- a/research/object_detection/core/post_processing_test.py
+++ b/research/object_detection/core/post_processing_test.py
@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
      self.assertAllClose(nmsed_classes, exp_nms_classes)
      self.assertAllClose(num_detections, [2, 3])
+  def test_batch_multiclass_nms_with_per_batch_clip_window(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    clip_window = tf.constant([0., 0., 200., 200.])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+    exp_nms_corners = np.array([[[0, 10, 1, 11],
+                                 [0, 0, 1, 1],
+                                 [0, 0, 0, 0],
+                                 [0, 0, 0, 0]],
+                                [[0, 10.1, 1, 11.1],
+                                 [0, 100, 1, 101],
+                                 [0, 0, 0, 0],
+                                 [0, 0, 0, 0]]])
+    exp_nms_scores = np.array([[.95, .9, 0, 0],
+                               [.5, .3, 0, 0]])
+    exp_nms_classes = np.array([[0, 0, 0, 0],
+                                [0, 0, 0, 0]])
+    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        clip_window=clip_window)
+    self.assertIsNone(nmsed_masks)
+    self.assertIsNone(nmsed_additional_fields)
+    # Check static shapes
+    self.assertAllEqual(nmsed_boxes.shape.as_list(),
+                        exp_nms_corners.shape)
+    self.assertAllEqual(nmsed_scores.shape.as_list(),
+                        exp_nms_scores.shape)
+    self.assertAllEqual(nmsed_classes.shape.as_list(),
+                        exp_nms_classes.shape)
+    self.assertEqual(num_detections.shape.as_list(), [2])
+    with self.test_session() as sess:
+      (nmsed_boxes, nmsed_scores, nmsed_classes,
+       num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+                                   num_detections])
+      self.assertAllClose(nmsed_boxes, exp_nms_corners)
+      self.assertAllClose(nmsed_scores, exp_nms_scores)
+      self.assertAllClose(nmsed_classes, exp_nms_classes)
+      self.assertAllClose(num_detections, [2, 2])
+  def test_batch_multiclass_nms_with_per_image_clip_window(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    clip_window = tf.constant([[0., 0., 5., 5.],
+                               [0., 0., 200., 200.]])
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+    exp_nms_corners = np.array([[[0, 0, 1, 1],
+                                 [0, 0, 0, 0],
+                                 [0, 0, 0, 0],
+                                 [0, 0, 0, 0]],
+                                [[0, 10.1, 1, 11.1],
+                                 [0, 100, 1, 101],
+                                 [0, 0, 0, 0],
+                                 [0, 0, 0, 0]]])
+    exp_nms_scores = np.array([[.9, 0., 0., 0.],
+                               [.5, .3, 0, 0]])
+    exp_nms_classes = np.array([[0, 0, 0, 0],
+                                [0, 0, 0, 0]])
+    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        clip_window=clip_window)
+    self.assertIsNone(nmsed_masks)
+    self.assertIsNone(nmsed_additional_fields)
+    # Check static shapes
+    self.assertAllEqual(nmsed_boxes.shape.as_list(),
+                        exp_nms_corners.shape)
+    self.assertAllEqual(nmsed_scores.shape.as_list(),
+                        exp_nms_scores.shape)
+    self.assertAllEqual(nmsed_classes.shape.as_list(),
+                        exp_nms_classes.shape)
+    self.assertEqual(num_detections.shape.as_list(), [2])
+    with self.test_session() as sess:
+      (nmsed_boxes, nmsed_scores, nmsed_classes,
+       num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+                                   num_detections])
+      self.assertAllClose(nmsed_boxes, exp_nms_corners)
+      self.assertAllClose(nmsed_scores, exp_nms_scores)
+      self.assertAllClose(nmsed_classes, exp_nms_classes)
+      self.assertAllClose(num_detections, [1, 2])
  def test_batch_multiclass_nms_with_masks(self):
    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax].
 Boxes are in normalized coordinates meaning
 their coordinate values range in [0, 1]
+To preprocess multiple images with the same operations in cases where
+nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
+object can be passed into the preprocess function or individual operations.
+All nondeterministic operations except random_jitter_boxes support caching.
+E.g.
+Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
+Let preprocess_options contain nondeterministic operation(s) excluding
+random_jitter_boxes.
+cache1 = preprocessor_cache.PreprocessorCache()
+cache2 = preprocessor_cache.PreprocessorCache()
+a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
+b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
+c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
+d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
+e = preprocess(tensor_dict5, preprocess_options)
+Then correspondings tensors of object pairs (a,b) and (c,d)
+are guaranteed to be equal element-wise, but the equality of any other object
+pair cannot be determined.
 Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
 functions receive a rank 3 tensor for processing the image. Thus, inside the
 preprocess function we squeeze the image to become a rank 3 tensor and then
@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image
 back to rank 4.
 """
+import functools
+import inspect
 import sys
 import tensorflow as tf
@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
 from object_detection.core import keypoint_ops
+from object_detection.core import preprocessor_cache
 from object_detection.core import standard_fields as fields
+from object_detection.utils import shape_utils
-def _apply_with_random_selector(x, func, num_cases):
+def _apply_with_random_selector(x,
+                                func,
+                                num_cases,
+                                preprocess_vars_cache=None,
+                                key=''):
  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+  If both preprocess_vars_cache AND key are the same between two calls, sel will
+  be the same value in both calls.
  Args:
    x: input Tensor.
    func: Python function to apply.
    num_cases: Python int32, number of cases to sample sel from.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
+    key: variable identifier for preprocess_vars_cache.
  Returns:
    The result of func(x, sel), where func receives the value of the
    selector as a python integer, but sel is sampled dynamically.
  """
-  rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  generator_func = functools.partial(
+      tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
+  rand_sel = _get_or_create_preprocess_rand_vars(
+      generator_func, preprocessor_cache.PreprocessorCache.SELECTOR,
+      preprocess_vars_cache, key)
  # Pass the real x only to one of the func calls.
  return control_flow_ops.merge([func(
      control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
                                 for case in range(num_cases)])[0]
-def _apply_with_random_selector_tuples(x, func, num_cases):
+def _apply_with_random_selector_tuples(x,
+                                       func,
+                                       num_cases,
+                                       preprocess_vars_cache=None,
+                                       key=''):
  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+  If both preprocess_vars_cache AND key are the same between two calls, sel will
+  be the same value in both calls.
  Args:
    x: A tuple of input tensors.
    func: Python function to apply.
    num_cases: Python int32, number of cases to sample sel from.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
+    key: variable identifier for preprocess_vars_cache.
  Returns:
    The result of func(x, sel), where func receives the value of the
    selector as a python integer, but sel is sampled dynamically.
  """
  num_inputs = len(x)
-  rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  generator_func = functools.partial(
-  # Pass the real x only to one of the func calls.
+      tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
+  rand_sel = _get_or_create_preprocess_rand_vars(
+      generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES,
+      preprocess_vars_cache, key)
+  # Pass the real x only to one of the func calls.
  tuples = [list() for t in x]
  for case in range(num_cases):
    new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases):
  return tuple(tuples)
+def _get_or_create_preprocess_rand_vars(generator_func,
+                                        function_id,
+                                        preprocess_vars_cache,
+                                        key=''):
+  """Returns a tensor stored in preprocess_vars_cache or using generator_func.
+  If the tensor was previously generated and appears in the PreprocessorCache,
+  the previously generated tensor will be returned. Otherwise, a new tensor
+  is generated using generator_func and stored in the cache.
+  Args:
+    generator_func: A 0-argument function that generates a tensor.
+    function_id: identifier for the preprocessing function used.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
+    key: identifier for the variable stored.
+  Returns:
+    The generated tensor.
+  """
+  if preprocess_vars_cache is not None:
+    var = preprocess_vars_cache.get(function_id, key)
+    if var is None:
+      var = generator_func()
+      preprocess_vars_cache.update(function_id, key, var)
+  else:
+    var = generator_func()
+  return var
 def _random_integer(minval, maxval, seed):
  """Returns a random 0-D tensor between minval and maxval.
@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed):
      [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
+# TODO: This method is needed because the current
+# tf.image.rgb_to_grayscale method does not support quantization. Replace with
+# tf.image.rgb_to_grayscale after quantization support is added.
+def _rgb_to_grayscale(images, name=None):
+  """Converts one or more images from RGB to Grayscale.
+  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
+  last dimension of the output is 1, containing the Grayscale value of the
+  pixels.
+  Args:
+    images: The RGB tensor to convert. Last dimension must have size 3 and
+      should contain RGB values.
+    name: A name for the operation (optional).
+  Returns:
+    The converted grayscale image(s).
+  """
+  with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name:
+    images = tf.convert_to_tensor(images, name='images')
+    # Remember original dtype to so we can convert back if needed
+    orig_dtype = images.dtype
+    flt_image = tf.image.convert_image_dtype(images, tf.float32)
+    # Reference for converting between RGB and grayscale.
+    # https://en.wikipedia.org/wiki/Luma_%28video%29
+    rgb_weights = [0.2989, 0.5870, 0.1140]
+    rank_1 = tf.expand_dims(tf.rank(images) - 1, 0)
+    gray_float = tf.reduce_sum(
+        flt_image * rgb_weights, rank_1, keepdims=True)
+    gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
+    return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name)
 def normalize_image(image, original_minval, original_maxval, target_minval,
                    target_maxval):
  """Normalizes pixel values in the image.
@@ -312,7 +435,8 @@ def random_horizontal_flip(image,
                           masks=None,
                           keypoints=None,
                           keypoint_flip_permutation=None,
-                           seed=None):
+                           seed=None,
+                           preprocess_vars_cache=None):
  """Randomly flips the image and detections horizontally.
  The probability of flipping the image is 50%.
@@ -333,6 +457,10 @@ def random_horizontal_flip(image,
    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
                               permutation.
    seed: random seed
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
@@ -364,7 +492,12 @@ def random_horizontal_flip(image,
  with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
    result = []
    # random variable defining whether to do flip or not
-    do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
+    generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+    do_a_flip_random = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP,
+        preprocess_vars_cache)
+    do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
    # flip image
    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
@@ -399,7 +532,8 @@ def random_vertical_flip(image,
                         masks=None,
                         keypoints=None,
                         keypoint_flip_permutation=None,
-                         seed=None):
+                         seed=None,
+                         preprocess_vars_cache=None):
  """Randomly flips the image and detections vertically.
  The probability of flipping the image is 50%.
@@ -420,6 +554,10 @@ def random_vertical_flip(image,
    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
                               permutation.
    seed: random seed
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
@@ -451,7 +589,11 @@ def random_vertical_flip(image,
  with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
    result = []
    # random variable defining whether to do flip or not
-    do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
+    generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+    do_a_flip_random = _get_or_create_preprocess_rand_vars(
+        generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP,
+        preprocess_vars_cache)
+    do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
    # flip image
    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
@@ -485,7 +627,8 @@ def random_rotation90(image,
                      boxes=None,
                      masks=None,
                      keypoints=None,
-                      seed=None):
+                      seed=None,
+                      preprocess_vars_cache=None):
  """Randomly rotates the image and detections 90 degrees counter-clockwise.
  The probability of rotating the image is 50%. This can be combined with
@@ -507,6 +650,10 @@ def random_rotation90(image,
               [num_instances, num_keypoints, 2]. The keypoints are in y-x
               normalized coordinates.
    seed: random seed
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
@@ -532,7 +679,11 @@ def random_rotation90(image,
    result = []
    # random variable defining whether to rotate by 90 degrees or not
-    do_a_rot90_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
+    generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+    do_a_rot90_random = _get_or_create_preprocess_rand_vars(
+        generator_func, preprocessor_cache.PreprocessorCache.ROTATION90,
+        preprocess_vars_cache)
+    do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5)
    # flip image
    image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
@@ -562,7 +713,11 @@ def random_rotation90(image,
    return tuple(result)
-def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
+def random_pixel_value_scale(image,
+                             minval=0.9,
+                             maxval=1.1,
+                             seed=None,
+                             preprocess_vars_cache=None):
  """Scales each value in the pixels of the image.
     This function scales each pixel independent of the other ones.
@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
    minval: lower ratio of scaling pixel values.
    maxval: upper ratio of scaling pixel values.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
  """
  with tf.name_scope('RandomPixelValueScale', values=[image]):
-    color_coef = tf.random_uniform(
+    generator_func = functools.partial(
-        tf.shape(image),
+        tf.random_uniform, tf.shape(image),
-        minval=minval,
+        minval=minval, maxval=maxval,
-        maxval=maxval,
+        dtype=tf.float32, seed=seed)
-        dtype=tf.float32,
+    color_coef = _get_or_create_preprocess_rand_vars(
-        seed=seed)
+        generator_func,
+        preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE,
+        preprocess_vars_cache)
    image = tf.multiply(image, color_coef)
    image = tf.clip_by_value(image, 0.0, 1.0)
@@ -596,7 +758,8 @@ def random_image_scale(image,
                       masks=None,
                       min_scale_ratio=0.5,
                       max_scale_ratio=2.0,
-                       seed=None):
+                       seed=None,
+                       preprocess_vars_cache=None):
  """Scales the image size.
  Args:
@@ -607,6 +770,10 @@ def random_image_scale(image,
    min_scale_ratio: minimum scaling ratio.
    max_scale_ratio: maximum scaling ratio.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -618,10 +785,14 @@ def random_image_scale(image,
    image_shape = tf.shape(image)
    image_height = image_shape[0]
    image_width = image_shape[1]
-    size_coef = tf.random_uniform([],
+    generator_func = functools.partial(
-                                  minval=min_scale_ratio,
+        tf.random_uniform, [],
-                                  maxval=max_scale_ratio,
+        minval=min_scale_ratio, maxval=max_scale_ratio,
-                                  dtype=tf.float32, seed=seed)
+        dtype=tf.float32, seed=seed)
+    size_coef = _get_or_create_preprocess_rand_vars(
+        generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE,
+        preprocess_vars_cache)
    image_newysize = tf.to_int32(
        tf.multiply(tf.to_float(image_height), size_coef))
    image_newxsize = tf.to_int32(
@@ -636,7 +807,10 @@ def random_image_scale(image,
    return tuple(result)
-def random_rgb_to_gray(image, probability=0.1, seed=None):
+def random_rgb_to_gray(image,
+                       probability=0.1,
+                       seed=None,
+                       preprocess_vars_cache=None):
  """Changes the image from RGB to Grayscale with the given probability.
  Args:
@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
    probability: the probability of returning a grayscale image.
            The probability should be a number between [0, 1].
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
  """
  def _image_to_gray(image):
-    image_gray1 = tf.image.rgb_to_grayscale(image)
+    image_gray1 = _rgb_to_grayscale(image)
    image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
    return image_gray3
  with tf.name_scope('RandomRGBtoGray', values=[image]):
-    # random variable defining whether to do flip or not
+    # random variable defining whether to change to grayscale or not
-    do_gray_random = tf.random_uniform([], seed=seed)
+    generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+    do_gray_random = _get_or_create_preprocess_rand_vars(
+        generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY,
+        preprocess_vars_cache)
    image = tf.cond(
        tf.greater(do_gray_random, probability), lambda: image,
@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
  return image
-def random_adjust_brightness(image, max_delta=0.2):
+def random_adjust_brightness(image,
+                             max_delta=0.2,
+                             seed=None,
+                             preprocess_vars_cache=None):
  """Randomly adjusts brightness.
  Makes sure the output image is still between 0 and 1.
@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2):
    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
           with pixel values varying between [0, 1].
    max_delta: how much to change the brightness. A value between [0, 1).
+    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
    boxes: boxes which is the same shape as input boxes.
  """
  with tf.name_scope('RandomAdjustBrightness', values=[image]):
-    image = tf.image.random_brightness(image, max_delta)
+    generator_func = functools.partial(tf.random_uniform, [],
+                                       -max_delta, max_delta, seed=seed)
+    delta = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS,
+        preprocess_vars_cache)
+    image = tf.image.adjust_brightness(image, delta)
    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
    return image
-def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
+def random_adjust_contrast(image,
+                           min_delta=0.8,
+                           max_delta=1.25,
+                           seed=None,
+                           preprocess_vars_cache=None):
  """Randomly adjusts contrast.
  Makes sure the output image is still between 0 and 1.
@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
    max_delta: how much to change the contrast. Contrast will change with a
               value between min_delta and max_delta. This value will be
               multiplied to the current contrast of the image.
+    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
  """
  with tf.name_scope('RandomAdjustContrast', values=[image]):
-    image = tf.image.random_contrast(image, min_delta, max_delta)
+    generator_func = functools.partial(tf.random_uniform, [],
+                                       min_delta, max_delta, seed=seed)
+    contrast_factor = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST,
+        preprocess_vars_cache)
+    image = tf.image.adjust_contrast(image, contrast_factor)
    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
    return image
-def random_adjust_hue(image, max_delta=0.02):
+def random_adjust_hue(image,
+                      max_delta=0.02,
+                      seed=None,
+                      preprocess_vars_cache=None):
  """Randomly adjusts hue.
  Makes sure the output image is still between 0 and 1.
@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02):
    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
           with pixel values varying between [0, 1].
    max_delta: change hue randomly with a value between 0 and max_delta.
+    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
  """
  with tf.name_scope('RandomAdjustHue', values=[image]):
-    image = tf.image.random_hue(image, max_delta)
+    generator_func = functools.partial(tf.random_uniform, [],
+                                       -max_delta, max_delta, seed=seed)
+    delta = _get_or_create_preprocess_rand_vars(
+        generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE,
+        preprocess_vars_cache)
+    image = tf.image.adjust_hue(image, delta)
    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
    return image
-def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
+def random_adjust_saturation(image,
+                             min_delta=0.8,
+                             max_delta=1.25,
+                             seed=None,
+                             preprocess_vars_cache=None):
  """Randomly adjusts saturation.
  Makes sure the output image is still between 0 and 1.
@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
    max_delta: how much to change the saturation. Saturation will change with a
               value between min_delta and max_delta. This value will be
               multiplied to the current saturation of the image.
+    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
  """
  with tf.name_scope('RandomAdjustSaturation', values=[image]):
-    image = tf.image.random_saturation(image, min_delta, max_delta)
+    generator_func = functools.partial(tf.random_uniform, [],
+                                       min_delta, max_delta, seed=seed)
+    saturation_factor = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.ADJUST_SATURATION,
+        preprocess_vars_cache)
+    image = tf.image.adjust_saturation(image, saturation_factor)
    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
    return image
-def random_distort_color(image, color_ordering=0):
+def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None):
  """Randomly distorts color.
  Randomly distorts color using a combination of brightness, hue, contrast
@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0):
    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
           with pixel values varying between [0, 1].
    color_ordering: Python int, a type of distortion (valid values: 0, 1).
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same shape as input image.
@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0):
  """
  with tf.name_scope('RandomDistortColor', values=[image]):
    if color_ordering == 0:
-      image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      image = random_adjust_brightness(
-      image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+          image, max_delta=32. / 255.,
-      image = tf.image.random_hue(image, max_delta=0.2)
+          preprocess_vars_cache=preprocess_vars_cache)
-      image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+      image = random_adjust_saturation(
+          image, min_delta=0.5, max_delta=1.5,
+          preprocess_vars_cache=preprocess_vars_cache)
+      image = random_adjust_hue(
+          image, max_delta=0.2,
+          preprocess_vars_cache=preprocess_vars_cache)
+      image = random_adjust_contrast(
+          image, min_delta=0.5, max_delta=1.5,
+          preprocess_vars_cache=preprocess_vars_cache)
    elif color_ordering == 1:
-      image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      image = random_adjust_brightness(
-      image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+          image, max_delta=32. / 255.,
-      image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+          preprocess_vars_cache=preprocess_vars_cache)
-      image = tf.image.random_hue(image, max_delta=0.2)
+      image = random_adjust_contrast(
+          image, min_delta=0.5, max_delta=1.5,
+          preprocess_vars_cache=preprocess_vars_cache)
+      image = random_adjust_saturation(
+          image, min_delta=0.5, max_delta=1.5,
+          preprocess_vars_cache=preprocess_vars_cache)
+      image = random_adjust_hue(
+          image, max_delta=0.2,
+          preprocess_vars_cache=preprocess_vars_cache)
    else:
      raise ValueError('color_ordering must be in {0, 1}')
-    # The random_* ops do not necessarily clamp.
-    image = tf.clip_by_value(image, 0.0, 1.0)
    return image
@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image,
                              min_object_covered=1.0,
                              aspect_ratio_range=(0.75, 1.33),
                              area_range=(0.1, 1.0),
-                              overlap_thresh=0.3):
+                              overlap_thresh=0.3,
+                              preprocess_vars_cache=None):
  """Performs random crop.
  Note: boxes will be clipped to the crop. Keypoint coordinates that are
@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image,
                original image.
    overlap_thresh: minimum overlap thresh with new cropped
                    image to keep the box.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image,
        tf.clip_by_value(
            boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
-    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
+    generator_func = functools.partial(
+        tf.image.sample_distorted_bounding_box,
        image_shape,
        bounding_boxes=boxes_expanded,
        min_object_covered=min_object_covered,
@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image,
        max_attempts=100,
        use_image_if_no_bounding_boxes=True)
+    # for ssd cropping, each value of min_object_covered has its own
+    # cached random variable
+    sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE,
+        preprocess_vars_cache, key=min_object_covered)
    im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
    new_image = tf.slice(image, im_box_begin, im_box_size)
@@ -984,7 +1254,8 @@ def random_crop_image(image,
                      area_range=(0.1, 1.0),
                      overlap_thresh=0.3,
                      random_coef=0.0,
-                      seed=None):
+                      seed=None,
+                      preprocess_vars_cache=None):
  """Randomly crops the image.
  Given the input image and its bounding boxes, this op randomly
@@ -1029,6 +1300,10 @@ def random_crop_image(image,
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: Image shape will be [new_height, new_width, channels].
@@ -1056,13 +1331,17 @@ def random_crop_image(image,
        min_object_covered=min_object_covered,
        aspect_ratio_range=aspect_ratio_range,
        area_range=area_range,
-        overlap_thresh=overlap_thresh)
+        overlap_thresh=overlap_thresh,
+        preprocess_vars_cache=preprocess_vars_cache)
  # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
  if random_coef < sys.float_info.min:
    result = strict_random_crop_image_fn()
  else:
-    do_a_crop_random = tf.random_uniform([], seed=seed)
+    generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+    do_a_crop_random = _get_or_create_preprocess_rand_vars(
+        generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE,
+        preprocess_vars_cache)
    do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
    outputs = [image, boxes, labels]
@@ -1084,7 +1363,8 @@ def random_pad_image(image,
                     min_image_size=None,
                     max_image_size=None,
                     pad_color=None,
-                     seed=None):
+                     seed=None,
+                     preprocess_vars_cache=None):
  """Randomly pads the image.
  This function randomly pads the image with zeros. The final size of the
@@ -1110,8 +1390,11 @@ def random_pad_image(image,
    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
               if set as None, it will be set to average color of the input
               image.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: Image shape will be [new_height, new_width, channels].
@@ -1155,6 +1438,12 @@ def random_pad_image(image,
      lambda: _random_integer(0, target_width - image_width, seed),
      lambda: tf.constant(0, dtype=tf.int32))
+  gen_func = lambda: (target_height, target_width, offset_height, offset_width)
+  params = _get_or_create_preprocess_rand_vars(
+      gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
+      preprocess_vars_cache)
+  target_height, target_width, offset_height, offset_width = params
  new_image = tf.image.pad_to_bounding_box(
      image,
      offset_height=offset_height,
@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image,
                          min_padded_size_ratio=(1.0, 1.0),
                          max_padded_size_ratio=(2.0, 2.0),
                          pad_color=None,
-                          seed=None):
+                          seed=None,
+                          preprocess_vars_cache=None):
  """Randomly crops and pads the image.
  Given an input image and its bounding boxes, this op first randomly crops
@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image,
               if set as None, it will be set to average color of the randomly
               cropped image.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    padded_image: padded image.
@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image,
      area_range=area_range,
      overlap_thresh=overlap_thresh,
      random_coef=random_coef,
-      seed=seed)
+      seed=seed,
+      preprocess_vars_cache=preprocess_vars_cache)
  cropped_image, cropped_boxes, cropped_labels = result[:3]
@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image,
      min_image_size=min_image_size,
      max_image_size=max_image_size,
      pad_color=pad_color,
-      seed=seed)
+      seed=seed,
+      preprocess_vars_cache=preprocess_vars_cache)
  cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image,
                                keypoints=None,
                                aspect_ratio=1.0,
                                overlap_thresh=0.3,
-                                seed=None):
+                                seed=None,
+                                preprocess_vars_cache=None):
  """Randomly crops an image to the specified aspect ratio.
  Randomly crops the a portion of the image such that the crop is of the
@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image,
    overlap_thresh: minimum overlap thresh with new cropped
                    image to keep the box.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image,
    # offset_height is randomly chosen from [0, offset_height - target_height)
    offset_height = _random_integer(0, orig_height - target_height + 1, seed)
    offset_width = _random_integer(0, orig_width - target_width + 1, seed)
+    generator_func = lambda: (offset_height, offset_width)
+    offset_height, offset_width = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO,
+        preprocess_vars_cache)
    new_image = tf.image.crop_to_bounding_box(
        image, offset_height, offset_width, target_height, target_width)
@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image,
                               aspect_ratio=1.0,
                               min_padded_size_ratio=(1.0, 1.0),
                               max_padded_size_ratio=(2.0, 2.0),
-                               seed=None):
+                               seed=None,
+                               preprocess_vars_cache=None):
  """Randomly zero pads an image to the specified aspect ratio.
  Pads the image so that the resulting image will have the specified aspect
@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image,
    max_padded_size_ratio: max ratio of padded image height and width to the
                           input image's height and width.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image,
    min_scale = tf.maximum(min_height / target_height, min_width / target_width)
    max_scale = tf.minimum(max_height / target_height, max_width / target_width)
-    scale = tf.random_uniform([], min_scale, max_scale, seed=seed)
+    generator_func = functools.partial(tf.random_uniform, [],
+                                       min_scale, max_scale, seed=seed)
+    scale = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO,
+        preprocess_vars_cache)
    target_height = scale * target_height
    target_width = scale * target_width
@@ -1549,7 +1868,8 @@ def random_black_patches(image,
                         max_black_patches=10,
                         probability=0.5,
                         size_to_image_ratio=0.1,
-                         random_seed=None):
+                         random_seed=None,
+                         preprocess_vars_cache=None):
  """Randomly adds some black patches to the image.
  This op adds up to max_black_patches square black patches of a fixed size
@@ -1566,15 +1886,20 @@ def random_black_patches(image,
                         box_size = size_to_image_ratio *
                                    min(image_width, image_height)
    random_seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image
  """
-  def add_black_patch_to_image(image):
+  def add_black_patch_to_image(image, idx):
    """Function for adding one patch to the image.
    Args:
      image: image
+      idx: counter for number of patches that could have been added
    Returns:
      image with a randomly added black box
@@ -1586,10 +1911,19 @@ def random_black_patches(image,
        tf.multiply(
            tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
            size_to_image_ratio))
-    normalized_y_min = tf.random_uniform(
-        [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
+    generator_func = functools.partial(tf.random_uniform, [], minval=0.0,
-    normalized_x_min = tf.random_uniform(
+                                       maxval=(1.0 - size_to_image_ratio),
-        [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
+                                       seed=random_seed)
+    normalized_y_min = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
+        preprocess_vars_cache, key=str(idx) + 'y')
+    normalized_x_min = _get_or_create_preprocess_rand_vars(
+        generator_func,
+        preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
+        preprocess_vars_cache, key=str(idx) + 'x')
    y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
    x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
    black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
@@ -1599,13 +1933,17 @@ def random_black_patches(image,
    return image
  with tf.name_scope('RandomBlackPatchInImage', values=[image]):
-    for _ in range(max_black_patches):
+    for idx in range(max_black_patches):
-      random_prob = tf.random_uniform(
+      generator_func = functools.partial(tf.random_uniform, [],
-          [], minval=0.0, maxval=1.0, dtype=tf.float32, seed=random_seed)
+                                         minval=0.0, maxval=1.0,
+                                         dtype=tf.float32, seed=random_seed)
+      random_prob = _get_or_create_preprocess_rand_vars(
+          generator_func,
+          preprocessor_cache.PreprocessorCache.BLACK_PATCHES,
+          preprocess_vars_cache, key=idx)
      image = tf.cond(
          tf.greater(random_prob, probability), lambda: image,
-          lambda: add_black_patch_to_image(image))
+          functools.partial(add_black_patch_to_image, image=image, idx=idx))
    return image
@@ -1623,12 +1961,16 @@ def image_to_float(image):
    return image
-def random_resize_method(image, target_size):
+def random_resize_method(image, target_size, preprocess_vars_cache=None):
  """Uses a random resize method to resize the image to target size.
  Args:
    image: a rank 3 tensor.
    target_size: a list of [target_height, target_width]
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    resized image.
@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size):
  resized_image = _apply_with_random_selector(
      image,
      lambda x, method: tf.image.resize_images(x, target_size, method),
-      num_cases=4)
+      num_cases=4,
+      preprocess_vars_cache=preprocess_vars_cache,
+      key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD)
  return resized_image
@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
  image_shape = image.get_shape().as_list()
  orig_height = image_shape[0]
  orig_width = image_shape[1]
+  num_channels = image_shape[2]
  orig_min_dim = min(orig_height, orig_width)
  # Calculates the larger of the possible sizes
  large_scale_factor = min_dimension / float(orig_min_dim)
@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
      new_size = small_size
  else:
    new_size = large_size
-  return tf.constant(new_size)
+  return tf.constant(new_size + [num_channels])
 def _compute_new_dynamic_size(image, min_dimension, max_dimension):
@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
  image_shape = tf.shape(image)
  orig_height = tf.to_float(image_shape[0])
  orig_width = tf.to_float(image_shape[1])
+  num_channels = image_shape[2]
  orig_min_dim = tf.minimum(orig_height, orig_width)
  # Calculates the larger of the possible sizes
  min_dimension = tf.constant(min_dimension, dtype=tf.float32)
@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
        lambda: small_size, lambda: large_size)
  else:
    new_size = large_size
-  return new_size
+  return tf.stack(tf.unstack(new_size) + [num_channels])
 def resize_to_range(image,
@@ -1719,7 +2065,8 @@ def resize_to_range(image,
                    min_dimension=None,
                    max_dimension=None,
                    method=tf.image.ResizeMethod.BILINEAR,
-                    align_corners=False):
+                    align_corners=False,
+                    pad_to_max_dimension=False):
  """Resizes an image so its dimensions are within the provided value.
  The output size can be described by two cases:
@@ -1740,15 +2087,22 @@ def resize_to_range(image,
            BILINEAR.
    align_corners: bool. If true, exactly align all 4 corners of the input
                   and output. Defaults to False.
+    pad_to_max_dimension: Whether to resize the image and pad it with zeros
+      so the resulting image is of the spatial size
+      [max_dimension, max_dimension]. If masks are included they are padded
+      similarly.
  Returns:
-    A 3D tensor of shape [new_height, new_width, channels],
+    Note that the position of the resized_image_shape changes based on whether
-    where the image has been resized (with bilinear interpolation) so that
+    masks are present.
-    min(new_height, new_width) == min_dimension or
+    resized_image: A 3D tensor of shape [new_height, new_width, channels],
-    max(new_height, new_width) == max_dimension.
+      where the image has been resized (with bilinear interpolation) so that
+      min(new_height, new_width) == min_dimension or
-    If masks is not None, also outputs masks:
+      max(new_height, new_width) == max_dimension.
-    A 3D tensor of shape [num_instances, new_height, new_width]
+    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+      shape [num_instances, new_height, new_width].
+    resized_image_shape: A 1D tensor of shape [3] containing shape of the
+      resized image.
  Raises:
    ValueError: if the image is not a 3D tensor.
@@ -1762,16 +2116,27 @@ def resize_to_range(image,
    else:
      new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
    new_image = tf.image.resize_images(
-        image, new_size, method=method, align_corners=align_corners)
+        image, new_size[:-1], method=method, align_corners=align_corners)
-    result = new_image
+    if pad_to_max_dimension:
+      new_image = tf.image.pad_to_bounding_box(
+          new_image, 0, 0, max_dimension, max_dimension)
+    result = [new_image]
    if masks is not None:
      new_masks = tf.expand_dims(masks, 3)
-      new_masks = tf.image.resize_nearest_neighbor(
+      new_masks = tf.image.resize_images(
-          new_masks, new_size, align_corners=align_corners)
+          new_masks,
+          new_size[:-1],
+          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+          align_corners=align_corners)
      new_masks = tf.squeeze(new_masks, 3)
-      result = [new_image, new_masks]
+      if pad_to_max_dimension:
+        new_masks = tf.image.pad_to_bounding_box(
+            new_masks, 0, 0, max_dimension, max_dimension)
+      result.append(new_masks)
+    result.append(new_size)
    return result
@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
    min_dimension: minimum image dimension.
  Returns:
-    a tuple containing the following:
+    Note that the position of the resized_image_shape changes based on whether
-      Resized image. A tensor of size [new_height, new_width, channels].
+    masks are present.
-      (optional) Resized masks. A tensor of
+    resized_image: A tensor of size [new_height, new_width, channels].
-        size [num_instances, new_height, new_width].
+    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+      shape [num_instances, new_height, new_width]
+    resized_image_shape: A 1D tensor of shape [3] containing the shape of the
+      resized image.
  Raises:
    ValueError: if the image is not a 3D tensor.
@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
  with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
    image_height = tf.shape(image)[0]
    image_width = tf.shape(image)[1]
+    num_channels = tf.shape(image)[2]
    min_image_dimension = tf.minimum(image_height, image_width)
    min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
    target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
        tf.expand_dims(image, axis=0),
        size=[target_height, target_width],
        align_corners=True)
-    result = tf.squeeze(image, axis=0)
+    result = [tf.squeeze(image, axis=0)]
    if masks is not None:
      masks = tf.image.resize_nearest_neighbor(
          tf.expand_dims(masks, axis=3),
          size=[target_height, target_width],
          align_corners=True)
-      result = (result, tf.squeeze(masks, axis=3))
+      result.append(tf.squeeze(masks, axis=3))
+    result.append(tf.stack([target_height, target_width, num_channels]))
    return result
@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
  return tuple(result)
+# TODO: Investigate if instead the function should return None if
+# masks is None.
 # pylint: disable=g-doc-return-or-yield
 def resize_image(image,
                 masks=None,
@@ -1861,7 +2235,28 @@ def resize_image(image,
                 new_width=1024,
                 method=tf.image.ResizeMethod.BILINEAR,
                 align_corners=False):
-  """See `tf.image.resize_images` for detailed doc."""
+  """Resizes images to the given height and width.
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks.
+    new_height: (optional) (scalar) desired height of the image.
+    new_width: (optional) (scalar) desired width of the image.
+    method: (optional) interpolation method used in resizing. Defaults to
+            BILINEAR.
+    align_corners: bool. If true, exactly align all 4 corners of the input
+                   and output. Defaults to False.
+  Returns:
+    Note that the position of the resized_image_shape changes based on whether
+    masks are present.
+    resized_image: A tensor of size [new_height, new_width, channels].
+    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+      shape [num_instances, new_height, new_width]
+    resized_image_shape: A 1D tensor of shape [3] containing the shape of the
+      resized image.
+  """
  with tf.name_scope(
      'ResizeImage',
      values=[image, new_height, new_width, method, align_corners]):
@@ -1869,7 +2264,8 @@ def resize_image(image,
        image, [new_height, new_width],
        method=method,
        align_corners=align_corners)
-    result = new_image
+    image_shape = shape_utils.combined_static_and_dynamic_shape(image)
+    result = [new_image]
    if masks is not None:
      num_instances = tf.shape(masks)[0]
      new_size = tf.constant([new_height, new_width], dtype=tf.int32)
@@ -1886,8 +2282,9 @@ def resize_image(image,
      masks = tf.cond(num_instances > 0, resize_masks_branch,
                      reshape_masks_branch)
-      result = [new_image, masks]
+      result.append(masks)
+    result.append(tf.stack([new_height, new_width, image_shape[2]]))
    return result
@@ -1946,7 +2343,7 @@ def rgb_to_gray(image):
  Returns:
    image: A single channel grayscale image -> [image, height, 1].
  """
-  return tf.image.rgb_to_grayscale(image)
+  return _rgb_to_grayscale(image)
 def ssd_random_crop(image,
@@ -1960,7 +2357,8 @@ def ssd_random_crop(image,
                    area_range=((0.1, 1.0),) * 7,
                    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
                    random_coef=(0.15,) * 7,
-                    seed=None):
+                    seed=None,
+                    preprocess_vars_cache=None):
  """Random crop preprocessing with default parameters as in SSD paper.
  Liu et al., SSD: Single shot multibox detector.
@@ -1994,6 +2392,10 @@ def ssd_random_crop(image,
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -2046,14 +2448,17 @@ def ssd_random_crop(image,
        area_range=area_range[index],
        overlap_thresh=overlap_thresh[index],
        random_coef=random_coef[index],
-        seed=seed)
+        seed=seed,
+        preprocess_vars_cache=preprocess_vars_cache)
  result = _apply_with_random_selector_tuples(
      tuple(
          t for t in (image, boxes, labels, label_scores, masks, keypoints)
          if t is not None),
      random_crop_selector,
-      num_cases=len(min_object_covered))
+      num_cases=len(min_object_covered),
+      preprocess_vars_cache=preprocess_vars_cache,
+      key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID)
  return result
@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image,
                        min_padded_size_ratio=((1.0, 1.0),) * 6,
                        max_padded_size_ratio=((2.0, 2.0),) * 6,
                        pad_color=(None,) * 6,
-                        seed=None):
+                        seed=None,
+                        preprocess_vars_cache=None):
  """Random crop preprocessing with default parameters as in SSD paper.
  Liu et al., SSD: Single shot multibox detector.
@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image,
               if set as None, it will be set to average color of the randomly
               cropped image.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: Image shape will be [new_height, new_width, channels].
@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image,
        min_padded_size_ratio=min_padded_size_ratio[index],
        max_padded_size_ratio=max_padded_size_ratio[index],
        pad_color=pad_color[index],
-        seed=seed)
+        seed=seed,
+        preprocess_vars_cache=preprocess_vars_cache)
  return _apply_with_random_selector_tuples(
      tuple(t for t in (image, boxes, labels, label_scores) if t is not None),
      random_crop_pad_selector,
-      num_cases=len(min_object_covered))
+      num_cases=len(min_object_covered),
+      preprocess_vars_cache=preprocess_vars_cache,
+      key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID)
 def ssd_random_crop_fixed_aspect_ratio(
@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio(
    area_range=((0.1, 1.0),) * 7,
    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
    random_coef=(0.15,) * 7,
-    seed=None):
+    seed=None,
+    preprocess_vars_cache=None):
  """Random crop preprocessing with default parameters as in SSD paper.
  Liu et al., SSD: Single shot multibox detector.
@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio(
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio(
  crop_result = ssd_random_crop(
      image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
-      aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
+      aspect_ratio_range, area_range, overlap_thresh, random_coef, seed,
+      preprocess_vars_cache)
  i = 3
  new_image, new_boxes, new_labels = crop_result[:i]
  new_label_scores = None
@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio(
      new_masks,
      new_keypoints,
      aspect_ratio=aspect_ratio,
-      seed=seed)
+      seed=seed,
+      preprocess_vars_cache=preprocess_vars_cache)
  return result
@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
    random_coef=(0.15,) * 7,
    min_padded_size_ratio=(1.0, 1.0),
    max_padded_size_ratio=(2.0, 2.0),
-    seed=None):
+    seed=None,
+    preprocess_vars_cache=None):
  """Random crop and pad preprocessing with default parameters as in SSD paper.
  Liu et al., SSD: Single shot multibox detector.
@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
    max_padded_size_ratio: max ratio of padded image height and width to the
                           input image's height and width.
    seed: random seed.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    image: image which is the same rank as input image.
@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
  """
  crop_result = ssd_random_crop(
      image, boxes, labels, label_scores, masks, keypoints, min_object_covered,
-      aspect_ratio_range, area_range, overlap_thresh, random_coef, seed)
+      aspect_ratio_range, area_range, overlap_thresh, random_coef, seed,
+      preprocess_vars_cache)
  i = 3
  new_image, new_boxes, new_labels = crop_result[:i]
  new_label_scores = None
@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
      aspect_ratio=aspect_ratio,
      min_padded_size_ratio=min_padded_size_ratio,
      max_padded_size_ratio=max_padded_size_ratio,
-      seed=seed)
+      seed=seed,
+      preprocess_vars_cache=preprocess_vars_cache)
  result = list(result)
  if new_label_scores is not None:
@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False,
  return prep_func_arg_map
-def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
+def preprocess(tensor_dict,
+               preprocess_options,
+               func_arg_map=None,
+               preprocess_vars_cache=None):
  """Preprocess images and bounding boxes.
  Various types of preprocessing (to be implemented) based on the
@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
                        their values.
    func_arg_map: mapping from preprocessing functions to arguments that they
                  expect to receive and return.
+    preprocess_vars_cache: PreprocessorCache object that records previously
+                           performed augmentations. Updated in-place. If this
+                           function is called multiple times with the same
+                           non-null cache, it will perform deterministically.
  Returns:
    tensor_dict: which contains the preprocessed images, bounding boxes, etc.
@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
      return tensor_dict[key] if key is not None else None
    args = [get_arg(a) for a in arg_names]
+    if (preprocess_vars_cache is not None and
+        'preprocess_vars_cache' in inspect.getargspec(func).args):
+      params['preprocess_vars_cache'] = preprocess_vars_cache
    results = func(*args, **params)
    if not isinstance(results, (list, tuple)):
      results = (results,)

--- a/research/object_detection/core/preprocessor_cache.py
+++ b/research/object_detection/core/preprocessor_cache.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Records previous preprocessing operations and allows them to be repeated.
+Used with object_detection.core.preprocessor. Passing a PreprocessorCache
+into individual data augmentation functions or the general preprocess() function
+will store all randomly generated variables in the PreprocessorCache. When
+a preprocessor function is called multiple times with the same
+PreprocessorCache object, that function will perform the same augmentation
+on all calls.
+"""
+from collections import defaultdict
+class PreprocessorCache(object):
+  """Dictionary wrapper storing random variables generated during preprocessing.
+  """
+  # Constant keys representing different preprocessing functions
+  ROTATION90 = 'rotation90'
+  HORIZONTAL_FLIP = 'horizontal_flip'
+  VERTICAL_FLIP = 'vertical_flip'
+  PIXEL_VALUE_SCALE = 'pixel_value_scale'
+  IMAGE_SCALE = 'image_scale'
+  RGB_TO_GRAY = 'rgb_to_gray'
+  ADJUST_BRIGHTNESS = 'adjust_brightness'
+  ADJUST_CONTRAST = 'adjust_contrast'
+  ADJUST_HUE = 'adjust_hue'
+  ADJUST_SATURATION = 'adjust_saturation'
+  DISTORT_COLOR = 'distort_color'
+  STRICT_CROP_IMAGE = 'strict_crop_image'
+  CROP_IMAGE = 'crop_image'
+  PAD_IMAGE = 'pad_image'
+  CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio'
+  RESIZE_METHOD = 'resize_method'
+  PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio'
+  BLACK_PATCHES = 'black_patches'
+  ADD_BLACK_PATCH = 'add_black_patch'
+  SELECTOR = 'selector'
+  SELECTOR_TUPLES = 'selector_tuples'
+  SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id'
+  SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id'
+  # 23 permitted function ids
+  _VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE,
+                IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST,
+                ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE,
+                CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD,
+                PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR,
+                SELECTOR_TUPLES, SSD_CROP_SELECTOR_ID, SSD_CROP_PAD_SELECTOR_ID]
+  def __init__(self):
+    self._history = defaultdict(dict)
+  def clear(self):
+    """Resets cache."""
+    self._history = {}
+  def get(self, function_id, key):
+    """Gets stored value given a function id and key.
+    Args:
+      function_id: identifier for the preprocessing function used.
+      key: identifier for the variable stored.
+    Returns:
+      value: the corresponding value, expected to be a tensor or
+             nested structure of tensors.
+    Raises:
+      ValueError: if function_id is not one of the 23 valid function ids.
+    """
+    if function_id not in self._VALID_FNS:
+      raise ValueError('Function id not recognized: %s.' % str(function_id))
+    return self._history[function_id].get(key)
+  def update(self, function_id, key, value):
+    """Adds a value to the dictionary.
+    Args:
+      function_id: identifier for the preprocessing function used.
+      key: identifier for the variable stored.
+      value: the value to store, expected to be a tensor or nested structure
+             of tensors.
+    Raises:
+      ValueError: if function_id is not one of the 23 valid function ids.
+    """
+    if function_id not in self._VALID_FNS:
+      raise ValueError('Function id not recognized: %s.' % str(function_id))
+    self._history[function_id][key] = value
--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -21,6 +21,7 @@ import six
 import tensorflow as tf
 from object_detection.core import preprocessor
+from object_detection.core import preprocessor_cache
 from object_detection.core import standard_fields as fields
 if six.PY2:
@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase):
  def expectedLabelsAfterThresholdingWithMissingScore(self):
    return tf.constant([2], dtype=tf.float32)
+  def testRgbToGrayscale(self):
+    images = self.createTestImages()
+    grayscale_images = preprocessor._rgb_to_grayscale(images)
+    expected_images = tf.image.rgb_to_grayscale(images)
+    with self.test_session() as sess:
+      (grayscale_images, expected_images) = sess.run(
+          [grayscale_images, expected_images])
+      self.assertAllEqual(expected_images, grayscale_images)
  def testNormalizeImage(self):
    preprocess_options = [(preprocessor.normalize_image, {
        'original_minval': 0,
@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase):
      rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
      self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
+  def _testPreprocessorCache(self,
+                             preprocess_options,
+                             test_boxes=False,
+                             test_masks=False,
+                             test_keypoints=False,
+                             num_runs=4):
+    cache = preprocessor_cache.PreprocessorCache()
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    classes = self.createTestLabels()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=test_masks, include_keypoints=test_keypoints)
+    out = []
+    for i in range(num_runs):
+      tensor_dict = {
+          fields.InputDataFields.image: images,
+      }
+      num_outputs = 1
+      if test_boxes:
+        tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes
+        tensor_dict[fields.InputDataFields.groundtruth_classes] = classes
+        num_outputs += 1
+      if test_masks:
+        tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
+        num_outputs += 1
+      if test_keypoints:
+        tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
+        num_outputs += 1
+      out.append(preprocessor.preprocess(
+          tensor_dict, preprocess_options, preprocessor_arg_map, cache))
+    with self.test_session() as sess:
+      to_run = []
+      for i in range(num_runs):
+        to_run.append(out[i][fields.InputDataFields.image])
+        if test_boxes:
+          to_run.append(out[i][fields.InputDataFields.groundtruth_boxes])
+        if test_masks:
+          to_run.append(
+              out[i][fields.InputDataFields.groundtruth_instance_masks])
+        if test_keypoints:
+          to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints])
+      out_array = sess.run(to_run)
+      for i in range(num_outputs, len(out_array)):
+        self.assertAllClose(out_array[i], out_array[i - num_outputs])
  def testRandomHorizontalFlip(self):
    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
    images = self.expectedImagesAfterNormalization()
@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(boxes_, boxes_expected_)
      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRandomHorizontalFlipWithCache(self):
+    keypoint_flip_permutation = self.createKeypointFlipPermutation()
+    preprocess_options = [
+        (preprocessor.random_horizontal_flip,
+         {'keypoint_flip_permutation': keypoint_flip_permutation})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
    image_height = 3
@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(boxes_, boxes_expected_)
      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRandomVerticalFlipWithCache(self):
+    keypoint_flip_permutation = self.createKeypointFlipPermutation()
+    preprocess_options = [
+        (preprocessor.random_vertical_flip,
+         {'keypoint_flip_permutation': keypoint_flip_permutation})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
    preprocess_options = [(preprocessor.random_vertical_flip, {})]
    image_height = 3
@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(boxes_, boxes_expected_)
      self.assertAllClose(images_diff_, images_diff_expected_)
+  def testRandomRotation90WithCache(self):
+    preprocess_options = [(preprocessor.random_rotation90, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRunRandomRotation90WithMaskAndKeypoints(self):
    preprocess_options = [(preprocessor.random_rotation90, {})]
    image_height = 3
@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(values_greater_, values_true_)
      self.assertAllClose(values_less_, values_true_)
+  def testRandomPixelValueScaleWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_pixel_value_scale, {}))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomImageScale(self):
    preprocess_options = [(preprocessor.random_image_scale, {})]
    images_original = self.createTestImages()
@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertTrue(
          images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
+  def testRandomImageScaleWithCache(self):
+    preprocess_options = [(preprocessor.random_image_scale, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=False,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomRGBtoGray(self):
    preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
    images_original = self.createTestImages()
@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(images_g_diff_, image_zero1_)
      self.assertAllClose(images_b_diff_, image_zero1_)
+  def testRandomRGBtoGrayWithCache(self):
+    preprocess_options = [(
+        preprocessor.random_rgb_to_gray, {'probability': 0.5})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=False,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomAdjustBrightness(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase):
          [image_original_shape, image_bright_shape])
      self.assertAllEqual(image_original_shape_, image_bright_shape_)
+  def testRandomAdjustBrightnessWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_adjust_brightness, {}))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=False,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomAdjustContrast(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase):
          [image_original_shape, image_contrast_shape])
      self.assertAllEqual(image_original_shape_, image_contrast_shape_)
+  def testRandomAdjustContrastWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_adjust_contrast, {}))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=False,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomAdjustHue(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase):
          [image_original_shape, image_hue_shape])
      self.assertAllEqual(image_original_shape_, image_hue_shape_)
+  def testRandomAdjustHueWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_adjust_hue, {}))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=False,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomDistortColor(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase):
          [images_original_shape, images_distorted_color_shape])
      self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
+  def testRandomDistortColorWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_distort_color, {}))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=False,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomJitterBoxes(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
      self.assertAllEqual(images_rank_, distorted_images_rank_)
+  def testRandomCropImageWithCache(self):
+    preprocess_options = [(preprocessor.random_rgb_to_gray,
+                           {'probability': 0.5}),
+                          (preprocessor.normalize_image, {
+                              'original_minval': 0,
+                              'original_maxval': 255,
+                              'target_minval': 0,
+                              'target_maxval': 1,
+                          }),
+                          (preprocessor.random_crop_image, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRandomCropImageGrayscale(self):
    preprocessing_options = [(preprocessor.rgb_to_gray, {}),
                             (preprocessor.normalize_image, {
@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase):
           self.expectedKeypointsAfterThresholding()])
      self.assertAllClose(retained_keypoints_, expected_keypoints_)
+  def testRandomCropToAspectRatioWithCache(self):
+    preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=False,
+                                test_keypoints=False)
  def testRunRandomCropToAspectRatioWithMasks(self):
    image = self.createColorfulTestImage()
    boxes = self.createTestBoxes()
@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase):
        self.assertAllClose(distorted_keypoints_.flatten(),
                            expected_keypoints.flatten())
+  def testRandomPadToAspectRatioWithCache(self):
+    preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRunRandomPadToAspectRatioWithMasks(self):
    image = self.createColorfulTestImage()
    boxes = self.createTestBoxes()
@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(distorted_keypoints_.flatten(),
                          expected_keypoints.flatten())
+  def testRandomPadImageWithCache(self):
+    preprocess_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1,}), (preprocessor.random_pad_image, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRandomPadImage(self):
    preprocessing_options = [(preprocessor.normalize_image, {
        'original_minval': 0,
@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
          padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+  def testRandomCropPadImageWithCache(self):
+    preprocess_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRandomCropPadImageWithRandomCoefOne(self):
    preprocessing_options = [(preprocessor.normalize_image, {
        'original_minval': 0,
@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertEqual(images_shape_[1], padded_images_shape_[1])
      self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
+  def testRandomBlackPatchesWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_black_patches, {
+        'size_to_image_ratio': 0.5
+    }))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRandomBlackPatches(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase):
          [images_shape, blacked_images_shape])
      self.assertAllEqual(images_shape_, blacked_images_shape_)
+  def testRandomResizeMethodWithCache(self):
+    preprocess_options = []
+    preprocess_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocess_options.append((preprocessor.random_resize_method, {
+        'target_size': (75, 150)
+    }))
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=True,
+                                test_keypoints=True)
  def testRandomResizeMethod(self):
    preprocessing_options = []
    preprocessing_options.append((preprocessor.normalize_image, {
@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase):
                                     expected_masks_shape_list):
      in_image = tf.random_uniform(in_image_shape)
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_image(
+      out_image, out_masks, _ = preprocessor.resize_image(
          in_image, in_masks, new_height=height, new_width=width)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase):
                                     expected_masks_shape_list):
      in_image = tf.random_uniform(in_image_shape)
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_image(
+      out_image, out_masks, _ = preprocessor.resize_image(
          in_image, in_masks, new_height=height, new_width=width)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase):
    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
      in_image = tf.random_uniform(in_shape)
-      out_image = preprocessor.resize_to_range(
+      out_image, _ = preprocessor.resize_to_range(
          in_image, min_dimension=min_dim, max_dimension=max_dim)
      self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase):
    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
-      out_image = preprocessor.resize_to_range(
+      out_image, _ = preprocessor.resize_to_range(
          in_image, min_dimension=min_dim, max_dimension=max_dim)
      out_image_shape = tf.shape(out_image)
      with self.test_session() as sess:
@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase):
                                     expected_masks_shape_list):
      in_image = tf.random_uniform(in_image_shape)
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_to_range(
+      out_image, out_masks, _ = preprocessor.resize_to_range(
          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
      self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
      self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase):
      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
      in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_to_range(
+      out_image, out_masks, _ = preprocessor.resize_to_range(
          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase):
                                     expected_masks_shape_list):
      in_image = tf.random_uniform(in_image_shape)
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_to_range(
+      out_image, out_masks, _ = preprocessor.resize_to_range(
          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase):
    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
      in_image = tf.random_uniform(in_shape)
-      out_image = preprocessor.resize_to_range(
+      out_image, _ = preprocessor.resize_to_range(
          in_image, min_dimension=min_dim, max_dimension=max_dim)
      out_image_shape = tf.shape(out_image)
@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase):
      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
      in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_to_min_dimension(
+      out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
          in_image, in_masks, min_dimension=min_dim)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase):
                                     expected_masks_shape_list):
      in_image = tf.random_uniform(in_image_shape)
      in_masks = tf.random_uniform(in_masks_shape)
-      out_image, out_masks = preprocessor.resize_to_min_dimension(
+      out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
          in_image, in_masks, min_dimension=min_dim)
      out_image_shape = tf.shape(out_image)
      out_masks_shape = tf.shape(out_masks)
@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
+  def testSSDRandomCropWithCache(self):
+    preprocess_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=False,
+                                test_keypoints=False)
  def testSSDRandomCrop(self):
    preprocessing_options = [
        (preprocessor.normalize_image, {
@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
      self.assertAllEqual(images_rank_, distorted_images_rank_)
+  def testSSDRandomCropFixedAspectRatioWithCache(self):
+    preprocess_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+    self._testPreprocessorCache(preprocess_options,
+                                test_boxes=True,
+                                test_masks=False,
+                                test_keypoints=False)
  def _testSSDRandomCropFixedAspectRatio(self,
                                         include_label_scores,
                                         include_instance_masks,

--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -57,6 +57,10 @@ class InputDataFields(object):
    groundtruth_keypoints: ground truth keypoints.
    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
    groundtruth_label_scores: groundtruth label scores.
+    groundtruth_weights: groundtruth weight factor for bounding boxes.
+    num_groundtruth_boxes: number of groundtruth boxes.
+    true_image_shapes: true shapes of images in the resized images, as resized
+      images can be padded with zeros.
  """
  image = 'image'
  original_image = 'original_image'
@@ -79,10 +83,13 @@ class InputDataFields(object):
  groundtruth_keypoints = 'groundtruth_keypoints'
  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
  groundtruth_label_scores = 'groundtruth_label_scores'
+  groundtruth_weights = 'groundtruth_weights'
+  num_groundtruth_boxes = 'num_groundtruth_boxes'
+  true_image_shape = 'true_image_shape'
 class DetectionResultFields(object):
-  """Naming converntions for storing the output of the detector.
+  """Naming conventions for storing the output of the detector.
  Attributes:
    source_id: source of the original image.
@@ -162,6 +169,7 @@ class TfExampleFields(object):
    object_is_crowd: [DEPRECATED, use object_group_of instead]
      is the object a single object or a crowd
    object_segment_area: the area of the segment.
+    object_weight: a weight factor for the object's bounding box.
    instance_masks: instance segmentation masks.
    instance_boundaries: instance boundaries.
    instance_classes: Classes for each instance segmentation mask.
@@ -194,6 +202,7 @@ class TfExampleFields(object):
  object_depiction = 'image/object/depiction'
  object_is_crowd = 'image/object/is_crowd'
  object_segment_area = 'image/object/segment/area'
+  object_weight = 'image/object/weight'
  instance_masks = 'image/segmentation/object'
  instance_boundaries = 'image/boundaries/object'
  instance_classes = 'image/segmentation/object/class'

--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder
 from object_detection.box_coders import mean_stddev_box_coder
 from object_detection.core import box_coder as bcoder
 from object_detection.core import box_list
-from object_detection.core import box_list_ops
 from object_detection.core import matcher as mat
 from object_detection.core import region_similarity_calculator as sim_calc
+from object_detection.core import standard_fields as fields
 from object_detection.matchers import argmax_matcher
 from object_detection.matchers import bipartite_matcher
+from object_detection.utils import shape_utils
 class TargetAssigner(object):
  """Target assigner to compute classification and regression targets."""
  def __init__(self, similarity_calc, matcher, box_coder,
-               positive_class_weight=1.0, negative_class_weight=1.0,
+               negative_class_weight=1.0, unmatched_cls_target=None):
-               unmatched_cls_target=None):
    """Construct Object Detection Target Assigner.
    Args:
@@ -58,10 +58,8 @@ class TargetAssigner(object):
        anchors.
      box_coder: an object_detection.core.BoxCoder used to encode matching
        groundtruth boxes with respect to anchors.
-      positive_class_weight: classification weight to be associated to positive
-        anchors (default: 1.0)
      negative_class_weight: classification weight to be associated to negative
-        anchors (default: 1.0)
+        anchors (default: 1.0). The weight must be in [0., 1.].
      unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
        which is consistent with the classification target for each
        anchor (and can be empty for scalar targets).  This shape must thus be
@@ -82,7 +80,6 @@ class TargetAssigner(object):
    self._similarity_calc = similarity_calc
    self._matcher = matcher
    self._box_coder = box_coder
-    self._positive_class_weight = positive_class_weight
    self._negative_class_weight = negative_class_weight
    if unmatched_cls_target is None:
      self._unmatched_cls_target = tf.constant([0], tf.float32)
@@ -94,7 +91,7 @@ class TargetAssigner(object):
    return self._box_coder
  def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
-             **params):
+             groundtruth_weights=None, **params):
    """Assign classification and regression targets to each anchor.
    For a given set of anchors and groundtruth detections, match anchors
@@ -113,6 +110,9 @@ class TargetAssigner(object):
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
        ground_truth boxes get a positive label (of 1).
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box. The weights
+        must be in [0., 1.]. If None, all weights are set to 1.
      **params: Additional keyword arguments for specific implementations of
              the Matcher.
@@ -140,14 +140,21 @@ class TargetAssigner(object):
      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
                                                  0))
      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
-    unmatched_shape_assert = tf.assert_equal(
+    unmatched_shape_assert = shape_utils.assert_shape_equal(
-        tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target),
+        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
-        message='Unmatched class target shape incompatible '
+        shape_utils.combined_static_and_dynamic_shape(
-        'with groundtruth labels shape!')
+            self._unmatched_cls_target))
-    labels_and_box_shapes_assert = tf.assert_equal(
+    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
-        tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(),
+        shape_utils.combined_static_and_dynamic_shape(
-        message='Groundtruth boxes and labels have incompatible shapes!')
+            groundtruth_labels)[:1],
+        shape_utils.combined_static_and_dynamic_shape(
+            groundtruth_boxes.get())[:1])
+    if groundtruth_weights is None:
+      num_gt_boxes = groundtruth_boxes.num_boxes_static()
+      if not num_gt_boxes:
+        num_gt_boxes = groundtruth_boxes.num_boxes()
+      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
    with tf.control_dependencies(
        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
@@ -158,16 +165,16 @@ class TargetAssigner(object):
                                                    match)
      cls_targets = self._create_classification_targets(groundtruth_labels,
                                                        match)
-      reg_weights = self._create_regression_weights(match)
+      reg_weights = self._create_regression_weights(match, groundtruth_weights)
-      cls_weights = self._create_classification_weights(
+      cls_weights = self._create_classification_weights(match,
-          match, self._positive_class_weight, self._negative_class_weight)
+                                                        groundtruth_weights)
-      num_anchors = anchors.num_boxes_static()
+    num_anchors = anchors.num_boxes_static()
-      if num_anchors is not None:
+    if num_anchors is not None:
-        reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
-        cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
-        reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
-        cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+      cls_weights = self._reset_target_shape(cls_weights, num_anchors)
    return cls_targets, cls_weights, reg_targets, reg_weights, match
@@ -198,23 +205,31 @@ class TargetAssigner(object):
    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
-    matched_anchor_indices = match.matched_column_indices()
+    matched_gt_boxes = match.gather_based_on_match(
-    unmatched_ignored_anchor_indices = (match.
+        groundtruth_boxes.get(),
-                                        unmatched_or_ignored_column_indices())
+        unmatched_value=tf.zeros(4),
-    matched_gt_indices = match.matched_row_indices()
+        ignored_value=tf.zeros(4))
-    matched_anchors = box_list_ops.gather(anchors,
+    matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
-                                          matched_anchor_indices)
+    if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
-    matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
+      groundtruth_keypoints = groundtruth_boxes.get_field(
-                                           matched_gt_indices)
+          fields.BoxListFields.keypoints)
-    matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
+      matched_keypoints = match.gather_based_on_match(
-                                                 matched_anchors)
+          groundtruth_keypoints,
+          unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
+          ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
+      matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
+                                   matched_keypoints)
+    matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
+        match.match_results)
+    # Zero out the unmatched and ignored regression targets.
    unmatched_ignored_reg_targets = tf.tile(
-        self._default_regression_target(),
+        self._default_regression_target(), [match_results_shape[0], 1])
-        tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
+    matched_anchors_mask = match.matched_column_indicator()
-    reg_targets = tf.dynamic_stitch(
+    reg_targets = tf.where(matched_anchors_mask,
-        [matched_anchor_indices, unmatched_ignored_anchor_indices],
+                           matched_reg_targets,
-        [matched_reg_targets, unmatched_ignored_reg_targets])
+                           unmatched_ignored_reg_targets)
-    # TODO: summarize the number of matches on average.
    return reg_targets
  def _default_regression_target(self):
@@ -245,27 +260,16 @@ class TargetAssigner(object):
        and groundtruth boxes.
    Returns:
-      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+      a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
-        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+      subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
-        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+      shape [num_gt_boxes, d_1, d_2, ... d_k].
    """
-    matched_anchor_indices = match.matched_column_indices()
+    return match.gather_based_on_match(
-    unmatched_ignored_anchor_indices = (match.
+        groundtruth_labels,
-                                        unmatched_or_ignored_column_indices())
+        unmatched_value=self._unmatched_cls_target,
-    matched_gt_indices = match.matched_row_indices()
+        ignored_value=self._unmatched_cls_target)
-    matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
+  def _create_regression_weights(self, match, groundtruth_weights):
-    ones = self._unmatched_cls_target.shape.ndims * [1]
-    unmatched_ignored_cls_targets = tf.tile(
-        tf.expand_dims(self._unmatched_cls_target, 0),
-        tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
-    cls_targets = tf.dynamic_stitch(
-        [matched_anchor_indices, unmatched_ignored_anchor_indices],
-        [matched_cls_targets, unmatched_ignored_cls_targets])
-    return cls_targets
-  def _create_regression_weights(self, match):
    """Set regression weight for each anchor.
    Only positive anchors are set to contribute to the regression loss, so this
@@ -275,18 +279,18 @@ class TargetAssigner(object):
    Args:
      match: a matcher.Match object that provides a matching between anchors
        and groundtruth boxes.
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box.
    Returns:
-      reg_weights: a float32 tensor with shape [num_anchors] representing
+      a float32 tensor with shape [num_anchors] representing regression weights.
-        regression weights
    """
-    reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
+    return match.gather_based_on_match(
-    return reg_weights
+        groundtruth_weights, ignored_value=0., unmatched_value=0.)
  def _create_classification_weights(self,
                                     match,
-                                     positive_class_weight=1.0,
+                                     groundtruth_weights):
-                                     negative_class_weight=1.0):
    """Create classification weights for each anchor.
    Positive (matched) anchors are associated with a weight of
@@ -299,25 +303,23 @@ class TargetAssigner(object):
    Args:
      match: a matcher.Match object that provides a matching between anchors
        and groundtruth boxes.
-      positive_class_weight: weight to be associated to positive anchors
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
-      negative_class_weight: weight to be associated to negative anchors
+        assign to all anchors match to a particular groundtruth box.
    Returns:
-      cls_weights: a float32 tensor with shape [num_anchors] representing
+      a float32 tensor with shape [num_anchors] representing classification
-        classification weights.
+      weights.
    """
-    matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
+    return match.gather_based_on_match(
-    ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
+        groundtruth_weights,
-    unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
+        ignored_value=0.,
-    cls_weights = (positive_class_weight * matched_indicator
+        unmatched_value=self._negative_class_weight)
-                   + negative_class_weight * unmatched_indicator)
-    return cls_weights
  def get_box_coder(self):
    """Get BoxCoder of this TargetAssigner.
    Returns:
-      BoxCoder: BoxCoder object.
+      BoxCoder object.
    """
    return self._box_coder
@@ -325,7 +327,6 @@ class TargetAssigner(object):
 # TODO: This method pulls in all the implementation dependencies into
 # core. Therefore its best to have this factory method outside of core.
 def create_target_assigner(reference, stage=None,
-                           positive_class_weight=1.0,
                           negative_class_weight=1.0,
                           unmatched_cls_target=None):
  """Factory function for creating standard target assigners.
@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None,
  Args:
    reference: string referencing the type of TargetAssigner.
    stage: string denoting stage: {proposal, detection}.
-    positive_class_weight: classification weight to be associated to positive
-      anchors (default: 1.0)
    negative_class_weight: classification weight to be associated to negative
      anchors (default: 1.0)
    unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None,
    raise ValueError('No valid combination of reference and stage.')
  return TargetAssigner(similarity_calc, matcher, box_coder,
-                        positive_class_weight=positive_class_weight,
                        negative_class_weight=negative_class_weight,
                        unmatched_cls_target=unmatched_cls_target)
@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None,
 def batch_assign_targets(target_assigner,
                         anchors_batch,
                         gt_box_batch,
-                         gt_class_targets_batch):
+                         gt_class_targets_batch,
+                         gt_weights_batch=None):
  """Batched assignment of classification and regression targets.
  Args:
@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner,
      each tensor has shape [num_gt_boxes_i, classification_target_size] and
      num_gt_boxes_i is the number of boxes in the ith boxlist of
      gt_box_batch.
+    gt_weights_batch: A list of 1-D tf.float32 tensors of shape
+      [num_boxes] containing weights for groundtruth boxes.
  Returns:
    batch_cls_targets: a tensor with shape [batch_size, num_anchors,
@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner,
  reg_targets_list = []
  reg_weights_list = []
  match_list = []
-  for anchors, gt_boxes, gt_class_targets in zip(
+  if gt_weights_batch is None:
-      anchors_batch, gt_box_batch, gt_class_targets_batch):
+    gt_weights_batch = [None] * len(gt_class_targets_batch)
+  for anchors, gt_boxes, gt_class_targets, gt_weights in zip(
+      anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
    (cls_targets, cls_weights, reg_targets,
     reg_weights, match) = target_assigner.assign(
-         anchors, gt_boxes, gt_class_targets)
+         anchors, gt_boxes, gt_class_targets, gt_weights)
    cls_targets_list.append(cls_targets)
    cls_weights_list.append(cls_weights)
    reg_targets_list.append(reg_targets)

--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -17,135 +17,238 @@
 import numpy as np
 import tensorflow as tf
+from object_detection.box_coders import keypoint_box_coder
 from object_detection.box_coders import mean_stddev_box_coder
 from object_detection.core import box_list
 from object_detection.core import region_similarity_calculator
+from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner as targetassigner
 from object_detection.matchers import argmax_matcher
 from object_detection.matchers import bipartite_matcher
+from object_detection.utils import test_case
-class TargetAssignerTest(tf.test.TestCase):
+class TargetAssignerTest(test_case.TestCase):
  def test_assign_agnostic(self):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners):
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-    target_assigner = targetassigner.TargetAssigner(
+                                             unmatched_threshold=0.5)
-        similarity_calc, matcher, box_coder, unmatched_cls_target=None)
+      box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+      target_assigner = targetassigner.TargetAssigner(
-    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+          similarity_calc, matcher, box_coder, unmatched_cls_target=None)
-                               [0.5, 0.5, 1.0, 0.8],
+      anchors_boxlist = box_list.BoxList(anchor_means)
-                               [0, 0.5, .5, 1.0]])
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
-    prior_stddevs = tf.constant(3 * [4 * [.1]])
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
-    priors = box_list.BoxList(prior_means)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
-    priors.add_field('stddev', prior_stddevs)
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-    box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
-    boxes = box_list.BoxList(tf.constant(box_corners))
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+                             [0.5, 0.5, 1.0, 0.8],
+                             [0, 0.5, .5, 1.0]], dtype=np.float32)
+    anchor_stddevs = np.array(3 * [4 * [.1]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.5, 0.5, 0.9, 0.9]],
+                                       dtype=np.float32)
    exp_cls_targets = [[1], [1], [0]]
    exp_cls_weights = [1, 1, 1]
    exp_reg_targets = [[0, 0, 0, 0],
                       [0, 0, -1, 1],
                       [0, 0, 0, 0]]
    exp_reg_weights = [1, 1, 0]
-    exp_matching_anchors = [0, 1]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
-    result = target_assigner.assign(priors, boxes, num_valid_rows=2)
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+                                                groundtruth_box_corners])
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-    with self.test_session() as sess:
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-      (cls_targets_out, cls_weights_out,
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-           [cls_targets, cls_weights, reg_targets, reg_weights,
+    self.assertEquals(cls_targets_out.dtype, np.float32)
-            match.matched_column_indices()])
+    self.assertEquals(cls_weights_out.dtype, np.float32)
+    self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
+  def test_assign_class_agnostic_with_ignored_matches(self):
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
-      self.assertEquals(cls_targets_out.dtype, np.float32)
-      self.assertEquals(cls_weights_out.dtype, np.float32)
-      self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertEquals(matching_anchors_out.dtype, np.int32)
-  def test_assign_with_ignored_matches(self):
    # Note: test is very similar to above. The third box matched with an IOU
    # of 0.35, which is between the matched and unmatched threshold. This means
    # That like above the expected classification targets are [1, 1, 0].
    # Unlike above, the third target is ignored and therefore expected
    # classification weights are [1, 1, 0].
-    similarity_calc = region_similarity_calculator.IouSimilarity()
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners):
-    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-                                           unmatched_threshold=0.3)
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+                                             unmatched_threshold=0.3)
-    target_assigner = targetassigner.TargetAssigner(
+      box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-        similarity_calc, matcher, box_coder)
+      target_assigner = targetassigner.TargetAssigner(
+          similarity_calc, matcher, box_coder, unmatched_cls_target=None)
-    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+      anchors_boxlist = box_list.BoxList(anchor_means)
-                               [0.5, 0.5, 1.0, 0.8],
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
-                               [0.0, 0.5, .9, 1.0]])
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
-    prior_stddevs = tf.constant(3 * [4 * [.1]])
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
-    priors = box_list.BoxList(prior_means)
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
-    priors.add_field('stddev', prior_stddevs)
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-    box_corners = [[0.0, 0.0, 0.5, 0.5],
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
-                   [0.5, 0.5, 0.9, 0.9]]
+                             [0.5, 0.5, 1.0, 0.8],
-    boxes = box_list.BoxList(tf.constant(box_corners))
+                             [0.0, 0.5, .9, 1.0]], dtype=np.float32)
+    anchor_stddevs = np.array(3 * [4 * [.1]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.5, 0.5, 0.9, 0.9]], dtype=np.float32)
    exp_cls_targets = [[1], [1], [0]]
    exp_cls_weights = [1, 1, 0]
    exp_reg_targets = [[0, 0, 0, 0],
                       [0, 0, -1, 1],
                       [0, 0, 0, 0]]
    exp_reg_weights = [1, 1, 0]
-    exp_matching_anchors = [0, 1]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-    result = target_assigner.assign(priors, boxes)
+                                                groundtruth_box_corners])
-    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-    with self.test_session() as sess:
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-      (cls_targets_out, cls_weights_out,
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-           [cls_targets, cls_weights, reg_targets, reg_weights,
+    self.assertEquals(cls_targets_out.dtype, np.float32)
-            match.matched_column_indices()])
+    self.assertEquals(cls_weights_out.dtype, np.float32)
+    self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
+  def test_assign_agnostic_with_keypoints(self):
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
+    def graph_fn(anchor_means, groundtruth_box_corners,
-      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+                 groundtruth_keypoints):
-      self.assertEquals(cls_targets_out.dtype, np.float32)
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-      self.assertEquals(cls_weights_out.dtype, np.float32)
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-      self.assertEquals(reg_targets_out.dtype, np.float32)
+                                             unmatched_threshold=0.5)
-      self.assertEquals(reg_weights_out.dtype, np.float32)
+      box_coder = keypoint_box_coder.KeypointBoxCoder(
-      self.assertEquals(matching_anchors_out.dtype, np.int32)
+          num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
+      target_assigner = targetassigner.TargetAssigner(
+          similarity_calc, matcher, box_coder, unmatched_cls_target=None)
+      anchors_boxlist = box_list.BoxList(anchor_means)
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+      groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
+                                    groundtruth_keypoints)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+                             [0.5, 0.5, 1.0, 1.0],
+                             [0.0, 0.5, .9, 1.0]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.45, 0.45, 0.95, 0.95]],
+                                       dtype=np.float32)
+    groundtruth_keypoints = np.array(
+        [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
+         [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
+        dtype=np.float32)
+    exp_cls_targets = [[1], [1], [0]]
+    exp_cls_weights = [1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
+                        -5],
+                       [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
+                        -11, -7],
+                       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means,
+                                                groundtruth_box_corners,
+                                                groundtruth_keypoints])
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
+    self.assertEquals(cls_targets_out.dtype, np.float32)
+    self.assertEquals(cls_weights_out.dtype, np.float32)
+    self.assertEquals(reg_targets_out.dtype, np.float32)
+    self.assertEquals(reg_weights_out.dtype, np.float32)
+  def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self):
+    # Note: test is very similar to above. The third box matched with an IOU
+    # of 0.35, which is between the matched and unmatched threshold. This means
+    # That like above the expected classification targets are [1, 1, 0].
+    # Unlike above, the third target is ignored and therefore expected
+    # classification weights are [1, 1, 0].
+    def graph_fn(anchor_means, groundtruth_box_corners,
+                 groundtruth_keypoints):
+      similarity_calc = region_similarity_calculator.IouSimilarity()
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                             unmatched_threshold=0.5)
+      box_coder = keypoint_box_coder.KeypointBoxCoder(
+          num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
+      target_assigner = targetassigner.TargetAssigner(
+          similarity_calc, matcher, box_coder, unmatched_cls_target=None)
+      anchors_boxlist = box_list.BoxList(anchor_means)
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+      groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
+                                    groundtruth_keypoints)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist)
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+                             [0.5, 0.5, 1.0, 1.0],
+                             [0.0, 0.5, .9, 1.0]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.45, 0.45, 0.95, 0.95]],
+                                       dtype=np.float32)
+    groundtruth_keypoints = np.array(
+        [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
+         [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
+        dtype=np.float32)
+    exp_cls_targets = [[1], [1], [0]]
+    exp_cls_weights = [1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
+                        -5],
+                       [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
+                        -11, -7],
+                       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means,
+                                                groundtruth_box_corners,
+                                                groundtruth_keypoints])
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
+    self.assertEquals(cls_targets_out.dtype, np.float32)
+    self.assertEquals(cls_weights_out.dtype, np.float32)
+    self.assertEquals(reg_targets_out.dtype, np.float32)
+    self.assertEquals(reg_weights_out.dtype, np.float32)
  def test_assign_multiclass(self):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+                 groundtruth_labels):
-    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-    target_assigner = targetassigner.TargetAssigner(
+                                             unmatched_threshold=0.5)
-        similarity_calc, matcher, box_coder,
+      box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-        unmatched_cls_target=unmatched_cls_target)
+      unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+      target_assigner = targetassigner.TargetAssigner(
-    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+          similarity_calc, matcher, box_coder,
-                               [0.5, 0.5, 1.0, 0.8],
+          unmatched_cls_target=unmatched_cls_target)
-                               [0, 0.5, .5, 1.0],
-                               [.75, 0, 1.0, .25]])
+      anchors_boxlist = box_list.BoxList(anchor_means)
-    prior_stddevs = tf.constant(4 * [4 * [.1]])
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
-    priors = box_list.BoxList(prior_means)
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
-    priors.add_field('stddev', prior_stddevs)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
+                                      groundtruth_labels)
-    box_corners = [[0.0, 0.0, 0.5, 0.5],
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
-                   [0.5, 0.5, 0.9, 0.9],
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-                   [.75, 0, .95, .27]]
-    boxes = box_list.BoxList(tf.constant(box_corners))
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+                             [0.5, 0.5, 1.0, 0.8],
-    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                             [0, 0.5, .5, 1.0],
-                                      [0, 0, 0, 0, 0, 1, 0],
+                             [.75, 0, 1.0, .25]], dtype=np.float32)
-                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+    anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.5, 0.5, 0.9, 0.9],
+                                        [.75, 0, .95, .27]], dtype=np.float32)
+    groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
+                                   [0, 0, 0, 0, 0, 1, 0],
+                                   [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
    exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0, 0, 1, 0],
@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase):
                       [0, 0, 0, 0],
                       [0, 0, -.5, .2]]
    exp_reg_weights = [1, 1, 0, 1]
-    exp_matching_anchors = [0, 1, 3]
-    result = target_assigner.assign(priors, boxes, groundtruth_labels,
-                                    num_valid_rows=3)
-    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
-    with self.test_session() as sess:
-      (cls_targets_out, cls_weights_out,
-       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
-           [cls_targets, cls_weights, reg_targets, reg_weights,
-            match.matched_column_indices()])
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
-      self.assertEquals(cls_targets_out.dtype, np.float32)
-      self.assertEquals(cls_weights_out.dtype, np.float32)
-      self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertEquals(matching_anchors_out.dtype, np.int32)
-  def test_assign_multiclass_unequal_class_weights(self):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
-    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
-    target_assigner = targetassigner.TargetAssigner(
-        similarity_calc, matcher, box_coder,
-        positive_class_weight=1.0, negative_class_weight=0.5,
-        unmatched_cls_target=unmatched_cls_target)
-    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+    (cls_targets_out, cls_weights_out, reg_targets_out,
-                               [0.5, 0.5, 1.0, 0.8],
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-                               [0, 0.5, .5, 1.0],
+                                                groundtruth_box_corners,
-                               [.75, 0, 1.0, .25]])
+                                                groundtruth_labels])
-    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-    priors = box_list.BoxList(prior_means)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-    priors.add_field('stddev', prior_stddevs)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-    box_corners = [[0.0, 0.0, 0.5, 0.5],
+    self.assertEquals(cls_targets_out.dtype, np.float32)
-                   [0.5, 0.5, 0.9, 0.9],
+    self.assertEquals(cls_weights_out.dtype, np.float32)
-                   [.75, 0, .95, .27]]
+    self.assertEquals(reg_targets_out.dtype, np.float32)
-    boxes = box_list.BoxList(tf.constant(box_corners))
+    self.assertEquals(reg_weights_out.dtype, np.float32)
-    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+  def test_assign_multiclass_with_groundtruth_weights(self):
-                                      [0, 0, 0, 0, 0, 1, 0],
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
-                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+                 groundtruth_labels, groundtruth_weights):
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-    exp_cls_weights = [1, 1, .5, 1]
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                             unmatched_threshold=0.5)
-                                    num_valid_rows=3)
+      box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-    (_, cls_weights, _, _, _) = result
+      unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
-    with self.test_session() as sess:
+      target_assigner = targetassigner.TargetAssigner(
-      cls_weights_out = sess.run(cls_weights)
+          similarity_calc, matcher, box_coder,
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
+          unmatched_cls_target=unmatched_cls_target)
+      anchors_boxlist = box_list.BoxList(anchor_means)
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
+                                      groundtruth_labels,
+                                      groundtruth_weights)
+      (_, cls_weights, _, reg_weights, _) = result
+      return (cls_weights, reg_weights)
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+                             [0.5, 0.5, 1.0, 0.8],
+                             [0, 0.5, .5, 1.0],
+                             [.75, 0, 1.0, .25]], dtype=np.float32)
+    anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.5, 0.5, 0.9, 0.9],
+                                        [.75, 0, .95, .27]], dtype=np.float32)
+    groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
+                                   [0, 0, 0, 0, 0, 1, 0],
+                                   [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
+    groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32)
+    exp_cls_weights = [0.3, 0., 1, 0.5]   # background class gets weight of 1.
+    exp_reg_weights = [0.3, 0., 0., 0.5]  # background class gets weight of 0.
+    (cls_weights_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
+                                                groundtruth_box_corners,
+                                                groundtruth_labels,
+                                                groundtruth_weights])
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
  def test_assign_multidimensional_class_targets(self):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+                 groundtruth_labels):
-    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-    unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-    target_assigner = targetassigner.TargetAssigner(
+                                             unmatched_threshold=0.5)
-        similarity_calc, matcher, box_coder,
+      box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-        unmatched_cls_target=unmatched_cls_target)
+      unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
-    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+      target_assigner = targetassigner.TargetAssigner(
-                               [0.5, 0.5, 1.0, 0.8],
+          similarity_calc, matcher, box_coder,
-                               [0, 0.5, .5, 1.0],
+          unmatched_cls_target=unmatched_cls_target)
-                               [.75, 0, 1.0, .25]])
-    prior_stddevs = tf.constant(4 * [4 * [.1]])
+      anchors_boxlist = box_list.BoxList(anchor_means)
-    priors = box_list.BoxList(prior_means)
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
-    priors.add_field('stddev', prior_stddevs)
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
-    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                                      groundtruth_labels)
-                   [0.5, 0.5, 0.9, 0.9],
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
-                   [.75, 0, .95, .27]]
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-    boxes = box_list.BoxList(tf.constant(box_corners))
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
-    groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
+                             [0.5, 0.5, 1.0, 0.8],
-                                      [[1, 0], [0, 1]],
+                             [0, 0.5, .5, 1.0],
-                                      [[0, 1], [1, .5]]], tf.float32)
+                             [.75, 0, 1.0, .25]], dtype=np.float32)
+    anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.5, 0.5, 0.9, 0.9],
+                                        [.75, 0, .95, .27]], dtype=np.float32)
+    groundtruth_labels = np.array([[[0, 1], [1, 0]],
+                                   [[1, 0], [0, 1]],
+                                   [[0, 1], [1, .5]]], np.float32)
    exp_cls_targets = [[[0, 1], [1, 0]],
                       [[1, 0], [0, 1]],
@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase):
                       [0, 0, 0, 0],
                       [0, 0, -.5, .2]]
    exp_reg_weights = [1, 1, 0, 1]
-    exp_matching_anchors = [0, 1, 3]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                                groundtruth_box_corners,
-                                    num_valid_rows=3)
+                                                groundtruth_labels])
-    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-    with self.test_session() as sess:
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-      (cls_targets_out, cls_weights_out,
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-           [cls_targets, cls_weights, reg_targets, reg_weights,
+    self.assertEquals(cls_targets_out.dtype, np.float32)
-            match.matched_column_indices()])
+    self.assertEquals(cls_weights_out.dtype, np.float32)
+    self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
-      self.assertEquals(cls_targets_out.dtype, np.float32)
-      self.assertEquals(cls_weights_out.dtype, np.float32)
-      self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertEquals(matching_anchors_out.dtype, np.int32)
  def test_assign_empty_groundtruth(self):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+                 groundtruth_labels):
-    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+      similarity_calc = region_similarity_calculator.IouSimilarity()
-    unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
-    target_assigner = targetassigner.TargetAssigner(
+                                             unmatched_threshold=0.5)
-        similarity_calc, matcher, box_coder,
+      box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
-        unmatched_cls_target=unmatched_cls_target)
+      unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
+      anchors_boxlist = box_list.BoxList(anchor_means)
-    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
-                               [0.5, 0.5, 1.0, 0.8],
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
-                               [0, 0.5, .5, 1.0],
+      target_assigner = targetassigner.TargetAssigner(
-                               [.75, 0, 1.0, .25]])
+          similarity_calc, matcher, box_coder,
-    prior_stddevs = tf.constant(4 * [4 * [.1]])
+          unmatched_cls_target=unmatched_cls_target)
-    priors = box_list.BoxList(prior_means)
+      result = target_assigner.assign(anchors_boxlist, groundtruth_boxlist,
-    priors.add_field('stddev', prior_stddevs)
+                                      groundtruth_labels)
+      (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
-    box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-    box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
-    boxes = box_list.BoxList(box_corners)
+    groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
+    groundtruth_labels = np.zeros((0, 3), dtype=np.float32)
-    groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
+    anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
-    groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
+                             [0.5, 0.5, 1.0, 0.8],
+                             [0, 0.5, .5, 1.0],
+                             [.75, 0, 1.0, .25]],
+                            dtype=np.float32)
+    anchor_stddevs = np.array(4 * [4 * [.1]], dtype=np.float32)
    exp_cls_targets = [[0, 0, 0],
                       [0, 0, 0],
                       [0, 0, 0],
@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase):
                       [0, 0, 0, 0],
                       [0, 0, 0, 0]]
    exp_reg_weights = [0, 0, 0, 0]
-    exp_matching_anchors = []
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-    result = target_assigner.assign(priors, boxes, groundtruth_labels)
+                                                groundtruth_box_corners,
-    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+                                                groundtruth_labels])
-    with self.test_session() as sess:
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-      (cls_targets_out, cls_weights_out,
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-           [cls_targets, cls_weights, reg_targets, reg_weights,
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-            match.matched_column_indices()])
+    self.assertEquals(cls_targets_out.dtype, np.float32)
+    self.assertEquals(cls_weights_out.dtype, np.float32)
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
-      self.assertEquals(cls_targets_out.dtype, np.float32)
-      self.assertEquals(cls_weights_out.dtype, np.float32)
-      self.assertEquals(reg_targets_out.dtype, np.float32)
-      self.assertEquals(reg_weights_out.dtype, np.float32)
-      self.assertEquals(matching_anchors_out.dtype, np.int32)
  def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase):
    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 1, 0],
                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
-    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+    with self.assertRaisesRegexp(ValueError, 'Unequal shapes'):
-                                    num_valid_rows=3)
+      target_assigner.assign(priors, boxes, groundtruth_labels,
-    (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+                             num_valid_rows=3)
-    with self.test_session() as sess:
-      with self.assertRaisesWithPredicateMatch(
-          tf.errors.InvalidArgumentError,
-          'Groundtruth boxes and labels have incompatible shapes!'):
-        sess.run([cls_targets, cls_weights, reg_targets, reg_weights])
  def test_raises_error_on_invalid_groundtruth_labels(self):
    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase):
                   [0.5, 0.5, 0.9, 0.9],
                   [.75, 0, .95, .27]]
    boxes = box_list.BoxList(tf.constant(box_corners))
    groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
    with self.assertRaises(ValueError):
@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase):
                             num_valid_rows=3)
-class BatchTargetAssignerTest(tf.test.TestCase):
+class BatchTargetAssignerTest(test_case.TestCase):
  def _get_agnostic_target_assigner(self):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    similarity_calc = region_similarity_calculator.IouSimilarity()
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.5)
    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
    return targetassigner.TargetAssigner(
        similarity_calc, matcher, box_coder,
-        positive_class_weight=1.0,
-        negative_class_weight=1.0,
        unmatched_cls_target=None)
  def _get_multi_class_target_assigner(self, num_classes):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    similarity_calc = region_similarity_calculator.IouSimilarity()
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.5)
    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
    unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
    return targetassigner.TargetAssigner(
        similarity_calc, matcher, box_coder,
-        positive_class_weight=1.0,
-        negative_class_weight=1.0,
        unmatched_cls_target=unmatched_cls_target)
  def _get_multi_dimensional_target_assigner(self, target_dimensions):
-    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    similarity_calc = region_similarity_calculator.IouSimilarity()
-    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.5)
    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
    unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
                                       tf.float32)
    return targetassigner.TargetAssigner(
        similarity_calc, matcher, box_coder,
-        positive_class_weight=1.0,
-        negative_class_weight=1.0,
        unmatched_cls_target=unmatched_cls_target)
  def test_batch_assign_targets(self):
-    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
-    box_list2 = box_list.BoxList(tf.constant(
+                 groundtruth_boxlist2):
-        [[0, 0.25123152, 1, 1],
+      box_list1 = box_list.BoxList(groundtruth_boxlist1)
-         [0.015789, 0.0985, 0.55789, 0.3842]]
+      box_list2 = box_list.BoxList(groundtruth_boxlist2)
-    ))
+      gt_box_batch = [box_list1, box_list2]
+      gt_class_targets = [None, None]
-    gt_box_batch = [box_list1, box_list2]
+      anchors_boxlist = box_list.BoxList(anchor_means)
-    gt_class_targets = [None, None]
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
+      agnostic_target_assigner = self._get_agnostic_target_assigner()
-    prior_means = tf.constant([[0, 0, .25, .25],
+      (cls_targets, cls_weights, reg_targets, reg_weights,
-                               [0, .25, 1, 1],
+       _) = targetassigner.batch_assign_targets(
-                               [0, .1, .5, .5],
+           agnostic_target_assigner, anchors_boxlist, gt_box_batch,
-                               [.75, .75, 1, 1]])
+           gt_class_targets)
-    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-                                 [.1, .1, .1, .1],
-                                 [.1, .1, .1, .1],
+    groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
-                                 [.1, .1, .1, .1]])
+    groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
-    priors = box_list.BoxList(prior_means)
+                                     [0.015789, 0.0985, 0.55789, 0.3842]],
-    priors.add_field('stddev', prior_stddevs)
+                                    dtype=np.float32)
+    anchor_means = np.array([[0, 0, .25, .25],
+                             [0, .25, 1, 1],
+                             [0, .1, .5, .5],
+                             [.75, .75, 1, 1]], dtype=np.float32)
+    anchor_stddevs = np.array([[.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1]], dtype=np.float32)
    exp_reg_targets = [[[0, 0, -0.5, -0.5],
                        [0, 0, 0, 0],
@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase):
                       [[0], [1], [1], [0]]]
    exp_reg_weights = [[1, 0, 0, 0],
                       [0, 1, 1, 0]]
-    exp_match_0 = [0]
-    exp_match_1 = [1, 2]
-    agnostic_target_assigner = self._get_agnostic_target_assigner()
-    (cls_targets, cls_weights, reg_targets, reg_weights,
-     match_list) = targetassigner.batch_assign_targets(
-         agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
-    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
-    with self.test_session() as sess:
-      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
-       match_out_0, match_out_1) = sess.run([
-           cls_targets, cls_weights, reg_targets, reg_weights] + [
-               match.matched_column_indices() for match in match_list])
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(match_out_0, exp_match_0)
-      self.assertAllClose(match_out_1, exp_match_1)
-  def test_batch_assign_multiclass_targets(self):
-    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
-    box_list2 = box_list.BoxList(tf.constant(
-        [[0, 0.25123152, 1, 1],
-         [0.015789, 0.0985, 0.55789, 0.3842]]
-    ))
-    gt_box_batch = [box_list1, box_list2]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
+                                                groundtruth_boxlist1,
+                                                groundtruth_boxlist2])
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-    class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
+  def test_batch_assign_multiclass_targets(self):
-    class_targets2 = tf.constant([[0, 0, 0, 1],
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
-                                  [0, 0, 1, 0]], tf.float32)
+                 groundtruth_boxlist2, class_targets1, class_targets2):
+      box_list1 = box_list.BoxList(groundtruth_boxlist1)
+      box_list2 = box_list.BoxList(groundtruth_boxlist2)
+      gt_box_batch = [box_list1, box_list2]
+      gt_class_targets = [class_targets1, class_targets2]
+      anchors_boxlist = box_list.BoxList(anchor_means)
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
+      multiclass_target_assigner = self._get_multi_class_target_assigner(
+          num_classes=3)
+      (cls_targets, cls_weights, reg_targets, reg_weights,
+       _) = targetassigner.batch_assign_targets(
+           multiclass_target_assigner, anchors_boxlist, gt_box_batch,
+           gt_class_targets)
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+    groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+    groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+                                     [0.015789, 0.0985, 0.55789, 0.3842]],
+                                    dtype=np.float32)
+    class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
+    class_targets2 = np.array([[0, 0, 0, 1],
+                               [0, 0, 1, 0]], dtype=np.float32)
+    anchor_means = np.array([[0, 0, .25, .25],
+                             [0, .25, 1, 1],
+                             [0, .1, .5, .5],
+                             [.75, .75, 1, 1]], dtype=np.float32)
+    anchor_stddevs = np.array([[.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1]], dtype=np.float32)
-    gt_class_targets = [class_targets1, class_targets2]
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0,],
+                        [0, 0, 0, 0,],],
+                       [[0, 0, 0, 0,],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+    exp_cls_targets = [[[0, 1, 0, 0],
+                        [1, 0, 0, 0],
+                        [1, 0, 0, 0],
+                        [1, 0, 0, 0]],
+                       [[1, 0, 0, 0],
+                        [0, 0, 0, 1],
+                        [0, 0, 1, 0],
+                        [1, 0, 0, 0]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
-    prior_means = tf.constant([[0, 0, .25, .25],
+    (cls_targets_out, cls_weights_out, reg_targets_out,
-                               [0, .25, 1, 1],
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-                               [0, .1, .5, .5],
+                                                groundtruth_boxlist1,
-                               [.75, .75, 1, 1]])
+                                                groundtruth_boxlist2,
-    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                                class_targets1,
-                                 [.1, .1, .1, .1],
+                                                class_targets2])
-                                 [.1, .1, .1, .1],
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-                                 [.1, .1, .1, .1]])
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-    priors = box_list.BoxList(prior_means)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-    priors.add_field('stddev', prior_stddevs)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
+  def test_batch_assign_multiclass_targets_with_padded_groundtruth(self):
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
+                 groundtruth_boxlist2, class_targets1, class_targets2,
+                 groundtruth_weights1, groundtruth_weights2):
+      box_list1 = box_list.BoxList(groundtruth_boxlist1)
+      box_list2 = box_list.BoxList(groundtruth_boxlist2)
+      gt_box_batch = [box_list1, box_list2]
+      gt_class_targets = [class_targets1, class_targets2]
+      gt_weights = [groundtruth_weights1, groundtruth_weights2]
+      anchors_boxlist = box_list.BoxList(anchor_means)
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
+      multiclass_target_assigner = self._get_multi_class_target_assigner(
+          num_classes=3)
+      (cls_targets, cls_weights, reg_targets, reg_weights,
+       _) = targetassigner.batch_assign_targets(
+           multiclass_target_assigner, anchors_boxlist, gt_box_batch,
+           gt_class_targets, gt_weights)
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+    groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2],
+                                     [0., 0., 0., 0.]], dtype=np.float32)
+    groundtruth_weights1 = np.array([1, 0], dtype=np.float32)
+    groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+                                     [0.015789, 0.0985, 0.55789, 0.3842],
+                                     [0, 0, 0, 0]],
+                                    dtype=np.float32)
+    groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32)
+    class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32)
+    class_targets2 = np.array([[0, 0, 0, 1],
+                               [0, 0, 1, 0],
+                               [0, 0, 0, 0]], dtype=np.float32)
+    anchor_means = np.array([[0, 0, .25, .25],
+                             [0, .25, 1, 1],
+                             [0, .1, .5, .5],
+                             [.75, .75, 1, 1]], dtype=np.float32)
+    anchor_stddevs = np.array([[.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1]], dtype=np.float32)
    exp_reg_targets = [[[0, 0, -0.5, -0.5],
                        [0, 0, 0, 0],
-                        [0, 0, 0, 0],
+                        [0, 0, 0, 0,],
-                        [0, 0, 0, 0]],
+                        [0, 0, 0, 0,],],
-                       [[0, 0, 0, 0],
+                       [[0, 0, 0, 0,],
                        [0, 0.01231521, 0, 0],
                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
                        [0, 0, 0, 0]]]
@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase):
                        [1, 0, 0, 0]]]
    exp_reg_weights = [[1, 0, 0, 0],
                       [0, 1, 1, 0]]
-    exp_match_0 = [0]
-    exp_match_1 = [1, 2]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-    multiclass_target_assigner = self._get_multi_class_target_assigner(
+                                                groundtruth_boxlist1,
-        num_classes=3)
+                                                groundtruth_boxlist2,
+                                                class_targets1,
-    (cls_targets, cls_weights, reg_targets, reg_weights,
+                                                class_targets2,
-     match_list) = targetassigner.batch_assign_targets(
+                                                groundtruth_weights1,
-         multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
+                                                groundtruth_weights2])
-    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-    with self.test_session() as sess:
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-       match_out_0, match_out_1) = sess.run([
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-           cls_targets, cls_weights, reg_targets, reg_weights] + [
-               match.matched_column_indices() for match in match_list])
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(match_out_0, exp_match_0)
-      self.assertAllClose(match_out_1, exp_match_1)
  def test_batch_assign_multidimensional_targets(self):
-    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_boxlist1,
+                 groundtruth_boxlist2, class_targets1, class_targets2):
-    box_list2 = box_list.BoxList(tf.constant(
+      box_list1 = box_list.BoxList(groundtruth_boxlist1)
-        [[0, 0.25123152, 1, 1],
+      box_list2 = box_list.BoxList(groundtruth_boxlist2)
-         [0.015789, 0.0985, 0.55789, 0.3842]]
+      gt_box_batch = [box_list1, box_list2]
-    ))
+      gt_class_targets = [class_targets1, class_targets2]
+      anchors_boxlist = box_list.BoxList(anchor_means)
-    gt_box_batch = [box_list1, box_list2]
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
-    class_targets1 = tf.constant([[[0, 1, 1],
+      multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
-                                   [1, 1, 0]]], tf.float32)
+          target_dimensions=(2, 3))
-    class_targets2 = tf.constant([[[0, 1, 1],
+      (cls_targets, cls_weights, reg_targets, reg_weights,
-                                   [1, 1, 0]],
+       _) = targetassigner.batch_assign_targets(
-                                  [[0, 0, 1],
+           multiclass_target_assigner, anchors_boxlist, gt_box_batch,
-                                   [0, 0, 1]]], tf.float32)
+           gt_class_targets)
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
-    gt_class_targets = [class_targets1, class_targets2]
+    groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
-    prior_means = tf.constant([[0, 0, .25, .25],
+    groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
-                               [0, .25, 1, 1],
+                                     [0.015789, 0.0985, 0.55789, 0.3842]],
-                               [0, .1, .5, .5],
+                                    dtype=np.float32)
-                               [.75, .75, 1, 1]])
+    class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
-    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+    class_targets2 = np.array([[0, 0, 0, 1],
-                                 [.1, .1, .1, .1],
+                               [0, 0, 1, 0]], dtype=np.float32)
-                                 [.1, .1, .1, .1],
+    class_targets1 = np.array([[[0, 1, 1],
-                                 [.1, .1, .1, .1]])
+                                [1, 1, 0]]], dtype=np.float32)
-    priors = box_list.BoxList(prior_means)
+    class_targets2 = np.array([[[0, 1, 1],
-    priors.add_field('stddev', prior_stddevs)
+                                [1, 1, 0]],
+                               [[0, 0, 1],
+                                [0, 0, 1]]], dtype=np.float32)
+    anchor_means = np.array([[0, 0, .25, .25],
+                             [0, .25, 1, 1],
+                             [0, .1, .5, .5],
+                             [.75, .75, 1, 1]], dtype=np.float32)
+    anchor_stddevs = np.array([[.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1],
+                               [.1, .1, .1, .1]], dtype=np.float32)
    exp_reg_targets = [[[0, 0, -0.5, -0.5],
                        [0, 0, 0, 0],
-                        [0, 0, 0, 0],
+                        [0, 0, 0, 0,],
-                        [0, 0, 0, 0]],
+                        [0, 0, 0, 0,],],
-                       [[0, 0, 0, 0],
+                       [[0, 0, 0, 0,],
                        [0, 0.01231521, 0, 0],
                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
                        [0, 0, 0, 0]]]
    exp_cls_weights = [[1, 1, 1, 1],
                       [1, 1, 1, 1]]
    exp_cls_targets = [[[[0., 1., 1.],
                         [1., 1., 0.]],
                        [[0., 0., 0.],
@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase):
                         [0., 0., 0.]]]]
    exp_reg_weights = [[1, 0, 0, 0],
                       [0, 1, 1, 0]]
-    exp_match_0 = [0]
-    exp_match_1 = [1, 2]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
+     reg_weights_out) = self.execute(graph_fn, [anchor_means, anchor_stddevs,
-    multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
+                                                groundtruth_boxlist1,
-        target_dimensions=(2, 3))
+                                                groundtruth_boxlist2,
+                                                class_targets1,
-    (cls_targets, cls_weights, reg_targets, reg_weights,
+                                                class_targets2])
-     match_list) = targetassigner.batch_assign_targets(
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-         multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-    with self.test_session() as sess:
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
-       match_out_0, match_out_1) = sess.run([
-           cls_targets, cls_weights, reg_targets, reg_weights] + [
-               match.matched_column_indices() for match in match_list])
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(match_out_0, exp_match_0)
-      self.assertAllClose(match_out_1, exp_match_1)
  def test_batch_assign_empty_groundtruth(self):
-    box_coords_expanded = tf.zeros((1, 4), tf.float32)
-    box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
-    box_list1 = box_list.BoxList(box_coords)
-    gt_box_batch = [box_list1]
-    prior_means = tf.constant([[0, 0, .25, .25],
-                               [0, .25, 1, 1]])
-    prior_stddevs = tf.constant([[.1, .1, .1, .1],
-                                 [.1, .1, .1, .1]])
-    priors = box_list.BoxList(prior_means)
-    priors.add_field('stddev', prior_stddevs)
+    def graph_fn(anchor_means, anchor_stddevs, groundtruth_box_corners,
+                 gt_class_targets):
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+      gt_box_batch = [groundtruth_boxlist]
+      gt_class_targets_batch = [gt_class_targets]
+      anchors_boxlist = box_list.BoxList(anchor_means)
+      anchors_boxlist.add_field('stddev', anchor_stddevs)
+      multiclass_target_assigner = self._get_multi_class_target_assigner(
+          num_classes=3)
+      (cls_targets, cls_weights, reg_targets, reg_weights,
+       _) = targetassigner.batch_assign_targets(
+           multiclass_target_assigner, anchors_boxlist,
+           gt_box_batch, gt_class_targets_batch)
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+    groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
+    anchor_means = np.array([[0, 0, .25, .25],
+                             [0, .25, 1, 1]], dtype=np.float32)
+    anchor_stddevs = np.array([[.1, .1, .1, .1],
+                               [.1, .1, .1, .1]], dtype=np.float32)
    exp_reg_targets = [[[0, 0, 0, 0],
                        [0, 0, 0, 0]]]
    exp_cls_weights = [[1, 1]]
    exp_cls_targets = [[[1, 0, 0, 0],
                        [1, 0, 0, 0]]]
    exp_reg_weights = [[0, 0]]
-    exp_match_0 = []
    num_classes = 3
    pad = 1
-    gt_class_targets = tf.zeros((0, num_classes + pad))
+    gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32)
-    gt_class_targets_batch = [gt_class_targets]
+    (cls_targets_out, cls_weights_out, reg_targets_out,
-    multiclass_target_assigner = self._get_multi_class_target_assigner(
+     reg_weights_out) = self.execute(
-        num_classes=3)
+         graph_fn, [anchor_means, anchor_stddevs, groundtruth_box_corners,
+                    gt_class_targets])
-    (cls_targets, cls_weights, reg_targets, reg_weights,
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
-     match_list) = targetassigner.batch_assign_targets(
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
-         multiclass_target_assigner, priors,
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
-         gt_box_batch, gt_class_targets_batch)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
-    self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
-    with self.test_session() as sess:
-      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
-       match_out_0) = sess.run([
-           cls_targets, cls_weights, reg_targets, reg_weights] + [
-               match.matched_column_indices() for match in match_list])
-      self.assertAllClose(cls_targets_out, exp_cls_targets)
-      self.assertAllClose(cls_weights_out, exp_cls_weights)
-      self.assertAllClose(reg_targets_out, exp_reg_targets)
-      self.assertAllClose(reg_weights_out, exp_reg_weights)
-      self.assertAllClose(match_out_0, exp_match_0)
 class CreateTargetAssignerTest(tf.test.TestCase):

--- a/research/object_detection/data/BUILD
+++ b/research/object_detection/data/BUILD
+package(
+    default_visibility = ["//visibility:public"],
+)
+licenses(["notice"])
+exports_files([
+    "pet_label_map.pbtxt",
+])
--- a/research/object_detection/data_decoders/BUILD
+++ b/research/object_detection/data_decoders/BUILD
@@ -12,9 +12,10 @@ py_library(
    srcs = ["tf_example_decoder.py"],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/core:data_decoder",
+        "//tensorflow/models/research/object_detection/core:data_decoder",
-        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow/models/research/object_detection/core:standard_fields",
-        "//tensorflow_models/object_detection/utils:label_map_util",
+        "//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
+        "//tensorflow/models/research/object_detection/utils:label_map_util",
    ],
 )
@@ -24,6 +25,7 @@ py_test(
    deps = [
        ":tf_example_decoder",
        "//tensorflow",
-        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow/models/research/object_detection/core:standard_fields",
+        "//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
    ],
 )
--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -22,6 +22,7 @@ import tensorflow as tf
 from object_detection.core import data_decoder
 from object_detection.core import standard_fields as fields
+from object_detection.protos import input_reader_pb2
 from object_detection.utils import label_map_util
 slim_example_decoder = tf.contrib.slim.tfexample_decoder
@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder):
  def __init__(self,
               load_instance_masks=False,
+               instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
               label_map_proto_file=None,
               use_display_name=False):
    """Constructor sets keys_to_features and items_to_handlers.
    Args:
      load_instance_masks: whether or not to load and handle instance masks.
+      instance_mask_type: type of instance masks. Options are provided in
+        input_reader.proto. This is only used if `load_instance_masks` is True.
      label_map_proto_file: a file path to a
        object_detection.protos.StringIntLabelMap proto. If provided, then the
        mapped IDs of 'image/object/class/text' will take precedence over the
@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
+    Raises:
+      ValueError: If `instance_mask_type` option is not one of
+        input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
+        input_reader_pb2.PNG_MASKS.
    """
    self.keys_to_features = {
        'image/encoded':
@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
            tf.VarLenFeature(tf.int64),
        'image/object/group_of':
            tf.VarLenFeature(tf.int64),
+        'image/object/weight':
+            tf.VarLenFeature(tf.float32),
    }
    self.items_to_handlers = {
        fields.InputDataFields.image: slim_example_decoder.Image(
@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        fields.InputDataFields.groundtruth_difficult: (
            slim_example_decoder.Tensor('image/object/difficult')),
        fields.InputDataFields.groundtruth_group_of: (
-            slim_example_decoder.Tensor('image/object/group_of'))
+            slim_example_decoder.Tensor('image/object/group_of')),
+        fields.InputDataFields.groundtruth_weights: (
+            slim_example_decoder.Tensor('image/object/weight')),
    }
    if load_instance_masks:
-      self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.float32)
+      if instance_mask_type in (input_reader_pb2.DEFAULT,
-      self.items_to_handlers[
+                                input_reader_pb2.NUMERICAL_MASKS):
-          fields.InputDataFields.groundtruth_instance_masks] = (
+        self.keys_to_features['image/object/mask'] = (
-              slim_example_decoder.ItemHandlerCallback(
+            tf.VarLenFeature(tf.float32))
-                  ['image/object/mask', 'image/height', 'image/width'],
+        self.items_to_handlers[
-                  self._reshape_instance_masks))
+            fields.InputDataFields.groundtruth_instance_masks] = (
-    # TODO: Add label_handler that decodes from 'image/object/class/text'
+                slim_example_decoder.ItemHandlerCallback(
-    # primarily after the recent tf.contrib.slim changes make into a release
+                    ['image/object/mask', 'image/height', 'image/width'],
-    # supported by cloudml.
+                    self._reshape_instance_masks))
-    label_handler = slim_example_decoder.Tensor('image/object/class/label')
+      elif instance_mask_type == input_reader_pb2.PNG_MASKS:
+        self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
+        self.items_to_handlers[
+            fields.InputDataFields.groundtruth_instance_masks] = (
+                slim_example_decoder.ItemHandlerCallback(
+                    ['image/object/mask', 'image/height', 'image/width'],
+                    self._decode_png_instance_masks))
+      else:
+        raise ValueError('Did not recognize the `instance_mask_type` option.')
+    if label_map_proto_file:
+      label_map = label_map_util.get_label_map_dict(label_map_proto_file,
+                                                    use_display_name)
+      # We use a default_value of -1, but we expect all labels to be contained
+      # in the label map.
+      table = tf.contrib.lookup.HashTable(
+          initializer=tf.contrib.lookup.KeyValueTensorInitializer(
+              keys=tf.constant(list(label_map.keys())),
+              values=tf.constant(list(label_map.values()), dtype=tf.int64)),
+          default_value=-1)
+      # If the label_map_proto is provided, try to use it in conjunction with
+      # the class text, and fall back to a materialized ID.
+      label_handler = slim_example_decoder.BackupHandler(
+          slim_example_decoder.LookupTensor(
+              'image/object/class/text', table, default_value=''),
+          slim_example_decoder.Tensor('image/object/class/label'))
+    else:
+      label_handler = slim_example_decoder.Tensor('image/object/class/label')
    self.items_to_handlers[
        fields.InputDataFields.groundtruth_classes] = label_handler
@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
+      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
+        shape [None] indicating the weights of groundtruth boxes.
+      fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
+        the number of groundtruth_boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.
    Optional:
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
-      fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
+      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
    """
    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder):
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
+    tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
+        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
+    def default_groundtruth_weights():
+      return tf.ones(
+          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
+          dtype=tf.float32)
+    tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
+        tf.greater(
+            tf.shape(
+                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
+            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
+        default_groundtruth_weights)
    return tensor_dict
  def _reshape_instance_masks(self, keys_to_tensors):
    """Reshape instance segmentation masks.
    The instance segmentation masks are reshaped to [num_instances, height,
-    width] and cast to boolean type to save memory.
+    width].
    Args:
      keys_to_tensors: a dictionary from keys to tensors.
@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      masks = tf.sparse_tensor_to_dense(masks)
    masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape)
    return tf.cast(masks, tf.float32)
+  def _decode_png_instance_masks(self, keys_to_tensors):
+    """Decode PNG instance segmentation masks and stack into dense tensor.
+    The instance segmentation masks are reshaped to [num_instances, height,
+    width].
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+    Returns:
+      A 3-D float tensor of shape [num_instances, height, width] with values
+        in {0, 1}.
+    """
+    def decode_png_mask(image_buffer):
+      image = tf.squeeze(
+          tf.image.decode_image(image_buffer, channels=1), axis=2)
+      image.set_shape([None, None])
+      image = tf.to_float(tf.greater(image, 0))
+      return image
+    png_masks = keys_to_tensors['image/object/mask']
+    height = keys_to_tensors['image/height']
+    width = keys_to_tensors['image/width']
+    if isinstance(png_masks, tf.SparseTensor):
+      png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='')
+    return tf.cond(
+        tf.greater(tf.size(png_masks), 0),
+        lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
+        lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width]))))
--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -21,6 +21,7 @@ import tensorflow as tf
 from object_detection.core import standard_fields as fields
 from object_detection.data_decoders import tf_example_decoder
+from object_detection.protos import input_reader_pb2
 class TfExampleDecoderTest(tf.test.TestCase):
@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
  def testDecodeJpegImage(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    decoded_jpeg = self._DecodeImage(encoded_jpeg)
    example = tf.train.Example(features=tf.train.Features(feature={
@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
  def testDecodeImageKeyAndFilename(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/encoded': self._BytesFeature(encoded_jpeg),
@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
  def testDecodePngImage(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
    decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
    example = tf.train.Example(features=tf.train.Features(feature={
@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+  def testDecodePngInstanceMasks(self):
+    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
+    mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
+    encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
+    decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
+    encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
+    decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
+    encoded_masks = [encoded_png_1, encoded_png_2]
+    decoded_masks = np.stack([decoded_png_1, decoded_png_2])
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded': self._BytesFeature(encoded_jpeg),
+                'image/format': self._BytesFeature('jpeg'),
+                'image/object/mask': self._BytesFeature(encoded_masks)
+            })).SerializeToString()
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllEqual(
+        decoded_masks,
+        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
+  def testDecodeEmptyPngInstanceMasks(self):
+    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    encoded_masks = []
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded': self._BytesFeature(encoded_jpeg),
+                'image/format': self._BytesFeature('jpeg'),
+                'image/object/mask': self._BytesFeature(encoded_masks),
+                'image/height': self._Int64Feature([10]),
+                'image/width': self._Int64Feature([10]),
+            })).SerializeToString()
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+      self.assertAllEqual(
+          tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
+          [0, 10, 10])
  def testDecodeBoundingBox(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase):
                                bbox_ymaxs, bbox_xmaxs]).transpose()
    self.assertAllEqual(expected_boxes,
                        tensor_dict[fields.InputDataFields.groundtruth_boxes])
+    self.assertAllEqual(
+        2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
+  def testDecodeDefaultGroundtruthWeights(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_ymins = [0.0, 4.0]
+    bbox_xmins = [1.0, 5.0]
+    bbox_ymaxs = [2.0, 6.0]
+    bbox_xmaxs = [3.0, 7.0]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
+        'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
+        'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
+        'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
+    })).SerializeToString()
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
+                         get_shape().as_list()), [None, 4])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
+                        np.ones(2, dtype=np.float32))
  def testDecodeObjectLabel(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes = [0, 1]
    example = tf.train.Example(features=tf.train.Features(feature={
@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual(bbox_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
+  def testDecodeObjectLabelNoText(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_classes = [1, 2]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/class/label': self._Int64Feature(bbox_classes),
+    })).SerializeToString()
+    label_map_string = """
+      item {
+        id:1
+        name:'cat'
+      }
+      item {
+        id:2
+        name:'dog'
+      }
+    """
+    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+    with tf.gfile.Open(label_map_path, 'wb') as f:
+      f.write(label_map_string)
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        label_map_proto_file=label_map_path)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    self.assertAllEqual((tensor_dict[
+        fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
+                        [None])
+    init = tf.tables_initializer()
+    with self.test_session() as sess:
+      sess.run(init)
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllEqual(bbox_classes,
+                        tensor_dict[fields.InputDataFields.groundtruth_classes])
+  def testDecodeObjectLabelUnrecognizedName(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_classes_text = ['cat', 'cheetah']
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded':
+                    self._BytesFeature(encoded_jpeg),
+                'image/format':
+                    self._BytesFeature('jpeg'),
+                'image/object/class/text':
+                    self._BytesFeature(bbox_classes_text),
+            })).SerializeToString()
+    label_map_string = """
+      item {
+        id:2
+        name:'cat'
+      }
+      item {
+        id:1
+        name:'dog'
+      }
+    """
+    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+    with tf.gfile.Open(label_map_path, 'wb') as f:
+      f.write(label_map_string)
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        label_map_proto_file=label_map_path)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+                         .get_shape().as_list()), [None])
+    with self.test_session() as sess:
+      sess.run(tf.tables_initializer())
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllEqual([2, -1],
+                        tensor_dict[fields.InputDataFields.groundtruth_classes])
+  def testDecodeObjectLabelWithMapping(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_classes_text = ['cat', 'dog']
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded':
+                    self._BytesFeature(encoded_jpeg),
+                'image/format':
+                    self._BytesFeature('jpeg'),
+                'image/object/class/text':
+                    self._BytesFeature(bbox_classes_text),
+            })).SerializeToString()
+    label_map_string = """
+      item {
+        id:3
+        name:'cat'
+      }
+      item {
+        id:1
+        name:'dog'
+      }
+    """
+    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+    with tf.gfile.Open(label_map_path, 'wb') as f:
+      f.write(label_map_string)
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        label_map_proto_file=label_map_path)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+                         .get_shape().as_list()), [None])
+    with self.test_session() as sess:
+      sess.run(tf.tables_initializer())
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllEqual([3, 1],
+                        tensor_dict[fields.InputDataFields.groundtruth_classes])
  def testDecodeObjectArea(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_area = [100., 174.]
    example = tf.train.Example(features=tf.train.Features(feature={
@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                        tensor_dict[fields.InputDataFields.groundtruth_area])
  def testDecodeObjectIsCrowd(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_is_crowd = [0, 1]
    example = tf.train.Example(features=tf.train.Features(feature={
@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                            fields.InputDataFields.groundtruth_is_crowd])
  def testDecodeObjectDifficult(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_difficult = [0, 1]
    example = tf.train.Example(features=tf.train.Features(feature={
@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                            fields.InputDataFields.groundtruth_difficult])
  def testDecodeObjectGroupOf(self):
-    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_group_of = [0, 1]
    example = tf.train.Example(features=tf.train.Features(
@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase):
        [bool(item) for item in object_group_of],
        tensor_dict[fields.InputDataFields.groundtruth_group_of])
+  def testDecodeObjectWeight(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    object_weights = [0.75, 1.0]
+    example = tf.train.Example(features=tf.train.Features(
+        feature={
+            'image/encoded': self._BytesFeature(encoded_jpeg),
+            'image/format': self._BytesFeature('jpeg'),
+            'image/object/weight': self._FloatFeature(object_weights),
+        })).SerializeToString()
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    self.assertAllEqual((tensor_dict[
+        fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
+                        [None])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllEqual(
+        object_weights,
+        tensor_dict[fields.InputDataFields.groundtruth_weights])
  def testDecodeInstanceSegmentation(self):
    num_instances = 4
    image_height = 5
    image_width = 3
    # Randomly generate image.
-    image_tensor = np.random.randint(255, size=(image_height,
+    image_tensor = np.random.randint(256, size=(image_height,
                                                image_width,
                                                3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_height = 5
    image_width = 3
    # Randomly generate image.
-    image_tensor = np.random.randint(255, size=(image_height,
+    image_tensor = np.random.randint(256, size=(image_height,
                                                image_width,
                                                3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

--- a/research/object_detection/dataset_tools/BUILD
+++ b/research/object_detection/dataset_tools/BUILD
-# Tensorflow Object Detection API: main runnables.
+# Tensorflow Object Detection API: dataset tools.
 package(
    default_visibility = ["//visibility:public"],
@@ -8,18 +8,43 @@ licenses(["notice"])
 # Apache 2.0
+py_binary(
+    name = "create_coco_tf_record",
+    srcs = [
+        "create_coco_tf_record.py",
+    ],
+    deps = [
+        "//PIL:pil",
+        "//pycocotools",
+        "//tensorflow",
+        "//tensorflow/models/research/object_detection/utils:dataset_util",
+        "//tensorflow/models/research/object_detection/utils:label_map_util",
+    ],
+)
+py_test(
+    name = "create_coco_tf_record_test",
+    srcs = [
+        "create_coco_tf_record_test.py",
+    ],
+    deps = [
+        ":create_coco_tf_record",
+        "//tensorflow",
+    ],
+)
 py_binary(
    name = "create_kitti_tf_record",
    srcs = [
        "create_kitti_tf_record.py",
    ],
    deps = [
-        "//third_party/py/PIL:pil",
+        "//PIL:pil",
-        "//third_party/py/lxml",
+        "//lxml",
        "//tensorflow",
-        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow/models/research/object_detection/utils:dataset_util",
-        "//tensorflow_models/object_detection/utils:label_map_util",
+        "//tensorflow/models/research/object_detection/utils:label_map_util",
-        "//tensorflow_models/object_detection/utils:np_box_ops",
+        "//tensorflow/models/research/object_detection/utils:np_box_ops",
    ],
 )
@@ -40,11 +65,11 @@ py_binary(
        "create_pascal_tf_record.py",
    ],
    deps = [
-        "//third_party/py/PIL:pil",
+        "//PIL:pil",
-        "//third_party/py/lxml",
+        "//lxml",
        "//tensorflow",
-        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow/models/research/object_detection/utils:dataset_util",
-        "//tensorflow_models/object_detection/utils:label_map_util",
+        "//tensorflow/models/research/object_detection/utils:label_map_util",
    ],
 )
@@ -65,11 +90,11 @@ py_binary(
        "create_pet_tf_record.py",
    ],
    deps = [
-        "//third_party/py/PIL:pil",
+        "//PIL:pil",
-        "//third_party/py/lxml",
+        "//lxml",
        "//tensorflow",
-        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow/models/research/object_detection/utils:dataset_util",
-        "//tensorflow_models/object_detection/utils:label_map_util",
+        "//tensorflow/models/research/object_detection/utils:label_map_util",
    ],
 )
@@ -78,8 +103,8 @@ py_library(
    srcs = ["oid_tfrecord_creation.py"],
    deps = [
        "//tensorflow",
-        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow/models/research/object_detection/core:standard_fields",
-        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow/models/research/object_detection/utils:dataset_util",
    ],
 )
@@ -88,9 +113,9 @@ py_test(
    srcs = ["oid_tfrecord_creation_test.py"],
    deps = [
        ":oid_tfrecord_creation",
-        "//third_party/py/contextlib2",
+        "//contextlib2",
-        "//third_party/py/pandas",
+        "//pandas",
-        "//third_party/py/tensorflow",
+        "//tensorflow",
    ],
 )
@@ -99,9 +124,9 @@ py_binary(
    srcs = ["create_oid_tf_record.py"],
    deps = [
        ":oid_tfrecord_creation",
-        "//third_party/py/contextlib2",
+        "//contextlib2",
-        "//third_party/py/pandas",
+        "//pandas",
        "//tensorflow",
-        "//tensorflow_models/object_detection/utils:label_map_util",
+        "//tensorflow/models/research/object_detection/utils:label_map_util",
    ],
 )
--- a/research/object_detection/dataset_tools/__init__.py
+++ b/research/object_detection/dataset_tools/__init__.py
--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Convert raw COCO dataset to TFRecord for object_detection.
+Example usage:
+    python create_coco_tf_record.py --logtostderr \
+      --train_image_dir="${TRAIN_IMAGE_DIR}" \
+      --val_image_dir="${VAL_IMAGE_DIR}" \
+      --test_image_dir="${TEST_IMAGE_DIR}" \
+      --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+      --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+      --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+      --output_dir="${OUTPUT_DIR}"
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import hashlib
+import io
+import json
+import os
+import numpy as np
+import PIL.Image
+from pycocotools import mask
+import tensorflow as tf
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+flags = tf.app.flags
+tf.flags.DEFINE_boolean('include_masks', False,
+                        'Whether to include instance segmentations masks '
+                        '(PNG encoded) in the result. default: False.')
+tf.flags.DEFINE_string('train_image_dir', '',
+                       'Training image directory.')
+tf.flags.DEFINE_string('val_image_dir', '',
+                       'Validation image directory.')
+tf.flags.DEFINE_string('test_image_dir', '',
+                       'Test image directory.')
+tf.flags.DEFINE_string('train_annotations_file', '',
+                       'Training annotations JSON file.')
+tf.flags.DEFINE_string('val_annotations_file', '',
+                       'Validation annotations JSON file.')
+tf.flags.DEFINE_string('testdev_annotations_file', '',
+                       'Test-dev annotations JSON file.')
+tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+FLAGS = flags.FLAGS
+tf.logging.set_verbosity(tf.logging.INFO)
+def create_tf_example(image,
+                      annotations_list,
+                      image_dir,
+                      category_index,
+                      include_masks=False):
+  """Converts image and annotations to a tf.Example proto.
+  Args:
+    image: dict with keys:
+      [u'license', u'file_name', u'coco_url', u'height', u'width',
+      u'date_captured', u'flickr_url', u'id']
+    annotations_list:
+      list of dicts with keys:
+      [u'segmentation', u'area', u'iscrowd', u'image_id',
+      u'bbox', u'category_id', u'id']
+      Notice that bounding box coordinates in the official COCO dataset are
+      given as [x, y, width, height] tuples using absolute coordinates where
+      x, y represent the top-left (0-indexed) corner.  This function converts
+      to the format expected by the Tensorflow Object Detection API (which is
+      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
+      to image size).
+    image_dir: directory containing the image files.
+    category_index: a dict containing COCO category information keyed
+      by the 'id' field of each category.  See the
+      label_map_util.create_category_index function.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+  Returns:
+    example: The converted tf.Example
+    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  image_height = image['height']
+  image_width = image['width']
+  filename = image['file_name']
+  image_id = image['id']
+  full_path = os.path.join(image_dir, filename)
+  with tf.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+  xmin = []
+  xmax = []
+  ymin = []
+  ymax = []
+  is_crowd = []
+  category_names = []
+  category_ids = []
+  area = []
+  encoded_mask_png = []
+  num_annotations_skipped = 0
+  for object_annotations in annotations_list:
+    (x, y, width, height) = tuple(object_annotations['bbox'])
+    if width <= 0 or height <= 0:
+      num_annotations_skipped += 1
+      continue
+    if x + width > image_width or y + height > image_height:
+      num_annotations_skipped += 1
+      continue
+    xmin.append(float(x) / image_width)
+    xmax.append(float(x + width) / image_width)
+    ymin.append(float(y) / image_height)
+    ymax.append(float(y + height) / image_height)
+    is_crowd.append(object_annotations['iscrowd'])
+    category_id = int(object_annotations['category_id'])
+    category_ids.append(category_id)
+    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    area.append(object_annotations['area'])
+    if include_masks:
+      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
+                                          image_height, image_width)
+      binary_mask = mask.decode(run_len_encoding)
+      if not object_annotations['iscrowd']:
+        binary_mask = np.amax(binary_mask, axis=2)
+      pil_image = PIL.Image.fromarray(binary_mask)
+      output_io = io.BytesIO()
+      pil_image.save(output_io, format='PNG')
+      encoded_mask_png.append(output_io.getvalue())
+  feature_dict = {
+      'image/height':
+          dataset_util.int64_feature(image_height),
+      'image/width':
+          dataset_util.int64_feature(image_width),
+      'image/filename':
+          dataset_util.bytes_feature(filename.encode('utf8')),
+      'image/source_id':
+          dataset_util.bytes_feature(str(image_id).encode('utf8')),
+      'image/key/sha256':
+          dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded':
+          dataset_util.bytes_feature(encoded_jpg),
+      'image/format':
+          dataset_util.bytes_feature('jpeg'.encode('utf8')),
+      'image/object/bbox/xmin':
+          dataset_util.float_list_feature(xmin),
+      'image/object/bbox/xmax':
+          dataset_util.float_list_feature(xmax),
+      'image/object/bbox/ymin':
+          dataset_util.float_list_feature(ymin),
+      'image/object/bbox/ymax':
+          dataset_util.float_list_feature(ymax),
+      'image/object/class/label':
+          dataset_util.int64_list_feature(category_ids),
+      'image/object/is_crowd':
+          dataset_util.int64_list_feature(is_crowd),
+      'image/object/area':
+          dataset_util.float_list_feature(area),
+  }
+  if include_masks:
+    feature_dict['image/object/mask'] = (
+        dataset_util.bytes_list_feature(encoded_mask_png))
+  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+  return key, example, num_annotations_skipped
+def _create_tf_record_from_coco_annotations(
+    annotations_file, image_dir, output_path, include_masks):
+  """Loads COCO annotation json files and converts to tf.Record format.
+  Args:
+    annotations_file: JSON file containing bounding box annotations.
+    image_dir: Directory containing the image files.
+    output_path: Path to output tf.Record file.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+  """
+  with tf.gfile.GFile(annotations_file, 'r') as fid:
+    groundtruth_data = json.load(fid)
+    images = groundtruth_data['images']
+    category_index = label_map_util.create_category_index(
+        groundtruth_data['categories'])
+    annotations_index = {}
+    if 'annotations' in groundtruth_data:
+      tf.logging.info(
+          'Found groundtruth annotations. Building annotations index.')
+      for annotation in groundtruth_data['annotations']:
+        image_id = annotation['image_id']
+        if image_id not in annotations_index:
+          annotations_index[image_id] = []
+        annotations_index[image_id].append(annotation)
+    missing_annotation_count = 0
+    for image in images:
+      image_id = image['id']
+      if image_id not in annotations_index:
+        missing_annotation_count += 1
+        annotations_index[image_id] = []
+    tf.logging.info('%d images are missing annotations.',
+                    missing_annotation_count)
+    tf.logging.info('writing to output path: %s', output_path)
+    writer = tf.python_io.TFRecordWriter(output_path)
+    total_num_annotations_skipped = 0
+    for idx, image in enumerate(images):
+      if idx % 100 == 0:
+        tf.logging.info('On image %d of %d', idx, len(images))
+      annotations_list = annotations_index[image['id']]
+      _, tf_example, num_annotations_skipped = create_tf_example(
+          image, annotations_list, image_dir, category_index, include_masks)
+      total_num_annotations_skipped += num_annotations_skipped
+      writer.write(tf_example.SerializeToString())
+    writer.close()
+    tf.logging.info('Finished writing, skipped %d annotations.',
+                    total_num_annotations_skipped)
+def main(_):
+  assert FLAGS.train_image_dir, '`train_image_dir` missing.'
+  assert FLAGS.val_image_dir, '`val_image_dir` missing.'
+  assert FLAGS.test_image_dir, '`test_image_dir` missing.'
+  assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
+  assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
+  assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
+  if not tf.gfile.IsDirectory(FLAGS.output_dir):
+    tf.gfile.MakeDirs(FLAGS.output_dir)
+  train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
+  val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
+  testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
+  _create_tf_record_from_coco_annotations(
+      FLAGS.train_annotations_file,
+      FLAGS.train_image_dir,
+      train_output_path,
+      FLAGS.include_masks)
+  _create_tf_record_from_coco_annotations(
+      FLAGS.val_annotations_file,
+      FLAGS.val_image_dir,
+      val_output_path,
+      FLAGS.include_masks)
+  _create_tf_record_from_coco_annotations(
+      FLAGS.testdev_annotations_file,
+      FLAGS.test_image_dir,
+      testdev_output_path,
+      FLAGS.include_masks)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test for create_coco_tf_record.py."""
+import io
+import os
+import numpy as np
+import PIL.Image
+import tensorflow as tf
+from object_detection.dataset_tools import create_coco_tf_record
+class CreateCocoTFRecordTest(tf.test.TestCase):
+  def _assertProtoEqual(self, proto_field, expectation):
+    """Helper function to assert if a proto field equals some value.
+    Args:
+      proto_field: The protobuf field to compare.
+      expectation: The expected value of the protobuf field.
+    """
+    proto_list = [p for p in proto_field]
+    self.assertListEqual(proto_list, expectation)
+  def test_create_tf_example(self):
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.rand(256, 256, 3)
+    tmp_dir = self.get_temp_dir()
+    save_path = os.path.join(tmp_dir, image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+    image = {
+        'file_name': image_file_name,
+        'height': 256,
+        'width': 256,
+        'id': 11,
+    }
+    annotations_list = [{
+        'area': .5,
+        'iscrowd': False,
+        'image_id': 11,
+        'bbox': [64, 64, 128, 128],
+        'category_id': 2,
+        'id': 1000,
+    }]
+    image_dir = tmp_dir
+    category_index = {
+        1: {
+            'name': 'dog',
+            'id': 1
+        },
+        2: {
+            'name': 'cat',
+            'id': 2
+        },
+        3: {
+            'name': 'human',
+            'id': 3
+        }
+    }
+    (_, example,
+     num_annotations_skipped) = create_coco_tf_record.create_tf_example(
+         image, annotations_list, image_dir, category_index)
+    self.assertEqual(num_annotations_skipped, 0)
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [image_file_name])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [str(image['id'])])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value, ['jpeg'])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.75])
+  def test_create_tf_example_with_instance_masks(self):
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.rand(8, 8, 3)
+    tmp_dir = self.get_temp_dir()
+    save_path = os.path.join(tmp_dir, image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+    image = {
+        'file_name': image_file_name,
+        'height': 8,
+        'width': 8,
+        'id': 11,
+    }
+    annotations_list = [{
+        'area': .5,
+        'iscrowd': False,
+        'image_id': 11,
+        'bbox': [0, 0, 8, 8],
+        'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]],
+        'category_id': 1,
+        'id': 1000,
+    }]
+    image_dir = tmp_dir
+    category_index = {
+        1: {
+            'name': 'dog',
+            'id': 1
+        },
+    }
+    (_, example,
+     num_annotations_skipped) = create_coco_tf_record.create_tf_example(
+         image, annotations_list, image_dir, category_index, include_masks=True)
+    self.assertEqual(num_annotations_skipped, 0)
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [8])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [8])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [image_file_name])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [str(image['id'])])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value, ['jpeg'])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [1])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [1])
+    encoded_mask_pngs = [
+        io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
+            'image/object/mask'].bytes_list.value
+    ]
+    pil_masks = [
+        np.array(PIL.Image.open(encoded_mask_png))
+        for encoded_mask_png in encoded_mask_pngs
+    ]
+    self.assertTrue(len(pil_masks) == 1)
+    self.assertAllEqual(pil_masks[0],
+                        [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
+                         [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
+                         [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
+                         [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/dataset_tools/create_kitti_tf_record.py
+++ b/research/object_detection/dataset_tools/create_kitti_tf_record.py
@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
    # Filter all bounding boxes of this frame that are of a legal class, and
    # don't overlap with a dontcare region.
-    # TODO(talremez) filter out targets that are truncated or heavily occluded.
+    # TODO filter out targets that are truncated or heavily occluded.
    annotation_for_image = filter_annotations(img_anno, classes_to_use)
    example = prepare_example(image_path, annotation_for_image, label_map_dict)

--- a/research/object_detection/dataset_tools/create_kitti_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_kitti_tf_record_test.py
@@ -24,7 +24,7 @@ import tensorflow as tf
 from object_detection.dataset_tools import create_kitti_tf_record
-class DictToTFExampleTest(tf.test.TestCase):
+class CreateKittiTFRecordTest(tf.test.TestCase):
  def _assertProtoEqual(self, proto_field, expectation):
    """Helper function to assert if a proto field equals some value.