Merge remote-tracking branch 'upstream/master'

27b4acd4 · Aman Gupta · 5133522f · d4e1f97f · 27b4acd4 · 27b4acd4
Commit 27b4acd4 authored Sep 25, 2018 by Aman Gupta
20 changed files
--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -93,8 +93,7 @@ class TargetAssigner(object):
             groundtruth_boxes,
             groundtruth_labels=None,
             unmatched_class_label=None,
-             groundtruth_weights=None,
+             groundtruth_weights=None):
-             **params):
    """Assign classification and regression targets to each anchor.
    For a given set of anchors and groundtruth detections, match anchors
@@ -121,9 +120,11 @@ class TargetAssigner(object):
        If set to None, unmatched_cls_target is set to be [0] for each anchor.
      groundtruth_weights: a float tensor of shape [M] indicating the weight to
        assign to all anchors match to a particular groundtruth box. The weights
-        must be in [0., 1.]. If None, all weights are set to 1.
+        must be in [0., 1.]. If None, all weights are set to 1. Generally no
-      **params: Additional keyword arguments for specific implementations of
+        groundtruth boxes with zero weight match to any anchors as matchers are
-              the Matcher.
+        aware of groundtruth weights. Additionally, `cls_weights` and
+        `reg_weights` are calculated using groundtruth weights as an added
+        safety.
    Returns:
      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
@@ -177,7 +178,8 @@ class TargetAssigner(object):
        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
                                                           anchors)
-      match = self._matcher.match(match_quality_matrix, **params)
+      match = self._matcher.match(match_quality_matrix,
+                                  valid_rows=tf.greater(groundtruth_weights, 0))
      reg_targets = self._create_regression_targets(anchors,
                                                    groundtruth_boxes,
                                                    match)

--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase):
          priors,
          boxes,
          groundtruth_labels,
-          unmatched_class_label=unmatched_class_label,
+          unmatched_class_label=unmatched_class_label)
-          num_valid_rows=3)
  def test_raises_error_on_invalid_groundtruth_labels(self):
    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase):
          priors,
          boxes,
          groundtruth_labels,
-          unmatched_class_label=unmatched_class_label,
+          unmatched_class_label=unmatched_class_label)
-          num_valid_rows=3)
 class BatchTargetAssignerTest(test_case.TestCase):

--- a/research/object_detection/data/fgvc_2854_classes_label_map.pbtxt
+++ b/research/object_detection/data/fgvc_2854_classes_label_map.pbtxt
--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -19,9 +19,6 @@ protos for object detection.
 """
 import tensorflow as tf
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
 from object_detection.core import data_decoder
 from object_detection.core import standard_fields as fields
 from object_detection.protos import input_reader_pb2
@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util
 slim_example_decoder = tf.contrib.slim.tfexample_decoder
-# TODO(lzc): keep LookupTensor and BackupHandler in sync with
+class _ClassTensorHandler(slim_example_decoder.Tensor):
-# tf.contrib.slim.tfexample_decoder version.
+  """An ItemHandler to fetch class ids from class text."""
-class LookupTensor(slim_example_decoder.Tensor):
-  """An ItemHandler that returns a parsed Tensor, the result of a lookup."""
  def __init__(self,
               tensor_key,
-               table,
+               label_map_proto_file,
               shape_keys=None,
               shape=None,
               default_value=''):
@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor):
    Args:
      tensor_key: the name of the `TFExample` feature to read the tensor from.
-      table: A tf.lookup table.
+      label_map_proto_file: File path to a text format LabelMapProto message
+        mapping class text to id.
      shape_keys: Optional name or list of names of the TF-Example feature in
        which the tensor shape is stored. If a list, then each corresponds to
        one dimension of the shape.
@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor):
    Raises:
      ValueError: if both `shape_keys` and `shape` are specified.
    """
-    self._table = table
+    name_to_id = label_map_util.get_label_map_dict(
-    super(LookupTensor, self).__init__(tensor_key, shape_keys, shape,
+        label_map_proto_file, use_display_name=False)
-                                       default_value)
+    # We use a default_value of -1, but we expect all labels to be contained
+    # in the label map.
+    name_to_id_table = tf.contrib.lookup.HashTable(
+        initializer=tf.contrib.lookup.KeyValueTensorInitializer(
+            keys=tf.constant(list(name_to_id.keys())),
+            values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
+        default_value=-1)
+    display_name_to_id = label_map_util.get_label_map_dict(
+        label_map_proto_file, use_display_name=True)
+    # We use a default_value of -1, but we expect all labels to be contained
+    # in the label map.
+    display_name_to_id_table = tf.contrib.lookup.HashTable(
+        initializer=tf.contrib.lookup.KeyValueTensorInitializer(
+            keys=tf.constant(list(display_name_to_id.keys())),
+            values=tf.constant(
+                list(display_name_to_id.values()), dtype=tf.int64)),
+        default_value=-1)
+    self._name_to_id_table = name_to_id_table
+    self._display_name_to_id_table = display_name_to_id_table
+    super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
+                                              default_value)
  def tensors_to_item(self, keys_to_tensors):
-    unmapped_tensor = super(LookupTensor, self).tensors_to_item(keys_to_tensors)
+    unmapped_tensor = super(_ClassTensorHandler,
-    return self._table.lookup(unmapped_tensor)
+                            self).tensors_to_item(keys_to_tensors)
+    return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor),
+                      self._display_name_to_id_table.lookup(unmapped_tensor))
-class BackupHandler(slim_example_decoder.ItemHandler):
+class _BackupHandler(slim_example_decoder.ItemHandler):
  """An ItemHandler that tries two ItemHandlers in order."""
  def __init__(self, handler, backup):
@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler):
          'Backup handler is of type %s instead of ItemHandler' % type(backup))
    self._handler = handler
    self._backup = backup
-    super(BackupHandler, self).__init__(handler.keys + backup.keys)
+    super(_BackupHandler, self).__init__(handler.keys + backup.keys)
  def tensors_to_item(self, keys_to_tensors):
    item = self._handler.tensors_to_item(keys_to_tensors)
-    return control_flow_ops.cond(
+    return tf.cond(
-        pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0),
+        pred=tf.equal(tf.reduce_prod(tf.shape(item)), 0),
        true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
        false_fn=lambda: item)
@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
        input_reader_pb2.PNG_MASKS.
    """
+    # TODO(rathodv): delete unused `use_display_name` argument once we change
+    # other decoders to handle label maps similarly.
+    del use_display_name
    self.keys_to_features = {
        'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
    if label_map_proto_file:
-      label_map = label_map_util.get_label_map_dict(label_map_proto_file,
-                                                    use_display_name)
-      # We use a default_value of -1, but we expect all labels to be contained
-      # in the label map.
-      table = tf.contrib.lookup.HashTable(
-          initializer=tf.contrib.lookup.KeyValueTensorInitializer(
-              keys=tf.constant(list(label_map.keys())),
-              values=tf.constant(list(label_map.values()), dtype=tf.int64)),
-          default_value=-1)
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
-      # TODO(lzc): note that here we are using BackupHandler defined in this
+      label_handler = _BackupHandler(
-      # file(which is branching slim_example_decoder.BackupHandler). Need to
+          _ClassTensorHandler(
-      # switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
+              'image/object/class/text', label_map_proto_file,
-      # more popular.
+              default_value=''),
-      label_handler = BackupHandler(
-          LookupTensor('image/object/class/text', table, default_value=''),
          slim_example_decoder.Tensor('image/object/class/label'))
-      image_label_handler = BackupHandler(
+      image_label_handler = _BackupHandler(
-          LookupTensor(
+          _ClassTensorHandler(
-              fields.TfExampleFields.image_class_text, table, default_value=''),
+              fields.TfExampleFields.image_class_text,
+              label_map_proto_file,
+              default_value=''),
          slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label))
    else:
      label_handler = slim_example_decoder.Tensor('image/object/class/label')
@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
+      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
+        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
+    tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
+        tensor_dict[fields.InputDataFields.image])[:2]
    tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -12,24 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for object_detection.data_decoders.tf_example_decoder."""
 import os
 import numpy as np
 import tensorflow as tf
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import parsing_ops
 from object_detection.core import standard_fields as fields
 from object_detection.data_decoders import tf_example_decoder
 from object_detection.protos import input_reader_pb2
+from object_detection.utils import dataset_util
 slim_example_decoder = tf.contrib.slim.tfexample_decoder
@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
        raise ValueError('Invalid encoding type.')
    return image_decoded
-  def _Int64Feature(self, value):
-    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-  def _FloatFeature(self, value):
-    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-  def _BytesFeature(self, value):
-    if isinstance(value, list):
-      return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
-    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-  def _Int64FeatureFromList(self, ndarray):
-    return feature_pb2.Feature(
-        int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist()))
-  def _BytesFeatureFromList(self, ndarray):
-    values = ndarray.flatten().tolist()
-    return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
  def testDecodeAdditionalChannels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
        features=tf.train.Features(
            feature={
                'image/encoded':
-                    self._BytesFeature(encoded_jpeg),
+                    dataset_util.bytes_feature(encoded_jpeg),
                'image/additional_channels/encoded':
-                    self._BytesFeatureFromList(
+                    dataset_util.bytes_list_feature(
-                        np.array([encoded_additional_channel] * 2)),
+                        [encoded_additional_channel] * 2),
                'image/format':
-                    self._BytesFeature('jpeg'),
+                    dataset_util.bytes_feature('jpeg'),
                'image/source_id':
-                    self._BytesFeature('image_id'),
+                    dataset_util.bytes_feature('image_id'),
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase):
          np.concatenate([decoded_additional_channel] * 2, axis=2),
          tensor_dict[fields.InputDataFields.image_additional_channels])
-  def testDecodeExampleWithBranchedBackupHandler(self):
-    example1 = example_pb2.Example(
-        features=feature_pb2.Features(
-            feature={
-                'image/object/class/text':
-                    self._BytesFeatureFromList(
-                        np.array(['cat', 'dog', 'guinea pig'])),
-                'image/object/class/label':
-                    self._Int64FeatureFromList(np.array([42, 10, 900]))
-            }))
-    example2 = example_pb2.Example(
-        features=feature_pb2.Features(
-            feature={
-                'image/object/class/text':
-                    self._BytesFeatureFromList(
-                        np.array(['cat', 'dog', 'guinea pig'])),
-            }))
-    example3 = example_pb2.Example(
-        features=feature_pb2.Features(
-            feature={
-                'image/object/class/label':
-                    self._Int64FeatureFromList(np.array([42, 10, 901]))
-            }))
-    # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
-    table = lookup_ops.index_table_from_tensor(
-        constant_op.constant(['dog', 'guinea pig', 'cat']))
-    keys_to_features = {
-        'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
-        'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
-    }
-    backup_handler = tf_example_decoder.BackupHandler(
-        handler=slim_example_decoder.Tensor('image/object/class/label'),
-        backup=tf_example_decoder.LookupTensor('image/object/class/text',
-                                               table))
-    items_to_handlers = {
-        'labels': backup_handler,
-    }
-    decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
-                                                    items_to_handlers)
-    obtained_class_ids_each_example = []
-    with self.test_session() as sess:
-      sess.run(lookup_ops.tables_initializer())
-      for example in [example1, example2, example3]:
-        serialized_example = array_ops.reshape(
-            example.SerializeToString(), shape=[])
-        obtained_class_ids_each_example.append(
-            decoder.decode(serialized_example)[0].eval())
-    self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
-    self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
-    self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
-  def testDecodeExampleWithBranchedLookup(self):
-    example = example_pb2.Example(features=feature_pb2.Features(feature={
-        'image/object/class/text': self._BytesFeatureFromList(
-            np.array(['cat', 'dog', 'guinea pig'])),
-    }))
-    serialized_example = example.SerializeToString()
-    # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
-    table = lookup_ops.index_table_from_tensor(
-        constant_op.constant(['dog', 'guinea pig', 'cat']))
-    with self.test_session() as sess:
-      sess.run(lookup_ops.tables_initializer())
-      serialized_example = array_ops.reshape(serialized_example, shape=[])
-      keys_to_features = {
-          'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
-      }
-      items_to_handlers = {
-          'labels':
-              tf_example_decoder.LookupTensor('image/object/class/text', table),
-      }
-      decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
-                                                      items_to_handlers)
-      obtained_class_ids = decoder.decode(serialized_example)[0].eval()
-    self.assertAllClose([2, 0, 1], obtained_class_ids)
  def testDecodeJpegImage(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    decoded_jpeg = self._DecodeImage(encoded_jpeg)
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/source_id': self._BytesFeature('image_id'),
+                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
-    })).SerializeToString()
+                'image/format': dataset_util.bytes_feature('jpeg'),
+                'image/source_id': dataset_util.bytes_feature('image_id'),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
                         get_shape().as_list()), [None, None, 3])
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.
+                                     original_image_spatial_shape].
+                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
    self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
+    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
+                                            original_image_spatial_shape])
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
  def testDecodeImageKeyAndFilename(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/key/sha256': self._BytesFeature('abc'),
+            feature={
-        'image/filename': self._BytesFeature('filename')
+                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
-    })).SerializeToString()
+                'image/key/sha256': dataset_util.bytes_feature('abc'),
+                'image/filename': dataset_util.bytes_feature('filename')
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
    decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_png),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('png'),
+            feature={
-        'image/source_id': self._BytesFeature('image_id')
+                'image/encoded': dataset_util.bytes_feature(encoded_png),
-    })).SerializeToString()
+                'image/format': dataset_util.bytes_feature('png'),
+                'image/source_id': dataset_util.bytes_feature('image_id')
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
                         get_shape().as_list()), [None, None, 3])
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.
+                                     original_image_spatial_shape].
+                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
    self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
+    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
+                                            original_image_spatial_shape])
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
  def testDecodePngInstanceMasks(self):
@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
-                'image/encoded': self._BytesFeature(encoded_jpeg),
+                'image/encoded':
-                'image/format': self._BytesFeature('jpeg'),
+                    dataset_util.bytes_feature(encoded_jpeg),
-                'image/object/mask': self._BytesFeature(encoded_masks)
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/mask':
+                    dataset_util.bytes_list_feature(encoded_masks)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
-                'image/encoded': self._BytesFeature(encoded_jpeg),
+                'image/encoded':
-                'image/format': self._BytesFeature('jpeg'),
+                    dataset_util.bytes_feature(encoded_jpeg),
-                'image/object/mask': self._BytesFeature(encoded_masks),
+                'image/format':
-                'image/height': self._Int64Feature([10]),
+                    dataset_util.bytes_feature('jpeg'),
-                'image/width': self._Int64Feature([10]),
+                'image/object/mask':
+                    dataset_util.bytes_list_feature(encoded_masks),
+                'image/height':
+                    dataset_util.int64_feature(10),
+                'image/width':
+                    dataset_util.int64_feature(10),
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase):
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
+                'image/encoded':
-        'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
+                'image/format':
-        'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
+                    dataset_util.bytes_feature('jpeg'),
-    })).SerializeToString()
+                'image/object/bbox/ymin':
+                    dataset_util.float_list_feature(bbox_ymins),
+                'image/object/bbox/xmin':
+                    dataset_util.float_list_feature(bbox_xmins),
+                'image/object/bbox/ymax':
+                    dataset_util.float_list_feature(bbox_ymaxs),
+                'image/object/bbox/xmax':
+                    dataset_util.float_list_feature(bbox_xmaxs),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
-                         get_shape().as_list()), [None, 4])
+                         .get_shape().as_list()), [None, 4])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
-    expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
+    expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
-                                bbox_ymaxs, bbox_xmaxs]).transpose()
+                                bbox_xmaxs]).transpose()
    self.assertAllEqual(expected_boxes,
                        tensor_dict[fields.InputDataFields.groundtruth_boxes])
    self.assertAllEqual(
@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase):
    bbox_xmaxs = [3.0, 7.0]
    keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
    keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
+                'image/encoded':
-        'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
+                'image/format':
-        'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
+                    dataset_util.bytes_feature('jpeg'),
-        'image/object/keypoint/y': self._FloatFeature(keypoint_ys),
+                'image/object/bbox/ymin':
-        'image/object/keypoint/x': self._FloatFeature(keypoint_xs),
+                    dataset_util.float_list_feature(bbox_ymins),
-    })).SerializeToString()
+                'image/object/bbox/xmin':
+                    dataset_util.float_list_feature(bbox_xmins),
+                'image/object/bbox/ymax':
+                    dataset_util.float_list_feature(bbox_ymaxs),
+                'image/object/bbox/xmax':
+                    dataset_util.float_list_feature(bbox_xmaxs),
+                'image/object/keypoint/y':
+                    dataset_util.float_list_feature(keypoint_ys),
+                'image/object/keypoint/x':
+                    dataset_util.float_list_feature(keypoint_xs),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
-                         get_shape().as_list()), [None, 4])
+                         .get_shape().as_list()), [None, 4])
-    self.assertAllEqual((tensor_dict[fields.InputDataFields.
+    self.assertAllEqual(
-                                     groundtruth_keypoints].
+        (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
-                         get_shape().as_list()), [2, 3, 2])
+         .as_list()), [2, 3, 2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
-    expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
+    expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
-                                bbox_ymaxs, bbox_xmaxs]).transpose()
+                                bbox_xmaxs]).transpose()
    self.assertAllEqual(expected_boxes,
                        tensor_dict[fields.InputDataFields.groundtruth_boxes])
    self.assertAllEqual(
@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
    expected_keypoints = (
        np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
-    self.assertAllEqual(expected_keypoints,
+    self.assertAllEqual(
-                        tensor_dict[
+        expected_keypoints,
-                            fields.InputDataFields.groundtruth_keypoints])
+        tensor_dict[fields.InputDataFields.groundtruth_keypoints])
  def testDecodeDefaultGroundtruthWeights(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
+                'image/encoded':
-        'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
+                'image/format':
-        'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
+                    dataset_util.bytes_feature('jpeg'),
-    })).SerializeToString()
+                'image/object/bbox/ymin':
+                    dataset_util.float_list_feature(bbox_ymins),
+                'image/object/bbox/xmin':
+                    dataset_util.float_list_feature(bbox_xmins),
+                'image/object/bbox/ymax':
+                    dataset_util.float_list_feature(bbox_ymaxs),
+                'image/object/bbox/xmax':
+                    dataset_util.float_list_feature(bbox_xmaxs),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
-                         get_shape().as_list()), [None, 4])
+                         .get_shape().as_list()), [None, 4])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes = [0, 1]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/class/label': self._Int64Feature(bbox_classes),
+                'image/encoded':
-    })).SerializeToString()
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/class/label':
+                    dataset_util.int64_list_feature(bbox_classes),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
-        fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
+                         .get_shape().as_list()), [2])
-                        [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes = [1, 2]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/class/label': self._Int64Feature(bbox_classes),
+                'image/encoded':
-    })).SerializeToString()
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/class/label':
+                    dataset_util.int64_list_feature(bbox_classes),
+            })).SerializeToString()
    label_map_string = """
      item {
        id:1
@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
-        fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
+                         .get_shape().as_list()), [None])
-                        [None])
    init = tf.tables_initializer()
    with self.test_session() as sess:
@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
        features=tf.train.Features(
            feature={
                'image/encoded':
-                    self._BytesFeature(encoded_jpeg),
+                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    self._BytesFeature('jpeg'),
+                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
-                    self._BytesFeature(bbox_classes_text),
+                    dataset_util.bytes_list_feature(bbox_classes_text),
            })).SerializeToString()
    label_map_string = """
@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual([2, -1],
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
-  def testDecodeObjectLabelWithMapping(self):
+  def testDecodeObjectLabelWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'dog']
@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase):
        features=tf.train.Features(
            feature={
                'image/encoded':
-                    self._BytesFeature(encoded_jpeg),
+                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    self._BytesFeature('jpeg'),
+                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
-                    self._BytesFeature(bbox_classes_text),
+                    dataset_util.bytes_list_feature(bbox_classes_text),
+            })).SerializeToString()
+    label_map_string = """
+      item {
+        id:3
+        display_name:'cat'
+      }
+      item {
+        id:1
+        display_name:'dog'
+      }
+    """
+    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+    with tf.gfile.Open(label_map_path, 'wb') as f:
+      f.write(label_map_string)
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        label_map_proto_file=label_map_path)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+                         .get_shape().as_list()), [None])
+    with self.test_session() as sess:
+      sess.run(tf.tables_initializer())
+      tensor_dict = sess.run(tensor_dict)
+    self.assertAllEqual([3, 1],
+                        tensor_dict[fields.InputDataFields.groundtruth_classes])
+  def testDecodeObjectLabelWithMappingWithName(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_classes_text = ['cat', 'dog']
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded':
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/class/text':
+                    dataset_util.bytes_list_feature(bbox_classes_text),
            })).SerializeToString()
    label_map_string = """
@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_area = [100., 174.]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/area': self._FloatFeature(object_area),
+                'image/encoded':
-    })).SerializeToString()
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/area':
+                    dataset_util.float_list_feature(object_area),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]
-                         get_shape().as_list()), [2])
+                         .get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_is_crowd = [0, 1]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/is_crowd': self._Int64Feature(object_is_crowd),
+                'image/encoded':
-    })).SerializeToString()
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/is_crowd':
+                    dataset_util.int64_list_feature(object_is_crowd),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[
+    self.assertAllEqual(
-        fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
+        (tensor_dict[fields.InputDataFields.groundtruth_is_crowd].get_shape()
-                        [2])
+         .as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
-    self.assertAllEqual([bool(item) for item in object_is_crowd],
+    self.assertAllEqual(
-                        tensor_dict[
+        [bool(item) for item in object_is_crowd],
-                            fields.InputDataFields.groundtruth_is_crowd])
+        tensor_dict[fields.InputDataFields.groundtruth_is_crowd])
  @test_util.enable_c_shapes
  def testDecodeObjectDifficult(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_difficult = [0, 1]
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/object/difficult': self._Int64Feature(object_difficult),
+                'image/encoded':
-    })).SerializeToString()
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/difficult':
+                    dataset_util.int64_list_feature(object_difficult),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[
+    self.assertAllEqual(
-        fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
+        (tensor_dict[fields.InputDataFields.groundtruth_difficult].get_shape()
-                        [2])
+         .as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
-    self.assertAllEqual([bool(item) for item in object_difficult],
+    self.assertAllEqual(
-                        tensor_dict[
+        [bool(item) for item in object_difficult],
-                            fields.InputDataFields.groundtruth_difficult])
+        tensor_dict[fields.InputDataFields.groundtruth_difficult])
  @test_util.enable_c_shapes
  def testDecodeObjectGroupOf(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_group_of = [0, 1]
-    example = tf.train.Example(features=tf.train.Features(
+    example = tf.train.Example(
-        feature={
+        features=tf.train.Features(
-            'image/encoded': self._BytesFeature(encoded_jpeg),
+            feature={
-            'image/format': self._BytesFeature('jpeg'),
+                'image/encoded':
-            'image/object/group_of': self._Int64Feature(object_group_of),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        })).SerializeToString()
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/group_of':
+                    dataset_util.int64_list_feature(object_group_of),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[
+    self.assertAllEqual(
-        fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
+        (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
-                        [2])
+         .as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_weights = [0.75, 1.0]
-    example = tf.train.Example(features=tf.train.Features(
+    example = tf.train.Example(
-        feature={
+        features=tf.train.Features(
-            'image/encoded': self._BytesFeature(encoded_jpeg),
+            feature={
-            'image/format': self._BytesFeature('jpeg'),
+                'image/encoded':
-            'image/object/weight': self._FloatFeature(object_weights),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        })).SerializeToString()
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/object/weight':
+                    dataset_util.float_list_feature(object_weights),
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((tensor_dict[
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights]
-        fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
+                         .get_shape().as_list()), [None])
-                        [None])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
-    self.assertAllEqual(
+    self.assertAllEqual(object_weights,
-        object_weights,
+                        tensor_dict[fields.InputDataFields.groundtruth_weights])
-        tensor_dict[fields.InputDataFields.groundtruth_weights])
  @test_util.enable_c_shapes
  def testDecodeInstanceSegmentation(self):
@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase):
    image_width = 3
    # Randomly generate image.
-    image_tensor = np.random.randint(256, size=(image_height,
+    image_tensor = np.random.randint(
-                                                image_width,
+        256, size=(image_height, image_width, 3)).astype(np.uint8)
-                                                3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    # Randomly generate instance segmentation masks.
    instance_masks = (
-        np.random.randint(2, size=(num_instances,
+        np.random.randint(2, size=(num_instances, image_height,
-                                   image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])
@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase):
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/height': self._Int64Feature([image_height]),
+                'image/encoded':
-        'image/width': self._Int64Feature([image_width]),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        'image/object/mask': self._FloatFeature(instance_masks_flattened),
+                'image/format':
-        'image/object/class/label': self._Int64Feature(
+                    dataset_util.bytes_feature('jpeg'),
-            object_classes)})).SerializeToString()
+                'image/height':
+                    dataset_util.int64_feature(image_height),
+                'image/width':
+                    dataset_util.int64_feature(image_width),
+                'image/object/mask':
+                    dataset_util.float_list_feature(instance_masks_flattened),
+                'image/object/class/label':
+                    dataset_util.int64_list_feature(object_classes)
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertAllEqual((
+    self.assertAllEqual(
-        tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
+        (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
-        get_shape().as_list()), [4, 5, 3])
+         .get_shape().as_list()), [4, 5, 3])
-    self.assertAllEqual((
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
-        tensor_dict[fields.InputDataFields.groundtruth_classes].
+                         .get_shape().as_list()), [4])
-        get_shape().as_list()), [4])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual(
        instance_masks.astype(np.float32),
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
-    self.assertAllEqual(
+    self.assertAllEqual(object_classes,
-        object_classes,
+                        tensor_dict[fields.InputDataFields.groundtruth_classes])
-        tensor_dict[fields.InputDataFields.groundtruth_classes])
  def testInstancesNotAvailableByDefault(self):
    num_instances = 4
    image_height = 5
    image_width = 3
    # Randomly generate image.
-    image_tensor = np.random.randint(256, size=(image_height,
+    image_tensor = np.random.randint(
-                                                image_width,
+        256, size=(image_height, image_width, 3)).astype(np.uint8)
-                                                3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    # Randomly generate instance segmentation masks.
    instance_masks = (
-        np.random.randint(2, size=(num_instances,
+        np.random.randint(2, size=(num_instances, image_height,
-                                   image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])
@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase):
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)
-    example = tf.train.Example(features=tf.train.Features(feature={
+    example = tf.train.Example(
-        'image/encoded': self._BytesFeature(encoded_jpeg),
+        features=tf.train.Features(
-        'image/format': self._BytesFeature('jpeg'),
+            feature={
-        'image/height': self._Int64Feature([image_height]),
+                'image/encoded':
-        'image/width': self._Int64Feature([image_width]),
+                    dataset_util.bytes_feature(encoded_jpeg),
-        'image/object/mask': self._FloatFeature(instance_masks_flattened),
+                'image/format':
-        'image/object/class/label': self._Int64Feature(
+                    dataset_util.bytes_feature('jpeg'),
-            object_classes)})).SerializeToString()
+                'image/height':
+                    dataset_util.int64_feature(image_height),
+                'image/width':
+                    dataset_util.int64_feature(image_width),
+                'image/object/mask':
+                    dataset_util.float_list_feature(instance_masks_flattened),
+                'image/object/class/label':
+                    dataset_util.int64_list_feature(object_classes)
+            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
-    self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
+    self.assertTrue(
-                    not in tensor_dict)
+        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
  def testDecodeImageLabels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
-                'image/encoded': self._BytesFeature(encoded_jpeg),
+                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
-                'image/format': self._BytesFeature('jpeg'),
+                'image/format': dataset_util.bytes_feature('jpeg'),
-                'image/class/label': self._Int64Feature([1, 2]),
+                'image/class/label': dataset_util.int64_list_feature([1, 2]),
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
-                'image/encoded': self._BytesFeature(encoded_jpeg),
+                'image/encoded':
-                'image/format': self._BytesFeature('jpeg'),
+                    dataset_util.bytes_feature(encoded_jpeg),
-                'image/class/text': self._BytesFeature(['dog', 'cat']),
+                'image/format':
+                    dataset_util.bytes_feature('jpeg'),
+                'image/class/text':
+                    dataset_util.bytes_list_feature(['dog', 'cat']),
            })).SerializeToString()
    label_map_string = """
      item {

--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
@@ -177,8 +177,8 @@ def create_tf_example(image,
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
-      'image/object/class/label':
+      'image/object/class/text':
-          dataset_util.int64_list_feature(category_ids),
+          dataset_util.bytes_list_feature(category_names),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':

--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    self._assertProtoEqual(
        example.features.feature['image/object/bbox/ymax'].float_list.value,
        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        ['cat'])
  def test_create_tf_example_with_instance_masks(self):
    image_file_name = 'tmp_image.jpg'
@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    self._assertProtoEqual(
        example.features.feature['image/object/bbox/ymax'].float_list.value,
        [1])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        ['dog'])
    encoded_mask_pngs = [
        io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
            'image/object/mask'].bytes_list.value

--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -14,7 +14,6 @@
 # ==============================================================================
 """Common utility functions for evaluation."""
 import collections
-import logging
 import os
 import time
@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir):
    global_step: Global step at which the metrics are computed.
    summary_dir: Directory to write tensorflow summaries to.
  """
-  logging.info('Writing metrics to tf summary.')
+  tf.logging.info('Writing metrics to tf summary.')
  summary_writer = tf.summary.FileWriterCache.get(summary_dir)
  for key in sorted(metrics):
    summary = tf.Summary(value=[
        tf.Summary.Value(tag=key, simple_value=metrics[key]),
    ])
    summary_writer.add_summary(summary, global_step)
-    logging.info('%s: %f', key, metrics[key])
+    tf.logging.info('%s: %f', key, metrics[key])
-  logging.info('Metrics written to tf summary.')
+  tf.logging.info('Metrics written to tf summary.')
 # TODO(rathodv): Add tests.
@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict,
  if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
    raise ValueError('If show_groundtruth is enabled, result_dict must contain '
                     'groundtruth_boxes.')
-  logging.info('Creating detection visualizations.')
+  tf.logging.info('Creating detection visualizations.')
  category_index = label_map_util.create_category_index(categories)
  image = np.squeeze(result_dict[input_fields.original_image], axis=0)
@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict,
  summary_writer = tf.summary.FileWriterCache.get(summary_dir)
  summary_writer.add_summary(summary, global_step)
-  logging.info('Detection visualizations written to summary with tag %s.', tag)
+  tf.logging.info('Detection visualizations written to summary with tag %s.',
+                  tag)
 def _run_checkpoint_once(tensor_dict,
@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict,
                         master='',
                         save_graph=False,
                         save_graph_dir='',
-                         losses_dict=None):
+                         losses_dict=None,
+                         eval_export_path=None):
  """Evaluates metrics defined in evaluators and returns summaries.
  This function loads the latest checkpoint in checkpoint_dirs and evaluates
@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict,
    save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
      is True this must be non-empty.
    losses_dict: optional dictionary of scalar detection losses.
+    eval_export_path: Path for saving a json file that contains the detection
+      results in json format.
  Returns:
    global_step: the count of global steps.
@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict,
    try:
      for batch in range(int(num_batches)):
        if (batch + 1) % 100 == 0:
-          logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
+          tf.logging.info('Running eval ops batch %d/%d', batch + 1,
+                          num_batches)
        if not batch_processor:
          try:
            if not losses_dict:
@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict,
                                                        losses_dict])
            counters['success'] += 1
          except tf.errors.InvalidArgumentError:
-            logging.info('Skipping image')
+            tf.logging.info('Skipping image')
            counters['skipped'] += 1
            result_dict = {}
        else:
@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict,
          # decoders to return correct image_id.
          # TODO(akuznetsa): result_dict contains batches of images, while
          # add_single_ground_truth_image_info expects a single image. Fix
+          if (isinstance(result_dict, dict) and
+              result_dict[fields.InputDataFields.key]):
+            image_id = result_dict[fields.InputDataFields.key]
+          else:
+            image_id = batch
          evaluator.add_single_ground_truth_image_info(
-              image_id=batch, groundtruth_dict=result_dict)
+              image_id=image_id, groundtruth_dict=result_dict)
          evaluator.add_single_detected_image_info(
-              image_id=batch, detections_dict=result_dict)
+              image_id=image_id, detections_dict=result_dict)
-      logging.info('Running eval batches done.')
+      tf.logging.info('Running eval batches done.')
    except tf.errors.OutOfRangeError:
-      logging.info('Done evaluating -- epoch limit reached')
+      tf.logging.info('Done evaluating -- epoch limit reached')
    finally:
      # When done, ask the threads to stop.
-      logging.info('# success: %d', counters['success'])
+      tf.logging.info('# success: %d', counters['success'])
-      logging.info('# skipped: %d', counters['skipped'])
+      tf.logging.info('# skipped: %d', counters['skipped'])
      all_evaluator_metrics = {}
+      if eval_export_path and eval_export_path is not None:
+        for evaluator in evaluators:
+          if (isinstance(evaluator, coco_evaluation.CocoDetectionEvaluator) or
+              isinstance(evaluator, coco_evaluation.CocoMaskEvaluator)):
+            tf.logging.info('Started dumping to json file.')
+            evaluator.dump_detections_to_json_file(
+                json_output_path=eval_export_path)
+            tf.logging.info('Finished dumping to json file.')
      for evaluator in evaluators:
        metrics = evaluator.evaluate()
        evaluator.clear()
@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict,
                            master='',
                            save_graph=False,
                            save_graph_dir='',
-                            losses_dict=None):
+                            losses_dict=None,
+                            eval_export_path=None):
  """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
  This function repeatedly loads a checkpoint and evaluates a desired
@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict,
    save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
      is True this must be non-empty.
    losses_dict: optional dictionary of scalar detection losses.
+    eval_export_path: Path for saving a json file that contains the detection
+      results in json format.
  Returns:
    metrics: A dictionary containing metric names and values in the latest
@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict,
  number_of_evaluations = 0
  while True:
    start = time.time()
-    logging.info('Starting evaluation at ' + time.strftime(
+    tf.logging.info('Starting evaluation at ' + time.strftime(
        '%Y-%m-%d-%H:%M:%S', time.gmtime()))
    model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
    if not model_path:
-      logging.info('No model found in %s. Will try again in %d seconds',
+      tf.logging.info('No model found in %s. Will try again in %d seconds',
-                   checkpoint_dirs[0], eval_interval_secs)
+                      checkpoint_dirs[0], eval_interval_secs)
    elif model_path == last_evaluated_model_path:
-      logging.info('Found already evaluated checkpoint. Will try again in %d '
+      tf.logging.info('Found already evaluated checkpoint. Will try again in '
-                   'seconds', eval_interval_secs)
+                      '%d seconds', eval_interval_secs)
    else:
      last_evaluated_model_path = model_path
-      global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators,
+      global_step, metrics = _run_checkpoint_once(
-                                                  batch_processor,
+          tensor_dict,
-                                                  checkpoint_dirs,
+          evaluators,
-                                                  variables_to_restore,
+          batch_processor,
-                                                  restore_fn, num_batches,
+          checkpoint_dirs,
-                                                  master, save_graph,
+          variables_to_restore,
-                                                  save_graph_dir,
+          restore_fn,
-                                                  losses_dict=losses_dict)
+          num_batches,
+          master,
+          save_graph,
+          save_graph_dir,
+          losses_dict=losses_dict,
+          eval_export_path=eval_export_path)
      write_metrics(metrics, global_step, summary_dir)
    number_of_evaluations += 1
    if (max_number_of_evaluations and
        number_of_evaluations >= max_number_of_evaluations):
-      logging.info('Finished evaluation!')
+      tf.logging.info('Finished evaluation!')
      break
    time_to_next_eval = start + eval_interval_secs - time.time()
    if time_to_next_eval > 0:
@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config):
              eval_config.include_metrics_per_category)
      }
  return evaluator_options
--- a/research/object_detection/g3doc/detection_model_zoo.md
+++ b/research/object_detection/g3doc/detection_model_zoo.md
@@ -2,13 +2,12 @@
 We provide a collection of detection models pre-trained on the [COCO
 dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/),
-the [Open Images dataset](https://github.com/openimages/dataset) and the
+the [Open Images dataset](https://github.com/openimages/dataset), the
-[AVA v2.1 dataset](https://research.google.com/ava/). These models can
+[AVA v2.1 dataset](https://research.google.com/ava/) and the
-be useful for
+[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
-out-of-the-box inference if you are interested in categories already in COCO
+These models can be useful for out-of-the-box inference if you are interested in
-(e.g., humans, cars, etc) or in Open Images (e.g.,
+categories already in those datasets. They are also useful for initializing your
-surfboard, jacuzzi, etc). They are also useful for initializing your models when
+models when training on novel datasets.
-training on novel datasets.
 In the table below, we list each such pre-trained model including:
@@ -113,6 +112,13 @@ Model name
 [faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37              | Boxes
 [faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347  |               | Boxes
+## iNaturalist Species-trained models
+Model name                                                                                                                                                        | Speed (ms) | Pascal mAP@0.5 | Outputs
+----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
+[faster_rcnn_resnet101_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz) | 395  | 58              | Boxes
+[faster_rcnn_resnet50_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) | 366  | 55             | Boxes
 ## AVA v2.1 trained models

--- a/research/object_detection/g3doc/running_locally.md
+++ b/research/object_detection/g3doc/running_locally.md
@@ -37,12 +37,12 @@ A local training job can be run with the following command:
 PIPELINE_CONFIG_PATH={path to pipeline config file}
 MODEL_DIR={path to model directory}
 NUM_TRAIN_STEPS=50000
-NUM_EVAL_STEPS=2000
+SAMPLE_1_OF_N_EVAL_EXAMPLES=1
 python object_detection/model_main.py \
    --pipeline_config_path=${PIPELINE_CONFIG_PATH} \
    --model_dir=${MODEL_DIR} \
    --num_train_steps=${NUM_TRAIN_STEPS} \
-    --num_eval_steps=${NUM_EVAL_STEPS} \
+    --sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
    --alsologtostderr
 ```

--- a/research/object_detection/g3doc/running_pets.md
+++ b/research/object_detection/g3doc/running_pets.md
@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the
 ```bash
 # From tensorflow/models/research/
 gcloud ml-engine jobs submit training `whoami`_object_detection_pets_`date +%m_%d_%Y_%H_%M_%S` \
-    --runtime-version 1.9 \
+    --runtime-version 1.8 \
    --job-dir=gs://${YOUR_GCS_BUCKET}/model_dir \
    --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz \
    --module-name object_detection.model_main \

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
-                         retain_original_image=False):
+                         retain_original_image=False,
+                         use_bfloat16=False):
  """A single function that is responsible for all input data transformations.
  Data transformation functions are applied in the following order.
@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict,
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
+    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict,
  if retain_original_image:
    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
-        tensor_dict[fields.InputDataFields.image], tf.uint8)
+        image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
+        tf.uint8)
  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict,
  image = tensor_dict[fields.InputDataFields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.to_float(image), axis=0))
+  if use_bfloat16:
+    preprocessed_resized_image = tf.cast(
+        preprocessed_resized_image, tf.bfloat16)
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict,
  tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
      zero_indexed_groundtruth_classes, num_classes)
+  if fields.InputDataFields.groundtruth_confidences in tensor_dict:
+    groundtruth_confidences = tensor_dict[
+        fields.InputDataFields.groundtruth_confidences]
+    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
+        tf.sparse_to_dense(
+            zero_indexed_groundtruth_classes,
+            [num_classes],
+            groundtruth_confidences,
+            validate_indices=False))
+  else:
+    groundtruth_confidences = tf.ones_like(
+        zero_indexed_groundtruth_classes, dtype=tf.float32)
+    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
+        tensor_dict[fields.InputDataFields.groundtruth_classes])
  if merge_multiple_boxes:
-    merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels(
+    merged_boxes, merged_classes, merged_confidences, _ = (
-        tensor_dict[fields.InputDataFields.groundtruth_boxes],
+        util_ops.merge_boxes_with_multiple_labels(
-        zero_indexed_groundtruth_classes, num_classes)
+            tensor_dict[fields.InputDataFields.groundtruth_boxes],
+            zero_indexed_groundtruth_classes,
+            groundtruth_confidences,
+            num_classes))
    merged_classes = tf.cast(merged_classes, tf.float32)
    tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
+    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
+        merged_confidences)
  return tensor_dict
@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
      fields.InputDataFields.image: [
          height, width, 3 + num_additional_channels
      ],
+      fields.InputDataFields.original_image_spatial_shape: [2],
      fields.InputDataFields.image_additional_channels: [
          height, width, num_additional_channels
      ],
@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
      fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
      fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
      fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
+      fields.InputDataFields.groundtruth_confidences: [
+          max_num_boxes, num_classes],
      fields.InputDataFields.groundtruth_instance_masks: [
          max_num_boxes, height, width
      ],
@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
          max_num_boxes, num_classes + 1 if num_classes is not None else None
      ],
      fields.InputDataFields.groundtruth_image_classes: [num_classes],
+      fields.InputDataFields.groundtruth_image_confidences: [num_classes],
  }
  if fields.InputDataFields.original_image in tensor_dict:
    padding_shapes[fields.InputDataFields.original_image] = [
-        None, None, 3 + num_additional_channels
+        height, width, 3 + num_additional_channels
    ]
  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
    tensor_shape = (
@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options):
                            in tensor_dict)
  include_keypoints = (fields.InputDataFields.groundtruth_keypoints
                       in tensor_dict)
+  include_label_scores = (fields.InputDataFields.groundtruth_confidences in
+                          tensor_dict)
  tensor_dict = preprocessor.preprocess(
      tensor_dict, data_augmentation_options,
      func_arg_map=preprocessor.get_default_func_arg_map(
+          include_label_scores=include_label_scores,
          include_instance_masks=include_instance_masks,
          include_keypoints=include_keypoints))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict):
    labels_dict[key] = input_dict[key]
  optional_label_keys = [
+      fields.InputDataFields.groundtruth_confidences,
      fields.InputDataFields.groundtruth_keypoints,
      fields.InputDataFields.groundtruth_instance_masks,
      fields.InputDataFields.groundtruth_area,
@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict):
  return labels_dict
+def _replace_empty_string_with_random_number(string_tensor):
+  """Returns string unchanged if non-empty, and random string tensor otherwise.
+  The random string is an integer 0 and 2**63 - 1, casted as string.
+  Args:
+    string_tensor: A tf.tensor of dtype string.
+  Returns:
+    out_string: A tf.tensor of dtype string. If string_tensor contains the empty
+      string, out_string will contain a random integer casted to a string.
+      Otherwise string_tensor is returned unchanged.
+  """
+  empty_string = tf.constant('', dtype=tf.string, name='EmptyString')
+  random_source_id = tf.as_string(
+      tf.random_uniform(shape=[], maxval=2**63 - 1, dtype=tf.int64))
+  out_string = tf.cond(
+      tf.equal(string_tensor, empty_string),
+      true_fn=lambda: random_source_id,
+      false_fn=lambda: string_tensor)
+  return out_string
 def _get_features_dict(input_dict):
  """Extracts features dict from input dict."""
-  hash_from_source_id = tf.string_to_hash_bucket_fast(
-      input_dict[fields.InputDataFields.source_id], HASH_BINS)
+  source_id = _replace_empty_string_with_random_number(
+      input_dict[fields.InputDataFields.source_id])
+  hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
-          input_dict[fields.InputDataFields.true_image_shape]
+          input_dict[fields.InputDataFields.true_image_shape],
+      fields.InputDataFields.original_image_spatial_shape:
+          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config,
          num_classes=config_util.get_number_of_classes(model_config),
          data_augmentation_fn=data_augmentation_fn,
          merge_multiple_boxes=train_config.merge_multiple_label_boxes,
-          retain_original_image=train_config.retain_original_images)
+          retain_original_image=train_config.retain_original_images,
+          use_bfloat16=train_config.use_bfloat16)
      tensor_dict = pad_input_data_to_static_shapes(
          tensor_dict=transform_data_fn(tensor_dict),
@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
 def create_eval_input_fn(eval_config, eval_input_config, model_config):
  """Creates an eval `input` function for `Estimator`.
-  # TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
  Args:
    eval_config: An eval_pb2.EvalConfig.
    eval_input_config: An input_reader_pb2.InputReader.
@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
      return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
    dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
        eval_input_config,
-        batch_size=1,  # Currently only support batch size of 1 for eval.
+        batch_size=params['batch_size'] if params else eval_config.batch_size,
        transform_input_data_fn=transform_and_pad_input_data_fn)
    return dataset

--- a/research/object_detection/inputs_test.py
+++ b/research/object_detection/inputs_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import functools
 import os
+from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
@@ -28,6 +29,7 @@ from object_detection import inputs
 from object_detection.core import preprocessor
 from object_detection.core import standard_fields as fields
 from object_detection.utils import config_util
+from object_detection.utils import test_case
 FLAGS = tf.flags.FLAGS
@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name):
  data_path = os.path.join(tf.resource_loader.get_data_files_path(),
                           'test_data/pets_examples.record')
  configs = config_util.get_configs_from_pipeline_file(fname)
+  override_dict = {
+      'train_input_path': data_path,
+      'eval_input_path': data_path,
+      'label_map_path': label_map_path
+  }
  return config_util.merge_external_params_with_configs(
-      configs,
+      configs, kwargs_dict=override_dict)
-      train_input_path=data_path,
-      eval_input_path=data_path,
-      label_map_path=label_map_path)
 def _make_initializable_iterator(dataset):
@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset):
  return iterator
-class InputsTest(tf.test.TestCase):
+class InputsTest(test_case.TestCase, parameterized.TestCase):
  def test_faster_rcnn_resnet50_train_input(self):
    """Tests the training input function for FasterRcnnResnet50."""
@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase):
        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_classes].dtype)
+    self.assertAllEqual(
+        [1, 100, model_config.faster_rcnn.num_classes],
+        labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
+    self.assertEqual(
+        tf.float32,
+        labels[fields.InputDataFields.groundtruth_confidences].dtype)
    self.assertAllEqual(
        [1, 100],
        labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_weights].dtype)
-  def test_faster_rcnn_resnet50_eval_input(self):
+  @parameterized.parameters(
+      {'eval_batch_size': 1},
+      {'eval_batch_size': 8}
+  )
+  def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1):
    """Tests the eval input function for FasterRcnnResnet50."""
    configs = _get_configs_for_model('faster_rcnn_resnet50_pets')
    model_config = configs['model']
    model_config.faster_rcnn.num_classes = 37
+    eval_config = configs['eval_config']
+    eval_config.batch_size = eval_batch_size
    eval_input_fn = inputs.create_eval_input_fn(
-        configs['eval_config'], configs['eval_input_config'], model_config)
+        eval_config, configs['eval_input_configs'][0], model_config)
    features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
-    self.assertAllEqual([1, None, None, 3],
+    self.assertAllEqual([eval_batch_size, None, None, 3],
                        features[fields.InputDataFields.image].shape.as_list())
    self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
    self.assertAllEqual(
-        [1, None, None, 3],
+        [eval_batch_size, None, None, 3],
        features[fields.InputDataFields.original_image].shape.as_list())
    self.assertEqual(tf.uint8,
                     features[fields.InputDataFields.original_image].dtype)
-    self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
+    self.assertAllEqual([eval_batch_size],
+                        features[inputs.HASH_KEY].shape.as_list())
    self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
    self.assertAllEqual(
-        [1, 100, 4],
+        [eval_batch_size, 100, 4],
        labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_boxes].dtype)
    self.assertAllEqual(
-        [1, 100, model_config.faster_rcnn.num_classes],
+        [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_classes].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
+        labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
+    self.assertEqual(
+        tf.float32,
+        labels[fields.InputDataFields.groundtruth_confidences].dtype)
+    self.assertAllEqual(
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_area].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_area].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
    self.assertEqual(
        tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
    self.assertEqual(
        tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase):
        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_classes].dtype)
+    self.assertAllEqual(
+        [batch_size, 100, model_config.ssd.num_classes],
+        labels[
+            fields.InputDataFields.groundtruth_confidences].shape.as_list())
+    self.assertEqual(
+        tf.float32,
+        labels[fields.InputDataFields.groundtruth_confidences].dtype)
    self.assertAllEqual(
        [batch_size, 100],
        labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_weights].dtype)
-  def test_ssd_inceptionV2_eval_input(self):
+  @parameterized.parameters(
+      {'eval_batch_size': 1},
+      {'eval_batch_size': 8}
+  )
+  def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1):
    """Tests the eval input function for SSDInceptionV2."""
    configs = _get_configs_for_model('ssd_inception_v2_pets')
    model_config = configs['model']
    model_config.ssd.num_classes = 37
+    eval_config = configs['eval_config']
+    eval_config.batch_size = eval_batch_size
    eval_input_fn = inputs.create_eval_input_fn(
-        configs['eval_config'], configs['eval_input_config'], model_config)
+        eval_config, configs['eval_input_configs'][0], model_config)
    features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
-    self.assertAllEqual([1, 300, 300, 3],
+    self.assertAllEqual([eval_batch_size, 300, 300, 3],
                        features[fields.InputDataFields.image].shape.as_list())
    self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
    self.assertAllEqual(
-        [1, None, None, 3],
+        [eval_batch_size, 300, 300, 3],
        features[fields.InputDataFields.original_image].shape.as_list())
    self.assertEqual(tf.uint8,
                     features[fields.InputDataFields.original_image].dtype)
-    self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
+    self.assertAllEqual([eval_batch_size],
+                        features[inputs.HASH_KEY].shape.as_list())
    self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
    self.assertAllEqual(
-        [1, 100, 4],
+        [eval_batch_size, 100, 4],
        labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_boxes].dtype)
    self.assertAllEqual(
-        [1, 100, model_config.ssd.num_classes],
+        [eval_batch_size, 100, model_config.ssd.num_classes],
        labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_classes].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100, model_config.ssd.num_classes],
+        labels[
+            fields.InputDataFields.groundtruth_confidences].shape.as_list())
+    self.assertEqual(
+        tf.float32,
+        labels[fields.InputDataFields.groundtruth_confidences].dtype)
+    self.assertAllEqual(
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_area].shape.as_list())
    self.assertEqual(tf.float32,
                     labels[fields.InputDataFields.groundtruth_area].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
    self.assertEqual(
        tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
    self.assertAllEqual(
-        [1, 100],
+        [eval_batch_size, 100],
        labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
    self.assertEqual(
        tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase):
    configs = _get_configs_for_model('ssd_inception_v2_pets')
    predict_input_fn = inputs.create_predict_input_fn(
        model_config=configs['model'],
-        predict_input_config=configs['eval_input_config'])
+        predict_input_config=configs['eval_input_configs'][0])
    serving_input_receiver = predict_input_fn()
    image = serving_input_receiver.features[fields.InputDataFields.image]
@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase):
  def test_predict_input_with_additional_channels(self):
    """Tests the predict input function with additional channels."""
    configs = _get_configs_for_model('ssd_inception_v2_pets')
-    configs['eval_input_config'].num_additional_channels = 2
+    configs['eval_input_configs'][0].num_additional_channels = 2
    predict_input_fn = inputs.create_predict_input_fn(
        model_config=configs['model'],
-        predict_input_config=configs['eval_input_config'])
+        predict_input_config=configs['eval_input_configs'][0])
    serving_input_receiver = predict_input_fn()
    image = serving_input_receiver.features[fields.InputDataFields.image]
@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase):
    configs['model'].ssd.num_classes = 37
    eval_input_fn = inputs.create_eval_input_fn(
        eval_config=configs['train_config'],  # Expecting `EvalConfig`.
-        eval_input_config=configs['eval_input_config'],
+        eval_input_config=configs['eval_input_configs'][0],
        model_config=configs['model'])
    with self.assertRaises(TypeError):
      eval_input_fn()
@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase):
    configs['model'].ssd.num_classes = 37
    eval_input_fn = inputs.create_eval_input_fn(
        eval_config=configs['eval_config'],
-        eval_input_config=configs['eval_input_config'],
+        eval_input_config=configs['eval_input_configs'][0],
        model_config=configs['eval_config'])  # Expecting `DetectionModel`.
    with self.assertRaises(TypeError):
      eval_input_fn()
+  def test_output_equal_in_replace_empty_string_with_random_number(self):
+    string_placeholder = tf.placeholder(tf.string, shape=[])
+    replaced_string = inputs._replace_empty_string_with_random_number(
+        string_placeholder)
+    test_string = 'hello world'
+    feed_dict = {string_placeholder: test_string}
+    with self.test_session() as sess:
+      out_string = sess.run(replaced_string, feed_dict=feed_dict)
+    self.assertEqual(test_string, out_string)
+  def test_output_is_integer_in_replace_empty_string_with_random_number(self):
+    string_placeholder = tf.placeholder(tf.string, shape=[])
+    replaced_string = inputs._replace_empty_string_with_random_number(
+        string_placeholder)
+    empty_string = ''
+    feed_dict = {string_placeholder: empty_string}
+    tf.set_random_seed(0)
+    with self.test_session() as sess:
+      out_string = sess.run(replaced_string, feed_dict=feed_dict)
+    # Test whether out_string is a string which represents an integer.
+    int(out_string)  # throws an error if out_string is not castable to int.
+    self.assertEqual(out_string, '2798129067578209328')
-class DataAugmentationFnTest(tf.test.TestCase):
+class DataAugmentationFnTest(test_case.TestCase):
  def test_apply_image_and_box_augmentation(self):
    data_augmentation_options = [
@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase):
        [[10, 10, 20, 20]]
    )
+  def test_apply_image_and_box_augmentation_with_scores(self):
+    data_augmentation_options = [
+        (preprocessor.resize_image, {
+            'new_height': 20,
+            'new_width': 20,
+            'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR
+        }),
+        (preprocessor.scale_boxes_to_pixel_coordinates, {}),
+    ]
+    data_augmentation_fn = functools.partial(
+        inputs.augment_input_data,
+        data_augmentation_options=data_augmentation_options)
+    tensor_dict = {
+        fields.InputDataFields.image:
+            tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
+        fields.InputDataFields.groundtruth_boxes:
+            tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
+        fields.InputDataFields.groundtruth_classes:
+            tf.constant(np.array([1.0], np.float32)),
+        fields.InputDataFields.groundtruth_confidences:
+            tf.constant(np.array([0.8], np.float32)),
+    }
+    augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
+    with self.test_session() as sess:
+      augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
+    self.assertAllEqual(
+        augmented_tensor_dict_out[fields.InputDataFields.image].shape,
+        [20, 20, 3]
+    )
+    self.assertAllClose(
+        augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes],
+        [[10, 10, 20, 20]]
+    )
+    self.assertAllClose(
+        augmented_tensor_dict_out[fields.InputDataFields.groundtruth_classes],
+        [1.0]
+    )
+    self.assertAllClose(
+        augmented_tensor_dict_out[
+            fields.InputDataFields.groundtruth_confidences],
+        [0.8]
+    )
  def test_include_masks_in_data_augmentation(self):
    data_augmentation_options = [
        (preprocessor.resize_image, {
@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
  return (image, mask, tf.shape(image))
-class DataTransformationFnTest(tf.test.TestCase):
+class DataTransformationFnTest(test_case.TestCase):
  def test_combine_additional_channels_if_present(self):
    image = np.random.rand(4, 4, 3).astype(np.float32)
@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase):
    self.assertAllClose(
        transformed_inputs[fields.InputDataFields.groundtruth_classes],
        [[0, 0, 1], [1, 0, 0]])
+    self.assertAllClose(
+        transformed_inputs[fields.InputDataFields.groundtruth_confidences],
+        [[0, 0, 1], [1, 0, 0]])
  def test_returns_correct_merged_boxes(self):
    tensor_dict = {
@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase):
    self.assertAllClose(
        transformed_inputs[fields.InputDataFields.groundtruth_classes],
        [[1, 0, 1]])
+    self.assertAllClose(
+        transformed_inputs[fields.InputDataFields.groundtruth_confidences],
+        [[1, 0, 1]])
  def test_returns_resized_masks(self):
    tensor_dict = {
@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_instance_masks:
            tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
        fields.InputDataFields.groundtruth_classes:
-            tf.constant(np.array([3, 1], np.int32))
+            tf.constant(np.array([3, 1], np.int32)),
+        fields.InputDataFields.original_image_spatial_shape:
+            tf.constant(np.array([4, 4], np.int32))
    }
    def fake_image_resizer_fn(image, masks=None):
      resized_image = tf.image.resize_images(image, [8, 8])
      results = [resized_image]
@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
    self.assertAllEqual(transformed_inputs[
        fields.InputDataFields.original_image].dtype, tf.uint8)
    self.assertAllEqual(transformed_inputs[
-        fields.InputDataFields.original_image].shape, [4, 4, 3])
+        fields.InputDataFields.original_image_spatial_shape], [4, 4])
+    self.assertAllEqual(transformed_inputs[
+        fields.InputDataFields.original_image].shape, [8, 8, 3])
    self.assertAllEqual(transformed_inputs[
        fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_classes:
            tf.constant(np.array([3, 1], np.int32))
    }
    def fake_model_preprocessor_fn(image):
      return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0))
@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_classes:
            tf.constant(np.array([3, 1], np.int32))
    }
    def add_one_data_augmentation_fn(tensor_dict):
      return {key: value + 1 for key, value in tensor_dict.items()}
@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase):
        fields.InputDataFields.groundtruth_classes:
            tf.constant(np.array([3, 1], np.int32))
    }
    def mul_two_model_preprocessor_fn(image):
      return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0))
    def add_five_to_image_data_augmentation_fn(tensor_dict):
      tensor_dict[fields.InputDataFields.image] += 5
      return tensor_dict
@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
                        (np_image + 5) * 2)
-class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
+class PadInputDataToStaticShapesFnTest(test_case.TestCase):
  def test_pad_images_boxes_and_classes(self):
    input_tensor_dict = {
@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
            tf.placeholder(tf.float32, [None, 4]),
        fields.InputDataFields.groundtruth_classes:
            tf.placeholder(tf.int32, [None, 3]),
-        fields.InputDataFields.true_image_shape: tf.placeholder(tf.int32, [3]),
+        fields.InputDataFields.true_image_shape:
+            tf.placeholder(tf.int32, [3]),
+        fields.InputDataFields.original_image_spatial_shape:
+            tf.placeholder(tf.int32, [2])
    }
    padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
        tensor_dict=input_tensor_dict,
@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
    self.assertAllEqual(
        padded_tensor_dict[fields.InputDataFields.true_image_shape]
        .shape.as_list(), [3])
+    self.assertAllEqual(
+        padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape]
+        .shape.as_list(), [2])
    self.assertAllEqual(
        padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
        .shape.as_list(), [3, 4])

--- a/research/object_detection/legacy/eval.py
+++ b/research/object_detection/legacy/eval.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 r"""Evaluation executable for detection models.
 This executable is used to evaluate DetectionModels. There are two ways of
@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator
 from object_detection.utils import config_util
 from object_detection.utils import label_map_util
 tf.logging.set_verbosity(tf.logging.INFO)
 flags = tf.app.flags
 flags.DEFINE_boolean('eval_training_data', False,
                     'If training data should be evaluated for this job.')
-flags.DEFINE_string('checkpoint_dir', '',
+flags.DEFINE_string(
-                    'Directory containing checkpoints to evaluate, typically '
+    'checkpoint_dir', '',
-                    'set to `train_dir` used in the training job.')
+    'Directory containing checkpoints to evaluate, typically '
-flags.DEFINE_string('eval_dir', '',
+    'set to `train_dir` used in the training job.')
-                    'Directory to write eval summaries to.')
+flags.DEFINE_string('eval_dir', '', 'Directory to write eval summaries to.')
-flags.DEFINE_string('pipeline_config_path', '',
+flags.DEFINE_string(
-                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+    'pipeline_config_path', '',
-                    'file. If provided, other configs are ignored')
+    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+    'file. If provided, other configs are ignored')
 flags.DEFINE_string('eval_config_path', '',
                    'Path to an eval_pb2.EvalConfig config file.')
 flags.DEFINE_string('input_config_path', '',
                    'Path to an input_reader_pb2.InputReader config file.')
 flags.DEFINE_string('model_config_path', '',
                    'Path to a model_pb2.DetectionModel config file.')
-flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
+flags.DEFINE_boolean(
-                     'evaluation. Overrides the `max_evals` parameter in the '
+    'run_once', False, 'Option to only run a single pass of '
-                     'provided config.')
+    'evaluation. Overrides the `max_evals` parameter in the '
+    'provided config.')
 FLAGS = flags.FLAGS
@@ -88,9 +88,10 @@ def main(unused_argv):
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
-    tf.gfile.Copy(FLAGS.pipeline_config_path,
+    tf.gfile.Copy(
-                  os.path.join(FLAGS.eval_dir, 'pipeline.config'),
+        FLAGS.pipeline_config_path,
-                  overwrite=True)
+        os.path.join(FLAGS.eval_dir, 'pipeline.config'),
+        overwrite=True)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
@@ -99,9 +100,7 @@ def main(unused_argv):
    for name, config in [('model.config', FLAGS.model_config_path),
                         ('eval.config', FLAGS.eval_config_path),
                         ('input.config', FLAGS.input_config_path)]:
-      tf.gfile.Copy(config,
+      tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True)
-                    os.path.join(FLAGS.eval_dir, name),
-                    overwrite=True)
  model_config = configs['model']
  eval_config = configs['eval_config']
@@ -110,9 +109,7 @@ def main(unused_argv):
    input_config = configs['train_input_config']
  model_fn = functools.partial(
-      model_builder.build,
+      model_builder.build, model_config=model_config, is_training=False)
-      model_config=model_config,
-      is_training=False)
  def get_next(config):
    return dataset_builder.make_initializable_iterator(
@@ -120,10 +117,8 @@ def main(unused_argv):
  create_input_dict_fn = functools.partial(get_next, input_config)
-  label_map = label_map_util.load_labelmap(input_config.label_map_path)
+  categories = label_map_util.create_categories_from_labelmap(
-  max_num_classes = max([item.id for item in label_map.item])
+      input_config.label_map_path)
-  categories = label_map_util.convert_label_map_to_categories(
-      label_map, max_num_classes)
  if FLAGS.run_once:
    eval_config.max_evals = 1

--- a/research/object_detection/legacy/evaluator.py
+++ b/research/object_detection/legacy/evaluator.py
@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
      master=eval_config.eval_master,
      save_graph=eval_config.save_graph,
      save_graph_dir=(eval_dir if eval_config.save_graph else ''),
-      losses_dict=losses_dict)
+      losses_dict=losses_dict,
+      eval_export_path=eval_config.export_path)
  return metrics
--- a/research/object_detection/matchers/argmax_matcher.py
+++ b/research/object_detection/matchers/argmax_matcher.py
@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher):
      if self._unmatched_threshold == self._matched_threshold:
        raise ValueError('When negatives are in between matched and '
                         'unmatched thresholds, these cannot be of equal '
-                         'value. matched: %s, unmatched: %s',
+                         'value. matched: {}, unmatched: {}'.format(
-                         self._matched_threshold, self._unmatched_threshold)
+                             self._matched_threshold,
+                             self._unmatched_threshold))
    self._force_match_for_each_row = force_match_for_each_row
    self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
-  def _match(self, similarity_matrix):
+  def _match(self, similarity_matrix, valid_rows):
    """Tries to match each column of the similarity matrix to a row.
    Args:
      similarity_matrix: tensor of shape [N, M] representing any similarity
        metric.
+      valid_rows: a boolean tensor of shape [N] indicating valid rows.
    Returns:
      Match object with corresponding matches for each of M columns.
@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher):
            similarity_matrix)
        force_match_column_ids = tf.argmax(similarity_matrix, 1,
                                           output_type=tf.int32)
-        force_match_column_indicators = tf.one_hot(
+        force_match_column_indicators = (
-            force_match_column_ids, depth=similarity_matrix_shape[1])
+            tf.one_hot(
+                force_match_column_ids, depth=similarity_matrix_shape[1]) *
+            tf.cast(tf.expand_dims(valid_rows, axis=-1), dtype=tf.float32))
        force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
                                        output_type=tf.int32)
        force_match_column_mask = tf.cast(

--- a/research/object_detection/matchers/argmax_matcher_test.py
+++ b/research/object_detection/matchers/argmax_matcher_test.py
@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase):
    self.assertAllEqual(np.nonzero(res_unmatched_cols)[0],
                        expected_unmatched_cols)
+  def test_return_correct_matches_using_force_match_padded_groundtruth(self):
+    def graph_fn(similarity, valid_rows):
+      matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3.,
+                                             unmatched_threshold=2.,
+                                             force_match_for_each_row=True)
+      match = matcher.match(similarity, valid_rows)
+      matched_cols = match.matched_column_indicator()
+      unmatched_cols = match.unmatched_column_indicator()
+      match_results = match.match_results
+      return (matched_cols, unmatched_cols, match_results)
+    similarity = np.array([[1, 1, 1, 3, 1],
+                           [-1, 0, -2, -2, -1],
+                           [0, 0, 0, 0, 0],
+                           [3, 0, -1, 2, 0],
+                           [0, 0, 0, 0, 0]], dtype=np.float32)
+    valid_rows = np.array([True, True, False, True, False])
+    expected_matched_cols = np.array([0, 1, 3])
+    expected_matched_rows = np.array([3, 1, 0])
+    expected_unmatched_cols = np.array([2, 4])  # col 2 has too high max val
+    (res_matched_cols, res_unmatched_cols,
+     match_results) = self.execute(graph_fn, [similarity, valid_rows])
+    self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows)
+    self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols)
+    self.assertAllEqual(np.nonzero(res_unmatched_cols)[0],
+                        expected_unmatched_cols)
  def test_valid_arguments_corner_case(self):
    argmax_matcher.ArgMaxMatcher(matched_threshold=1,
                                 unmatched_threshold=1)

--- a/research/object_detection/matchers/bipartite_matcher.py
+++ b/research/object_detection/matchers/bipartite_matcher.py
@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher):
    super(GreedyBipartiteMatcher, self).__init__(
        use_matmul_gather=use_matmul_gather)
-  def _match(self, similarity_matrix, num_valid_rows=-1):
+  def _match(self, similarity_matrix, valid_rows):
    """Bipartite matches a collection rows and columns. A greedy bi-partite.
    TODO(rathodv): Add num_valid_columns options to match only that many columns
@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher):
    Args:
      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
        where higher values mean more similar.
-      num_valid_rows: A scalar or a 1-D tensor with one element describing the
+      valid_rows: A boolean tensor of shape [N] indicating the rows that are
-        number of valid rows of similarity_matrix to consider for the bipartite
+        valid.
-        matching. If set to be negative, then all rows from similarity_matrix
-        are used.
    Returns:
      match_results: int32 tensor of shape [M] with match_results[i]=-1
        meaning that column i is not matched and otherwise that it is matched to
        row match_results[i].
    """
+    valid_row_sim_matrix = tf.gather(similarity_matrix,
+                                     tf.squeeze(tf.where(valid_rows), axis=-1))
+    invalid_row_sim_matrix = tf.gather(
+        similarity_matrix,
+        tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1))
+    similarity_matrix = tf.concat(
+        [valid_row_sim_matrix, invalid_row_sim_matrix], axis=0)
    # Convert similarity matrix to distance matrix as tf.image.bipartite tries
    # to find minimum distance matches.
    distance_matrix = -1 * similarity_matrix
+    num_valid_rows = tf.reduce_sum(tf.to_float(valid_rows))
    _, match_results = image_ops.bipartite_match(
-        distance_matrix, num_valid_rows)
+        distance_matrix, num_valid_rows=num_valid_rows)
    match_results = tf.reshape(match_results, [-1])
    match_results = tf.cast(match_results, tf.int32)
    return match_results
--- a/research/object_detection/matchers/bipartite_matcher_test.py
+++ b/research/object_detection/matchers/bipartite_matcher_test.py
@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase):
  def test_get_expected_matches_when_all_rows_are_valid(self):
    similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
-    num_valid_rows = 2
+    valid_rows = tf.ones([2], dtype=tf.bool)
    expected_match_results = [-1, 1, 0]
    matcher = bipartite_matcher.GreedyBipartiteMatcher()
-    match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
+    match = matcher.match(similarity_matrix, valid_rows=valid_rows)
    with self.test_session() as sess:
      match_results_out = sess.run(match._match_results)
      self.assertAllEqual(match_results_out, expected_match_results)
-  def test_get_expected_matches_with_valid_rows_set_to_minus_one(self):
+  def test_get_expected_matches_with_all_rows_be_default(self):
    similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
-    num_valid_rows = -1
    expected_match_results = [-1, 1, 0]
    matcher = bipartite_matcher.GreedyBipartiteMatcher()
-    match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
+    match = matcher.match(similarity_matrix)
    with self.test_session() as sess:
      match_results_out = sess.run(match._match_results)
      self.assertAllEqual(match_results_out, expected_match_results)
  def test_get_no_matches_with_zero_valid_rows(self):
    similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
-    num_valid_rows = 0
+    valid_rows = tf.zeros([2], dtype=tf.bool)
    expected_match_results = [-1, -1, -1]
    matcher = bipartite_matcher.GreedyBipartiteMatcher()
-    match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
+    match = matcher.match(similarity_matrix, valid_rows)
    with self.test_session() as sess:
      match_results_out = sess.run(match._match_results)
      self.assertAllEqual(match_results_out, expected_match_results)
  def test_get_expected_matches_with_only_one_valid_row(self):
    similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
-    num_valid_rows = 1
+    valid_rows = tf.constant([True, False], dtype=tf.bool)
    expected_match_results = [-1, -1, 0]
    matcher = bipartite_matcher.GreedyBipartiteMatcher()
-    match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
+    match = matcher.match(similarity_matrix, valid_rows)
+    with self.test_session() as sess:
+      match_results_out = sess.run(match._match_results)
+      self.assertAllEqual(match_results_out, expected_match_results)
+  def test_get_expected_matches_with_only_one_valid_row_at_bottom(self):
+    similarity_matrix = tf.constant([[0.15, 0.2, 0.3], [0.50, 0.1, 0.8]])
+    valid_rows = tf.constant([False, True], dtype=tf.bool)
+    expected_match_results = [-1, -1, 0]
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    match = matcher.match(similarity_matrix, valid_rows)
    with self.test_session() as sess:
      match_results_out = sess.run(match._match_results)
      self.assertAllEqual(match_results_out, expected_match_results)

--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops
 from object_detection.core import box_predictor
 from object_detection.core import losses
 from object_detection.core import model
-from object_detection.core import post_processing
 from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner
 from object_detection.utils import ops
@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
               first_stage_box_predictor_depth,
               first_stage_minibatch_size,
               first_stage_sampler,
-               first_stage_nms_score_threshold,
+               first_stage_non_max_suppression_fn,
-               first_stage_nms_iou_threshold,
               first_stage_max_proposals,
               first_stage_localization_loss_weight,
               first_stage_objectness_loss_weight,
+               crop_and_resize_fn,
               initial_crop_size,
               maxpool_kernel_size,
               maxpool_stride,
@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
               hard_example_miner=None,
               parallel_iterations=16,
               add_summaries=True,
-               use_matmul_crop_and_resize=False,
+               clip_anchors_to_image=False,
-               clip_anchors_to_image=False):
+               use_static_shapes=False,
+               resize_masks=True):
    """FasterRCNNMetaArch Constructor.
    Args:
@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
        to the loss function for any given image within the image batch and is
        only called "batch_size" due to terminology from the Faster R-CNN paper.
      first_stage_sampler: Sampler to use for first stage loss (RPN loss).
-      first_stage_nms_score_threshold: Score threshold for non max suppression
+      first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
-        for the Region Proposal Network (RPN).  This value is expected to be in
+        callable that takes `boxes`, `scores` and optional `clip_window`(with
-        [0, 1] as it is applied directly after a softmax transformation.  The
+        all other inputs already set) and returns a dictionary containing
-        recommended value for Faster R-CNN is 0.
+        tensors with keys: `detection_boxes`, `detection_scores`,
-      first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
+        `detection_classes`, `num_detections`. This is used to perform non max
-        for performing Non-Max Suppression (NMS) on the boxes predicted by the
+        suppression  on the boxes predicted by the Region Proposal Network
-        Region Proposal Network (RPN).
+        (RPN).
+        See `post_processing.batch_multiclass_non_max_suppression` for the type
+        and shape of these tensors.
      first_stage_max_proposals: Maximum number of boxes to retain after
        performing Non-Max Suppression (NMS) on the boxes predicted by the
        Region Proposal Network (RPN).
      first_stage_localization_loss_weight: A float
      first_stage_objectness_loss_weight: A float
+      crop_and_resize_fn: A differentiable resampler to use for cropping RPN
+        proposal features.
      initial_crop_size: A single integer indicating the output size
        (width and height are set to be the same) of the initial bilinear
        interpolation based cropping during ROI pooling.
@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
        in parallel for calls to tf.map_fn.
      add_summaries: boolean (default: True) controlling whether summary ops
        should be added to tensorflow graph.
-      use_matmul_crop_and_resize: Force the use of matrix multiplication based
-        crop and resize instead of standard tf.image.crop_and_resize while
-        computing second stage input feature maps.
      clip_anchors_to_image: Normally, anchors generated for a given image size
-      are pruned during training if they lie outside the image window. This
+        are pruned during training if they lie outside the image window. This
-      option clips the anchors to be within the image instead of pruning.
+        option clips the anchors to be within the image instead of pruning.
+      use_static_shapes: If True, uses implementation of ops with static shape
+        guarantees.
+      resize_masks: Indicates whether the masks presend in the groundtruth
+        should be resized in the model with `image_resizer_fn`
    Raises:
      ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
    # in the future.
    super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
-    if is_training and second_stage_batch_size > first_stage_max_proposals:
-      raise ValueError('second_stage_batch_size should be no greater than '
-                       'first_stage_max_proposals.')
    if not isinstance(first_stage_anchor_generator,
                      grid_anchor_generator.GridAnchorGenerator):
      raise ValueError('first_stage_anchor_generator must be of type '
@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
    self._is_training = is_training
    self._image_resizer_fn = image_resizer_fn
+    self._resize_masks = resize_masks
    self._feature_extractor = feature_extractor
    self._number_of_stages = number_of_stages
@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
            min_depth=0,
            max_depth=0))
-    self._first_stage_nms_score_threshold = first_stage_nms_score_threshold
+    self._first_stage_nms_fn = first_stage_non_max_suppression_fn
-    self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
    self._first_stage_max_proposals = first_stage_max_proposals
+    self._use_static_shapes = use_static_shapes
    self._first_stage_localization_loss = (
        losses.WeightedSmoothL1LocalizationLoss())
@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
    self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
    # Per-region cropping parameters
+    self._crop_and_resize_fn = crop_and_resize_fn
    self._initial_crop_size = initial_crop_size
    self._maxpool_kernel_size = maxpool_kernel_size
    self._maxpool_stride = maxpool_stride
@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
    self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
    self._second_stage_mask_loss_weight = (
        second_stage_mask_prediction_loss_weight)
-    self._use_matmul_crop_and_resize = use_matmul_crop_and_resize
    self._hard_example_miner = hard_example_miner
    self._parallel_iterations = parallel_iterations
@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
    }
    if self._number_of_stages >= 2:
+      # If mixed-precision training on TPU is enabled, rpn_box_encodings and
+      # rpn_objectness_predictions_with_background are bfloat16 tensors.
+      # Considered prediction results, they need to be casted to float32
+      # tensors for correct postprocess_rpn computation in predict_second_stage.
      prediction_dict.update(self._predict_second_stage(
-          rpn_box_encodings,
+          tf.to_float(rpn_box_encodings),
-          rpn_objectness_predictions_with_background,
+          tf.to_float(rpn_objectness_predictions_with_background),
          rpn_features_to_crop,
          self._anchors.get(), image_shape, true_image_shapes))
@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
        [batch_size, num_valid_anchors, 2] containing class
        predictions (logits) for each of the anchors.  Note that this
        tensor *includes* background class predictions (at class index 0).
-      rpn_features_to_crop: A 4-D float32 tensor with shape
+      rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
        [batch_size, height, width, depth] representing image features to crop
        using the proposal boxes predicted by the RPN.
      anchors: 2-D float tensor of shape
@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
          boxes proposed by the RPN, thus enabling one to extract features and
          get box classification and prediction for externally selected areas
          of the image.
-        6) box_classifier_features: a 4-D float32 tensor representing the
+        6) box_classifier_features: a 4-D float32 or bfloat16 tensor
-          features for each proposal.
+          representing the features for each proposal.
    """
    image_shape_2d = self._image_batch_shape_2d(image_shape)
    proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
        rpn_box_encodings, rpn_objectness_predictions_with_background,
        anchors, image_shape_2d, true_image_shapes)
+    # If mixed-precision training on TPU is enabled, the dtype of
+    # rpn_features_to_crop is bfloat16, otherwise it is float32. tf.cast is
+    # used to match the dtype of proposal_boxes_normalized to that of
+    # rpn_features_to_crop for further computation.
    flattened_proposal_feature_maps = (
        self._compute_second_stage_input_feature_maps(
-            rpn_features_to_crop, proposal_boxes_normalized))
+            rpn_features_to_crop,
+            tf.cast(proposal_boxes_normalized, rpn_features_to_crop.dtype)))
    box_classifier_features = (
        self._feature_extractor.extract_box_classifier_features(
@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
      image_shape: A 1-D tensor representing the input image shape.
    """
    image_shape = tf.shape(preprocessed_inputs)
-    rpn_features_to_crop, _ = self._feature_extractor.extract_proposal_features(
-        preprocessed_inputs, scope=self.first_stage_feature_extractor_scope)
+    rpn_features_to_crop, self.endpoints = (
+        self._feature_extractor.extract_proposal_features(
+            preprocessed_inputs,
+            scope=self.first_stage_feature_extractor_scope))
    feature_map_shape = tf.shape(rpn_features_to_crop)
    anchors = box_list_ops.concatenate(
@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel):
                                                      feature_map_shape[2])]))
    with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
      kernel_size = self._first_stage_box_predictor_kernel_size
+      reuse = tf.get_variable_scope().reuse
      rpn_box_predictor_features = slim.conv2d(
          rpn_features_to_crop,
          self._first_stage_box_predictor_depth,
          kernel_size=[kernel_size, kernel_size],
          rate=self._first_stage_atrous_rate,
-          activation_fn=tf.nn.relu6)
+          activation_fn=tf.nn.relu6,
+          scope='Conv',
+          reuse=reuse)
    return (rpn_box_predictor_features, rpn_features_to_crop,
            anchors, image_shape)
@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
        rpn_objectness_predictions_with_background_batch)[:, :, 1]
    clip_window = self._compute_clip_window(image_shapes)
    (proposal_boxes, proposal_scores, _, _, _,
-     num_proposals) = post_processing.batch_multiclass_non_max_suppression(
+     num_proposals) = self._first_stage_nms_fn(
         tf.expand_dims(proposal_boxes, axis=2),
-         tf.expand_dims(rpn_objectness_softmax_without_background,
+         tf.expand_dims(rpn_objectness_softmax_without_background, axis=2),
-                        axis=2),
-         self._first_stage_nms_score_threshold,
-         self._first_stage_nms_iou_threshold,
-         self._first_stage_max_proposals,
-         self._first_stage_max_proposals,
         clip_window=clip_window)
    if self._is_training:
      proposal_boxes = tf.stop_gradient(proposal_boxes)
@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel):
    groundtruth_masks_list = self._groundtruth_lists.get(
        fields.BoxListFields.masks)
-    if groundtruth_masks_list is not None:
+    # TODO(rathodv): Remove mask resizing once the legacy pipeline is deleted.
+    if groundtruth_masks_list is not None and self._resize_masks:
      resized_masks_list = []
      for mask in groundtruth_masks_list:
        _, resized_mask, _ = self._image_resizer_fn(
            # Reuse the given `image_resizer_fn` to resize groundtruth masks.
            # `mask` tensor for an image is of the shape [num_masks,
            # image_height, image_width]. Below we create a dummy image of the
            # the shape [image_height, image_width, 1] to use with
            # `image_resizer_fn`.
-            image=tf.zeros(tf.stack([tf.shape(mask)[1], tf.shape(mask)[2], 1])),
+            image=tf.zeros(tf.stack([tf.shape(mask)[1],
+                                     tf.shape(mask)[2], 1])),
            masks=mask)
        resized_masks_list.append(resized_mask)
@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
        tf.range(proposal_boxlist.num_boxes()) < num_valid_proposals,
        cls_weights > 0
    )
-    sampled_indices = self._second_stage_sampler.subsample(
+    selected_positions = self._second_stage_sampler.subsample(
        valid_indicator,
        self._second_stage_batch_size,
        positive_indicator)
-    return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices)
+    return box_list_ops.boolean_mask(
+        proposal_boxlist,
+        selected_positions,
+        use_static_shapes=self._use_static_shapes,
+        indicator_sum=(self._second_stage_batch_size
+                       if self._use_static_shapes else None))
  def _compute_second_stage_input_feature_maps(self, features_to_crop,
                                               proposal_boxes_normalized):
@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
    Returns:
      A float32 tensor with shape [K, new_height, new_width, depth].
    """
-    def get_box_inds(proposals):
+    cropped_regions = self._flatten_first_two_dimensions(
-      proposals_shape = proposals.get_shape().as_list()
+        self._crop_and_resize_fn(
-      if any(dim is None for dim in proposals_shape):
+            features_to_crop, proposal_boxes_normalized,
-        proposals_shape = tf.shape(proposals)
+            [self._initial_crop_size, self._initial_crop_size]))
-      ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
-      multiplier = tf.expand_dims(
-          tf.range(start=0, limit=proposals_shape[0]), 1)
-      return tf.reshape(ones_mat * multiplier, [-1])
-    if self._use_matmul_crop_and_resize:
-      def _single_image_crop_and_resize(inputs):
-        single_image_features_to_crop, proposal_boxes_normalized = inputs
-        return ops.matmul_crop_and_resize(
-            tf.expand_dims(single_image_features_to_crop, 0),
-            proposal_boxes_normalized,
-            [self._initial_crop_size, self._initial_crop_size])
-      cropped_regions = self._flatten_first_two_dimensions(
-          shape_utils.static_or_dynamic_map_fn(
-              _single_image_crop_and_resize,
-              elems=[features_to_crop, proposal_boxes_normalized],
-              dtype=tf.float32,
-              parallel_iterations=self._parallel_iterations))
-    else:
-      cropped_regions = tf.image.crop_and_resize(
-          features_to_crop,
-          self._flatten_first_two_dimensions(proposal_boxes_normalized),
-          get_box_inds(proposal_boxes_normalized),
-          (self._initial_crop_size, self._initial_crop_size))
    return slim.max_pool2d(
        cropped_regions,
        [self._maxpool_kernel_size, self._maxpool_kernel_size],
@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
      sampled_reg_indices = tf.multiply(batch_sampled_indices,
                                        batch_reg_weights)
+      losses_mask = None
+      if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
+        losses_mask = tf.stack(self.groundtruth_lists(
+            fields.InputDataFields.is_annotated))
      localization_losses = self._first_stage_localization_loss(
-          rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices)
+          rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices,
+          losses_mask=losses_mask)
      objectness_losses = self._first_stage_objectness_loss(
          rpn_objectness_predictions_with_background,
-          batch_one_hot_targets, weights=batch_sampled_indices)
+          batch_one_hot_targets, weights=batch_sampled_indices,
+          losses_mask=losses_mask)
      localization_loss = tf.reduce_mean(
          tf.reduce_sum(localization_losses, axis=1) / normalizer)
      objectness_loss = tf.reduce_mean(
@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel):
      # for just one class to avoid over-counting for regression loss and
      # (optionally) mask loss.
      else:
-        # We only predict refined location encodings for the non background
+        reshaped_refined_box_encodings = (
-        # classes, but we now pad it to make it compatible with the class
+            self._get_refined_encodings_for_postitive_class(
-        # predictions
+                refined_box_encodings,
-        refined_box_encodings_with_background = tf.pad(
+                one_hot_flat_cls_targets_with_background, batch_size))
-            refined_box_encodings, [[0, 0], [1, 0], [0, 0]])
-        refined_box_encodings_masked_by_class_targets = tf.boolean_mask(
+      losses_mask = None
-            refined_box_encodings_with_background,
+      if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
-            tf.greater(one_hot_flat_cls_targets_with_background, 0))
+        losses_mask = tf.stack(self.groundtruth_lists(
-        reshaped_refined_box_encodings = tf.reshape(
+            fields.InputDataFields.is_annotated))
-            refined_box_encodings_masked_by_class_targets,
-            [batch_size, self.max_num_proposals, self._box_coder.code_size])
      second_stage_loc_losses = self._second_stage_localization_loss(
          reshaped_refined_box_encodings,
-          batch_reg_targets, weights=batch_reg_weights) / normalizer
+          batch_reg_targets,
+          weights=batch_reg_weights,
+          losses_mask=losses_mask) / normalizer
      second_stage_cls_losses = ops.reduce_sum_trailing_dimensions(
          self._second_stage_classification_loss(
              class_predictions_with_background,
              batch_cls_targets_with_background,
-              weights=batch_cls_weights),
+              weights=batch_cls_weights,
+              losses_mask=losses_mask),
          ndims=2) / normalizer
      second_stage_loc_loss = tf.reduce_sum(
-          tf.boolean_mask(second_stage_loc_losses, paddings_indicator))
+          second_stage_loc_losses * tf.to_float(paddings_indicator))
      second_stage_cls_loss = tf.reduce_sum(
-          tf.boolean_mask(second_stage_cls_losses, paddings_indicator))
+          second_stage_cls_losses * tf.to_float(paddings_indicator))
      if self._hard_example_miner:
        (second_stage_loc_loss, second_stage_cls_loss
@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
            box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])),
            image_shape[1], image_shape[2]).get()
-        flat_cropped_gt_mask = tf.image.crop_and_resize(
+        flat_cropped_gt_mask = self._crop_and_resize_fn(
            tf.expand_dims(flat_gt_masks, -1),
-            flat_normalized_proposals,
+            tf.expand_dims(flat_normalized_proposals, axis=1),
-            tf.range(flat_normalized_proposals.shape[0].value),
            [mask_height, mask_width])
        batch_cropped_gt_mask = tf.reshape(
@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
            self._second_stage_mask_loss(
                reshaped_prediction_masks,
                batch_cropped_gt_mask,
-                weights=batch_mask_target_weights),
+                weights=batch_mask_target_weights,
+                losses_mask=losses_mask),
            ndims=2) / (
                mask_height * mask_width * tf.maximum(
                    tf.reduce_sum(
                        batch_mask_target_weights, axis=1, keep_dims=True
                    ), tf.ones((batch_size, 1))))
        second_stage_mask_loss = tf.reduce_sum(
-            tf.boolean_mask(second_stage_mask_losses, paddings_indicator))
+            tf.where(paddings_indicator, second_stage_mask_losses,
+                     tf.zeros_like(second_stage_mask_losses)))
      if second_stage_mask_loss is not None:
        mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel):
        loss_dict[mask_loss.op.name] = mask_loss
    return loss_dict
+  def _get_refined_encodings_for_postitive_class(
+      self, refined_box_encodings, flat_cls_targets_with_background,
+      batch_size):
+    # We only predict refined location encodings for the non background
+    # classes, but we now pad it to make it compatible with the class
+    # predictions
+    refined_box_encodings_with_background = tf.pad(refined_box_encodings,
+                                                   [[0, 0], [1, 0], [0, 0]])
+    refined_box_encodings_masked_by_class_targets = (
+        box_list_ops.boolean_mask(
+            box_list.BoxList(
+                tf.reshape(refined_box_encodings_with_background,
+                           [-1, self._box_coder.code_size])),
+            tf.reshape(tf.greater(flat_cls_targets_with_background, 0), [-1]),
+            use_static_shapes=self._use_static_shapes,
+            indicator_sum=batch_size * self.max_num_proposals
+            if self._use_static_shapes else None).get())
+    return tf.reshape(
+        refined_box_encodings_masked_by_class_targets, [
+            batch_size, self.max_num_proposals,
+            self._box_coder.code_size
+        ])
  def _padded_batched_proposals_indicator(self,
                                          num_proposals,
                                          max_num_proposals):