Plumb LVIS specific fields (e.g. `neg_category_ids`,...

Plumb LVIS specific fields (e.g. `neg_category_ids`, `not_exhaustive_category_ids`) through input pipelines. PiperOrigin-RevId: 339614575

Plumb LVIS specific fields (e.g. `neg_category_ids`,...
Plumb LVIS specific fields (e.g. `neg_category_ids`, `not_exhaustive_category_ids`) through input pipelines. PiperOrigin-RevId: 339614575
9139a7b9 · Jonathan Huang · TF Object Detection Team · 24e41ffe · 9139a7b9 · 9139a7b9
Commit 9139a7b9 authored Oct 29, 2020 by Jonathan Huang Committed by TF Object Detection Team Oct 29, 2020
7 changed files
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -313,7 +313,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      groundtruth_group_of_list=None,
      groundtruth_area_list=None,
      is_annotated_list=None,
-      groundtruth_labeled_classes=None):
+      groundtruth_labeled_classes=None,
+      groundtruth_verified_neg_classes=None,
+      groundtruth_not_exhaustive_classes=None):
    """Provide groundtruth tensors.

    Args:
@@ -371,6 +373,12 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      groundtruth_labeled_classes: A list of 1-D tf.float32 tensors of shape
        [num_classes], containing label indices encoded as k-hot of the classes
        that are exhaustively annotated.
+      groundtruth_verified_neg_classes: A list of 1-D tf.float32 tensors of
+        shape [num_classes], containing a K-hot representation of classes
+        which were verified as not present in the image.
+      groundtruth_not_exhaustive_classes: A list of 1-D tf.float32 tensors of
+        shape [num_classes], containing a K-hot representation of classes
+        which don't have all of their instances marked exhaustively.
    """
    self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
    self._groundtruth_lists[
@@ -430,6 +438,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      self._groundtruth_lists[
          fields.InputDataFields
          .groundtruth_labeled_classes] = groundtruth_labeled_classes
+    if groundtruth_verified_neg_classes:
+      self._groundtruth_lists[
+          fields.InputDataFields
+          .groundtruth_verified_neg_classes] = groundtruth_verified_neg_classes
+    if groundtruth_not_exhaustive_classes:
+      self._groundtruth_lists[
+          fields.InputDataFields
+          .groundtruth_not_exhaustive_classes] = (
+              groundtruth_not_exhaustive_classes)

  @abc.abstractmethod
  def regularization_losses(self):

--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -203,6 +203,10 @@ class TfExampleDecoder(data_decoder.DataDecoder):
            tf.VarLenFeature(tf.string),
        'image/class/label':
            tf.VarLenFeature(tf.int64),
+        'image/neg_category_ids':
+            tf.VarLenFeature(tf.int64),
+        'image/not_exhaustive_category_ids':
+            tf.VarLenFeature(tf.int64),
        'image/class/confidence':
            tf.VarLenFeature(tf.float32),
        # Object boxes and classes.
@@ -264,6 +268,10 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        # Image-level labels.
        fields.InputDataFields.groundtruth_image_confidences: (
            slim_example_decoder.Tensor('image/class/confidence')),
+        fields.InputDataFields.groundtruth_verified_neg_classes: (
+            slim_example_decoder.Tensor('image/neg_category_ids')),
+        fields.InputDataFields.groundtruth_not_exhaustive_classes: (
+            slim_example_decoder.Tensor('image/not_exhaustive_category_ids')),
        # Object boxes and classes.
        fields.InputDataFields.groundtruth_boxes: (
            slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],

--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -841,6 +841,61 @@ class TfExampleDecoderTest(test_case.TestCase):
    self.assertAllEqual(object_area,
                        tensor_dict[fields.InputDataFields.groundtruth_area])

+  def testDecodeVerifiedNegClasses(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+        image_tensor, 'jpeg')
+    neg_category_ids = [0, 5, 8]
+
+    def graph_fn():
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'image/encoded':
+                      dataset_util.bytes_feature(encoded_jpeg),
+                  'image/format':
+                      dataset_util.bytes_feature(six.b('jpeg')),
+                  'image/neg_category_ids':
+                      dataset_util.int64_list_feature(neg_category_ids),
+              })).SerializeToString()
+
+      example_decoder = tf_example_decoder.TfExampleDecoder()
+      output = example_decoder.decode(tf.convert_to_tensor(example))
+      return output
+
+    tensor_dict = self.execute_cpu(graph_fn, [])
+    self.assertAllEqual(
+        neg_category_ids,
+        tensor_dict[fields.InputDataFields.groundtruth_verified_neg_classes])
+
+  def testDecodeNotExhaustiveClasses(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+        image_tensor, 'jpeg')
+    not_exhaustive_category_ids = [0, 5, 8]
+
+    def graph_fn():
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'image/encoded':
+                      dataset_util.bytes_feature(encoded_jpeg),
+                  'image/format':
+                      dataset_util.bytes_feature(six.b('jpeg')),
+                  'image/not_exhaustive_category_ids':
+                      dataset_util.int64_list_feature(
+                          not_exhaustive_category_ids),
+              })).SerializeToString()
+
+      example_decoder = tf_example_decoder.TfExampleDecoder()
+      output = example_decoder.decode(tf.convert_to_tensor(example))
+      return output
+
+    tensor_dict = self.execute_cpu(graph_fn, [])
+    self.assertAllEqual(
+        not_exhaustive_category_ids,
+        tensor_dict[fields.InputDataFields.groundtruth_not_exhaustive_classes])
+
  def testDecodeObjectIsCrowd(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg, _ = self._create_encoded_and_decoded_data(

--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -33,6 +33,7 @@ from object_detection.core import box_list_ops
 from object_detection.core import keypoint_ops
 from object_detection.core import standard_fields as fields
 from object_detection.metrics import coco_evaluation
+from object_detection.metrics import lvis_evaluation
 from object_detection.protos import eval_pb2
 from object_detection.utils import label_map_util
 from object_detection.utils import object_detection_evaluation
@@ -54,6 +55,8 @@ EVAL_METRICS_CLASS_DICT = {
        coco_evaluation.CocoMaskEvaluator,
    'coco_panoptic_metrics':
        coco_evaluation.CocoPanopticSegmentationEvaluator,
+    'lvis_mask_metrics':
+        lvis_evaluation.LVISMaskEvaluator,
    'oid_challenge_detection_metrics':
        object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
    'oid_challenge_segmentation_metrics':
@@ -548,10 +551,36 @@ def _scale_box_to_absolute(args):
      box_list.BoxList(boxes), image_shape[0], image_shape[1]).get()


-def _resize_detection_masks(args):
-  detection_boxes, detection_masks, image_shape = args
+def _resize_detection_masks(arg_tuple):
+  """Resizes detection masks.
+
+  Args:
+    arg_tuple: A (detection_boxes, detection_masks, image_shape, pad_shape)
+      tuple where
+      detection_boxes is a tf.float32 tensor of size [num_masks, 4] containing
+        the box corners. Row i contains [ymin, xmin, ymax, xmax] of the box
+        corresponding to mask i. Note that the box corners are in
+        normalized coordinates.
+      detection_masks is a tensor of size
+        [num_masks, mask_height, mask_width].
+      image_shape is a tensor of shape [2]
+      pad_shape is a tensor of shape [2] --- this is assumed to be greater
+        than or equal to image_shape along both dimensions and represents a
+        shape to-be-padded-to.
+
+  Returns:
+  """
+  detection_boxes, detection_masks, image_shape, pad_shape = arg_tuple
  detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
      detection_masks, detection_boxes, image_shape[0], image_shape[1])
+  paddings = tf.concat(
+      [tf.zeros([3, 1], dtype=tf.int32),
+       tf.expand_dims(
+           tf.concat([tf.zeros([1], dtype=tf.int32),
+                      pad_shape-image_shape], axis=0),
+           1)], axis=1)
+  detection_masks_reframed = tf.pad(detection_masks_reframed, paddings)
+
  # If the masks are currently float, binarize them. Otherwise keep them as
  # integers, since they have already been thresholded.
  if detection_masks_reframed.dtype == tf.float32:
@@ -569,7 +598,7 @@ def resize_detection_masks(detection_boxes, detection_masks,
  Args:
    detection_boxes: A [batch_size, num_instances, 4] float tensor containing
      bounding boxes.
-    detection_masks: A [batch_suze, num_instances, height, width] float tensor
+    detection_masks: A [batch_size, num_instances, height, width] float tensor
      containing binary instance masks per box.
    original_image_spatial_shapes: a [batch_size, 3] shaped int tensor
      holding the spatial dimensions of each image in the batch.
@@ -577,15 +606,26 @@ def resize_detection_masks(detection_boxes, detection_masks,
    masks: Masks resized to the spatial extents given by
      (original_image_spatial_shapes[0, 0], original_image_spatial_shapes[0, 1])
  """
+  # modify original image spatial shapes to be max along each dim
+  # in evaluator, should have access to original_image_spatial_shape field
+  # in add_Eval_Dict
+  max_spatial_shape = tf.reduce_max(
+      original_image_spatial_shapes, axis=0, keep_dims=True)
+  tiled_max_spatial_shape = tf.tile(
+      max_spatial_shape,
+      multiples=[tf.shape(original_image_spatial_shapes)[0], 1])
  return shape_utils.static_or_dynamic_map_fn(
      _resize_detection_masks,
-      elems=[detection_boxes, detection_masks, original_image_spatial_shapes],
+      elems=[detection_boxes,
+             detection_masks,
+             original_image_spatial_shapes,
+             tiled_max_spatial_shape],
      dtype=tf.uint8)


 def _resize_groundtruth_masks(args):
-  """Resizes groundgtruth masks to the original image size."""
-  mask, true_image_shape, original_image_shape = args
+  """Resizes groundtruth masks to the original image size."""
+  mask, true_image_shape, original_image_shape, pad_shape = args
  true_height = true_image_shape[0]
  true_width = true_image_shape[1]
  mask = mask[:, :true_height, :true_width]
@@ -595,7 +635,15 @@ def _resize_groundtruth_masks(args):
      original_image_shape,
      method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
      align_corners=True)
-  return tf.cast(tf.squeeze(mask, 3), tf.uint8)
+
+  paddings = tf.concat(
+      [tf.zeros([3, 1], dtype=tf.int32),
+       tf.expand_dims(
+           tf.concat([tf.zeros([1], dtype=tf.int32),
+                      pad_shape-original_image_shape], axis=0),
+           1)], axis=1)
+  mask = tf.pad(tf.squeeze(mask, 3), paddings)
+  return tf.cast(mask, tf.uint8)


 def _resize_surface_coordinate_masks(args):
@@ -932,10 +980,17 @@ def result_dict_for_batched_example(images,

    if input_data_fields.groundtruth_instance_masks in groundtruth:
      masks = groundtruth[input_data_fields.groundtruth_instance_masks]
+      max_spatial_shape = tf.reduce_max(
+          original_image_spatial_shapes, axis=0, keep_dims=True)
+      tiled_max_spatial_shape = tf.tile(
+          max_spatial_shape,
+          multiples=[tf.shape(original_image_spatial_shapes)[0], 1])
      groundtruth[input_data_fields.groundtruth_instance_masks] = (
          shape_utils.static_or_dynamic_map_fn(
              _resize_groundtruth_masks,
-              elems=[masks, true_image_shapes, original_image_spatial_shapes],
+              elems=[masks, true_image_shapes,
+                     original_image_spatial_shapes,
+                     tiled_max_spatial_shape],
              dtype=tf.uint8))

    output_dict.update(groundtruth)
@@ -1116,7 +1171,8 @@ def evaluator_options_from_eval_config(eval_config):
  eval_metric_fn_keys = eval_config.metrics_set
  evaluator_options = {}
  for eval_metric_fn_key in eval_metric_fn_keys:
-    if eval_metric_fn_key in ('coco_detection_metrics', 'coco_mask_metrics'):
+    if eval_metric_fn_key in (
+        'coco_detection_metrics', 'coco_mask_metrics', 'lvis_mask_metrics'):
      evaluator_options[eval_metric_fn_key] = {
          'include_metrics_per_category': (
              eval_config.include_metrics_per_category)

--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -198,53 +198,53 @@ def transform_input_data(tensor_dict,
  """
  out_tensor_dict = tensor_dict.copy()

-  labeled_classes_field = fields.InputDataFields.groundtruth_labeled_classes
-  image_classes_field = fields.InputDataFields.groundtruth_image_classes
+  input_fields = fields.InputDataFields
+  labeled_classes_field = input_fields.groundtruth_labeled_classes
+  image_classes_field = input_fields.groundtruth_image_classes
+  verified_neg_classes_field = input_fields.groundtruth_verified_neg_classes
+  not_exhaustive_field = input_fields.groundtruth_not_exhaustive_classes
+
  if (labeled_classes_field in out_tensor_dict and
      image_classes_field in out_tensor_dict):
    raise KeyError('groundtruth_labeled_classes and groundtruth_image_classes'
                   'are provided by the decoder, but only one should be set.')

-  if labeled_classes_field in out_tensor_dict:
-    # tf_example_decoder casts unrecognized labels to -1. Remove these
-    # unrecognized labels before converting labeled_classes to k-hot vector.
-    out_tensor_dict[labeled_classes_field] = _remove_unrecognized_classes(
-        out_tensor_dict[labeled_classes_field], unrecognized_label=-1)
-    out_tensor_dict[labeled_classes_field] = _convert_labeled_classes_to_k_hot(
-        out_tensor_dict[labeled_classes_field], num_classes)
-
-  if image_classes_field in out_tensor_dict:
-    out_tensor_dict[image_classes_field] = _remove_unrecognized_classes(
-        out_tensor_dict[image_classes_field], unrecognized_label=-1)
-    out_tensor_dict[labeled_classes_field] = _convert_labeled_classes_to_k_hot(
-        out_tensor_dict[image_classes_field], num_classes)
-
-  if fields.InputDataFields.multiclass_scores in out_tensor_dict:
+  for field in [labeled_classes_field,
+                image_classes_field,
+                verified_neg_classes_field,
+                not_exhaustive_field]:
+    if field in out_tensor_dict:
+      out_tensor_dict[field] = _remove_unrecognized_classes(
+          out_tensor_dict[field], unrecognized_label=-1)
+      out_tensor_dict[field] = _convert_labeled_classes_to_k_hot(
+          out_tensor_dict[field], num_classes)
+
+  if input_fields.multiclass_scores in out_tensor_dict:
    out_tensor_dict[
-        fields.InputDataFields
+        input_fields
        .multiclass_scores] = _multiclass_scores_or_one_hot_labels(
-            out_tensor_dict[fields.InputDataFields.multiclass_scores],
-            out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
-            out_tensor_dict[fields.InputDataFields.groundtruth_classes],
+            out_tensor_dict[input_fields.multiclass_scores],
+            out_tensor_dict[input_fields.groundtruth_boxes],
+            out_tensor_dict[input_fields.groundtruth_classes],
            num_classes)

-  if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
+  if input_fields.groundtruth_boxes in out_tensor_dict:
    out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
        out_tensor_dict)
    out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

  if retain_original_image:
-    out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
-        image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
+    out_tensor_dict[input_fields.original_image] = tf.cast(
+        image_resizer_fn(out_tensor_dict[input_fields.image],
                         None)[0], tf.uint8)

-  if fields.InputDataFields.image_additional_channels in out_tensor_dict:
-    channels = out_tensor_dict[fields.InputDataFields.image_additional_channels]
-    out_tensor_dict[fields.InputDataFields.image] = tf.concat(
-        [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)
+  if input_fields.image_additional_channels in out_tensor_dict:
+    channels = out_tensor_dict[input_fields.image_additional_channels]
+    out_tensor_dict[input_fields.image] = tf.concat(
+        [out_tensor_dict[input_fields.image], channels], axis=2)
    if retain_original_image_additional_channels:
      out_tensor_dict[
-          fields.InputDataFields.image_additional_channels] = tf.cast(
+          input_fields.image_additional_channels] = tf.cast(
              image_resizer_fn(channels, None)[0], tf.uint8)

  # Apply data augmentation ops.
@@ -252,7 +252,7 @@ def transform_input_data(tensor_dict,
    out_tensor_dict = data_augmentation_fn(out_tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
-  image = out_tensor_dict[fields.InputDataFields.image]
+  image = out_tensor_dict[input_fields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))

@@ -265,25 +265,25 @@ def transform_input_data(tensor_dict,
      tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1])
  ])

-  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
-    bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes]
+  if input_fields.groundtruth_boxes in tensor_dict:
+    bboxes = out_tensor_dict[input_fields.groundtruth_boxes]
    boxlist = box_list.BoxList(bboxes)
    realigned_bboxes = box_list_ops.change_coordinate_frame(boxlist, im_box)

    realigned_boxes_tensor = realigned_bboxes.get()
    valid_boxes_tensor = assert_or_prune_invalid_boxes(realigned_boxes_tensor)
    out_tensor_dict[
-        fields.InputDataFields.groundtruth_boxes] = valid_boxes_tensor
+        input_fields.groundtruth_boxes] = valid_boxes_tensor

-  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
-    keypoints = out_tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+  if input_fields.groundtruth_keypoints in tensor_dict:
+    keypoints = out_tensor_dict[input_fields.groundtruth_keypoints]
    realigned_keypoints = keypoint_ops.change_coordinate_frame(keypoints,
                                                               im_box)
    out_tensor_dict[
-        fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints
-    flds_gt_kpt = fields.InputDataFields.groundtruth_keypoints
-    flds_gt_kpt_vis = fields.InputDataFields.groundtruth_keypoint_visibilities
-    flds_gt_kpt_weights = fields.InputDataFields.groundtruth_keypoint_weights
+        input_fields.groundtruth_keypoints] = realigned_keypoints
+    flds_gt_kpt = input_fields.groundtruth_keypoints
+    flds_gt_kpt_vis = input_fields.groundtruth_keypoint_visibilities
+    flds_gt_kpt_weights = input_fields.groundtruth_keypoint_weights
    if flds_gt_kpt_vis not in out_tensor_dict:
      out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
          out_tensor_dict[flds_gt_kpt][:, :, 0],
@@ -293,7 +293,7 @@ def transform_input_data(tensor_dict,
            out_tensor_dict[flds_gt_kpt_vis],
            keypoint_type_weight))

-  dp_surface_coords_fld = fields.InputDataFields.groundtruth_dp_surface_coords
+  dp_surface_coords_fld = input_fields.groundtruth_dp_surface_coords
  if dp_surface_coords_fld in tensor_dict:
    dp_surface_coords = out_tensor_dict[dp_surface_coords_fld]
    realigned_dp_surface_coords = densepose_ops.change_coordinate_frame(
@@ -303,60 +303,60 @@ def transform_input_data(tensor_dict,
  if use_bfloat16:
    preprocessed_resized_image = tf.cast(
        preprocessed_resized_image, tf.bfloat16)
-    if fields.InputDataFields.context_features in out_tensor_dict:
-      out_tensor_dict[fields.InputDataFields.context_features] = tf.cast(
-          out_tensor_dict[fields.InputDataFields.context_features], tf.bfloat16)
-  out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
+    if input_fields.context_features in out_tensor_dict:
+      out_tensor_dict[input_fields.context_features] = tf.cast(
+          out_tensor_dict[input_fields.context_features], tf.bfloat16)
+  out_tensor_dict[input_fields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
-  out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
+  out_tensor_dict[input_fields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)
-  if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
-    masks = out_tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+  if input_fields.groundtruth_instance_masks in out_tensor_dict:
+    masks = out_tensor_dict[input_fields.groundtruth_instance_masks]
    _, resized_masks, _ = image_resizer_fn(image, masks)
    if use_bfloat16:
      resized_masks = tf.cast(resized_masks, tf.bfloat16)
    out_tensor_dict[
-        fields.InputDataFields.groundtruth_instance_masks] = resized_masks
+        input_fields.groundtruth_instance_masks] = resized_masks

  zero_indexed_groundtruth_classes = out_tensor_dict[
-      fields.InputDataFields.groundtruth_classes] - _LABEL_OFFSET
+      input_fields.groundtruth_classes] - _LABEL_OFFSET
  if use_multiclass_scores:
    out_tensor_dict[
-        fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
-            fields.InputDataFields.multiclass_scores]
+        input_fields.groundtruth_classes] = out_tensor_dict[
+            input_fields.multiclass_scores]
  else:
-    out_tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
+    out_tensor_dict[input_fields.groundtruth_classes] = tf.one_hot(
        zero_indexed_groundtruth_classes, num_classes)
-  out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)
+  out_tensor_dict.pop(input_fields.multiclass_scores, None)

-  if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
+  if input_fields.groundtruth_confidences in out_tensor_dict:
    groundtruth_confidences = out_tensor_dict[
-        fields.InputDataFields.groundtruth_confidences]
+        input_fields.groundtruth_confidences]
    # Map the confidences to the one-hot encoding of classes
-    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
+    out_tensor_dict[input_fields.groundtruth_confidences] = (
        tf.reshape(groundtruth_confidences, [-1, 1]) *
-        out_tensor_dict[fields.InputDataFields.groundtruth_classes])
+        out_tensor_dict[input_fields.groundtruth_classes])
  else:
    groundtruth_confidences = tf.ones_like(
        zero_indexed_groundtruth_classes, dtype=tf.float32)
-    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
-        out_tensor_dict[fields.InputDataFields.groundtruth_classes])
+    out_tensor_dict[input_fields.groundtruth_confidences] = (
+        out_tensor_dict[input_fields.groundtruth_classes])

  if merge_multiple_boxes:
    merged_boxes, merged_classes, merged_confidences, _ = (
        util_ops.merge_boxes_with_multiple_labels(
-            out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
+            out_tensor_dict[input_fields.groundtruth_boxes],
            zero_indexed_groundtruth_classes,
            groundtruth_confidences,
            num_classes))
    merged_classes = tf.cast(merged_classes, tf.float32)
-    out_tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
-    out_tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
-    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
+    out_tensor_dict[input_fields.groundtruth_boxes] = merged_boxes
+    out_tensor_dict[input_fields.groundtruth_classes] = merged_classes
+    out_tensor_dict[input_fields.groundtruth_confidences] = (
        merged_confidences)
-  if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
-    out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
-        out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
+  if input_fields.groundtruth_boxes in out_tensor_dict:
+    out_tensor_dict[input_fields.num_groundtruth_boxes] = tf.shape(
+        out_tensor_dict[input_fields.groundtruth_boxes])[0]

  return out_tensor_dict

@@ -406,120 +406,128 @@ def pad_input_data_to_static_shapes(tensor_dict,
  else:
    height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

+  input_fields = fields.InputDataFields
  num_additional_channels = 0
-  if fields.InputDataFields.image_additional_channels in tensor_dict:
+  if input_fields.image_additional_channels in tensor_dict:
    num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[
-        fields.InputDataFields.image_additional_channels].shape[2])
+        input_fields.image_additional_channels].shape[2])

  # We assume that if num_additional_channels > 0, then it has already been
  # concatenated to the base image (but not the ground truth).
  num_channels = 3
-  if fields.InputDataFields.image in tensor_dict:
+  if input_fields.image in tensor_dict:
    num_channels = shape_utils.get_dim_as_int(
-        tensor_dict[fields.InputDataFields.image].shape[2])
+        tensor_dict[input_fields.image].shape[2])

  if num_additional_channels:
    if num_additional_channels >= num_channels:
      raise ValueError(
          'Image must be already concatenated with additional channels.')

-    if (fields.InputDataFields.original_image in tensor_dict and
+    if (input_fields.original_image in tensor_dict and
        shape_utils.get_dim_as_int(
-            tensor_dict[fields.InputDataFields.original_image].shape[2]) ==
+            tensor_dict[input_fields.original_image].shape[2]) ==
        num_channels):
      raise ValueError(
          'Image must be already concatenated with additional channels.')

-  if fields.InputDataFields.context_features in tensor_dict and (
+  if input_fields.context_features in tensor_dict and (
      max_num_context_features is None):
    raise ValueError('max_num_context_features must be specified in the model '
                     'config if include_context is specified in the input '
                     'config')

  padding_shapes = {
-      fields.InputDataFields.image: [height, width, num_channels],
-      fields.InputDataFields.original_image_spatial_shape: [2],
-      fields.InputDataFields.image_additional_channels: [
+      input_fields.image: [height, width, num_channels],
+      input_fields.original_image_spatial_shape: [2],
+      input_fields.image_additional_channels: [
          height, width, num_additional_channels
      ],
-      fields.InputDataFields.source_id: [],
-      fields.InputDataFields.filename: [],
-      fields.InputDataFields.key: [],
-      fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
-      fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
-      fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
-      fields.InputDataFields.groundtruth_instance_masks: [
+      input_fields.source_id: [],
+      input_fields.filename: [],
+      input_fields.key: [],
+      input_fields.groundtruth_difficult: [max_num_boxes],
+      input_fields.groundtruth_boxes: [max_num_boxes, 4],
+      input_fields.groundtruth_classes: [max_num_boxes, num_classes],
+      input_fields.groundtruth_instance_masks: [
          max_num_boxes, height, width
      ],
-      fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
-      fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
-      fields.InputDataFields.groundtruth_area: [max_num_boxes],
-      fields.InputDataFields.groundtruth_weights: [max_num_boxes],
-      fields.InputDataFields.groundtruth_confidences: [
+      input_fields.groundtruth_is_crowd: [max_num_boxes],
+      input_fields.groundtruth_group_of: [max_num_boxes],
+      input_fields.groundtruth_area: [max_num_boxes],
+      input_fields.groundtruth_weights: [max_num_boxes],
+      input_fields.groundtruth_confidences: [
          max_num_boxes, num_classes
      ],
-      fields.InputDataFields.num_groundtruth_boxes: [],
-      fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
-      fields.InputDataFields.groundtruth_label_weights: [max_num_boxes],
-      fields.InputDataFields.true_image_shape: [3],
-      fields.InputDataFields.groundtruth_image_classes: [num_classes],
-      fields.InputDataFields.groundtruth_image_confidences: [num_classes],
-      fields.InputDataFields.groundtruth_labeled_classes: [num_classes],
+      input_fields.num_groundtruth_boxes: [],
+      input_fields.groundtruth_label_types: [max_num_boxes],
+      input_fields.groundtruth_label_weights: [max_num_boxes],
+      input_fields.true_image_shape: [3],
+      input_fields.groundtruth_image_classes: [num_classes],
+      input_fields.groundtruth_image_confidences: [num_classes],
+      input_fields.groundtruth_labeled_classes: [num_classes],
  }

-  if fields.InputDataFields.original_image in tensor_dict:
-    padding_shapes[fields.InputDataFields.original_image] = [
+  if input_fields.original_image in tensor_dict:
+    padding_shapes[input_fields.original_image] = [
        height, width,
-        shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields.
+        shape_utils.get_dim_as_int(tensor_dict[input_fields.
                                               original_image].shape[2])
    ]
-  if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
+  if input_fields.groundtruth_keypoints in tensor_dict:
    tensor_shape = (
-        tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape)
+        tensor_dict[input_fields.groundtruth_keypoints].shape)
    padding_shape = [max_num_boxes,
                     shape_utils.get_dim_as_int(tensor_shape[1]),
                     shape_utils.get_dim_as_int(tensor_shape[2])]
-    padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape
-  if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict:
-    tensor_shape = tensor_dict[fields.InputDataFields.
+    padding_shapes[input_fields.groundtruth_keypoints] = padding_shape
+  if input_fields.groundtruth_keypoint_visibilities in tensor_dict:
+    tensor_shape = tensor_dict[input_fields.
                               groundtruth_keypoint_visibilities].shape
    padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
-    padding_shapes[fields.InputDataFields.
+    padding_shapes[input_fields.
                   groundtruth_keypoint_visibilities] = padding_shape

-  if fields.InputDataFields.groundtruth_keypoint_weights in tensor_dict:
+  if input_fields.groundtruth_keypoint_weights in tensor_dict:
    tensor_shape = (
-        tensor_dict[fields.InputDataFields.groundtruth_keypoint_weights].shape)
+        tensor_dict[input_fields.groundtruth_keypoint_weights].shape)
    padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])]
-    padding_shapes[fields.InputDataFields.
+    padding_shapes[input_fields.
                   groundtruth_keypoint_weights] = padding_shape
-  if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+  if input_fields.groundtruth_dp_num_points in tensor_dict:
    padding_shapes[
-        fields.InputDataFields.groundtruth_dp_num_points] = [max_num_boxes]
+        input_fields.groundtruth_dp_num_points] = [max_num_boxes]
    padding_shapes[
-        fields.InputDataFields.groundtruth_dp_part_ids] = [
+        input_fields.groundtruth_dp_part_ids] = [
            max_num_boxes, max_dp_points]
    padding_shapes[
-        fields.InputDataFields.groundtruth_dp_surface_coords] = [
+        input_fields.groundtruth_dp_surface_coords] = [
            max_num_boxes, max_dp_points, 4]
-  if fields.InputDataFields.groundtruth_track_ids in tensor_dict:
+  if input_fields.groundtruth_track_ids in tensor_dict:
+    padding_shapes[
+        input_fields.groundtruth_track_ids] = [max_num_boxes]
+
+  if input_fields.groundtruth_verified_neg_classes in tensor_dict:
+    padding_shapes[
+        input_fields.groundtruth_verified_neg_classes] = [num_classes]
+  if input_fields.groundtruth_not_exhaustive_classes in tensor_dict:
    padding_shapes[
-        fields.InputDataFields.groundtruth_track_ids] = [max_num_boxes]
+        input_fields.groundtruth_not_exhaustive_classes] = [num_classes]

  # Prepare for ContextRCNN related fields.
-  if fields.InputDataFields.context_features in tensor_dict:
+  if input_fields.context_features in tensor_dict:
    padding_shape = [max_num_context_features, context_feature_length]
-    padding_shapes[fields.InputDataFields.context_features] = padding_shape
+    padding_shapes[input_fields.context_features] = padding_shape

    tensor_shape = tf.shape(
-        tensor_dict[fields.InputDataFields.context_features])
-    tensor_dict[fields.InputDataFields.valid_context_size] = tensor_shape[0]
-    padding_shapes[fields.InputDataFields.valid_context_size] = []
-  if fields.InputDataFields.context_feature_length in tensor_dict:
-    padding_shapes[fields.InputDataFields.context_feature_length] = []
+        tensor_dict[input_fields.context_features])
+    tensor_dict[input_fields.valid_context_size] = tensor_shape[0]
+    padding_shapes[input_fields.valid_context_size] = []
+  if input_fields.context_feature_length in tensor_dict:
+    padding_shapes[input_fields.context_feature_length] = []

-  if fields.InputDataFields.is_annotated in tensor_dict:
-    padding_shapes[fields.InputDataFields.is_annotated] = []
+  if input_fields.is_annotated in tensor_dict:
+    padding_shapes[input_fields.is_annotated] = []

  padded_tensor_dict = {}
  for tensor_name in tensor_dict:
@@ -528,10 +536,10 @@ def pad_input_data_to_static_shapes(tensor_dict,

  # Make sure that the number of groundtruth boxes now reflects the
  # padded/clipped tensors.
-  if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict:
-    padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = (
+  if input_fields.num_groundtruth_boxes in padded_tensor_dict:
+    padded_tensor_dict[input_fields.num_groundtruth_boxes] = (
        tf.minimum(
-            padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
+            padded_tensor_dict[input_fields.num_groundtruth_boxes],
            max_num_boxes))
  return padded_tensor_dict

@@ -609,7 +617,9 @@ def _get_labels_dict(input_dict):
      fields.InputDataFields.groundtruth_dp_num_points,
      fields.InputDataFields.groundtruth_dp_part_ids,
      fields.InputDataFields.groundtruth_dp_surface_coords,
-      fields.InputDataFields.groundtruth_track_ids
+      fields.InputDataFields.groundtruth_track_ids,
+      fields.InputDataFields.groundtruth_verified_neg_classes,
+      fields.InputDataFields.groundtruth_not_exhaustive_classes
  ]

  for key in optional_label_keys:

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -108,6 +108,12 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
        group_of annotations (if provided in groundtruth).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
        tensor of 1-indexed classes.
+      'groundtruth_verified_neg_classes': [batch_size, num_classes] float32
+        K-hot representation of 1-indexed classes which were verified as not
+        present in the image.
+      'groundtruth_not_exhaustive_classes': [batch_size, num_classes] K-hot
+        representation of 1-indexed classes which don't have all of their
+        instances marked exhaustively.
    class_agnostic: Boolean indicating whether detections are class agnostic.
  """
  input_data_fields = fields.InputDataFields()
@@ -129,6 +135,7 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
      input_data_fields.groundtruth_boxes: groundtruth_boxes,
      input_data_fields.groundtruth_classes: groundtruth_classes
  }
+
  if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
    groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
        detection_model.groundtruth_lists(fields.BoxListFields.masks))
@@ -156,23 +163,17 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
        detection_model.groundtruth_lists(fields.BoxListFields.group_of))

  if detection_model.groundtruth_has_field(
-      fields.InputDataFields.groundtruth_labeled_classes):
-    labeled_classes_list = detection_model.groundtruth_lists(
-        fields.InputDataFields.groundtruth_labeled_classes)
-    labeled_classes = [
-        tf.where(x)[:, 0] + label_id_offset for x in labeled_classes_list
-    ]
-    if len(labeled_classes) > 1:
-      num_classes = labeled_classes_list[0].shape[0]
-      padded_labeled_classes = []
-      for x in labeled_classes:
-        padding = num_classes - tf.shape(x)[0]
-        padded_labeled_classes.append(tf.pad(x, [[0, padding]]))
-      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
-          padded_labeled_classes)
-    else:
-      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
-          labeled_classes)
+      input_data_fields.groundtruth_verified_neg_classes):
+    groundtruth[input_data_fields.groundtruth_verified_neg_classes] = tf.stack(
+        detection_model.groundtruth_lists(
+            input_data_fields.groundtruth_verified_neg_classes))
+
+  if detection_model.groundtruth_has_field(
+      input_data_fields.groundtruth_not_exhaustive_classes):
+    groundtruth[
+        input_data_fields.groundtruth_not_exhaustive_classes] = tf.stack(
+            detection_model.groundtruth_lists(
+                input_data_fields.groundtruth_not_exhaustive_classes))

  if detection_model.groundtruth_has_field(
      fields.BoxListFields.densepose_num_points):
@@ -194,6 +195,25 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
    groundtruth[input_data_fields.groundtruth_track_ids] = tf.stack(
        detection_model.groundtruth_lists(fields.BoxListFields.track_ids))

+  if detection_model.groundtruth_has_field(
+      input_data_fields.groundtruth_labeled_classes):
+    labeled_classes_list = detection_model.groundtruth_lists(
+        input_data_fields.groundtruth_labeled_classes)
+    labeled_classes = [
+        tf.where(x)[:, 0] + label_id_offset for x in labeled_classes_list
+    ]
+    if len(labeled_classes) > 1:
+      num_classes = labeled_classes_list[0].shape[0]
+      padded_labeled_classes = []
+      for x in labeled_classes:
+        padding = num_classes - tf.shape(x)[0]
+        padded_labeled_classes.append(tf.pad(x, [[0, padding]]))
+      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
+          padded_labeled_classes)
+    else:
+      groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
+          labeled_classes)
+
  groundtruth[input_data_fields.num_groundtruth_boxes] = (
      tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
  return groundtruth
@@ -339,6 +359,14 @@ def provide_groundtruth(model, labels):
  if fields.InputDataFields.groundtruth_labeled_classes in labels:
    gt_labeled_classes = labels[
        fields.InputDataFields.groundtruth_labeled_classes]
+  gt_verified_neg_classes = None
+  if fields.InputDataFields.groundtruth_verified_neg_classes in labels:
+    gt_verified_neg_classes = labels[
+        fields.InputDataFields.groundtruth_verified_neg_classes]
+  gt_not_exhaustive_classes = None
+  if fields.InputDataFields.groundtruth_not_exhaustive_classes in labels:
+    gt_not_exhaustive_classes = labels[
+        fields.InputDataFields.groundtruth_not_exhaustive_classes]
  model.provide_groundtruth(
      groundtruth_boxes_list=gt_boxes_list,
      groundtruth_classes_list=gt_classes_list,
@@ -354,7 +382,9 @@ def provide_groundtruth(model, labels):
      groundtruth_is_crowd_list=gt_is_crowd_list,
      groundtruth_group_of_list=gt_group_of_list,
      groundtruth_area_list=gt_area_list,
-      groundtruth_track_ids_list=gt_track_ids_list)
+      groundtruth_track_ids_list=gt_track_ids_list,
+      groundtruth_verified_neg_classes=gt_verified_neg_classes,
+      groundtruth_not_exhaustive_classes=gt_not_exhaustive_classes)


 def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -703,6 +703,7 @@ def eager_eval_loop(

  evaluator_options = eval_util.evaluator_options_from_eval_config(
      eval_config)
+  batch_size = eval_config.batch_size

  class_agnostic_category_index = (
      label_map_util.create_class_agnostic_category_index())
@@ -731,7 +732,9 @@ def eager_eval_loop(
    # must be unpadded.
    boxes_shape = (
        labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list())
-    unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
+    unpad_groundtruth_tensors = (boxes_shape[1] is not None
+                                 and not use_tpu
+                                 and batch_size == 1)
    labels = model_lib.unstack_batch(
        labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

@@ -799,7 +802,8 @@ def eager_eval_loop(
      tf.logging.info('Finished eval step %d', i)

    use_original_images = fields.InputDataFields.original_image in features
-    if use_original_images and i < eval_config.num_visualizations:
+    if (use_original_images and i < eval_config.num_visualizations
+        and batch_size == 1):
      sbys_image_list = vutils.draw_side_by_side_evaluation_image(
          eval_dict,
          category_index=category_index,