Fixes three bugs in online OpenImageChallenge evaluation:

1. label offset is not propagated for image-level labels 2. adds an option to treat no image-level labels as a sign that no labels were verified for a given image 3. updates image ids in object detection evaluation on merging multiple states. PiperOrigin-RevId: 439537865

Fixes three bugs in online OpenImageChallenge evaluation:
1. label offset is not propagated for image-level labels 2. adds an option to treat no image-level labels as a sign that no labels were verified for a given image 3. updates image ids in object detection evaluation on merging multiple states. PiperOrigin-RevId: 439537865
7cb41cec · A. Unique TensorFlower · TF Object Detection Team · 80f6738b · 7cb41cec · 7cb41cec
Commit 7cb41cec authored Apr 05, 2022 by A. Unique TensorFlower Committed by TF Object Detection Team Apr 05, 2022
4 changed files
--- a/research/object_detection/inputs.py
+++ b/research/object_detection/inputs.py
@@ -54,18 +54,20 @@ INPUT_BUILDER_UTIL_MAP = {
 }


-def _multiclass_scores_or_one_hot_labels(multiclass_scores,
-                                         groundtruth_boxes,
+def _multiclass_scores_or_one_hot_labels(multiclass_scores, groundtruth_boxes,
                                         groundtruth_classes, num_classes):
  """Returns one-hot encoding of classes when multiclass_scores is empty."""
+
  # Replace groundtruth_classes tensor with multiclass_scores tensor when its
  # non-empty. If multiclass_scores is empty fall back on groundtruth_classes
  # tensor.
  def true_fn():
    return tf.reshape(multiclass_scores,
                      [tf.shape(groundtruth_boxes)[0], num_classes])
+
  def false_fn():
    return tf.one_hot(groundtruth_classes, num_classes)
+
  return tf.cond(tf.size(multiclass_scores) > 0, true_fn, false_fn)


@@ -132,8 +134,7 @@ def assert_or_prune_invalid_boxes(boxes):
      This is not supported on TPUs.
  """

-  ymin, xmin, ymax, xmax = tf.split(
-      boxes, num_or_size_splits=4, axis=1)
+  ymin, xmin, ymax, xmax = tf.split(boxes, num_or_size_splits=4, axis=1)

  height_check = tf.Assert(tf.reduce_all(ymax >= ymin), [ymin, ymax])
  width_check = tf.Assert(tf.reduce_all(xmax >= xmin), [xmin, xmax])
@@ -157,7 +158,8 @@ def transform_input_data(tensor_dict,
                         use_multiclass_scores=False,
                         use_bfloat16=False,
                         retain_original_image_additional_channels=False,
-                         keypoint_type_weight=None):
+                         keypoint_type_weight=None,
+                         image_classes_field_map_empty_to_ones=True):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
@@ -206,6 +208,9 @@ def transform_input_data(tensor_dict,
    keypoint_type_weight: A list (of length num_keypoints) containing
      groundtruth loss weights to use for each keypoint. If None, will use a
      weight of 1.
+    image_classes_field_map_empty_to_ones: A boolean flag indicating if empty
+      image classes field indicates that all classes have been labeled on this
+      image [true] or none [false].

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
@@ -229,9 +234,9 @@ def transform_input_data(tensor_dict,
    raise KeyError('groundtruth_labeled_classes and groundtruth_image_classes'
                   'are provided by the decoder, but only one should be set.')

-  for field, map_empty_to_ones in [
-      (labeled_classes_field, True),
-      (image_classes_field, True),
+  for field, map_empty_to_ones in [(labeled_classes_field, True),
+                                   (image_classes_field,
+                                    image_classes_field_map_empty_to_ones),
                                   (verified_neg_classes_field, False),
                                   (not_exhaustive_field, False)]:
    if field in out_tensor_dict:
@@ -1044,7 +1049,9 @@ def eval_input(eval_config, eval_input_config, model_config,
        retain_original_image=eval_config.retain_original_images,
        retain_original_image_additional_channels=
        eval_config.retain_original_image_additional_channels,
-        keypoint_type_weight=keypoint_type_weight)
+        keypoint_type_weight=keypoint_type_weight,
+        image_classes_field_map_empty_to_ones=eval_config
+        .image_classes_field_map_empty_to_ones)
    tensor_dict = pad_input_data_to_static_shapes(
        tensor_dict=transform_data_fn(tensor_dict),
        max_num_boxes=eval_input_config.max_number_of_boxes,

--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -63,7 +63,8 @@ MODEL_BUILD_UTIL_MAP = {
        inputs.create_eval_input_fn,
    'create_predict_input_fn':
        inputs.create_predict_input_fn,
-    'detection_model_fn_base': model_builder.build,
+    'detection_model_fn_base':
+        model_builder.build,
 }


@@ -146,12 +147,13 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
    groundtruth_image_classes_k_hot = tf.stack(
        detection_model.groundtruth_lists(
            input_data_fields.groundtruth_image_classes))
-    # We do not add label_id_offset here because it was not added when encoding
-    # groundtruth_image_classes.
    groundtruth_image_classes = tf.expand_dims(
        tf.where(groundtruth_image_classes_k_hot > 0)[:, 1], 0)
+    # Adds back label_id_offset as it is subtracted in
+    # convert_labeled_classes_to_k_hot.
    groundtruth[
-        input_data_fields.groundtruth_image_classes] = groundtruth_image_classes
+        input_data_fields.
+        groundtruth_image_classes] = groundtruth_image_classes + label_id_offset

  if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
    groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
@@ -192,15 +194,16 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
  if detection_model.groundtruth_has_field(
      input_data_fields.groundtruth_verified_neg_classes):
    groundtruth[input_data_fields.groundtruth_verified_neg_classes] = tf.pad(
-        tf.stack(detection_model.groundtruth_lists(
+        tf.stack(
+            detection_model.groundtruth_lists(
                input_data_fields.groundtruth_verified_neg_classes)),
        label_id_offset_paddings)

  if detection_model.groundtruth_has_field(
      input_data_fields.groundtruth_not_exhaustive_classes):
-    groundtruth[
-        input_data_fields.groundtruth_not_exhaustive_classes] = tf.pad(
-            tf.stack(detection_model.groundtruth_lists(
+    groundtruth[input_data_fields.groundtruth_not_exhaustive_classes] = tf.pad(
+        tf.stack(
+            detection_model.groundtruth_lists(
                input_data_fields.groundtruth_not_exhaustive_classes)),
        label_id_offset_paddings)

@@ -272,8 +275,8 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
      key: tf.unstack(tensor) for key, tensor in tensor_dict.items()
  }
  if unpad_groundtruth_tensors:
-    if (fields.InputDataFields.num_groundtruth_boxes not in
-        unbatched_tensor_dict):
+    if (fields.InputDataFields.num_groundtruth_boxes
+        not in unbatched_tensor_dict):
      raise ValueError('`num_groundtruth_boxes` not found in tensor_dict. '
                       'Keys available: {}'.format(
                           unbatched_tensor_dict.keys()))
@@ -330,15 +333,14 @@ def provide_groundtruth(model, labels, training_step=None):
  Args:
    model: The detection model to provide groundtruth to.
    labels: The labels for the training or evaluation inputs.
-    training_step: int, optional. The training step for the model. Useful
-      for models which want to anneal loss weights.
+    training_step: int, optional. The training step for the model. Useful for
+      models which want to anneal loss weights.
  """
  gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
  gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
  gt_masks_list = None
  if fields.InputDataFields.groundtruth_instance_masks in labels:
-    gt_masks_list = labels[
-        fields.InputDataFields.groundtruth_instance_masks]
+    gt_masks_list = labels[fields.InputDataFields.groundtruth_instance_masks]
  gt_mask_weights_list = None
  if fields.InputDataFields.groundtruth_instance_mask_weights in labels:
    gt_mask_weights_list = labels[
@@ -363,23 +365,20 @@ def provide_groundtruth(model, labels, training_step=None):
        fields.InputDataFields.groundtruth_dp_num_points]
  gt_dp_part_ids_list = None
  if fields.InputDataFields.groundtruth_dp_part_ids in labels:
-    gt_dp_part_ids_list = labels[
-        fields.InputDataFields.groundtruth_dp_part_ids]
+    gt_dp_part_ids_list = labels[fields.InputDataFields.groundtruth_dp_part_ids]
  gt_dp_surface_coords_list = None
  if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
    gt_dp_surface_coords_list = labels[
        fields.InputDataFields.groundtruth_dp_surface_coords]
  gt_track_ids_list = None
  if fields.InputDataFields.groundtruth_track_ids in labels:
-    gt_track_ids_list = labels[
-        fields.InputDataFields.groundtruth_track_ids]
+    gt_track_ids_list = labels[fields.InputDataFields.groundtruth_track_ids]
  gt_weights_list = None
  if fields.InputDataFields.groundtruth_weights in labels:
    gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
  gt_confidences_list = None
  if fields.InputDataFields.groundtruth_confidences in labels:
-    gt_confidences_list = labels[
-        fields.InputDataFields.groundtruth_confidences]
+    gt_confidences_list = labels[fields.InputDataFields.groundtruth_confidences]
  gt_is_crowd_list = None
  if fields.InputDataFields.groundtruth_is_crowd in labels:
    gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
@@ -430,7 +429,10 @@ def provide_groundtruth(model, labels, training_step=None):
      training_step=training_step)


-def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
+def create_model_fn(detection_model_fn,
+                    configs,
+                    hparams=None,
+                    use_tpu=False,
                    postprocess_on_cpu=False):
  """Creates a model function for `Estimator`.

@@ -438,8 +440,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
    detection_model_fn: Function that returns a `DetectionModel` instance.
    configs: Dictionary of pipeline config objects.
    hparams: `HParams` object.
-    use_tpu: Boolean indicating whether model should be constructed for
-        use on TPU.
+    use_tpu: Boolean indicating whether model should be constructed for use on
+      TPU.
    postprocess_on_cpu: When use_tpu and postprocess_on_cpu is true, postprocess
      is scheduled on the host cpu.

@@ -488,8 +490,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
      # For evaling on train data, it is necessary to check whether groundtruth
      # must be unpadded.
      boxes_shape = (
-          labels[fields.InputDataFields.groundtruth_boxes].get_shape()
-          .as_list())
+          labels[
+              fields.InputDataFields.groundtruth_boxes].get_shape().as_list())
      unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
      labels = unstack_batch(
          labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)
@@ -522,8 +524,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
            (prediction_dict,
             features[fields.InputDataFields.true_image_shape]))
      else:
-        detections = postprocess_wrapper((
-            prediction_dict,
+        detections = postprocess_wrapper(
+            (prediction_dict,
             features[fields.InputDataFields.true_image_shape]))

    if mode == tf_estimator.ModeKeys.TRAIN:
@@ -649,8 +651,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
        eval_images = features[fields.InputDataFields.original_image]
        true_image_shapes = tf.slice(
            features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3])
-        original_image_spatial_shapes = features[fields.InputDataFields
-                                                 .original_image_spatial_shape]
+        original_image_spatial_shapes = features[
+            fields.InputDataFields.original_image_spatial_shape]
      else:
        eval_images = features[fields.InputDataFields.image]
        true_image_shapes = None
@@ -677,8 +679,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
            eval_input_config.label_map_path)
      vis_metric_ops = None
      if not use_tpu and use_original_images:
-        keypoint_edges = [
-            (kp.start, kp.end) for kp in eval_config.keypoint_edge]
+        keypoint_edges = [(kp.start, kp.end) for kp in eval_config.keypoint_edge
+                         ]

        eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
            category_index,
@@ -777,16 +779,13 @@ def create_estimator_and_inputs(run_config,
      data for evaluation.
    model_fn_creator: A function that creates a `model_fn` for `Estimator`.
      Follows the signature:
-
      * Args:
        * `detection_model_fn`: Function that returns `DetectionModel` instance.
        * `configs`: Dictionary of pipeline config objects.
        * `hparams`: `HParams` object.
-      * Returns:
-        `model_fn` for `Estimator`.
-
-    use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False,
-      an `Estimator` will be returned.
+      * Returns: `model_fn` for `Estimator`.
+    use_tpu_estimator: Whether a `TPUEstimator` should be returned. If False, an
+      `Estimator` will be returned.
    use_tpu: Boolean, whether training and evaluation should run on TPU. Only
      used if `use_tpu_estimator` is True.
    num_shards: Number of shards (TPU cores). Only used if `use_tpu_estimator`
@@ -833,9 +832,7 @@ def create_estimator_and_inputs(run_config,
      'use_bfloat16': configs['train_config'].use_bfloat16 and use_tpu
  })
  if sample_1_of_n_eval_examples >= 1:
-    kwargs.update({
-        'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples
-    })
+    kwargs.update({'sample_1_of_n_eval_examples': sample_1_of_n_eval_examples})
  if override_eval_num_epochs:
    kwargs.update({'eval_num_epochs': 1})
    tf.logging.warning(
@@ -959,8 +956,8 @@ def create_train_and_eval_specs(train_input_fn,
    eval_spec_names = [str(i) for i in range(len(eval_input_fns))]

  eval_specs = []
-  for index, (eval_spec_name, eval_input_fn) in enumerate(
-      zip(eval_spec_names, eval_input_fns)):
+  for index, (eval_spec_name,
+              eval_input_fn) in enumerate(zip(eval_spec_names, eval_input_fns)):
    # Uses final_exporter_name as exporter_name for the first eval spec for
    # backward compatibility.
    if index == 0:
@@ -1120,14 +1117,11 @@ def populate_experiment(run_config,
      number of evaluation steps is set from the `EvalConfig` proto.
    model_fn_creator: A function that creates a `model_fn` for `Estimator`.
      Follows the signature:
-
      * Args:
        * `detection_model_fn`: Function that returns `DetectionModel` instance.
        * `configs`: Dictionary of pipeline config objects.
        * `hparams`: `HParams` object.
-      * Returns:
-        `model_fn` for `Estimator`.
-
+      * Returns: `model_fn` for `Estimator`.
    **kwargs: Additional keyword arguments for configuration override.

  Returns:

--- a/research/object_detection/protos/eval.proto
+++ b/research/object_detection/protos/eval.proto
@@ -3,7 +3,7 @@ syntax = "proto2";
 package object_detection.protos;

 // Message for configuring DetectionModel evaluation jobs (eval.py).
-// Next id - 36
+// Next id - 37
 message EvalConfig {
  optional uint32 batch_size = 25 [default = 1];
  // Number of visualization images to generate.
@@ -118,6 +118,11 @@ message EvalConfig {
  // will be ignored. This is useful for evaluating on test data that are not
  // exhaustively labeled.
  optional bool skip_predictions_for_unlabeled_class = 33 [default = false];
+
+  // If image_classes_field for a given image is empty and this field set to
+  // true, it is interpreted as if the annotations on this image were
+  // exhaustive.
+  optional bool image_classes_field_map_empty_to_ones = 36 [default = true];
 }

 // A message to configure parameterized evaluation metric.

--- a/research/object_detection/utils/object_detection_evaluation.py
+++ b/research/object_detection/utils/object_detection_evaluation.py
@@ -255,6 +255,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
    for image_id in image_ids:
      if image_id in self._image_ids:
        logging.warning('Image with id %s already added.', image_id)
+    self._image_ids.update(image_ids)

    self._evaluation.merge_internal_state(state_tuple)

@@ -329,8 +330,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
    # If the key is not present in the groundtruth_dict or the array is empty
    # (unless there are no annotations for the groundtruth on this image)
    # use values from the dictionary or insert None otherwise.
-    if (standard_fields.InputDataFields.groundtruth_difficult in six.viewkeys(
-        groundtruth_dict) and
+    if (standard_fields.InputDataFields.groundtruth_difficult
+        in six.viewkeys(groundtruth_dict) and
        (groundtruth_dict[standard_fields.InputDataFields.groundtruth_difficult]
         .size or not groundtruth_classes.size)):
      groundtruth_difficult = groundtruth_dict[
@@ -343,8 +344,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator):
            image_id)
    groundtruth_masks = None
    if self._evaluate_masks:
-      if (standard_fields.InputDataFields.groundtruth_instance_masks not in
-          groundtruth_dict):
+      if (standard_fields.InputDataFields.groundtruth_instance_masks
+          not in groundtruth_dict):
        raise ValueError('Instance masks not in groundtruth dictionary.')
      groundtruth_masks = groundtruth_dict[
          standard_fields.InputDataFields.groundtruth_instance_masks]
@@ -1246,7 +1247,6 @@ class ObjectDetectionEvaluation(object):
            groundtruth_is_group_of_list=groundtruth_is_group_of_list,
            detected_masks=detected_masks,
            groundtruth_masks=groundtruth_masks))
-
    for i in range(self.num_class):
      if scores[i].shape[0] > 0:
        self.scores_per_class[i].append(scores[i])