Partially labeled groundtruth online eval implementation.

PiperOrigin-RevId: 328477362

Partially labeled groundtruth online eval implementation.
PiperOrigin-RevId: 328477362
c814f04d · Huizhong Chen · TF Object Detection Team · 4af7e0d8 · c814f04d · c814f04d
Commit c814f04d authored Aug 25, 2020 by Huizhong Chen Committed by TF Object Detection Team Aug 25, 2020
5 changed files
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -1121,6 +1121,15 @@ def evaluator_options_from_eval_config(eval_config):
          'include_metrics_per_category': (
              eval_config.include_metrics_per_category)
      }
+      # For coco detection eval, if the eval_config proto contains the
+      # "skip_predictions_for_unlabeled_class" field, include this field in
+      # evaluator_options.
+      if eval_metric_fn_key == 'coco_detection_metrics' and hasattr(
+          eval_config, 'skip_predictions_for_unlabeled_class'):
+        evaluator_options[eval_metric_fn_key].update({
+            'skip_predictions_for_unlabeled_class':
+                (eval_config.skip_predictions_for_unlabeled_class)
+        })
    elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
      evaluator_options[eval_metric_fn_key] = {
          'recall_lower_bound': (eval_config.recall_lower_bound),

--- a/research/object_detection/eval_util_test.py
+++ b/research/object_detection/eval_util_test.py
@@ -239,6 +239,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
        eval_config)
    self.assertTrue(evaluator_options['coco_detection_metrics']
                    ['include_metrics_per_category'])
+    self.assertFalse(evaluator_options['coco_detection_metrics']
+                     ['skip_predictions_for_unlabeled_class'])
    self.assertTrue(
        evaluator_options['coco_mask_metrics']['include_metrics_per_category'])
    self.assertAlmostEqual(
@@ -253,6 +255,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
    eval_config.include_metrics_per_category = True
+    eval_config.skip_predictions_for_unlabeled_class = True
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
    categories = self._get_categories_list()
@@ -263,6 +266,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
                                         evaluator_options)

    self.assertTrue(evaluator[0]._include_metrics_per_category)
+    self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class)
    self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
                           eval_config.recall_lower_bound)
    self.assertAlmostEqual(evaluator[1]._recall_upper_bound,

--- a/research/object_detection/metrics/coco_evaluation.py
+++ b/research/object_detection/metrics/coco_evaluation.py
@@ -34,7 +34,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
  def __init__(self,
               categories,
               include_metrics_per_category=False,
-               all_metrics_per_category=False):
+               all_metrics_per_category=False,
+               skip_predictions_for_unlabeled_class=False):
    """Constructor.

    Args:
@@ -46,6 +47,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
        each category in per_category_ap. Be careful with setting it to true if
        you have more than handful of categories, because it will pollute
        your mldash.
+      skip_predictions_for_unlabeled_class: Skip predictions that do not match
+        with the labeled classes for the image.
    """
    super(CocoDetectionEvaluator, self).__init__(categories)
    # _image_ids is a dictionary that maps unique image ids to Booleans which
@@ -58,6 +61,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
    self._metrics = None
    self._include_metrics_per_category = include_metrics_per_category
    self._all_metrics_per_category = all_metrics_per_category
+    self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
+    self._groundtruth_labeled_classes = {}

  def clear(self):
    """Clears the state to prepare for a fresh evaluation."""
@@ -92,6 +97,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
          numpy array of keypoint visibilities with shape [num_gt_boxes,
          num_keypoints]. Integer is treated as an enum with 0=not labeled,
          1=labeled but not visible and 2=labeled and visible.
+        InputDataFields.groundtruth_labeled_classes (optional): a dictionary of
+          image_id to groundtruth_labeled_class, where groundtruth_labeled_class
+          is a 1-indexed integer numpy array indicating which classes have been
+          annotated over the image.
    """
    if image_id in self._image_ids:
      tf.logging.warning('Ignoring ground truth with image id %s since it was '
@@ -134,6 +143,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):

    self._annotation_id += groundtruth_dict[standard_fields.InputDataFields.
                                            groundtruth_boxes].shape[0]
+    self._groundtruth_labeled_classes[image_id] = groundtruth_dict.get(
+        standard_fields.InputDataFields.groundtruth_labeled_classes)
    # Boolean to indicate whether a detection has been added for this image.
    self._image_ids[image_id] = False

@@ -173,6 +184,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
        standard_fields.DetectionResultFields.detection_keypoints)
    if detection_keypoints is not None and not detection_keypoints.shape[0]:
      detection_keypoints = None
+
+    if self._skip_predictions_for_unlabeled_class:
+      det_classes = detections_dict[
+          standard_fields.DetectionResultFields.detection_classes]
+      num_det_boxes = det_classes.shape[0]
+      keep_box_ids = []
+      for box_id in range(num_det_boxes):
+        if det_classes[box_id] in self._groundtruth_labeled_classes[image_id]:
+          keep_box_ids.append(box_id)
+      self._detection_boxes_list.extend(
+          coco_tools.ExportSingleImageDetectionBoxesToCoco(
+              image_id=image_id,
+              category_id_set=self._category_id_set,
+              detection_boxes=detections_dict[
+                  standard_fields.DetectionResultFields.detection_boxes]
+              [keep_box_ids],
+              detection_scores=detections_dict[
+                  standard_fields.DetectionResultFields.detection_scores]
+              [keep_box_ids],
+              detection_classes=detections_dict[
+                  standard_fields.DetectionResultFields.detection_classes]
+              [keep_box_ids],
+              detection_keypoints=detection_keypoints))
+    else:
      self._detection_boxes_list.extend(
          coco_tools.ExportSingleImageDetectionBoxesToCoco(
              image_id=image_id,
@@ -271,24 +306,20 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
      None when executing eagerly, or an update_op that can be used to update
      the eval metrics in `tf.estimator.EstimatorSpec`.
    """
-    def update_op(
-        image_id_batched,
-        groundtruth_boxes_batched,
-        groundtruth_classes_batched,
-        groundtruth_is_crowd_batched,
-        num_gt_boxes_per_image,
-        detection_boxes_batched,
-        detection_scores_batched,
-        detection_classes_batched,
-        num_det_boxes_per_image,
+
+    def update_op(image_id_batched, groundtruth_boxes_batched,
+                  groundtruth_classes_batched, groundtruth_is_crowd_batched,
+                  groundtruth_labeled_classes_batched, num_gt_boxes_per_image,
+                  detection_boxes_batched, detection_scores_batched,
+                  detection_classes_batched, num_det_boxes_per_image,
                  is_annotated_batched):
      """Update operation for adding batch of images to Coco evaluator."""
-
-      for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box,
-           det_score, det_class, num_det_box, is_annotated) in zip(
+      for (image_id, gt_box, gt_class, gt_is_crowd, gt_labeled_classes,
+           num_gt_box, det_box, det_score, det_class,
+           num_det_box, is_annotated) in zip(
               image_id_batched, groundtruth_boxes_batched,
               groundtruth_classes_batched, groundtruth_is_crowd_batched,
-               num_gt_boxes_per_image,
+               groundtruth_labeled_classes_batched, num_gt_boxes_per_image,
               detection_boxes_batched, detection_scores_batched,
               detection_classes_batched, num_det_boxes_per_image,
               is_annotated_batched):
@@ -297,7 +328,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
              image_id, {
                  'groundtruth_boxes': gt_box[:num_gt_box],
                  'groundtruth_classes': gt_class[:num_gt_box],
-                  'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
+                  'groundtruth_is_crowd': gt_is_crowd[:num_gt_box],
+                  'groundtruth_labeled_classes': gt_labeled_classes
              })
          self.add_single_detected_image_info(
              image_id,
@@ -313,6 +345,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
    groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
    groundtruth_is_crowd = eval_dict.get(
        input_data_fields.groundtruth_is_crowd, None)
+    groundtruth_labeled_classes = eval_dict.get(
+        input_data_fields.groundtruth_labeled_classes, None)
    detection_boxes = eval_dict[detection_fields.detection_boxes]
    detection_scores = eval_dict[detection_fields.detection_scores]
    detection_classes = eval_dict[detection_fields.detection_classes]
@@ -323,12 +357,21 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):

    if groundtruth_is_crowd is None:
      groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
+
+    # If groundtruth_labeled_classes is not provided, make it equal to the
+    # detection_classes. This assumes that all predictions will be kept to
+    # compute eval metrics.
+    if groundtruth_labeled_classes is None:
+      groundtruth_labeled_classes = detection_classes
+
    if not image_id.shape.as_list():
      # Apply a batch dimension to all tensors.
      image_id = tf.expand_dims(image_id, 0)
      groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
      groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
      groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
+      groundtruth_labeled_classes = tf.expand_dims(groundtruth_labeled_classes,
+                                                   0)
      detection_boxes = tf.expand_dims(detection_boxes, 0)
      detection_scores = tf.expand_dims(detection_scores, 0)
      detection_classes = tf.expand_dims(detection_classes, 0)
@@ -359,16 +402,12 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
      if is_annotated is None:
        is_annotated = tf.ones_like(image_id, dtype=tf.bool)

-    return tf.py_func(update_op, [image_id,
-                                  groundtruth_boxes,
-                                  groundtruth_classes,
-                                  groundtruth_is_crowd,
-                                  num_gt_boxes_per_image,
-                                  detection_boxes,
-                                  detection_scores,
-                                  detection_classes,
-                                  num_det_boxes_per_image,
-                                  is_annotated], [])
+    return tf.py_func(update_op, [
+        image_id, groundtruth_boxes, groundtruth_classes, groundtruth_is_crowd,
+        groundtruth_labeled_classes, num_gt_boxes_per_image, detection_boxes,
+        detection_scores, detection_classes, num_det_boxes_per_image,
+        is_annotated
+    ], [])

  def get_estimator_eval_metric_ops(self, eval_dict):
    """Returns a dictionary of eval metric ops.

--- a/research/object_detection/metrics/coco_evaluation_test.py
+++ b/research/object_detection/metrics/coco_evaluation_test.py
@@ -330,6 +330,107 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
    self.assertFalse(coco_evaluator._detection_boxes_list)
    self.assertFalse(coco_evaluator._image_ids)

+  # Test skipping unmatched detector predictions with
+  # groundtruth_labeled_classes.
+  def testGetMAPWithSkipUnmatchedPredictions(self):
+    coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
+        _get_categories_list(), skip_predictions_for_unlabeled_class=True)
+    image_id = tf.placeholder(tf.string, shape=())
+    groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
+    groundtruth_labeled_classes = tf.placeholder(tf.float32, shape=(None))
+    detection_boxes = tf.placeholder(tf.float32, shape=(None, 4))
+    detection_scores = tf.placeholder(tf.float32, shape=(None))
+    detection_classes = tf.placeholder(tf.float32, shape=(None))
+
+    input_data_fields = standard_fields.InputDataFields
+    detection_fields = standard_fields.DetectionResultFields
+    eval_dict = {
+        input_data_fields.key:
+            image_id,
+        input_data_fields.groundtruth_boxes:
+            groundtruth_boxes,
+        input_data_fields.groundtruth_classes:
+            groundtruth_classes,
+        input_data_fields.groundtruth_labeled_classes:
+            groundtruth_labeled_classes,
+        detection_fields.detection_boxes:
+            detection_boxes,
+        detection_fields.detection_scores:
+            detection_scores,
+        detection_fields.detection_classes:
+            detection_classes
+    }
+
+    eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
+
+    _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
+
+    with self.test_session() as sess:
+      sess.run(
+          update_op,
+          feed_dict={
+              image_id:
+                  'image1',
+              groundtruth_boxes:
+                  np.array([[100., 100., 200., 200.]]),
+              groundtruth_classes:
+                  np.array([1]),
+              # Only class 1 is exhaustively labeled for image1.
+              groundtruth_labeled_classes:
+                  np.array([1]),
+              detection_boxes:
+                  np.array([[100., 100., 200., 200.], [100., 100., 200.,
+                                                       200.]]),
+              detection_scores:
+                  np.array([.8, .95]),
+              detection_classes:
+                  np.array([1, 2])
+          })
+      sess.run(
+          update_op,
+          feed_dict={
+              image_id: 'image2',
+              groundtruth_boxes: np.array([[50., 50., 100., 100.]]),
+              groundtruth_classes: np.array([3]),
+              groundtruth_labeled_classes: np.array([3]),
+              detection_boxes: np.array([[50., 50., 100., 100.]]),
+              detection_scores: np.array([.7]),
+              detection_classes: np.array([3])
+          })
+      sess.run(
+          update_op,
+          feed_dict={
+              image_id: 'image3',
+              groundtruth_boxes: np.array([[25., 25., 50., 50.]]),
+              groundtruth_classes: np.array([2]),
+              groundtruth_labeled_classes: np.array([2]),
+              detection_boxes: np.array([[25., 25., 50., 50.]]),
+              detection_scores: np.array([.9]),
+              detection_classes: np.array([2])
+          })
+    metrics = {}
+    for key, (value_op, _) in eval_metric_ops.items():
+      metrics[key] = value_op
+    metrics = sess.run(metrics)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
+                           1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
+                           1.0)
+    self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
+    self.assertFalse(coco_evaluator._groundtruth_list)
+    self.assertFalse(coco_evaluator._detection_boxes_list)
+    self.assertFalse(coco_evaluator._image_ids)
+
  def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self):
    coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
        _get_categories_list())

--- a/research/object_detection/protos/eval.proto
+++ b/research/object_detection/protos/eval.proto
@@ -3,7 +3,7 @@ syntax = "proto2";
 package object_detection.protos;

 // Message for configuring DetectionModel evaluation jobs (eval.py).
-// Next id - 33
+// Next id - 34
 message EvalConfig {
  optional uint32 batch_size = 25 [default = 1];
  // Number of visualization images to generate.
@@ -103,6 +103,13 @@ message EvalConfig {
  // visualization. An example would be human pose estimation where certain
  // joints can be connected.
  repeated KeypointEdge keypoint_edge = 32;
+
+  // The "groundtruth_labeled_classes" field indicates which classes have been
+  // labeled on the images. If skip_predictions_for_unlabeled_class is set,
+  // detector predictions that do not match to the groundtruth_labeled_classes
+  // will be ignored. This is useful for evaluating on test data that are not
+  // exhaustively labeled.
+  optional bool skip_predictions_for_unlabeled_class = 33 [default = false];
 }

 // A message to configure parameterized evaluation metric.