Add partial label evaluation to PrecisionAtRecallEvaluation.

PiperOrigin-RevId: 455588418

Add partial label evaluation to PrecisionAtRecallEvaluation.
PiperOrigin-RevId: 455588418
240e75aa · A. Unique TensorFlower · TF Object Detection Team · 529efcdf · 240e75aa · 240e75aa
Commit 240e75aa authored Jun 17, 2022 by A. Unique TensorFlower Committed by TF Object Detection Team Jun 17, 2022
4 changed files
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -1209,7 +1209,9 @@ def evaluator_options_from_eval_config(eval_config):
    elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
      evaluator_options[eval_metric_fn_key] = {
          'recall_lower_bound': (eval_config.recall_lower_bound),
-          'recall_upper_bound': (eval_config.recall_upper_bound)
+          'recall_upper_bound': (eval_config.recall_upper_bound),
+          'skip_predictions_for_unlabeled_class':
+              eval_config.skip_predictions_for_unlabeled_class,
      }
  return evaluator_options

--- a/research/object_detection/eval_util_test.py
+++ b/research/object_detection/eval_util_test.py
@@ -257,6 +257,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    self.assertAlmostEqual(
        evaluator_options['precision_at_recall_detection_metrics']
        ['recall_upper_bound'], eval_config.recall_upper_bound)
+    self.assertFalse(evaluator_options['precision_at_recall_detection_metrics']
+                     ['skip_predictions_for_unlabeled_class'])
  def test_get_evaluator_with_evaluator_options(self):
    eval_config = eval_pb2.EvalConfig()
@@ -275,6 +277,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    self.assertTrue(evaluator[0]._include_metrics_per_category)
    self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class)
+    self.assertTrue(evaluator[1]._skip_predictions_for_unlabeled_class)
    self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
                           eval_config.recall_lower_bound)
    self.assertAlmostEqual(evaluator[1]._recall_upper_bound,

--- a/research/object_detection/utils/object_detection_evaluation.py
+++ b/research/object_detection/utils/object_detection_evaluation.py
@@ -606,7 +606,8 @@ class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator):
               categories,
               matching_iou_threshold=0.5,
               recall_lower_bound=0.0,
-               recall_upper_bound=1.0):
+               recall_upper_bound=1.0,
+               skip_predictions_for_unlabeled_class=False):
    super(PrecisionAtRecallDetectionEvaluator, self).__init__(
        categories,
        matching_iou_threshold=matching_iou_threshold,
@@ -615,6 +616,83 @@ class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator):
        evaluate_corlocs=False,
        metric_prefix='PrecisionAtRecallBoxes',
        use_weighted_mean_ap=False)
+    self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
+    self._expected_keys.update(
+        [standard_fields.InputDataFields.groundtruth_labeled_classes])
+    self.groundtruth_labeled_classes = {}
+  def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
+    """Adds groundtruth for a single image to be used for evaluation.
+      If the labeled classes field is present, a map of image_id to
+      groundtruth_labeled_classes is populated with the one-hot labeled classes.
+    Args:
+      image_id: A unique string/integer identifier for the image.
+      groundtruth_dict: A dictionary containing -
+        standard_fields.InputDataFields.groundtruth_labeled_classes: Optional
+          numpy one-hot integer array of shape [num_classes+1] containing 1
+          for classes that are labeled in the image and 0 otherwise.
+    Raises:
+      ValueError: If shape of labeled classes field is not as expected.
+    """
+    super(PrecisionAtRecallDetectionEvaluator,
+          self).add_single_ground_truth_image_info(image_id, groundtruth_dict)
+    labeled_classes = groundtruth_dict.get(
+        standard_fields.InputDataFields.groundtruth_labeled_classes, None)
+    if self._skip_predictions_for_unlabeled_class and labeled_classes is not None:
+      if labeled_classes.shape != (self._num_classes + 1,):
+        raise ValueError('Invalid shape for groundtruth labeled classes: {}, '
+                         'num_categories_including_background: {}'.format(
+                             labeled_classes, self._num_classes + 1))
+      labeled_classes = np.flatnonzero(labeled_classes == 1).tolist()
+      self.groundtruth_labeled_classes[image_id] = labeled_classes
+    else:
+      self.groundtruth_labeled_classes[image_id] = None
+  def add_single_detected_image_info(self, image_id, detections_dict):
+    """Adds detections for a single image to be used for evaluation.
+       If the labeled classes field has been populated for the given image_id,
+       the detections for classes that are not in the labeled classes are
+       filtered out.
+    Args:
+      image_id: A unique string/integer identifier for the image.
+      detections_dict: A dictionary containing -
+        standard_fields.DetectionResultFields.detection_boxes: float32 numpy
+        array of shape [num_boxes, 4] containing `num_boxes` detection boxes of
+        the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+        standard_fields.DetectionResultFields.detection_scores: float32 numpy
+        array of shape [num_boxes] containing detection scores for the boxes.
+        standard_fields.DetectionResultFields.detection_classes: integer numpy
+        array of shape [num_boxes] containing 1-indexed detection classes for
+        the boxes.
+    """
+    groundtruth_labeled_classes = self.groundtruth_labeled_classes[image_id]
+    if groundtruth_labeled_classes is not None:
+      detection_classes_key = standard_fields.DetectionResultFields.detection_classes
+      detected_boxes_key = standard_fields.DetectionResultFields.detection_boxes
+      detected_scores_key = standard_fields.DetectionResultFields.detection_scores
+      # Only keep detection if label is in groundtruth_labeled_classes.
+      allowed = np.isin(detections_dict[detection_classes_key],
+                        groundtruth_labeled_classes)
+      detections_dict[detection_classes_key] = detections_dict[
+          detection_classes_key][allowed]
+      detections_dict[detected_boxes_key] = detections_dict[detected_boxes_key][
+          allowed]
+      detections_dict[detected_scores_key] = detections_dict[
+          detected_scores_key][allowed]
+    super(PrecisionAtRecallDetectionEvaluator,
+          self).add_single_detected_image_info(image_id, detections_dict)
 class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):

--- a/research/object_detection/utils/object_detection_evaluation_test.py
+++ b/research/object_detection/utils/object_detection_evaluation_test.py
@@ -761,6 +761,159 @@ class PrecisionAtRecallEvaluationTest(tf.test.TestCase):
    self.wp_eval.clear()
    self.assertFalse(self.wp_eval._image_ids)
+  def test_returns_correct_metric_values_partial_labels(self):
+    # Create partial label evaluation object.
+    self.wp_eval_partial = (
+        object_detection_evaluation.PrecisionAtRecallDetectionEvaluator(
+            self.categories,
+            recall_lower_bound=0.0,
+            recall_upper_bound=1.0,
+            skip_predictions_for_unlabeled_class=True))
+    # The first test case is as follows:
+    #
+    # Labeled classes: [cat, dog]
+    # +--------------------------+-------------------------------------+------+
+    # |       Groundtruth        |          Matched Detection          | Type |
+    # +--------------------------+-------------------------------------+------+
+    # | cat [10, 10, 11, 11]     | cat (0.6) [10, 10, 11, 11]          | TP   |
+    # | cat [100, 100, 220, 220] | -                                   | FN   |
+    # | dog [100, 100, 120, 120] | dog (0.8) [100, 100, 120, 120]      | TP   |
+    # | -                        | dog (0.9) [10, 10, 11, 11]          | FP   |
+    # | -                        | elephant (0.9) [100, 100, 220, 220] | IGN  |
+    # +--------------------------+-------------------------------------+------+
+    image_key1 = 'img1'
+    # Add groundtruth boxes for img1.
+    groundtruth_boxes1 = np.array(
+        [[10, 10, 11, 11], [100, 100, 220, 220], [100, 100, 120, 120]],
+        dtype=float)
+    groundtruth_class_labels1 = np.array([1, 1, 2], dtype=int)
+    groundtruth_labeled_classes1 = np.array([0, 1, 1, 0], dtype=int)
+    self.wp_eval_partial.add_single_ground_truth_image_info(
+        image_key1, {
+            standard_fields.InputDataFields.groundtruth_boxes:
+                groundtruth_boxes1,
+            standard_fields.InputDataFields.groundtruth_classes:
+                groundtruth_class_labels1,
+            standard_fields.InputDataFields.groundtruth_labeled_classes:
+                groundtruth_labeled_classes1
+        })
+    # Add detected boxes for img1.
+    detected_boxes1 = np.array([[10, 10, 11, 11], [10, 10, 11, 11],
+                                [100, 100, 120, 120], [100, 100, 220, 220]],
+                               dtype=float)
+    detected_class_labels1 = np.array([1, 2, 2, 3], dtype=int)
+    detected_scores1 = np.array([0.6, 0.9, 0.8, 0.9], dtype=float)
+    self.wp_eval_partial.add_single_detected_image_info(
+        image_key1, {
+            standard_fields.DetectionResultFields.detection_boxes:
+                detected_boxes1,
+            standard_fields.DetectionResultFields.detection_scores:
+                detected_scores1,
+            standard_fields.DetectionResultFields.detection_classes:
+                detected_class_labels1
+        })
+    # The second test case is as follows:
+    #
+    # Labeled classes: [dog, elephant]
+    # +---------------------------+---------------------------------+------+
+    # |        Groundtruth        |        Matched Detection        | Type |
+    # +---------------------------+---------------------------------+------+
+    # | -                         | cat (0.8) [100, 100, 120, 120]  | IGN  |
+    # | dog [100, 100, 120, 120]  | -                               | FN   |
+    # | elephant [10, 10, 11, 11] | elephant (0.9) [10, 10, 11, 11] | TP   |
+    # +---------------------------+---------------------------------+------+
+    image_key2 = 'img2'
+    # Add groundtruth boxes for img2.
+    groundtruth_boxes2 = np.array(
+        [[100, 100, 120, 120], [10, 10, 11, 11]], dtype=float)
+    groundtruth_class_labels2 = np.array([2, 3], dtype=int)
+    groundtruth_labeled_classes2 = np.array([0, 0, 1, 1], dtype=int)
+    self.wp_eval_partial.add_single_ground_truth_image_info(
+        image_key2, {
+            standard_fields.InputDataFields.groundtruth_boxes:
+                groundtruth_boxes2,
+            standard_fields.InputDataFields.groundtruth_classes:
+                groundtruth_class_labels2,
+            standard_fields.InputDataFields.groundtruth_labeled_classes:
+                groundtruth_labeled_classes2
+        })
+    # Add detected boxes for img2.
+    detected_boxes2 = np.array(
+        [[100, 100, 120, 120], [10, 10, 11, 11]],
+        dtype=float)
+    detected_class_labels2 = np.array([1, 3], dtype=int)
+    detected_scores2 = np.array([0.8, 0.9], dtype=float)
+    self.wp_eval_partial.add_single_detected_image_info(
+        image_key2, {
+            standard_fields.DetectionResultFields.detection_boxes:
+                detected_boxes2,
+            standard_fields.DetectionResultFields.detection_scores:
+                detected_scores2,
+            standard_fields.DetectionResultFields.detection_classes:
+                detected_class_labels2
+        })
+    # Compute AP metrics.
+    metrics = self.wp_eval_partial.evaluate()
+    # Precision and recall for cat.
+    # +------------+------+-----------+--------+
+    # | Confidence | Type | Precision | Recall |
+    # +------------+------+-----------+--------+
+    # |        0.8 | IGN  |         - |      - |
+    # |        0.6 | TP   |       0.5 |    0.5 |
+    # +------------+------+-----------+--------+
+    # Expected AP: 0.5
+    self.assertAlmostEqual(
+        metrics[self.wp_eval_partial._metric_prefix +
+                'PerformanceByCategory/AP@0.5IOU/cat'], 0.5)
+    # Precision and recall for dog.
+    # +------------+------+-----------+--------+
+    # | Confidence | Type | Precision | Recall |
+    # +------------+------+-----------+--------+
+    # |        0.9 | FP   |       0.0 |    0.0 |
+    # |        0.8 | TP   |       0.5 |    0.5 |
+    # +------------+------+-----------+--------+
+    #
+    # After non-decreasing preprocessing of precision in
+    # third_party/tensorflow_models/object_detection/utils/metrics.py.
+    # +------------+------+-----------+--------+
+    # | Confidence | Type | Precision | Recall |
+    # +------------+------+-----------+--------+
+    # |        0.9 | FP   |       0.5 |    0.0 |
+    # |        0.8 | TP   |       0.5 |    0.5 |
+    # +------------+------+-----------+--------+
+    # Expected AP: 0.25
+    self.assertAlmostEqual(
+        metrics[self.wp_eval_partial._metric_prefix +
+                'PerformanceByCategory/AP@0.5IOU/dog'], 0.25)
+    # Precision and recall for elephant.
+    # +------------+------+-----------+--------+
+    # | Confidence | Type | Precision | Recall |
+    # +------------+------+-----------+--------+
+    # |        0.9 | IGN  |        -  |     -  |
+    # |        0.9 | TP   |       1.0 |    1.0 |
+    # +------------+------+-----------+--------+
+    # Expected AP: 1.0
+    self.assertAlmostEqual(
+        metrics[self.wp_eval_partial._metric_prefix +
+                'PerformanceByCategory/AP@0.5IOU/elephant'], 1.0)
+    # Expected mAP: (AP_cat + AP_dog + AP_elephant) / 3 = (0.25 + 1.0 + 0.5) / 3
+    self.assertAlmostEqual(
+        metrics[self.wp_eval_partial._metric_prefix +
+                'Precision/mAP@0.5IOU'], (0.25 + 1.0 + 0.5) / 3)
+    self.wp_eval_partial.clear()
+    self.assertFalse(self.wp_eval_partial._image_ids)
 class ObjectDetectionEvaluationTest(tf.test.TestCase):