Commit 240e75aa authored by A. Unique TensorFlower's avatar A. Unique TensorFlower Committed by TF Object Detection Team
Browse files

Add partial label evaluation to PrecisionAtRecallEvaluation.

PiperOrigin-RevId: 455588418
parent 529efcdf
...@@ -1209,7 +1209,9 @@ def evaluator_options_from_eval_config(eval_config): ...@@ -1209,7 +1209,9 @@ def evaluator_options_from_eval_config(eval_config):
elif eval_metric_fn_key == 'precision_at_recall_detection_metrics': elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
evaluator_options[eval_metric_fn_key] = { evaluator_options[eval_metric_fn_key] = {
'recall_lower_bound': (eval_config.recall_lower_bound), 'recall_lower_bound': (eval_config.recall_lower_bound),
'recall_upper_bound': (eval_config.recall_upper_bound) 'recall_upper_bound': (eval_config.recall_upper_bound),
'skip_predictions_for_unlabeled_class':
eval_config.skip_predictions_for_unlabeled_class,
} }
return evaluator_options return evaluator_options
......
...@@ -257,6 +257,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -257,6 +257,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertAlmostEqual( self.assertAlmostEqual(
evaluator_options['precision_at_recall_detection_metrics'] evaluator_options['precision_at_recall_detection_metrics']
['recall_upper_bound'], eval_config.recall_upper_bound) ['recall_upper_bound'], eval_config.recall_upper_bound)
self.assertFalse(evaluator_options['precision_at_recall_detection_metrics']
['skip_predictions_for_unlabeled_class'])
def test_get_evaluator_with_evaluator_options(self): def test_get_evaluator_with_evaluator_options(self):
eval_config = eval_pb2.EvalConfig() eval_config = eval_pb2.EvalConfig()
...@@ -275,6 +277,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -275,6 +277,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertTrue(evaluator[0]._include_metrics_per_category) self.assertTrue(evaluator[0]._include_metrics_per_category)
self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class) self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class)
self.assertTrue(evaluator[1]._skip_predictions_for_unlabeled_class)
self.assertAlmostEqual(evaluator[1]._recall_lower_bound, self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
eval_config.recall_lower_bound) eval_config.recall_lower_bound)
self.assertAlmostEqual(evaluator[1]._recall_upper_bound, self.assertAlmostEqual(evaluator[1]._recall_upper_bound,
......
...@@ -606,7 +606,8 @@ class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator): ...@@ -606,7 +606,8 @@ class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator):
categories, categories,
matching_iou_threshold=0.5, matching_iou_threshold=0.5,
recall_lower_bound=0.0, recall_lower_bound=0.0,
recall_upper_bound=1.0): recall_upper_bound=1.0,
skip_predictions_for_unlabeled_class=False):
super(PrecisionAtRecallDetectionEvaluator, self).__init__( super(PrecisionAtRecallDetectionEvaluator, self).__init__(
categories, categories,
matching_iou_threshold=matching_iou_threshold, matching_iou_threshold=matching_iou_threshold,
...@@ -615,6 +616,83 @@ class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator): ...@@ -615,6 +616,83 @@ class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator):
evaluate_corlocs=False, evaluate_corlocs=False,
metric_prefix='PrecisionAtRecallBoxes', metric_prefix='PrecisionAtRecallBoxes',
use_weighted_mean_ap=False) use_weighted_mean_ap=False)
self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
self._expected_keys.update(
[standard_fields.InputDataFields.groundtruth_labeled_classes])
self.groundtruth_labeled_classes = {}
def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
"""Adds groundtruth for a single image to be used for evaluation.
If the labeled classes field is present, a map of image_id to
groundtruth_labeled_classes is populated with the one-hot labeled classes.
Args:
image_id: A unique string/integer identifier for the image.
groundtruth_dict: A dictionary containing -
standard_fields.InputDataFields.groundtruth_labeled_classes: Optional
numpy one-hot integer array of shape [num_classes+1] containing 1
for classes that are labeled in the image and 0 otherwise.
Raises:
ValueError: If shape of labeled classes field is not as expected.
"""
super(PrecisionAtRecallDetectionEvaluator,
self).add_single_ground_truth_image_info(image_id, groundtruth_dict)
labeled_classes = groundtruth_dict.get(
standard_fields.InputDataFields.groundtruth_labeled_classes, None)
if self._skip_predictions_for_unlabeled_class and labeled_classes is not None:
if labeled_classes.shape != (self._num_classes + 1,):
raise ValueError('Invalid shape for groundtruth labeled classes: {}, '
'num_categories_including_background: {}'.format(
labeled_classes, self._num_classes + 1))
labeled_classes = np.flatnonzero(labeled_classes == 1).tolist()
self.groundtruth_labeled_classes[image_id] = labeled_classes
else:
self.groundtruth_labeled_classes[image_id] = None
def add_single_detected_image_info(self, image_id, detections_dict):
"""Adds detections for a single image to be used for evaluation.
If the labeled classes field has been populated for the given image_id,
the detections for classes that are not in the labeled classes are
filtered out.
Args:
image_id: A unique string/integer identifier for the image.
detections_dict: A dictionary containing -
standard_fields.DetectionResultFields.detection_boxes: float32 numpy
array of shape [num_boxes, 4] containing `num_boxes` detection boxes of
the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
standard_fields.DetectionResultFields.detection_scores: float32 numpy
array of shape [num_boxes] containing detection scores for the boxes.
standard_fields.DetectionResultFields.detection_classes: integer numpy
array of shape [num_boxes] containing 1-indexed detection classes for
the boxes.
"""
groundtruth_labeled_classes = self.groundtruth_labeled_classes[image_id]
if groundtruth_labeled_classes is not None:
detection_classes_key = standard_fields.DetectionResultFields.detection_classes
detected_boxes_key = standard_fields.DetectionResultFields.detection_boxes
detected_scores_key = standard_fields.DetectionResultFields.detection_scores
# Only keep detection if label is in groundtruth_labeled_classes.
allowed = np.isin(detections_dict[detection_classes_key],
groundtruth_labeled_classes)
detections_dict[detection_classes_key] = detections_dict[
detection_classes_key][allowed]
detections_dict[detected_boxes_key] = detections_dict[detected_boxes_key][
allowed]
detections_dict[detected_scores_key] = detections_dict[
detected_scores_key][allowed]
super(PrecisionAtRecallDetectionEvaluator,
self).add_single_detected_image_info(image_id, detections_dict)
class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator): class PascalInstanceSegmentationEvaluator(ObjectDetectionEvaluator):
......
...@@ -761,6 +761,159 @@ class PrecisionAtRecallEvaluationTest(tf.test.TestCase): ...@@ -761,6 +761,159 @@ class PrecisionAtRecallEvaluationTest(tf.test.TestCase):
self.wp_eval.clear() self.wp_eval.clear()
self.assertFalse(self.wp_eval._image_ids) self.assertFalse(self.wp_eval._image_ids)
def test_returns_correct_metric_values_partial_labels(self):
# Create partial label evaluation object.
self.wp_eval_partial = (
object_detection_evaluation.PrecisionAtRecallDetectionEvaluator(
self.categories,
recall_lower_bound=0.0,
recall_upper_bound=1.0,
skip_predictions_for_unlabeled_class=True))
# The first test case is as follows:
#
# Labeled classes: [cat, dog]
# +--------------------------+-------------------------------------+------+
# | Groundtruth | Matched Detection | Type |
# +--------------------------+-------------------------------------+------+
# | cat [10, 10, 11, 11] | cat (0.6) [10, 10, 11, 11] | TP |
# | cat [100, 100, 220, 220] | - | FN |
# | dog [100, 100, 120, 120] | dog (0.8) [100, 100, 120, 120] | TP |
# | - | dog (0.9) [10, 10, 11, 11] | FP |
# | - | elephant (0.9) [100, 100, 220, 220] | IGN |
# +--------------------------+-------------------------------------+------+
image_key1 = 'img1'
# Add groundtruth boxes for img1.
groundtruth_boxes1 = np.array(
[[10, 10, 11, 11], [100, 100, 220, 220], [100, 100, 120, 120]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 1, 2], dtype=int)
groundtruth_labeled_classes1 = np.array([0, 1, 1, 0], dtype=int)
self.wp_eval_partial.add_single_ground_truth_image_info(
image_key1, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1,
standard_fields.InputDataFields.groundtruth_labeled_classes:
groundtruth_labeled_classes1
})
# Add detected boxes for img1.
detected_boxes1 = np.array([[10, 10, 11, 11], [10, 10, 11, 11],
[100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float)
detected_class_labels1 = np.array([1, 2, 2, 3], dtype=int)
detected_scores1 = np.array([0.6, 0.9, 0.8, 0.9], dtype=float)
self.wp_eval_partial.add_single_detected_image_info(
image_key1, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes1,
standard_fields.DetectionResultFields.detection_scores:
detected_scores1,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels1
})
# The second test case is as follows:
#
# Labeled classes: [dog, elephant]
# +---------------------------+---------------------------------+------+
# | Groundtruth | Matched Detection | Type |
# +---------------------------+---------------------------------+------+
# | - | cat (0.8) [100, 100, 120, 120] | IGN |
# | dog [100, 100, 120, 120] | - | FN |
# | elephant [10, 10, 11, 11] | elephant (0.9) [10, 10, 11, 11] | TP |
# +---------------------------+---------------------------------+------+
image_key2 = 'img2'
# Add groundtruth boxes for img2.
groundtruth_boxes2 = np.array(
[[100, 100, 120, 120], [10, 10, 11, 11]], dtype=float)
groundtruth_class_labels2 = np.array([2, 3], dtype=int)
groundtruth_labeled_classes2 = np.array([0, 0, 1, 1], dtype=int)
self.wp_eval_partial.add_single_ground_truth_image_info(
image_key2, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes2,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels2,
standard_fields.InputDataFields.groundtruth_labeled_classes:
groundtruth_labeled_classes2
})
# Add detected boxes for img2.
detected_boxes2 = np.array(
[[100, 100, 120, 120], [10, 10, 11, 11]],
dtype=float)
detected_class_labels2 = np.array([1, 3], dtype=int)
detected_scores2 = np.array([0.8, 0.9], dtype=float)
self.wp_eval_partial.add_single_detected_image_info(
image_key2, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes2,
standard_fields.DetectionResultFields.detection_scores:
detected_scores2,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels2
})
# Compute AP metrics.
metrics = self.wp_eval_partial.evaluate()
# Precision and recall for cat.
# +------------+------+-----------+--------+
# | Confidence | Type | Precision | Recall |
# +------------+------+-----------+--------+
# | 0.8 | IGN | - | - |
# | 0.6 | TP | 0.5 | 0.5 |
# +------------+------+-----------+--------+
# Expected AP: 0.5
self.assertAlmostEqual(
metrics[self.wp_eval_partial._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/cat'], 0.5)
# Precision and recall for dog.
# +------------+------+-----------+--------+
# | Confidence | Type | Precision | Recall |
# +------------+------+-----------+--------+
# | 0.9 | FP | 0.0 | 0.0 |
# | 0.8 | TP | 0.5 | 0.5 |
# +------------+------+-----------+--------+
#
# After non-decreasing preprocessing of precision in
# third_party/tensorflow_models/object_detection/utils/metrics.py.
# +------------+------+-----------+--------+
# | Confidence | Type | Precision | Recall |
# +------------+------+-----------+--------+
# | 0.9 | FP | 0.5 | 0.0 |
# | 0.8 | TP | 0.5 | 0.5 |
# +------------+------+-----------+--------+
# Expected AP: 0.25
self.assertAlmostEqual(
metrics[self.wp_eval_partial._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/dog'], 0.25)
# Precision and recall for elephant.
# +------------+------+-----------+--------+
# | Confidence | Type | Precision | Recall |
# +------------+------+-----------+--------+
# | 0.9 | IGN | - | - |
# | 0.9 | TP | 1.0 | 1.0 |
# +------------+------+-----------+--------+
# Expected AP: 1.0
self.assertAlmostEqual(
metrics[self.wp_eval_partial._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/elephant'], 1.0)
# Expected mAP: (AP_cat + AP_dog + AP_elephant) / 3 = (0.25 + 1.0 + 0.5) / 3
self.assertAlmostEqual(
metrics[self.wp_eval_partial._metric_prefix +
'Precision/mAP@0.5IOU'], (0.25 + 1.0 + 0.5) / 3)
self.wp_eval_partial.clear()
self.assertFalse(self.wp_eval_partial._image_ids)
class ObjectDetectionEvaluationTest(tf.test.TestCase): class ObjectDetectionEvaluationTest(tf.test.TestCase):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment