Commit c814f04d authored by Huizhong Chen's avatar Huizhong Chen Committed by TF Object Detection Team
Browse files

Partially labeled groundtruth online eval implementation.

PiperOrigin-RevId: 328477362
parent 4af7e0d8
...@@ -1121,6 +1121,15 @@ def evaluator_options_from_eval_config(eval_config): ...@@ -1121,6 +1121,15 @@ def evaluator_options_from_eval_config(eval_config):
'include_metrics_per_category': ( 'include_metrics_per_category': (
eval_config.include_metrics_per_category) eval_config.include_metrics_per_category)
} }
# For coco detection eval, if the eval_config proto contains the
# "skip_predictions_for_unlabeled_class" field, include this field in
# evaluator_options.
if eval_metric_fn_key == 'coco_detection_metrics' and hasattr(
eval_config, 'skip_predictions_for_unlabeled_class'):
evaluator_options[eval_metric_fn_key].update({
'skip_predictions_for_unlabeled_class':
(eval_config.skip_predictions_for_unlabeled_class)
})
elif eval_metric_fn_key == 'precision_at_recall_detection_metrics': elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
evaluator_options[eval_metric_fn_key] = { evaluator_options[eval_metric_fn_key] = {
'recall_lower_bound': (eval_config.recall_lower_bound), 'recall_lower_bound': (eval_config.recall_lower_bound),
......
...@@ -239,6 +239,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -239,6 +239,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
eval_config) eval_config)
self.assertTrue(evaluator_options['coco_detection_metrics'] self.assertTrue(evaluator_options['coco_detection_metrics']
['include_metrics_per_category']) ['include_metrics_per_category'])
self.assertFalse(evaluator_options['coco_detection_metrics']
['skip_predictions_for_unlabeled_class'])
self.assertTrue( self.assertTrue(
evaluator_options['coco_mask_metrics']['include_metrics_per_category']) evaluator_options['coco_mask_metrics']['include_metrics_per_category'])
self.assertAlmostEqual( self.assertAlmostEqual(
...@@ -253,6 +255,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -253,6 +255,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
eval_config.metrics_set.extend( eval_config.metrics_set.extend(
['coco_detection_metrics', 'precision_at_recall_detection_metrics']) ['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
eval_config.include_metrics_per_category = True eval_config.include_metrics_per_category = True
eval_config.skip_predictions_for_unlabeled_class = True
eval_config.recall_lower_bound = 0.2 eval_config.recall_lower_bound = 0.2
eval_config.recall_upper_bound = 0.6 eval_config.recall_upper_bound = 0.6
categories = self._get_categories_list() categories = self._get_categories_list()
...@@ -263,6 +266,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -263,6 +266,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
evaluator_options) evaluator_options)
self.assertTrue(evaluator[0]._include_metrics_per_category) self.assertTrue(evaluator[0]._include_metrics_per_category)
self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class)
self.assertAlmostEqual(evaluator[1]._recall_lower_bound, self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
eval_config.recall_lower_bound) eval_config.recall_lower_bound)
self.assertAlmostEqual(evaluator[1]._recall_upper_bound, self.assertAlmostEqual(evaluator[1]._recall_upper_bound,
......
...@@ -34,7 +34,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -34,7 +34,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
def __init__(self, def __init__(self,
categories, categories,
include_metrics_per_category=False, include_metrics_per_category=False,
all_metrics_per_category=False): all_metrics_per_category=False,
skip_predictions_for_unlabeled_class=False):
"""Constructor. """Constructor.
Args: Args:
...@@ -46,6 +47,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -46,6 +47,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
each category in per_category_ap. Be careful with setting it to true if each category in per_category_ap. Be careful with setting it to true if
you have more than handful of categories, because it will pollute you have more than handful of categories, because it will pollute
your mldash. your mldash.
skip_predictions_for_unlabeled_class: Skip predictions that do not match
with the labeled classes for the image.
""" """
super(CocoDetectionEvaluator, self).__init__(categories) super(CocoDetectionEvaluator, self).__init__(categories)
# _image_ids is a dictionary that maps unique image ids to Booleans which # _image_ids is a dictionary that maps unique image ids to Booleans which
...@@ -58,6 +61,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -58,6 +61,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
self._metrics = None self._metrics = None
self._include_metrics_per_category = include_metrics_per_category self._include_metrics_per_category = include_metrics_per_category
self._all_metrics_per_category = all_metrics_per_category self._all_metrics_per_category = all_metrics_per_category
self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
self._groundtruth_labeled_classes = {}
def clear(self): def clear(self):
"""Clears the state to prepare for a fresh evaluation.""" """Clears the state to prepare for a fresh evaluation."""
...@@ -92,6 +97,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -92,6 +97,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
numpy array of keypoint visibilities with shape [num_gt_boxes, numpy array of keypoint visibilities with shape [num_gt_boxes,
num_keypoints]. Integer is treated as an enum with 0=not labeled, num_keypoints]. Integer is treated as an enum with 0=not labeled,
1=labeled but not visible and 2=labeled and visible. 1=labeled but not visible and 2=labeled and visible.
InputDataFields.groundtruth_labeled_classes (optional): a dictionary of
image_id to groundtruth_labeled_class, where groundtruth_labeled_class
is a 1-indexed integer numpy array indicating which classes have been
annotated over the image.
""" """
if image_id in self._image_ids: if image_id in self._image_ids:
tf.logging.warning('Ignoring ground truth with image id %s since it was ' tf.logging.warning('Ignoring ground truth with image id %s since it was '
...@@ -134,6 +143,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -134,6 +143,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. self._annotation_id += groundtruth_dict[standard_fields.InputDataFields.
groundtruth_boxes].shape[0] groundtruth_boxes].shape[0]
self._groundtruth_labeled_classes[image_id] = groundtruth_dict.get(
standard_fields.InputDataFields.groundtruth_labeled_classes)
# Boolean to indicate whether a detection has been added for this image. # Boolean to indicate whether a detection has been added for this image.
self._image_ids[image_id] = False self._image_ids[image_id] = False
...@@ -173,17 +184,41 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -173,17 +184,41 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
standard_fields.DetectionResultFields.detection_keypoints) standard_fields.DetectionResultFields.detection_keypoints)
if detection_keypoints is not None and not detection_keypoints.shape[0]: if detection_keypoints is not None and not detection_keypoints.shape[0]:
detection_keypoints = None detection_keypoints = None
self._detection_boxes_list.extend(
coco_tools.ExportSingleImageDetectionBoxesToCoco( if self._skip_predictions_for_unlabeled_class:
image_id=image_id, det_classes = detections_dict[
category_id_set=self._category_id_set, standard_fields.DetectionResultFields.detection_classes]
detection_boxes=detections_dict[ num_det_boxes = det_classes.shape[0]
standard_fields.DetectionResultFields.detection_boxes], keep_box_ids = []
detection_scores=detections_dict[ for box_id in range(num_det_boxes):
standard_fields.DetectionResultFields.detection_scores], if det_classes[box_id] in self._groundtruth_labeled_classes[image_id]:
detection_classes=detections_dict[ keep_box_ids.append(box_id)
standard_fields.DetectionResultFields.detection_classes], self._detection_boxes_list.extend(
detection_keypoints=detection_keypoints)) coco_tools.ExportSingleImageDetectionBoxesToCoco(
image_id=image_id,
category_id_set=self._category_id_set,
detection_boxes=detections_dict[
standard_fields.DetectionResultFields.detection_boxes]
[keep_box_ids],
detection_scores=detections_dict[
standard_fields.DetectionResultFields.detection_scores]
[keep_box_ids],
detection_classes=detections_dict[
standard_fields.DetectionResultFields.detection_classes]
[keep_box_ids],
detection_keypoints=detection_keypoints))
else:
self._detection_boxes_list.extend(
coco_tools.ExportSingleImageDetectionBoxesToCoco(
image_id=image_id,
category_id_set=self._category_id_set,
detection_boxes=detections_dict[
standard_fields.DetectionResultFields.detection_boxes],
detection_scores=detections_dict[
standard_fields.DetectionResultFields.detection_scores],
detection_classes=detections_dict[
standard_fields.DetectionResultFields.detection_classes],
detection_keypoints=detection_keypoints))
self._image_ids[image_id] = True self._image_ids[image_id] = True
def dump_detections_to_json_file(self, json_output_path): def dump_detections_to_json_file(self, json_output_path):
...@@ -271,24 +306,20 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -271,24 +306,20 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
None when executing eagerly, or an update_op that can be used to update None when executing eagerly, or an update_op that can be used to update
the eval metrics in `tf.estimator.EstimatorSpec`. the eval metrics in `tf.estimator.EstimatorSpec`.
""" """
def update_op(
image_id_batched,
groundtruth_boxes_batched,
groundtruth_classes_batched,
groundtruth_is_crowd_batched,
num_gt_boxes_per_image,
detection_boxes_batched,
detection_scores_batched,
detection_classes_batched,
num_det_boxes_per_image,
is_annotated_batched):
"""Update operation for adding batch of images to Coco evaluator."""
for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box, def update_op(image_id_batched, groundtruth_boxes_batched,
det_score, det_class, num_det_box, is_annotated) in zip( groundtruth_classes_batched, groundtruth_is_crowd_batched,
groundtruth_labeled_classes_batched, num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image,
is_annotated_batched):
"""Update operation for adding batch of images to Coco evaluator."""
for (image_id, gt_box, gt_class, gt_is_crowd, gt_labeled_classes,
num_gt_box, det_box, det_score, det_class,
num_det_box, is_annotated) in zip(
image_id_batched, groundtruth_boxes_batched, image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_is_crowd_batched, groundtruth_classes_batched, groundtruth_is_crowd_batched,
num_gt_boxes_per_image, groundtruth_labeled_classes_batched, num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched, detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image, detection_classes_batched, num_det_boxes_per_image,
is_annotated_batched): is_annotated_batched):
...@@ -297,7 +328,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -297,7 +328,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
image_id, { image_id, {
'groundtruth_boxes': gt_box[:num_gt_box], 'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box], 'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box] 'groundtruth_is_crowd': gt_is_crowd[:num_gt_box],
'groundtruth_labeled_classes': gt_labeled_classes
}) })
self.add_single_detected_image_info( self.add_single_detected_image_info(
image_id, image_id,
...@@ -313,6 +345,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -313,6 +345,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
groundtruth_is_crowd = eval_dict.get( groundtruth_is_crowd = eval_dict.get(
input_data_fields.groundtruth_is_crowd, None) input_data_fields.groundtruth_is_crowd, None)
groundtruth_labeled_classes = eval_dict.get(
input_data_fields.groundtruth_labeled_classes, None)
detection_boxes = eval_dict[detection_fields.detection_boxes] detection_boxes = eval_dict[detection_fields.detection_boxes]
detection_scores = eval_dict[detection_fields.detection_scores] detection_scores = eval_dict[detection_fields.detection_scores]
detection_classes = eval_dict[detection_fields.detection_classes] detection_classes = eval_dict[detection_fields.detection_classes]
...@@ -323,12 +357,21 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -323,12 +357,21 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
if groundtruth_is_crowd is None: if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
# If groundtruth_labeled_classes is not provided, make it equal to the
# detection_classes. This assumes that all predictions will be kept to
# compute eval metrics.
if groundtruth_labeled_classes is None:
groundtruth_labeled_classes = detection_classes
if not image_id.shape.as_list(): if not image_id.shape.as_list():
# Apply a batch dimension to all tensors. # Apply a batch dimension to all tensors.
image_id = tf.expand_dims(image_id, 0) image_id = tf.expand_dims(image_id, 0)
groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0) groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
groundtruth_labeled_classes = tf.expand_dims(groundtruth_labeled_classes,
0)
detection_boxes = tf.expand_dims(detection_boxes, 0) detection_boxes = tf.expand_dims(detection_boxes, 0)
detection_scores = tf.expand_dims(detection_scores, 0) detection_scores = tf.expand_dims(detection_scores, 0)
detection_classes = tf.expand_dims(detection_classes, 0) detection_classes = tf.expand_dims(detection_classes, 0)
...@@ -359,16 +402,12 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -359,16 +402,12 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
if is_annotated is None: if is_annotated is None:
is_annotated = tf.ones_like(image_id, dtype=tf.bool) is_annotated = tf.ones_like(image_id, dtype=tf.bool)
return tf.py_func(update_op, [image_id, return tf.py_func(update_op, [
groundtruth_boxes, image_id, groundtruth_boxes, groundtruth_classes, groundtruth_is_crowd,
groundtruth_classes, groundtruth_labeled_classes, num_gt_boxes_per_image, detection_boxes,
groundtruth_is_crowd, detection_scores, detection_classes, num_det_boxes_per_image,
num_gt_boxes_per_image, is_annotated
detection_boxes, ], [])
detection_scores,
detection_classes,
num_det_boxes_per_image,
is_annotated], [])
def get_estimator_eval_metric_ops(self, eval_dict): def get_estimator_eval_metric_ops(self, eval_dict):
"""Returns a dictionary of eval metric ops. """Returns a dictionary of eval metric ops.
......
...@@ -330,6 +330,107 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): ...@@ -330,6 +330,107 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids) self.assertFalse(coco_evaluator._image_ids)
# Test skipping unmatched detector predictions with
# groundtruth_labeled_classes.
def testGetMAPWithSkipUnmatchedPredictions(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list(), skip_predictions_for_unlabeled_class=True)
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
groundtruth_labeled_classes = tf.placeholder(tf.float32, shape=(None))
detection_boxes = tf.placeholder(tf.float32, shape=(None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
eval_dict = {
input_data_fields.key:
image_id,
input_data_fields.groundtruth_boxes:
groundtruth_boxes,
input_data_fields.groundtruth_classes:
groundtruth_classes,
input_data_fields.groundtruth_labeled_classes:
groundtruth_labeled_classes,
detection_fields.detection_boxes:
detection_boxes,
detection_fields.detection_scores:
detection_scores,
detection_fields.detection_classes:
detection_classes
}
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(
update_op,
feed_dict={
image_id:
'image1',
groundtruth_boxes:
np.array([[100., 100., 200., 200.]]),
groundtruth_classes:
np.array([1]),
# Only class 1 is exhaustively labeled for image1.
groundtruth_labeled_classes:
np.array([1]),
detection_boxes:
np.array([[100., 100., 200., 200.], [100., 100., 200.,
200.]]),
detection_scores:
np.array([.8, .95]),
detection_classes:
np.array([1, 2])
})
sess.run(
update_op,
feed_dict={
image_id: 'image2',
groundtruth_boxes: np.array([[50., 50., 100., 100.]]),
groundtruth_classes: np.array([3]),
groundtruth_labeled_classes: np.array([3]),
detection_boxes: np.array([[50., 50., 100., 100.]]),
detection_scores: np.array([.7]),
detection_classes: np.array([3])
})
sess.run(
update_op,
feed_dict={
image_id: 'image3',
groundtruth_boxes: np.array([[25., 25., 50., 50.]]),
groundtruth_classes: np.array([2]),
groundtruth_labeled_classes: np.array([2]),
detection_boxes: np.array([[25., 25., 50., 50.]]),
detection_scores: np.array([.9]),
detection_classes: np.array([2])
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.items():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self): def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator( coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list()) _get_categories_list())
......
...@@ -3,7 +3,7 @@ syntax = "proto2"; ...@@ -3,7 +3,7 @@ syntax = "proto2";
package object_detection.protos; package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py). // Message for configuring DetectionModel evaluation jobs (eval.py).
// Next id - 33 // Next id - 34
message EvalConfig { message EvalConfig {
optional uint32 batch_size = 25 [default = 1]; optional uint32 batch_size = 25 [default = 1];
// Number of visualization images to generate. // Number of visualization images to generate.
...@@ -103,6 +103,13 @@ message EvalConfig { ...@@ -103,6 +103,13 @@ message EvalConfig {
// visualization. An example would be human pose estimation where certain // visualization. An example would be human pose estimation where certain
// joints can be connected. // joints can be connected.
repeated KeypointEdge keypoint_edge = 32; repeated KeypointEdge keypoint_edge = 32;
// The "groundtruth_labeled_classes" field indicates which classes have been
// labeled on the images. If skip_predictions_for_unlabeled_class is set,
// detector predictions that do not match to the groundtruth_labeled_classes
// will be ignored. This is useful for evaluating on test data that are not
// exhaustively labeled.
optional bool skip_predictions_for_unlabeled_class = 33 [default = false];
} }
// A message to configure parameterized evaluation metric. // A message to configure parameterized evaluation metric.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment