Commit c814f04d authored by Huizhong Chen's avatar Huizhong Chen Committed by TF Object Detection Team
Browse files

Partially labeled groundtruth online eval implementation.

PiperOrigin-RevId: 328477362
parent 4af7e0d8
......@@ -1121,6 +1121,15 @@ def evaluator_options_from_eval_config(eval_config):
'include_metrics_per_category': (
eval_config.include_metrics_per_category)
}
# For coco detection eval, if the eval_config proto contains the
# "skip_predictions_for_unlabeled_class" field, include this field in
# evaluator_options.
if eval_metric_fn_key == 'coco_detection_metrics' and hasattr(
eval_config, 'skip_predictions_for_unlabeled_class'):
evaluator_options[eval_metric_fn_key].update({
'skip_predictions_for_unlabeled_class':
(eval_config.skip_predictions_for_unlabeled_class)
})
elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
evaluator_options[eval_metric_fn_key] = {
'recall_lower_bound': (eval_config.recall_lower_bound),
......
......@@ -239,6 +239,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
eval_config)
self.assertTrue(evaluator_options['coco_detection_metrics']
['include_metrics_per_category'])
self.assertFalse(evaluator_options['coco_detection_metrics']
['skip_predictions_for_unlabeled_class'])
self.assertTrue(
evaluator_options['coco_mask_metrics']['include_metrics_per_category'])
self.assertAlmostEqual(
......@@ -253,6 +255,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
eval_config.metrics_set.extend(
['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
eval_config.include_metrics_per_category = True
eval_config.skip_predictions_for_unlabeled_class = True
eval_config.recall_lower_bound = 0.2
eval_config.recall_upper_bound = 0.6
categories = self._get_categories_list()
......@@ -263,6 +266,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
evaluator_options)
self.assertTrue(evaluator[0]._include_metrics_per_category)
self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class)
self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
eval_config.recall_lower_bound)
self.assertAlmostEqual(evaluator[1]._recall_upper_bound,
......
......@@ -34,7 +34,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
def __init__(self,
categories,
include_metrics_per_category=False,
all_metrics_per_category=False):
all_metrics_per_category=False,
skip_predictions_for_unlabeled_class=False):
"""Constructor.
Args:
......@@ -46,6 +47,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
each category in per_category_ap. Be careful with setting it to true if
you have more than handful of categories, because it will pollute
your mldash.
skip_predictions_for_unlabeled_class: Skip predictions that do not match
with the labeled classes for the image.
"""
super(CocoDetectionEvaluator, self).__init__(categories)
# _image_ids is a dictionary that maps unique image ids to Booleans which
......@@ -58,6 +61,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
self._metrics = None
self._include_metrics_per_category = include_metrics_per_category
self._all_metrics_per_category = all_metrics_per_category
self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
self._groundtruth_labeled_classes = {}
def clear(self):
"""Clears the state to prepare for a fresh evaluation."""
......@@ -92,6 +97,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
numpy array of keypoint visibilities with shape [num_gt_boxes,
num_keypoints]. Integer is treated as an enum with 0=not labeled,
1=labeled but not visible and 2=labeled and visible.
InputDataFields.groundtruth_labeled_classes (optional): a dictionary of
image_id to groundtruth_labeled_class, where groundtruth_labeled_class
is a 1-indexed integer numpy array indicating which classes have been
annotated over the image.
"""
if image_id in self._image_ids:
tf.logging.warning('Ignoring ground truth with image id %s since it was '
......@@ -134,6 +143,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
self._annotation_id += groundtruth_dict[standard_fields.InputDataFields.
groundtruth_boxes].shape[0]
self._groundtruth_labeled_classes[image_id] = groundtruth_dict.get(
standard_fields.InputDataFields.groundtruth_labeled_classes)
# Boolean to indicate whether a detection has been added for this image.
self._image_ids[image_id] = False
......@@ -173,6 +184,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
standard_fields.DetectionResultFields.detection_keypoints)
if detection_keypoints is not None and not detection_keypoints.shape[0]:
detection_keypoints = None
if self._skip_predictions_for_unlabeled_class:
det_classes = detections_dict[
standard_fields.DetectionResultFields.detection_classes]
num_det_boxes = det_classes.shape[0]
keep_box_ids = []
for box_id in range(num_det_boxes):
if det_classes[box_id] in self._groundtruth_labeled_classes[image_id]:
keep_box_ids.append(box_id)
self._detection_boxes_list.extend(
coco_tools.ExportSingleImageDetectionBoxesToCoco(
image_id=image_id,
category_id_set=self._category_id_set,
detection_boxes=detections_dict[
standard_fields.DetectionResultFields.detection_boxes]
[keep_box_ids],
detection_scores=detections_dict[
standard_fields.DetectionResultFields.detection_scores]
[keep_box_ids],
detection_classes=detections_dict[
standard_fields.DetectionResultFields.detection_classes]
[keep_box_ids],
detection_keypoints=detection_keypoints))
else:
self._detection_boxes_list.extend(
coco_tools.ExportSingleImageDetectionBoxesToCoco(
image_id=image_id,
......@@ -271,24 +306,20 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
None when executing eagerly, or an update_op that can be used to update
the eval metrics in `tf.estimator.EstimatorSpec`.
"""
def update_op(
image_id_batched,
groundtruth_boxes_batched,
groundtruth_classes_batched,
groundtruth_is_crowd_batched,
num_gt_boxes_per_image,
detection_boxes_batched,
detection_scores_batched,
detection_classes_batched,
num_det_boxes_per_image,
def update_op(image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_is_crowd_batched,
groundtruth_labeled_classes_batched, num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image,
is_annotated_batched):
"""Update operation for adding batch of images to Coco evaluator."""
for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box,
det_score, det_class, num_det_box, is_annotated) in zip(
for (image_id, gt_box, gt_class, gt_is_crowd, gt_labeled_classes,
num_gt_box, det_box, det_score, det_class,
num_det_box, is_annotated) in zip(
image_id_batched, groundtruth_boxes_batched,
groundtruth_classes_batched, groundtruth_is_crowd_batched,
num_gt_boxes_per_image,
groundtruth_labeled_classes_batched, num_gt_boxes_per_image,
detection_boxes_batched, detection_scores_batched,
detection_classes_batched, num_det_boxes_per_image,
is_annotated_batched):
......@@ -297,7 +328,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
image_id, {
'groundtruth_boxes': gt_box[:num_gt_box],
'groundtruth_classes': gt_class[:num_gt_box],
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box]
'groundtruth_is_crowd': gt_is_crowd[:num_gt_box],
'groundtruth_labeled_classes': gt_labeled_classes
})
self.add_single_detected_image_info(
image_id,
......@@ -313,6 +345,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
groundtruth_is_crowd = eval_dict.get(
input_data_fields.groundtruth_is_crowd, None)
groundtruth_labeled_classes = eval_dict.get(
input_data_fields.groundtruth_labeled_classes, None)
detection_boxes = eval_dict[detection_fields.detection_boxes]
detection_scores = eval_dict[detection_fields.detection_scores]
detection_classes = eval_dict[detection_fields.detection_classes]
......@@ -323,12 +357,21 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
if groundtruth_is_crowd is None:
groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool)
# If groundtruth_labeled_classes is not provided, make it equal to the
# detection_classes. This assumes that all predictions will be kept to
# compute eval metrics.
if groundtruth_labeled_classes is None:
groundtruth_labeled_classes = detection_classes
if not image_id.shape.as_list():
# Apply a batch dimension to all tensors.
image_id = tf.expand_dims(image_id, 0)
groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0)
groundtruth_classes = tf.expand_dims(groundtruth_classes, 0)
groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0)
groundtruth_labeled_classes = tf.expand_dims(groundtruth_labeled_classes,
0)
detection_boxes = tf.expand_dims(detection_boxes, 0)
detection_scores = tf.expand_dims(detection_scores, 0)
detection_classes = tf.expand_dims(detection_classes, 0)
......@@ -359,16 +402,12 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
if is_annotated is None:
is_annotated = tf.ones_like(image_id, dtype=tf.bool)
return tf.py_func(update_op, [image_id,
groundtruth_boxes,
groundtruth_classes,
groundtruth_is_crowd,
num_gt_boxes_per_image,
detection_boxes,
detection_scores,
detection_classes,
num_det_boxes_per_image,
is_annotated], [])
return tf.py_func(update_op, [
image_id, groundtruth_boxes, groundtruth_classes, groundtruth_is_crowd,
groundtruth_labeled_classes, num_gt_boxes_per_image, detection_boxes,
detection_scores, detection_classes, num_det_boxes_per_image,
is_annotated
], [])
def get_estimator_eval_metric_ops(self, eval_dict):
"""Returns a dictionary of eval metric ops.
......
......@@ -330,6 +330,107 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
# Test skipping unmatched detector predictions with
# groundtruth_labeled_classes.
def testGetMAPWithSkipUnmatchedPredictions(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list(), skip_predictions_for_unlabeled_class=True)
image_id = tf.placeholder(tf.string, shape=())
groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
groundtruth_labeled_classes = tf.placeholder(tf.float32, shape=(None))
detection_boxes = tf.placeholder(tf.float32, shape=(None, 4))
detection_scores = tf.placeholder(tf.float32, shape=(None))
detection_classes = tf.placeholder(tf.float32, shape=(None))
input_data_fields = standard_fields.InputDataFields
detection_fields = standard_fields.DetectionResultFields
eval_dict = {
input_data_fields.key:
image_id,
input_data_fields.groundtruth_boxes:
groundtruth_boxes,
input_data_fields.groundtruth_classes:
groundtruth_classes,
input_data_fields.groundtruth_labeled_classes:
groundtruth_labeled_classes,
detection_fields.detection_boxes:
detection_boxes,
detection_fields.detection_scores:
detection_scores,
detection_fields.detection_classes:
detection_classes
}
eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict)
_, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
sess.run(
update_op,
feed_dict={
image_id:
'image1',
groundtruth_boxes:
np.array([[100., 100., 200., 200.]]),
groundtruth_classes:
np.array([1]),
# Only class 1 is exhaustively labeled for image1.
groundtruth_labeled_classes:
np.array([1]),
detection_boxes:
np.array([[100., 100., 200., 200.], [100., 100., 200.,
200.]]),
detection_scores:
np.array([.8, .95]),
detection_classes:
np.array([1, 2])
})
sess.run(
update_op,
feed_dict={
image_id: 'image2',
groundtruth_boxes: np.array([[50., 50., 100., 100.]]),
groundtruth_classes: np.array([3]),
groundtruth_labeled_classes: np.array([3]),
detection_boxes: np.array([[50., 50., 100., 100.]]),
detection_scores: np.array([.7]),
detection_classes: np.array([3])
})
sess.run(
update_op,
feed_dict={
image_id: 'image3',
groundtruth_boxes: np.array([[25., 25., 50., 50.]]),
groundtruth_classes: np.array([2]),
groundtruth_labeled_classes: np.array([2]),
detection_boxes: np.array([[25., 25., 50., 50.]]),
detection_scores: np.array([.9]),
detection_classes: np.array([2])
})
metrics = {}
for key, (value_op, _) in eval_metric_ops.items():
metrics[key] = value_op
metrics = sess.run(metrics)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'],
1.0)
self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0)
self.assertFalse(coco_evaluator._groundtruth_list)
self.assertFalse(coco_evaluator._detection_boxes_list)
self.assertFalse(coco_evaluator._image_ids)
def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self):
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
_get_categories_list())
......
......@@ -3,7 +3,7 @@ syntax = "proto2";
package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py).
// Next id - 33
// Next id - 34
message EvalConfig {
optional uint32 batch_size = 25 [default = 1];
// Number of visualization images to generate.
......@@ -103,6 +103,13 @@ message EvalConfig {
// visualization. An example would be human pose estimation where certain
// joints can be connected.
repeated KeypointEdge keypoint_edge = 32;
// The "groundtruth_labeled_classes" field indicates which classes have been
// labeled on the images. If skip_predictions_for_unlabeled_class is set,
// detector predictions that do not match to the groundtruth_labeled_classes
// will be ignored. This is useful for evaluating on test data that are not
// exhaustively labeled.
optional bool skip_predictions_for_unlabeled_class = 33 [default = false];
}
// A message to configure parameterized evaluation metric.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment