Commit 3f3ecc47 authored by Jonathan Huang's avatar Jonathan Huang Committed by TF Object Detection Team
Browse files

Adds option to aggregate COCO metrics across "supercategories".

PiperOrigin-RevId: 329650549
parent bf7b181f
...@@ -1130,6 +1130,13 @@ def evaluator_options_from_eval_config(eval_config): ...@@ -1130,6 +1130,13 @@ def evaluator_options_from_eval_config(eval_config):
'skip_predictions_for_unlabeled_class': 'skip_predictions_for_unlabeled_class':
(eval_config.skip_predictions_for_unlabeled_class) (eval_config.skip_predictions_for_unlabeled_class)
}) })
for super_category in eval_config.super_categories:
if 'super_categories' not in evaluator_options[eval_metric_fn_key]:
evaluator_options[eval_metric_fn_key]['super_categories'] = {}
key = super_category
value = eval_config.super_categories[key].split(',')
evaluator_options[eval_metric_fn_key]['super_categories'][key] = value
elif eval_metric_fn_key == 'precision_at_recall_detection_metrics': elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
evaluator_options[eval_metric_fn_key] = { evaluator_options[eval_metric_fn_key] = {
'recall_lower_bound': (eval_config.recall_lower_bound), 'recall_lower_bound': (eval_config.recall_lower_bound),
......
...@@ -25,6 +25,7 @@ import numpy as np ...@@ -25,6 +25,7 @@ import numpy as np
import six import six
from six.moves import range from six.moves import range
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection import eval_util from object_detection import eval_util
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
...@@ -406,6 +407,48 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -406,6 +407,48 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
[[[0., 0.], [75., 150.], [150., 300.]]]], [[[0., 0.], [75., 150.], [150., 300.]]]],
detection_keypoints) detection_keypoints)
def test_evaluator_options_from_eval_config_no_super_categories(self):
eval_config_text_proto = """
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
"""
eval_config = eval_pb2.EvalConfig()
text_format.Merge(eval_config_text_proto, eval_config)
evaluator_options = eval_util.evaluator_options_from_eval_config(
eval_config)
self.assertNotIn('super_categories', evaluator_options['coco_mask_metrics'])
def test_evaluator_options_from_eval_config_with_super_categories(self):
eval_config_text_proto = """
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
super_categories {
key: "supercat1"
value: "a,b,c"
}
super_categories {
key: "supercat2"
value: "d,e,f"
}
"""
eval_config = eval_pb2.EvalConfig()
text_format.Merge(eval_config_text_proto, eval_config)
evaluator_options = eval_util.evaluator_options_from_eval_config(
eval_config)
self.assertIn('super_categories', evaluator_options['coco_mask_metrics'])
super_categories = evaluator_options[
'coco_mask_metrics']['super_categories']
self.assertIn('supercat1', super_categories)
self.assertIn('supercat2', super_categories)
self.assertAllEqual(super_categories['supercat1'], ['a', 'b', 'c'])
self.assertAllEqual(super_categories['supercat2'], ['d', 'e', 'f'])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -35,7 +35,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -35,7 +35,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
categories, categories,
include_metrics_per_category=False, include_metrics_per_category=False,
all_metrics_per_category=False, all_metrics_per_category=False,
skip_predictions_for_unlabeled_class=False): skip_predictions_for_unlabeled_class=False,
super_categories=None):
"""Constructor. """Constructor.
Args: Args:
...@@ -49,6 +50,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -49,6 +50,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
your mldash. your mldash.
skip_predictions_for_unlabeled_class: Skip predictions that do not match skip_predictions_for_unlabeled_class: Skip predictions that do not match
with the labeled classes for the image. with the labeled classes for the image.
super_categories: None or a python dict mapping super-category names
(strings) to lists of categories (corresponding to category names
in the label_map). Metrics are aggregated along these super-categories
and added to the `per_category_ap` and are associated with the name
`PerformanceBySuperCategory/<super-category-name>`.
""" """
super(CocoDetectionEvaluator, self).__init__(categories) super(CocoDetectionEvaluator, self).__init__(categories)
# _image_ids is a dictionary that maps unique image ids to Booleans which # _image_ids is a dictionary that maps unique image ids to Booleans which
...@@ -63,6 +69,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -63,6 +69,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
self._all_metrics_per_category = all_metrics_per_category self._all_metrics_per_category = all_metrics_per_category
self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
self._groundtruth_labeled_classes = {} self._groundtruth_labeled_classes = {}
self._super_categories = super_categories
def clear(self): def clear(self):
"""Clears the state to prepare for a fresh evaluation.""" """Clears the state to prepare for a fresh evaluation."""
...@@ -268,6 +275,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -268,6 +275,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
no supercategories exist). For backward compatibility no supercategories exist). For backward compatibility
'PerformanceByCategory' is included in the output regardless of 'PerformanceByCategory' is included in the output regardless of
all_metrics_per_category. all_metrics_per_category.
If super_categories are provided, then this will additionally include
metrics aggregated along the super_categories with keys of the form:
`PerformanceBySuperCategory/<super-category-name>`
""" """
tf.logging.info('Performing evaluation on %d images.', len(self._image_ids)) tf.logging.info('Performing evaluation on %d images.', len(self._image_ids))
groundtruth_dict = { groundtruth_dict = {
...@@ -282,7 +292,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -282,7 +292,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False) coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False)
box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
include_metrics_per_category=self._include_metrics_per_category, include_metrics_per_category=self._include_metrics_per_category,
all_metrics_per_category=self._all_metrics_per_category) all_metrics_per_category=self._all_metrics_per_category,
super_categories=self._super_categories)
box_metrics.update(box_per_category_ap) box_metrics.update(box_per_category_ap)
box_metrics = {'DetectionBoxes_'+ key: value box_metrics = {'DetectionBoxes_'+ key: value
for key, value in iter(box_metrics.items())} for key, value in iter(box_metrics.items())}
...@@ -933,7 +944,9 @@ class CocoKeypointEvaluator(CocoDetectionEvaluator): ...@@ -933,7 +944,9 @@ class CocoKeypointEvaluator(CocoDetectionEvaluator):
class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
"""Class to evaluate COCO detection metrics.""" """Class to evaluate COCO detection metrics."""
def __init__(self, categories, include_metrics_per_category=False): def __init__(self, categories,
include_metrics_per_category=False,
super_categories=None):
"""Constructor. """Constructor.
Args: Args:
...@@ -941,6 +954,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -941,6 +954,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'id': (required) an integer id uniquely identifying this category. 'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'. 'name': (required) string representing category name e.g., 'cat', 'dog'.
include_metrics_per_category: If True, include metrics for each category. include_metrics_per_category: If True, include metrics for each category.
super_categories: None or a python dict mapping super-category names
(strings) to lists of categories (corresponding to category names
in the label_map). Metrics are aggregated along these super-categories
and added to the `per_category_ap` and are associated with the name
`PerformanceBySuperCategory/<super-category-name>`.
""" """
super(CocoMaskEvaluator, self).__init__(categories) super(CocoMaskEvaluator, self).__init__(categories)
self._image_id_to_mask_shape_map = {} self._image_id_to_mask_shape_map = {}
...@@ -950,6 +968,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -950,6 +968,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
self._category_id_set = set([cat['id'] for cat in self._categories]) self._category_id_set = set([cat['id'] for cat in self._categories])
self._annotation_id = 1 self._annotation_id = 1
self._include_metrics_per_category = include_metrics_per_category self._include_metrics_per_category = include_metrics_per_category
self._super_categories = super_categories
def clear(self): def clear(self):
"""Clears the state to prepare for a fresh evaluation.""" """Clears the state to prepare for a fresh evaluation."""
...@@ -1106,6 +1125,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -1106,6 +1125,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
no supercategories exist). For backward compatibility no supercategories exist). For backward compatibility
'PerformanceByCategory' is included in the output regardless of 'PerformanceByCategory' is included in the output regardless of
all_metrics_per_category. all_metrics_per_category.
If super_categories are provided, then this will additionally include
metrics aggregated along the super_categories with keys of the form:
`PerformanceBySuperCategory/<super-category-name>`
""" """
groundtruth_dict = { groundtruth_dict = {
'annotations': self._groundtruth_list, 'annotations': self._groundtruth_list,
...@@ -1122,7 +1144,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): ...@@ -1122,7 +1144,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
coco_wrapped_groundtruth, coco_wrapped_detection_masks, coco_wrapped_groundtruth, coco_wrapped_detection_masks,
agnostic_mode=False, iou_type='segm') agnostic_mode=False, iou_type='segm')
mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics( mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics(
include_metrics_per_category=self._include_metrics_per_category) include_metrics_per_category=self._include_metrics_per_category,
super_categories=self._super_categories)
mask_metrics.update(mask_per_category_ap) mask_metrics.update(mask_per_category_ap)
mask_metrics = {'DetectionMasks_'+ key: value mask_metrics = {'DetectionMasks_'+ key: value
for key, value in mask_metrics.items()} for key, value in mask_metrics.items()}
......
...@@ -202,7 +202,8 @@ class COCOEvalWrapper(cocoeval.COCOeval): ...@@ -202,7 +202,8 @@ class COCOEvalWrapper(cocoeval.COCOeval):
def ComputeMetrics(self, def ComputeMetrics(self,
include_metrics_per_category=False, include_metrics_per_category=False,
all_metrics_per_category=False): all_metrics_per_category=False,
super_categories=None):
"""Computes detection/keypoint metrics. """Computes detection/keypoint metrics.
Args: Args:
...@@ -211,6 +212,11 @@ class COCOEvalWrapper(cocoeval.COCOeval): ...@@ -211,6 +212,11 @@ class COCOEvalWrapper(cocoeval.COCOeval):
each category in per_category_ap. Be careful with setting it to true if each category in per_category_ap. Be careful with setting it to true if
you have more than handful of categories, because it will pollute you have more than handful of categories, because it will pollute
your mldash. your mldash.
super_categories: None or a python dict mapping super-category names
(strings) to lists of categories (corresponding to category names
in the label_map). Metrics are aggregated along these super-categories
and added to the `per_category_ap` and are associated with the name
`PerformanceBySuperCategory/<super-category-name>`.
Returns: Returns:
1. summary_metrics: a dictionary holding: 1. summary_metrics: a dictionary holding:
...@@ -240,6 +246,9 @@ class COCOEvalWrapper(cocoeval.COCOeval): ...@@ -240,6 +246,9 @@ class COCOEvalWrapper(cocoeval.COCOeval):
output regardless of all_metrics_per_category. output regardless of all_metrics_per_category.
If evaluating class-agnostic mode, per_category_ap is an empty If evaluating class-agnostic mode, per_category_ap is an empty
dictionary. dictionary.
If super_categories are provided, then this will additionally include
metrics aggregated along the super_categories with keys of the form:
`PerformanceBySuperCategory/<super-category-name>`
Raises: Raises:
ValueError: If category_stats does not exist. ValueError: If category_stats does not exist.
...@@ -291,6 +300,7 @@ class COCOEvalWrapper(cocoeval.COCOeval): ...@@ -291,6 +300,7 @@ class COCOEvalWrapper(cocoeval.COCOeval):
if not hasattr(self, 'category_stats'): if not hasattr(self, 'category_stats'):
raise ValueError('Category stats do not exist') raise ValueError('Category stats do not exist')
per_category_ap = OrderedDict([]) per_category_ap = OrderedDict([])
super_category_ap = OrderedDict([])
if self.GetAgnosticMode(): if self.GetAgnosticMode():
return summary_metrics, per_category_ap return summary_metrics, per_category_ap
for category_index, category_id in enumerate(self.GetCategoryIdList()): for category_index, category_id in enumerate(self.GetCategoryIdList()):
...@@ -298,6 +308,14 @@ class COCOEvalWrapper(cocoeval.COCOeval): ...@@ -298,6 +308,14 @@ class COCOEvalWrapper(cocoeval.COCOeval):
# Kept for backward compatilbility # Kept for backward compatilbility
per_category_ap['PerformanceByCategory/mAP/{}'.format( per_category_ap['PerformanceByCategory/mAP/{}'.format(
category)] = self.category_stats[0][category_index] category)] = self.category_stats[0][category_index]
if super_categories:
for key in super_categories:
if category in super_categories[key]:
metric_name = 'PerformanceBySuperCategory/{}'.format(key)
if metric_name not in super_category_ap:
super_category_ap[metric_name] = 0
super_category_ap[metric_name] += self.category_stats[0][
category_index]
if all_metrics_per_category: if all_metrics_per_category:
per_category_ap['Precision mAP ByCategory/{}'.format( per_category_ap['Precision mAP ByCategory/{}'.format(
category)] = self.category_stats[0][category_index] category)] = self.category_stats[0][category_index]
...@@ -323,7 +341,11 @@ class COCOEvalWrapper(cocoeval.COCOeval): ...@@ -323,7 +341,11 @@ class COCOEvalWrapper(cocoeval.COCOeval):
category)] = self.category_stats[10][category_index] category)] = self.category_stats[10][category_index]
per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
category)] = self.category_stats[11][category_index] category)] = self.category_stats[11][category_index]
if super_categories:
for key in super_categories:
metric_name = 'PerformanceBySuperCategory/{}'.format(key)
super_category_ap[metric_name] /= len(super_categories[key])
per_category_ap.update(super_category_ap)
return summary_metrics, per_category_ap return summary_metrics, per_category_ap
......
...@@ -3,7 +3,7 @@ syntax = "proto2"; ...@@ -3,7 +3,7 @@ syntax = "proto2";
package object_detection.protos; package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py). // Message for configuring DetectionModel evaluation jobs (eval.py).
// Next id - 34 // Next id - 35
message EvalConfig { message EvalConfig {
optional uint32 batch_size = 25 [default = 1]; optional uint32 batch_size = 25 [default = 1];
// Number of visualization images to generate. // Number of visualization images to generate.
...@@ -82,6 +82,11 @@ message EvalConfig { ...@@ -82,6 +82,11 @@ message EvalConfig {
// If True, additionally include per-category metrics. // If True, additionally include per-category metrics.
optional bool include_metrics_per_category = 24 [default = false]; optional bool include_metrics_per_category = 24 [default = false];
// Optional super-category definitions: keys are super-category names;
// values are comma-separated categories (assumed to correspond to category
// names (`display_name`) in the label map.
map<string, string> super_categories = 34;
// Recall range within which precision should be computed. // Recall range within which precision should be computed.
optional float recall_lower_bound = 26 [default = 0.0]; optional float recall_lower_bound = 26 [default = 0.0];
optional float recall_upper_bound = 27 [default = 1.0]; optional float recall_upper_bound = 27 [default = 1.0];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment