Commit 3f3ecc47 authored by Jonathan Huang's avatar Jonathan Huang Committed by TF Object Detection Team
Browse files

Adds option to aggregate COCO metrics across "supercategories".

PiperOrigin-RevId: 329650549
parent bf7b181f
......@@ -1130,6 +1130,13 @@ def evaluator_options_from_eval_config(eval_config):
'skip_predictions_for_unlabeled_class':
(eval_config.skip_predictions_for_unlabeled_class)
})
for super_category in eval_config.super_categories:
if 'super_categories' not in evaluator_options[eval_metric_fn_key]:
evaluator_options[eval_metric_fn_key]['super_categories'] = {}
key = super_category
value = eval_config.super_categories[key].split(',')
evaluator_options[eval_metric_fn_key]['super_categories'][key] = value
elif eval_metric_fn_key == 'precision_at_recall_detection_metrics':
evaluator_options[eval_metric_fn_key] = {
'recall_lower_bound': (eval_config.recall_lower_bound),
......
......@@ -25,6 +25,7 @@ import numpy as np
import six
from six.moves import range
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection import eval_util
from object_detection.core import standard_fields as fields
......@@ -406,6 +407,48 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
[[[0., 0.], [75., 150.], [150., 300.]]]],
detection_keypoints)
def test_evaluator_options_from_eval_config_no_super_categories(self):
eval_config_text_proto = """
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
"""
eval_config = eval_pb2.EvalConfig()
text_format.Merge(eval_config_text_proto, eval_config)
evaluator_options = eval_util.evaluator_options_from_eval_config(
eval_config)
self.assertNotIn('super_categories', evaluator_options['coco_mask_metrics'])
def test_evaluator_options_from_eval_config_with_super_categories(self):
eval_config_text_proto = """
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
super_categories {
key: "supercat1"
value: "a,b,c"
}
super_categories {
key: "supercat2"
value: "d,e,f"
}
"""
eval_config = eval_pb2.EvalConfig()
text_format.Merge(eval_config_text_proto, eval_config)
evaluator_options = eval_util.evaluator_options_from_eval_config(
eval_config)
self.assertIn('super_categories', evaluator_options['coco_mask_metrics'])
super_categories = evaluator_options[
'coco_mask_metrics']['super_categories']
self.assertIn('supercat1', super_categories)
self.assertIn('supercat2', super_categories)
self.assertAllEqual(super_categories['supercat1'], ['a', 'b', 'c'])
self.assertAllEqual(super_categories['supercat2'], ['d', 'e', 'f'])
if __name__ == '__main__':
tf.test.main()
......@@ -35,7 +35,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
categories,
include_metrics_per_category=False,
all_metrics_per_category=False,
skip_predictions_for_unlabeled_class=False):
skip_predictions_for_unlabeled_class=False,
super_categories=None):
"""Constructor.
Args:
......@@ -49,6 +50,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
your mldash.
skip_predictions_for_unlabeled_class: Skip predictions that do not match
with the labeled classes for the image.
super_categories: None or a python dict mapping super-category names
(strings) to lists of categories (corresponding to category names
in the label_map). Metrics are aggregated along these super-categories
and added to the `per_category_ap` and are associated with the name
`PerformanceBySuperCategory/<super-category-name>`.
"""
super(CocoDetectionEvaluator, self).__init__(categories)
# _image_ids is a dictionary that maps unique image ids to Booleans which
......@@ -63,6 +69,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
self._all_metrics_per_category = all_metrics_per_category
self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class
self._groundtruth_labeled_classes = {}
self._super_categories = super_categories
def clear(self):
"""Clears the state to prepare for a fresh evaluation."""
......@@ -268,6 +275,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
no supercategories exist). For backward compatibility
'PerformanceByCategory' is included in the output regardless of
all_metrics_per_category.
If super_categories are provided, then this will additionally include
metrics aggregated along the super_categories with keys of the form:
`PerformanceBySuperCategory/<super-category-name>`
"""
tf.logging.info('Performing evaluation on %d images.', len(self._image_ids))
groundtruth_dict = {
......@@ -282,7 +292,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False)
box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
include_metrics_per_category=self._include_metrics_per_category,
all_metrics_per_category=self._all_metrics_per_category)
all_metrics_per_category=self._all_metrics_per_category,
super_categories=self._super_categories)
box_metrics.update(box_per_category_ap)
box_metrics = {'DetectionBoxes_'+ key: value
for key, value in iter(box_metrics.items())}
......@@ -933,7 +944,9 @@ class CocoKeypointEvaluator(CocoDetectionEvaluator):
class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
"""Class to evaluate COCO detection metrics."""
def __init__(self, categories, include_metrics_per_category=False):
def __init__(self, categories,
include_metrics_per_category=False,
super_categories=None):
"""Constructor.
Args:
......@@ -941,6 +954,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'.
include_metrics_per_category: If True, include metrics for each category.
super_categories: None or a python dict mapping super-category names
(strings) to lists of categories (corresponding to category names
in the label_map). Metrics are aggregated along these super-categories
and added to the `per_category_ap` and are associated with the name
`PerformanceBySuperCategory/<super-category-name>`.
"""
super(CocoMaskEvaluator, self).__init__(categories)
self._image_id_to_mask_shape_map = {}
......@@ -950,6 +968,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
self._category_id_set = set([cat['id'] for cat in self._categories])
self._annotation_id = 1
self._include_metrics_per_category = include_metrics_per_category
self._super_categories = super_categories
def clear(self):
"""Clears the state to prepare for a fresh evaluation."""
......@@ -1106,6 +1125,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
no supercategories exist). For backward compatibility
'PerformanceByCategory' is included in the output regardless of
all_metrics_per_category.
If super_categories are provided, then this will additionally include
metrics aggregated along the super_categories with keys of the form:
`PerformanceBySuperCategory/<super-category-name>`
"""
groundtruth_dict = {
'annotations': self._groundtruth_list,
......@@ -1122,7 +1144,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
coco_wrapped_groundtruth, coco_wrapped_detection_masks,
agnostic_mode=False, iou_type='segm')
mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics(
include_metrics_per_category=self._include_metrics_per_category)
include_metrics_per_category=self._include_metrics_per_category,
super_categories=self._super_categories)
mask_metrics.update(mask_per_category_ap)
mask_metrics = {'DetectionMasks_'+ key: value
for key, value in mask_metrics.items()}
......
......@@ -202,7 +202,8 @@ class COCOEvalWrapper(cocoeval.COCOeval):
def ComputeMetrics(self,
include_metrics_per_category=False,
all_metrics_per_category=False):
all_metrics_per_category=False,
super_categories=None):
"""Computes detection/keypoint metrics.
Args:
......@@ -211,6 +212,11 @@ class COCOEvalWrapper(cocoeval.COCOeval):
each category in per_category_ap. Be careful with setting it to true if
you have more than handful of categories, because it will pollute
your mldash.
super_categories: None or a python dict mapping super-category names
(strings) to lists of categories (corresponding to category names
in the label_map). Metrics are aggregated along these super-categories
and added to the `per_category_ap` and are associated with the name
`PerformanceBySuperCategory/<super-category-name>`.
Returns:
1. summary_metrics: a dictionary holding:
......@@ -240,6 +246,9 @@ class COCOEvalWrapper(cocoeval.COCOeval):
output regardless of all_metrics_per_category.
If evaluating class-agnostic mode, per_category_ap is an empty
dictionary.
If super_categories are provided, then this will additionally include
metrics aggregated along the super_categories with keys of the form:
`PerformanceBySuperCategory/<super-category-name>`
Raises:
ValueError: If category_stats does not exist.
......@@ -291,6 +300,7 @@ class COCOEvalWrapper(cocoeval.COCOeval):
if not hasattr(self, 'category_stats'):
raise ValueError('Category stats do not exist')
per_category_ap = OrderedDict([])
super_category_ap = OrderedDict([])
if self.GetAgnosticMode():
return summary_metrics, per_category_ap
for category_index, category_id in enumerate(self.GetCategoryIdList()):
......@@ -298,6 +308,14 @@ class COCOEvalWrapper(cocoeval.COCOeval):
# Kept for backward compatilbility
per_category_ap['PerformanceByCategory/mAP/{}'.format(
category)] = self.category_stats[0][category_index]
if super_categories:
for key in super_categories:
if category in super_categories[key]:
metric_name = 'PerformanceBySuperCategory/{}'.format(key)
if metric_name not in super_category_ap:
super_category_ap[metric_name] = 0
super_category_ap[metric_name] += self.category_stats[0][
category_index]
if all_metrics_per_category:
per_category_ap['Precision mAP ByCategory/{}'.format(
category)] = self.category_stats[0][category_index]
......@@ -323,7 +341,11 @@ class COCOEvalWrapper(cocoeval.COCOeval):
category)] = self.category_stats[10][category_index]
per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
category)] = self.category_stats[11][category_index]
if super_categories:
for key in super_categories:
metric_name = 'PerformanceBySuperCategory/{}'.format(key)
super_category_ap[metric_name] /= len(super_categories[key])
per_category_ap.update(super_category_ap)
return summary_metrics, per_category_ap
......
......@@ -3,7 +3,7 @@ syntax = "proto2";
package object_detection.protos;
// Message for configuring DetectionModel evaluation jobs (eval.py).
// Next id - 34
// Next id - 35
message EvalConfig {
optional uint32 batch_size = 25 [default = 1];
// Number of visualization images to generate.
......@@ -82,6 +82,11 @@ message EvalConfig {
// If True, additionally include per-category metrics.
optional bool include_metrics_per_category = 24 [default = false];
// Optional super-category definitions: keys are super-category names;
// values are comma-separated categories (assumed to correspond to category
// names (`display_name`) in the label map.
map<string, string> super_categories = 34;
// Recall range within which precision should be computed.
optional float recall_lower_bound = 26 [default = 0.0];
optional float recall_upper_bound = 27 [default = 1.0];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment