eval_util_test.py 19.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for eval_util."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

21
import unittest
22
from absl.testing import parameterized
23

24
import numpy as np
pkulzc's avatar
pkulzc committed
25
26
import six
from six.moves import range
27
import tensorflow.compat.v1 as tf
28
from google.protobuf import text_format
29
30
31

from object_detection import eval_util
from object_detection.core import standard_fields as fields
32
from object_detection.metrics import coco_evaluation
33
from object_detection.protos import eval_pb2
34
from object_detection.utils import test_case
35
from object_detection.utils import tf_version
36
37


38
class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
39
40

  def _get_categories_list(self):
41
42
43
    return [{'id': 1, 'name': 'person'},
            {'id': 2, 'name': 'dog'},
            {'id': 3, 'name': 'cat'}]
44

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
  def _get_categories_list_with_keypoints(self):
    return [{
        'id': 1,
        'name': 'person',
        'keypoints': {
            'left_eye': 0,
            'right_eye': 3
        }
    }, {
        'id': 2,
        'name': 'dog',
        'keypoints': {
            'tail_start': 1,
            'mouth': 2
        }
    }, {
        'id': 3,
        'name': 'cat'
    }]

65
66
67
68
69
  def _make_evaluation_dict(self,
                            resized_groundtruth_masks=False,
                            batch_size=1,
                            max_gt_boxes=None,
                            scale_to_absolute=False):
70
71
72
    input_data_fields = fields.InputDataFields
    detection_fields = fields.DetectionResultFields

73
74
75
76
    image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
    if batch_size == 1:
      key = tf.constant('image1')
    else:
77
      key = tf.constant([str(i) for i in range(batch_size)])
78
79
80
81
82
83
84
    detection_boxes = tf.tile(tf.constant([[[0., 0., 1., 1.]]]),
                              multiples=[batch_size, 1, 1])
    detection_scores = tf.tile(tf.constant([[0.8]]), multiples=[batch_size, 1])
    detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1])
    detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32),
                              multiples=[batch_size, 1, 1, 1])
    num_detections = tf.ones([batch_size])
85
86
87
    groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
    groundtruth_classes = tf.constant([1])
    groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
Jonathan Huang's avatar
Jonathan Huang committed
88
89
    original_image_spatial_shapes = tf.constant([[20, 20]], dtype=tf.int32)

90
    groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]])
91
92
    if resized_groundtruth_masks:
      groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
93
94
95
96
97
98
99
100
101

    if batch_size > 1:
      groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0),
                                  multiples=[batch_size, 1, 1])
      groundtruth_classes = tf.tile(tf.expand_dims(groundtruth_classes, 0),
                                    multiples=[batch_size, 1])
      groundtruth_instance_masks = tf.tile(
          tf.expand_dims(groundtruth_instance_masks, 0),
          multiples=[batch_size, 1, 1, 1])
102
103
104
      groundtruth_keypoints = tf.tile(
          tf.expand_dims(groundtruth_keypoints, 0),
          multiples=[batch_size, 1, 1])
Jonathan Huang's avatar
Jonathan Huang committed
105
106
      original_image_spatial_shapes = tf.tile(original_image_spatial_shapes,
                                              multiples=[batch_size, 1])
107

108
109
110
111
112
113
114
115
116
117
    detections = {
        detection_fields.detection_boxes: detection_boxes,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
        detection_fields.num_detections: num_detections
    }
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
118
        input_data_fields.groundtruth_keypoints: groundtruth_keypoints,
Jonathan Huang's avatar
Jonathan Huang committed
119
120
121
122
        input_data_fields.groundtruth_instance_masks:
            groundtruth_instance_masks,
        input_data_fields.original_image_spatial_shape:
            original_image_spatial_shapes
123
    }
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
    if batch_size > 1:
      return eval_util.result_dict_for_batched_example(
          image, key, detections, groundtruth,
          scale_to_absolute=scale_to_absolute,
          max_gt_boxes=max_gt_boxes)
    else:
      return eval_util.result_dict_for_single_example(
          image, key, detections, groundtruth,
          scale_to_absolute=scale_to_absolute)

  @parameterized.parameters(
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
140
  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
141
142
143
  def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
                                                   max_gt_boxes=None,
                                                   scale_to_absolute=False):
144
145
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(['coco_detection_metrics'])
146
    categories = self._get_categories_list()
147
148
149
    eval_dict = self._make_evaluation_dict(batch_size=batch_size,
                                           max_gt_boxes=max_gt_boxes,
                                           scale_to_absolute=scale_to_absolute)
150
    metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
151
        eval_config, categories, eval_dict)
152
153
154
155
    _, update_op = metric_ops['DetectionBoxes_Precision/mAP']

    with self.test_session() as sess:
      metrics = {}
pkulzc's avatar
pkulzc committed
156
      for key, (value_op, _) in six.iteritems(metric_ops):
157
158
159
160
161
162
        metrics[key] = value_op
      sess.run(update_op)
      metrics = sess.run(metrics)
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertNotIn('DetectionMasks_Precision/mAP', metrics)

163
164
165
166
167
168
  @parameterized.parameters(
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
169
  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
170
171
  def test_get_eval_metric_ops_for_coco_detections_and_masks(
      self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
172
173
174
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'coco_mask_metrics'])
175
    categories = self._get_categories_list()
176
177
178
    eval_dict = self._make_evaluation_dict(batch_size=batch_size,
                                           max_gt_boxes=max_gt_boxes,
                                           scale_to_absolute=scale_to_absolute)
179
    metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
180
        eval_config, categories, eval_dict)
181
182
183
184
185
    _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
    _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP']

    with self.test_session() as sess:
      metrics = {}
pkulzc's avatar
pkulzc committed
186
      for key, (value_op, _) in six.iteritems(metric_ops):
187
188
189
190
191
192
193
        metrics[key] = value_op
      sess.run(update_op_boxes)
      sess.run(update_op_masks)
      metrics = sess.run(metrics)
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])

194
195
196
197
198
199
  @parameterized.parameters(
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
200
  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
201
202
  def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
      self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
203
204
205
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'coco_mask_metrics'])
206
    categories = self._get_categories_list()
207
208
209
210
    eval_dict = self._make_evaluation_dict(batch_size=batch_size,
                                           max_gt_boxes=max_gt_boxes,
                                           scale_to_absolute=scale_to_absolute,
                                           resized_groundtruth_masks=True)
211
    metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
212
        eval_config, categories, eval_dict)
213
214
215
216
217
    _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
    _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP']

    with self.test_session() as sess:
      metrics = {}
pkulzc's avatar
pkulzc committed
218
      for key, (value_op, _) in six.iteritems(metric_ops):
219
220
221
222
223
224
225
        metrics[key] = value_op
      sess.run(update_op_boxes)
      sess.run(update_op_masks)
      metrics = sess.run(metrics)
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])

226
  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
227
  def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
228
229
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(['unsupported_metric'])
230
231
232
233
    categories = self._get_categories_list()
    eval_dict = self._make_evaluation_dict()
    with self.assertRaises(ValueError):
      eval_util.get_eval_metric_ops_for_evaluators(
234
235
236
237
          eval_config, categories, eval_dict)

  def test_get_eval_metric_ops_for_evaluators(self):
    eval_config = eval_pb2.EvalConfig()
238
239
240
241
    eval_config.metrics_set.extend([
        'coco_detection_metrics', 'coco_mask_metrics',
        'precision_at_recall_detection_metrics'
    ])
242
    eval_config.include_metrics_per_category = True
243
244
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
245
246
247

    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)
248
249
    self.assertTrue(evaluator_options['coco_detection_metrics']
                    ['include_metrics_per_category'])
250
251
    self.assertFalse(evaluator_options['coco_detection_metrics']
                     ['skip_predictions_for_unlabeled_class'])
252
253
254
255
256
257
258
259
    self.assertTrue(
        evaluator_options['coco_mask_metrics']['include_metrics_per_category'])
    self.assertAlmostEqual(
        evaluator_options['precision_at_recall_detection_metrics']
        ['recall_lower_bound'], eval_config.recall_lower_bound)
    self.assertAlmostEqual(
        evaluator_options['precision_at_recall_detection_metrics']
        ['recall_upper_bound'], eval_config.recall_upper_bound)
260
261
    self.assertFalse(evaluator_options['precision_at_recall_detection_metrics']
                     ['skip_predictions_for_unlabeled_class'])
262
263
264

  def test_get_evaluator_with_evaluator_options(self):
    eval_config = eval_pb2.EvalConfig()
265
266
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
267
    eval_config.include_metrics_per_category = True
268
    eval_config.skip_predictions_for_unlabeled_class = True
269
270
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
271
272
273
274
    categories = self._get_categories_list()

    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)
275
276
    evaluator = eval_util.get_evaluators(eval_config, categories,
                                         evaluator_options)
277
278

    self.assertTrue(evaluator[0]._include_metrics_per_category)
279
    self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class)
280
    self.assertTrue(evaluator[1]._skip_predictions_for_unlabeled_class)
281
282
283
284
    self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
                           eval_config.recall_lower_bound)
    self.assertAlmostEqual(evaluator[1]._recall_upper_bound,
                           eval_config.recall_upper_bound)
285
286
287

  def test_get_evaluator_with_no_evaluator_options(self):
    eval_config = eval_pb2.EvalConfig()
288
289
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
290
    eval_config.include_metrics_per_category = True
291
292
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
293
294
295
296
    categories = self._get_categories_list()

    evaluator = eval_util.get_evaluators(
        eval_config, categories, evaluator_options=None)
297

298
    # Even though we are setting eval_config.include_metrics_per_category = True
299
300
    # and bounds on recall, these options are never passed into the
    # DetectionEvaluator constructor (via `evaluator_options`).
301
    self.assertFalse(evaluator[0]._include_metrics_per_category)
302
303
304
    self.assertAlmostEqual(evaluator[1]._recall_lower_bound, 0.0)
    self.assertAlmostEqual(evaluator[1]._recall_upper_bound, 1.0)

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
  def test_get_evaluator_with_keypoint_metrics(self):
    eval_config = eval_pb2.EvalConfig()
    person_keypoints_metric = eval_config.parameterized_metric.add()
    person_keypoints_metric.coco_keypoint_metrics.class_label = 'person'
    person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'left_eye'] = 0.1
    person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'right_eye'] = 0.2
    dog_keypoints_metric = eval_config.parameterized_metric.add()
    dog_keypoints_metric.coco_keypoint_metrics.class_label = 'dog'
    dog_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'tail_start'] = 0.3
    dog_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'mouth'] = 0.4
    categories = self._get_categories_list_with_keypoints()

    evaluator = eval_util.get_evaluators(
        eval_config, categories, evaluator_options=None)

    # Verify keypoint evaluator class variables.
    self.assertLen(evaluator, 3)
    self.assertFalse(evaluator[0]._include_metrics_per_category)
    self.assertEqual(evaluator[1]._category_name, 'person')
    self.assertEqual(evaluator[2]._category_name, 'dog')
    self.assertAllEqual(evaluator[1]._keypoint_ids, [0, 3])
    self.assertAllEqual(evaluator[2]._keypoint_ids, [1, 2])
    self.assertAllClose([0.1, 0.2], evaluator[1]._oks_sigmas)
    self.assertAllClose([0.3, 0.4], evaluator[2]._oks_sigmas)

  def test_get_evaluator_with_unmatched_label(self):
    eval_config = eval_pb2.EvalConfig()
    person_keypoints_metric = eval_config.parameterized_metric.add()
    person_keypoints_metric.coco_keypoint_metrics.class_label = 'unmatched'
    person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'kpt'] = 0.1
    categories = self._get_categories_list_with_keypoints()

    evaluator = eval_util.get_evaluators(
        eval_config, categories, evaluator_options=None)
    self.assertLen(evaluator, 1)
    self.assertNotIsInstance(
        evaluator[0], coco_evaluation.CocoKeypointEvaluator)

348
349
350
351
352
353
354
355
  def test_padded_image_result_dict(self):

    input_data_fields = fields.InputDataFields
    detection_fields = fields.DetectionResultFields
    key = tf.constant([str(i) for i in range(2)])

    detection_boxes = np.array([[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]],
                               dtype=np.float32)
356
357
    detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
                                   dtype=np.float32)
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
    def graph_fn():
      detections = {
          detection_fields.detection_boxes:
              tf.constant(detection_boxes),
          detection_fields.detection_scores:
              tf.constant([[1.], [1.]]),
          detection_fields.detection_classes:
              tf.constant([[1], [2]]),
          detection_fields.num_detections:
              tf.constant([1, 1]),
          detection_fields.detection_keypoints:
              tf.tile(
                  tf.reshape(
                      tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
                  multiples=[2, 1, 1, 1])
      }

      gt_boxes = detection_boxes
      groundtruth = {
          input_data_fields.groundtruth_boxes:
              tf.constant(gt_boxes),
          input_data_fields.groundtruth_classes:
              tf.constant([[1.], [1.]]),
          input_data_fields.groundtruth_keypoints:
              tf.tile(
                  tf.reshape(
                      tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
                  multiples=[2, 1, 1, 1])
      }

      image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)

      true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
      original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])

      result = eval_util.result_dict_for_batched_example(
          image, key, detections, groundtruth,
          scale_to_absolute=True,
          true_image_shapes=true_image_shapes,
          original_image_spatial_shapes=original_image_spatial_shapes,
          max_gt_boxes=tf.constant(1))
      return (result[input_data_fields.groundtruth_boxes],
              result[input_data_fields.groundtruth_keypoints],
              result[detection_fields.detection_boxes],
              result[detection_fields.detection_keypoints])
    (gt_boxes, gt_keypoints, detection_boxes,
     detection_keypoints) = self.execute_cpu(graph_fn, [])
    self.assertAllEqual(
        [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
        gt_boxes)
    self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
                         [[[0., 0.], [150., 150.], [300., 300.]]]],
                        gt_keypoints)

    # Predictions from the model are not scaled.
    self.assertAllEqual(
        [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
        detection_boxes)
    self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
                         [[[0., 0.], [75., 150.], [150., 300.]]]],
                        detection_keypoints)
419

420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
  def test_evaluator_options_from_eval_config_no_super_categories(self):
    eval_config_text_proto = """
      metrics_set: "coco_detection_metrics"
      metrics_set: "coco_mask_metrics"
      include_metrics_per_category: true
      use_moving_averages: false
      batch_size: 1;
    """
    eval_config = eval_pb2.EvalConfig()
    text_format.Merge(eval_config_text_proto, eval_config)
    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)
    self.assertNotIn('super_categories', evaluator_options['coco_mask_metrics'])

  def test_evaluator_options_from_eval_config_with_super_categories(self):
    eval_config_text_proto = """
      metrics_set: "coco_detection_metrics"
      metrics_set: "coco_mask_metrics"
      include_metrics_per_category: true
      use_moving_averages: false
      batch_size: 1;
      super_categories {
        key: "supercat1"
        value: "a,b,c"
      }
      super_categories {
        key: "supercat2"
        value: "d,e,f"
      }
    """
    eval_config = eval_pb2.EvalConfig()
    text_format.Merge(eval_config_text_proto, eval_config)
    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)
    self.assertIn('super_categories', evaluator_options['coco_mask_metrics'])
    super_categories = evaluator_options[
        'coco_mask_metrics']['super_categories']
    self.assertIn('supercat1', super_categories)
    self.assertIn('supercat2', super_categories)
    self.assertAllEqual(super_categories['supercat1'], ['a', 'b', 'c'])
    self.assertAllEqual(super_categories['supercat2'], ['d', 'e', 'f'])

462
463
464

if __name__ == '__main__':
  tf.test.main()