eval_util_test.py 16.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for eval_util."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

21
from absl.testing import parameterized
22

23
import numpy as np
pkulzc's avatar
pkulzc committed
24
25
import six
from six.moves import range
26
import tensorflow as tf
27
28
29

from object_detection import eval_util
from object_detection.core import standard_fields as fields
30
from object_detection.metrics import coco_evaluation
31
from object_detection.protos import eval_pb2
32
from object_detection.utils import test_case
33
34


35
class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
36
37

  def _get_categories_list(self):
38
39
40
    return [{'id': 1, 'name': 'person'},
            {'id': 2, 'name': 'dog'},
            {'id': 3, 'name': 'cat'}]
41

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  def _get_categories_list_with_keypoints(self):
    return [{
        'id': 1,
        'name': 'person',
        'keypoints': {
            'left_eye': 0,
            'right_eye': 3
        }
    }, {
        'id': 2,
        'name': 'dog',
        'keypoints': {
            'tail_start': 1,
            'mouth': 2
        }
    }, {
        'id': 3,
        'name': 'cat'
    }]

62
63
64
65
66
  def _make_evaluation_dict(self,
                            resized_groundtruth_masks=False,
                            batch_size=1,
                            max_gt_boxes=None,
                            scale_to_absolute=False):
67
68
69
    input_data_fields = fields.InputDataFields
    detection_fields = fields.DetectionResultFields

70
71
72
73
    image = tf.zeros(shape=[batch_size, 20, 20, 3], dtype=tf.uint8)
    if batch_size == 1:
      key = tf.constant('image1')
    else:
74
      key = tf.constant([str(i) for i in range(batch_size)])
75
76
77
78
79
80
81
    detection_boxes = tf.tile(tf.constant([[[0., 0., 1., 1.]]]),
                              multiples=[batch_size, 1, 1])
    detection_scores = tf.tile(tf.constant([[0.8]]), multiples=[batch_size, 1])
    detection_classes = tf.tile(tf.constant([[0]]), multiples=[batch_size, 1])
    detection_masks = tf.tile(tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32),
                              multiples=[batch_size, 1, 1, 1])
    num_detections = tf.ones([batch_size])
82
83
84
    groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
    groundtruth_classes = tf.constant([1])
    groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
85
    groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]])
86
87
    if resized_groundtruth_masks:
      groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
88
89
90
91
92
93
94
95
96

    if batch_size > 1:
      groundtruth_boxes = tf.tile(tf.expand_dims(groundtruth_boxes, 0),
                                  multiples=[batch_size, 1, 1])
      groundtruth_classes = tf.tile(tf.expand_dims(groundtruth_classes, 0),
                                    multiples=[batch_size, 1])
      groundtruth_instance_masks = tf.tile(
          tf.expand_dims(groundtruth_instance_masks, 0),
          multiples=[batch_size, 1, 1, 1])
97
98
99
      groundtruth_keypoints = tf.tile(
          tf.expand_dims(groundtruth_keypoints, 0),
          multiples=[batch_size, 1, 1])
100

101
102
103
104
105
106
107
108
109
110
    detections = {
        detection_fields.detection_boxes: detection_boxes,
        detection_fields.detection_scores: detection_scores,
        detection_fields.detection_classes: detection_classes,
        detection_fields.detection_masks: detection_masks,
        detection_fields.num_detections: num_detections
    }
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes,
111
        input_data_fields.groundtruth_keypoints: groundtruth_keypoints,
112
113
        input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
    }
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
    if batch_size > 1:
      return eval_util.result_dict_for_batched_example(
          image, key, detections, groundtruth,
          scale_to_absolute=scale_to_absolute,
          max_gt_boxes=max_gt_boxes)
    else:
      return eval_util.result_dict_for_single_example(
          image, key, detections, groundtruth,
          scale_to_absolute=scale_to_absolute)

  @parameterized.parameters(
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
  def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
                                                   max_gt_boxes=None,
                                                   scale_to_absolute=False):
133
134
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(['coco_detection_metrics'])
135
    categories = self._get_categories_list()
136
137
138
    eval_dict = self._make_evaluation_dict(batch_size=batch_size,
                                           max_gt_boxes=max_gt_boxes,
                                           scale_to_absolute=scale_to_absolute)
139
    metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
140
        eval_config, categories, eval_dict)
141
142
143
144
    _, update_op = metric_ops['DetectionBoxes_Precision/mAP']

    with self.test_session() as sess:
      metrics = {}
pkulzc's avatar
pkulzc committed
145
      for key, (value_op, _) in six.iteritems(metric_ops):
146
147
148
149
150
151
        metrics[key] = value_op
      sess.run(update_op)
      metrics = sess.run(metrics)
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertNotIn('DetectionMasks_Precision/mAP', metrics)

152
153
154
155
156
157
158
159
  @parameterized.parameters(
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
  def test_get_eval_metric_ops_for_coco_detections_and_masks(
      self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
160
161
162
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'coco_mask_metrics'])
163
    categories = self._get_categories_list()
164
165
166
    eval_dict = self._make_evaluation_dict(batch_size=batch_size,
                                           max_gt_boxes=max_gt_boxes,
                                           scale_to_absolute=scale_to_absolute)
167
    metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
168
        eval_config, categories, eval_dict)
169
170
171
172
173
    _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
    _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP']

    with self.test_session() as sess:
      metrics = {}
pkulzc's avatar
pkulzc committed
174
      for key, (value_op, _) in six.iteritems(metric_ops):
175
176
177
178
179
180
181
        metrics[key] = value_op
      sess.run(update_op_boxes)
      sess.run(update_op_masks)
      metrics = sess.run(metrics)
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])

182
183
184
185
186
187
188
189
  @parameterized.parameters(
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': True},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': True},
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
  def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
      self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
190
191
192
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'coco_mask_metrics'])
193
    categories = self._get_categories_list()
194
195
196
197
    eval_dict = self._make_evaluation_dict(batch_size=batch_size,
                                           max_gt_boxes=max_gt_boxes,
                                           scale_to_absolute=scale_to_absolute,
                                           resized_groundtruth_masks=True)
198
    metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
199
        eval_config, categories, eval_dict)
200
201
202
203
204
    _, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
    _, update_op_masks = metric_ops['DetectionMasks_Precision/mAP']

    with self.test_session() as sess:
      metrics = {}
pkulzc's avatar
pkulzc committed
205
      for key, (value_op, _) in six.iteritems(metric_ops):
206
207
208
209
210
211
212
213
        metrics[key] = value_op
      sess.run(update_op_boxes)
      sess.run(update_op_masks)
      metrics = sess.run(metrics)
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])

  def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
214
215
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(['unsupported_metric'])
216
217
218
219
    categories = self._get_categories_list()
    eval_dict = self._make_evaluation_dict()
    with self.assertRaises(ValueError):
      eval_util.get_eval_metric_ops_for_evaluators(
220
221
222
223
          eval_config, categories, eval_dict)

  def test_get_eval_metric_ops_for_evaluators(self):
    eval_config = eval_pb2.EvalConfig()
224
225
226
227
    eval_config.metrics_set.extend([
        'coco_detection_metrics', 'coco_mask_metrics',
        'precision_at_recall_detection_metrics'
    ])
228
    eval_config.include_metrics_per_category = True
229
230
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
231
232
233

    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)
234
235
236
237
238
239
240
241
242
243
    self.assertTrue(evaluator_options['coco_detection_metrics']
                    ['include_metrics_per_category'])
    self.assertTrue(
        evaluator_options['coco_mask_metrics']['include_metrics_per_category'])
    self.assertAlmostEqual(
        evaluator_options['precision_at_recall_detection_metrics']
        ['recall_lower_bound'], eval_config.recall_lower_bound)
    self.assertAlmostEqual(
        evaluator_options['precision_at_recall_detection_metrics']
        ['recall_upper_bound'], eval_config.recall_upper_bound)
244
245
246

  def test_get_evaluator_with_evaluator_options(self):
    eval_config = eval_pb2.EvalConfig()
247
248
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
249
    eval_config.include_metrics_per_category = True
250
251
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
252
253
254
255
    categories = self._get_categories_list()

    evaluator_options = eval_util.evaluator_options_from_eval_config(
        eval_config)
256
257
    evaluator = eval_util.get_evaluators(eval_config, categories,
                                         evaluator_options)
258
259

    self.assertTrue(evaluator[0]._include_metrics_per_category)
260
261
262
263
    self.assertAlmostEqual(evaluator[1]._recall_lower_bound,
                           eval_config.recall_lower_bound)
    self.assertAlmostEqual(evaluator[1]._recall_upper_bound,
                           eval_config.recall_upper_bound)
264
265
266

  def test_get_evaluator_with_no_evaluator_options(self):
    eval_config = eval_pb2.EvalConfig()
267
268
    eval_config.metrics_set.extend(
        ['coco_detection_metrics', 'precision_at_recall_detection_metrics'])
269
    eval_config.include_metrics_per_category = True
270
271
    eval_config.recall_lower_bound = 0.2
    eval_config.recall_upper_bound = 0.6
272
273
274
275
    categories = self._get_categories_list()

    evaluator = eval_util.get_evaluators(
        eval_config, categories, evaluator_options=None)
276

277
    # Even though we are setting eval_config.include_metrics_per_category = True
278
279
    # and bounds on recall, these options are never passed into the
    # DetectionEvaluator constructor (via `evaluator_options`).
280
    self.assertFalse(evaluator[0]._include_metrics_per_category)
281
282
283
    self.assertAlmostEqual(evaluator[1]._recall_lower_bound, 0.0)
    self.assertAlmostEqual(evaluator[1]._recall_upper_bound, 1.0)

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  def test_get_evaluator_with_keypoint_metrics(self):
    eval_config = eval_pb2.EvalConfig()
    person_keypoints_metric = eval_config.parameterized_metric.add()
    person_keypoints_metric.coco_keypoint_metrics.class_label = 'person'
    person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'left_eye'] = 0.1
    person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'right_eye'] = 0.2
    dog_keypoints_metric = eval_config.parameterized_metric.add()
    dog_keypoints_metric.coco_keypoint_metrics.class_label = 'dog'
    dog_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'tail_start'] = 0.3
    dog_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'mouth'] = 0.4
    categories = self._get_categories_list_with_keypoints()

    evaluator = eval_util.get_evaluators(
        eval_config, categories, evaluator_options=None)

    # Verify keypoint evaluator class variables.
    self.assertLen(evaluator, 3)
    self.assertFalse(evaluator[0]._include_metrics_per_category)
    self.assertEqual(evaluator[1]._category_name, 'person')
    self.assertEqual(evaluator[2]._category_name, 'dog')
    self.assertAllEqual(evaluator[1]._keypoint_ids, [0, 3])
    self.assertAllEqual(evaluator[2]._keypoint_ids, [1, 2])
    self.assertAllClose([0.1, 0.2], evaluator[1]._oks_sigmas)
    self.assertAllClose([0.3, 0.4], evaluator[2]._oks_sigmas)

  def test_get_evaluator_with_unmatched_label(self):
    eval_config = eval_pb2.EvalConfig()
    person_keypoints_metric = eval_config.parameterized_metric.add()
    person_keypoints_metric.coco_keypoint_metrics.class_label = 'unmatched'
    person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
        'kpt'] = 0.1
    categories = self._get_categories_list_with_keypoints()

    evaluator = eval_util.get_evaluators(
        eval_config, categories, evaluator_options=None)
    self.assertLen(evaluator, 1)
    self.assertNotIsInstance(
        evaluator[0], coco_evaluation.CocoKeypointEvaluator)

327
328
329
330
331
332
333
334
  def test_padded_image_result_dict(self):

    input_data_fields = fields.InputDataFields
    detection_fields = fields.DetectionResultFields
    key = tf.constant([str(i) for i in range(2)])

    detection_boxes = np.array([[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]],
                               dtype=np.float32)
335
336
    detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
                                   dtype=np.float32)
337
338
339
340
341
342
343
344
    detections = {
        detection_fields.detection_boxes:
            tf.constant(detection_boxes),
        detection_fields.detection_scores:
            tf.constant([[1.], [1.]]),
        detection_fields.detection_classes:
            tf.constant([[1], [2]]),
        detection_fields.num_detections:
345
346
347
348
349
350
            tf.constant([1, 1]),
        detection_fields.detection_keypoints:
            tf.tile(
                tf.reshape(
                    tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
                multiples=[2, 1, 1, 1])
351
352
353
354
355
356
357
358
    }

    gt_boxes = detection_boxes
    groundtruth = {
        input_data_fields.groundtruth_boxes:
            tf.constant(gt_boxes),
        input_data_fields.groundtruth_classes:
            tf.constant([[1.], [1.]]),
359
360
361
362
363
        input_data_fields.groundtruth_keypoints:
            tf.tile(
                tf.reshape(
                    tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
                multiples=[2, 1, 1, 1])
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
    }

    image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)

    true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
    original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])

    result = eval_util.result_dict_for_batched_example(
        image, key, detections, groundtruth,
        scale_to_absolute=True,
        true_image_shapes=true_image_shapes,
        original_image_spatial_shapes=original_image_spatial_shapes,
        max_gt_boxes=tf.constant(1))

    with self.test_session() as sess:
      result = sess.run(result)
      self.assertAllEqual(
          [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
          result[input_data_fields.groundtruth_boxes])
383
384
385
      self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
                           [[[0., 0.], [150., 150.], [300., 300.]]]],
                          result[input_data_fields.groundtruth_keypoints])
386
387
388
389
390

      # Predictions from the model are not scaled.
      self.assertAllEqual(
          [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
          result[detection_fields.detection_boxes])
391
392
393
      self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
                           [[[0., 0.], [75., 150.], [150., 300.]]]],
                          result[detection_fields.detection_keypoints])
394

395
396
397

if __name__ == '__main__':
  tf.test.main()