test_prediction_count_evaluation.py 2.48 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3

import unittest
import torch

from d2go.evaluation.prediction_count_evaluation import PredictionCountEvaluator
from detectron2.structures.instances import Instances


class TestPredictionCountEvaluation(unittest.TestCase):

    def setUp(self):
        self.evaluator = PredictionCountEvaluator()
        image_size = (224, 224)
        self.mock_outputs = [
            {"instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8, 0.7]))},
            {"instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8, 0.7]))},
            {"instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8]))},
            {"instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8]))},
            {"instances": Instances(image_size, scores=torch.Tensor([0.9]))},
        ]
        # PredictionCountEvaluator does not depend on inputs
        self.mock_inputs  = [None] * len(self.mock_outputs)

    def test_process_evaluate_reset(self):
        self.assertEqual(len(self.evaluator.prediction_counts), 0)
        self.assertEqual(len(self.evaluator.confidence_scores), 0)

        # Test that `process` registers the outputs.
        self.evaluator.process(self.mock_inputs, self.mock_outputs)
        self.assertListEqual(self.evaluator.prediction_counts, [3, 3, 2, 2, 1])
        self.assertEqual(len(self.evaluator.confidence_scores), 11)

        # Test that `evaluate` returns the correct metrics.
        output_metrics = self.evaluator.evaluate()
        self.assertDictAlmostEqual(
            output_metrics,
            {
                "false_positives": {
                    "predictions_per_image": 11 / 5,
                    "confidence_per_prediction": (0.9 * 5 + 0.8 * 4 + 0.7 * 2) / 11,
                }
            }
        )

        # Test that `reset` clears the evaluator state.
        self.evaluator.reset()
        self.assertEqual(len(self.evaluator.prediction_counts), 0)
        self.assertEqual(len(self.evaluator.confidence_scores), 0)


    def assertDictAlmostEqual(self, dict1, dict2):
        keys1 = list(dict1.keys())
        keys2 = list(dict2.keys())
        # Assert lists are equal, irrespective of ordering
        self.assertCountEqual(keys1, keys2)

        for k, v1 in dict1.items():
            v2 = dict2[k]
            if isinstance(v2, list):
                self.assertListEqual(v1, v2)
            elif isinstance(v2, dict):
                self.assertDictAlmostEqual(v1, v2)
            else:
                self.assertAlmostEqual(v1, v2)