test_evaluation.py 12 KB
Newer Older
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
1
2
3
4
5
6
7
8
9
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


import contextlib
import dataclasses
10
import itertools
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
11
12
13
14
15
16
import math
import os
import unittest

import lpips
import torch
17
18
from pytorch3d.implicitron.dataset.dataset_base import FrameData
from pytorch3d.implicitron.dataset.implicitron_dataset import ImplicitronDataset
19
from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import eval_batch
20
21
22
23
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.models.generic_model import GenericModel  # noqa
from pytorch3d.implicitron.models.model_dbir import ModelDBIR  # noqa
from pytorch3d.implicitron.tools.config import expand_args_fields, registry
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
24
25
26
from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth
from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_

Jeremy Reizenstein's avatar
logging  
Jeremy Reizenstein committed
27

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
if os.environ.get("FB_TEST", False):
    from .common_resources import get_skateboard_data, provide_lpips_vgg
else:
    from common_resources import get_skateboard_data, provide_lpips_vgg


class TestEvaluation(unittest.TestCase):
    def setUp(self):
        # initialize evaluation dataset/dataloader
        torch.manual_seed(42)

        stack = contextlib.ExitStack()
        dataset_root, path_manager = stack.enter_context(get_skateboard_data())
        self.addCleanup(stack.close)

        category = "skateboard"
        frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
        sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
46
        self.image_size = 64
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
47
48
49
50
51
52
53
54
55
        self.dataset = ImplicitronDataset(
            frame_annotations_file=frame_file,
            sequence_annotations_file=sequence_file,
            dataset_root=dataset_root,
            image_height=self.image_size,
            image_width=self.image_size,
            box_crop=True,
            path_manager=path_manager,
        )
56
        self.bg_color = (0.0, 0.0, 0.0)
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
57
58
59

        # init the lpips model for eval
        provide_lpips_vgg()
60
        self.lpips_model = lpips.LPIPS(net="vgg").cuda()
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

    def test_eval_depth(self):
        """
        Check that eval_depth correctly masks errors and that, for get_best_scale=True,
        the error with scaled prediction equals the error without scaling the
        predicted depth. Finally, test that the error values are as expected
        for prediction and gt differing by a constant offset.
        """
        gt = (torch.randn(10, 1, 300, 400, device="cuda") * 5.0).clamp(0.0)
        mask = (torch.rand_like(gt) > 0.5).type_as(gt)

        for diff in 10 ** torch.linspace(-5, 0, 6):
            for crop in (0, 5):

                pred = gt + (torch.rand_like(gt) - 0.5) * 2 * diff

                # scaled prediction test
                mse_depth, abs_depth = eval_depth(
                    pred,
                    gt,
                    crop=crop,
                    mask=mask,
                    get_best_scale=True,
                )
                mse_depth_scale, abs_depth_scale = eval_depth(
                    pred * 10.0,
                    gt,
                    crop=crop,
                    mask=mask,
                    get_best_scale=True,
                )
                self.assertAlmostEqual(
                    float(mse_depth.sum()), float(mse_depth_scale.sum()), delta=1e-4
                )
                self.assertAlmostEqual(
                    float(abs_depth.sum()), float(abs_depth_scale.sum()), delta=1e-4
                )

                # error masking test
                pred_masked_err = gt + (torch.rand_like(gt) + diff) * (1 - mask)
                mse_depth_masked, abs_depth_masked = eval_depth(
                    pred_masked_err,
                    gt,
                    crop=crop,
                    mask=mask,
                    get_best_scale=True,
                )
                self.assertAlmostEqual(
                    float(mse_depth_masked.sum()), float(0.0), delta=1e-4
                )
                self.assertAlmostEqual(
                    float(abs_depth_masked.sum()), float(0.0), delta=1e-4
                )
                mse_depth_unmasked, abs_depth_unmasked = eval_depth(
                    pred_masked_err,
                    gt,
                    crop=crop,
                    mask=1 - mask,
                    get_best_scale=True,
                )
                self.assertGreater(
                    float(mse_depth_unmasked.sum()),
123
                    float(diff**2),
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
                )
                self.assertGreater(
                    float(abs_depth_unmasked.sum()),
                    float(diff),
                )

                # tests with constant error
                pred_fix_diff = gt + diff * mask
                for _mask_gt in (mask, None):
                    mse_depth_fix_diff, abs_depth_fix_diff = eval_depth(
                        pred_fix_diff,
                        gt,
                        crop=crop,
                        mask=_mask_gt,
                        get_best_scale=False,
                    )
                    if _mask_gt is not None:
                        expected_err_abs = diff
142
                        expected_err_mse = diff**2
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
                    else:
                        err_mask = (gt > 0.0).float() * mask
                        if crop > 0:
                            err_mask = err_mask[:, :, crop:-crop, crop:-crop]
                            gt_cropped = gt[:, :, crop:-crop, crop:-crop]
                        else:
                            gt_cropped = gt
                        gt_mass = (gt_cropped > 0.0).float().sum(dim=(1, 2, 3))
                        expected_err_abs = (
                            diff * err_mask.sum(dim=(1, 2, 3)) / (gt_mass)
                        )
                        expected_err_mse = diff * expected_err_abs
                    self.assertTrue(
                        torch.allclose(
                            abs_depth_fix_diff,
                            expected_err_abs * torch.ones_like(abs_depth_fix_diff),
                            atol=1e-4,
                        )
                    )
                    self.assertTrue(
                        torch.allclose(
                            mse_depth_fix_diff,
                            expected_err_mse * torch.ones_like(mse_depth_fix_diff),
                            atol=1e-4,
                        )
                    )

    def test_psnr(self):
        """
        Compare against opencv and check that the psnr is above
        the minimum possible value.
        """
        import cv2

        im1 = torch.rand(100, 3, 256, 256).cuda()
        im1_uint8 = (im1 * 255).to(torch.uint8)
        im1_rounded = im1_uint8.float() / 255
        for max_diff in 10 ** torch.linspace(-5, 0, 6):
            im2 = im1 + (torch.rand_like(im1) - 0.5) * 2 * max_diff
            im2 = im2.clamp(0.0, 1.0)
            im2_uint8 = (im2 * 255).to(torch.uint8)
            im2_rounded = im2_uint8.float() / 255
            # check that our psnr matches the output of opencv
            psnr = calc_psnr(im1_rounded, im2_rounded)
            # some versions of cv2 can only take uint8 input
            psnr_cv2 = cv2.PSNR(
                im1_uint8.cpu().numpy(),
                im2_uint8.cpu().numpy(),
            )
            self.assertAlmostEqual(float(psnr), float(psnr_cv2), delta=1e-4)
            # check that all PSNRs are bigger than the minimum possible PSNR
194
            max_mse = max_diff**2
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
195
196
197
198
199
200
201
202
            min_psnr = 10 * math.log10(1.0 / max_mse)
            for _im1, _im2 in zip(im1, im2):
                _psnr = calc_psnr(_im1, _im2)
                self.assertGreaterEqual(float(_psnr) + 1e-6, min_psnr)

    def _one_sequence_test(
        self,
        seq_dataset,
203
204
205
        model,
        batch_indices,
        check_metrics=False,
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
    ):
        loader = torch.utils.data.DataLoader(
            seq_dataset,
            shuffle=False,
            batch_sampler=batch_indices,
            collate_fn=FrameData.collate,
        )

        for frame_data in loader:
            self.assertIsNone(frame_data.frame_type)
            self.assertIsNotNone(frame_data.image_rgb)
            # override the frame_type
            frame_data.frame_type = [
                "train_unseen",
                *(["train_known"] * (len(frame_data.image_rgb) - 1)),
            ]

            frame_data = dataclass_to_cuda_(frame_data)
            preds = model(**dataclasses.asdict(frame_data))

            eval_result = eval_batch(
                frame_data,
228
                preds["implicitron_render"],
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
229
230
231
232
                bg_color=self.bg_color,
                lpips_model=self.lpips_model,
            )

233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
            if check_metrics:
                self._check_metrics(
                    frame_data, preds["implicitron_render"], eval_result
                )

    def _check_metrics(self, frame_data, implicitron_render, eval_result):
        # Make a terribly bad NVS prediction and check that this is worse
        # than the DBIR prediction.
        implicitron_render_bad = implicitron_render.clone()
        implicitron_render_bad.depth_render += (
            torch.randn_like(implicitron_render_bad.depth_render) * 100.0
        )
        implicitron_render_bad.image_render += (
            torch.randn_like(implicitron_render_bad.image_render) * 100.0
        )
        implicitron_render_bad.mask_render = (
            torch.randn_like(implicitron_render_bad.mask_render) > 0.0
        ).float()
        eval_result_bad = eval_batch(
            frame_data,
            implicitron_render_bad,
            bg_color=self.bg_color,
            lpips_model=self.lpips_model,
        )

        lower_better = {
            "psnr": False,
            "psnr_fg": False,
            "depth_abs_fg": True,
            "iou": False,
            "rgb_l1": True,
            "rgb_l1_fg": True,
        }

        for metric in lower_better:
            m_better = eval_result[metric]
            m_worse = eval_result_bad[metric]
            if m_better != m_better or m_worse != m_worse:
                continue  # metric is missing, i.e. NaN
            _assert = (
                self.assertLessEqual
                if lower_better[metric]
                else self.assertGreaterEqual
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
276
            )
277
278
279
280
281
282
283
284
285
            _assert(m_better, m_worse)

    def _get_random_batch_indices(
        self, seq_dataset, n_batches=2, min_batch_size=5, max_batch_size=10
    ):
        batch_indices = []
        for _ in range(n_batches):
            batch_size = torch.randint(
                low=min_batch_size, high=max_batch_size, size=(1,)
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
286
            )
287
            batch_indices.append(torch.randperm(len(seq_dataset))[:batch_size])
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
288

289
        return batch_indices
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
290
291
292

    def test_full_eval(self, n_sequences=5):
        """Test evaluation."""
293
294
295
296

        # caching batch indices first to preserve RNG state
        seq_datasets = {}
        batch_indices = {}
297
298
        for seq in itertools.islice(self.dataset.sequence_names(), n_sequences):
            idx = list(self.dataset.sequence_indices_in_order(seq))
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
299
            seq_dataset = torch.utils.data.Subset(self.dataset, idx)
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
            seq_datasets[seq] = seq_dataset
            batch_indices[seq] = self._get_random_batch_indices(seq_dataset)

        for model_class_type in ["ModelDBIR", "GenericModel"]:
            ModelClass = registry.get(ImplicitronModelBase, model_class_type)
            expand_args_fields(ModelClass)
            model = ModelClass(
                render_image_width=self.image_size,
                render_image_height=self.image_size,
                bg_color=self.bg_color,
            )
            model.eval()
            model.cuda()

            for seq in itertools.islice(self.dataset.sequence_names(), n_sequences):
                self._one_sequence_test(
                    seq_datasets[seq],
                    model,
                    batch_indices[seq],
                    check_metrics=(model_class_type == "ModelDBIR"),
                )