test_raysampling.py 19.4 KB
Newer Older
1
# Copyright (c) Meta Platforms, Inc. and affiliates.
Patrick Labatut's avatar
Patrick Labatut committed
2
3
4
5
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
David Novotny's avatar
David Novotny committed
6
7

import unittest
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
8
from typing import Callable
David Novotny's avatar
David Novotny committed
9
10
11

import torch
from common_testing import TestCaseMixin
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
12
from pytorch3d.common.compat import meshgrid_ij
David Novotny's avatar
David Novotny committed
13
from pytorch3d.ops import eyes
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
14
15
16
17
18
19
from pytorch3d.renderer import (
    MonteCarloRaysampler,
    MultinomialRaysampler,
    NDCGridRaysampler,
    NDCMultinomialRaysampler,
)
David Novotny's avatar
David Novotny committed
20
21
22
23
24
25
from pytorch3d.renderer.cameras import (
    FoVOrthographicCameras,
    FoVPerspectiveCameras,
    OrthographicCameras,
    PerspectiveCameras,
)
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
26
27
28
29
from pytorch3d.renderer.implicit.raysampling import (
    _jiggle_within_stratas,
    _safe_multinomial,
)
David Novotny's avatar
David Novotny committed
30
31
32
33
34
35
36
37
from pytorch3d.renderer.implicit.utils import (
    ray_bundle_to_ray_points,
    ray_bundle_variables_to_ray_points,
)
from pytorch3d.transforms import Rotate
from test_cameras import init_random_cameras


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class TestNDCRaysamplerConvention(TestCaseMixin, unittest.TestCase):
    def setUp(self) -> None:
        torch.manual_seed(42)

    def test_ndc_convention(
        self,
        h=428,
        w=760,
    ):
        device = torch.device("cuda")

        camera = init_random_cameras(PerspectiveCameras, 1, random_z=True).to(device)

        depth_map = torch.ones((1, 1, h, w)).to(device)

        xyz = ray_bundle_to_ray_points(
            NDCGridRaysampler(
                image_width=w,
                image_height=h,
                n_pts_per_ray=1,
                min_depth=1.0,
                max_depth=1.0,
            )(camera)._replace(lengths=depth_map[:, 0, ..., None])
        ).view(1, -1, 3)

        # project pointcloud
        xy = camera.transform_points(xyz)[:, :, :2].squeeze()

        xy_grid = self._get_ndc_grid(h, w, device)

        self.assertClose(
            xy,
            xy_grid,
            atol=1e-4,
        )

    def _get_ndc_grid(self, h, w, device):
        if w >= h:
            range_x = w / h
            range_y = 1.0
        else:
            range_x = 1.0
            range_y = h / w

        half_pix_width = range_x / w
        half_pix_height = range_y / h

        min_x = range_x - half_pix_width
        max_x = -range_x + half_pix_width
        min_y = range_y - half_pix_height
        max_y = -range_y + half_pix_height

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
90
        y_grid, x_grid = meshgrid_ij(
91
92
93
94
95
96
97
98
99
100
            torch.linspace(min_y, max_y, h, dtype=torch.float32),
            torch.linspace(min_x, max_x, w, dtype=torch.float32),
        )

        x_points = x_grid.contiguous().view(-1).to(device)
        y_points = y_grid.contiguous().view(-1).to(device)
        xy = torch.stack((x_points, y_points), dim=1)
        return xy


David Novotny's avatar
David Novotny committed
101
102
103
104
105
106
class TestRaysampling(TestCaseMixin, unittest.TestCase):
    def setUp(self) -> None:
        torch.manual_seed(42)

    @staticmethod
    def raysampler(
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
107
108
109
110
111
112
113
114
115
116
        raysampler_type,
        camera_type,
        n_pts_per_ray: int,
        batch_size: int,
        image_width: int,
        image_height: int,
    ) -> Callable[[], None]:
        """
        Used for benchmarks.
        """
David Novotny's avatar
David Novotny committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        device = torch.device("cuda")

        # init raysamplers
        raysampler = TestRaysampling.init_raysampler(
            raysampler_type=raysampler_type,
            min_x=-1.0,
            max_x=1.0,
            min_y=-1.0,
            max_y=1.0,
            image_width=image_width,
            image_height=image_height,
            min_depth=1.0,
            max_depth=10.0,
            n_pts_per_ray=n_pts_per_ray,
        ).to(device)

        # init a batch of random cameras
        cameras = init_random_cameras(camera_type, batch_size, random_z=True).to(device)

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
136
        def run_raysampler() -> None:
David Novotny's avatar
David Novotny committed
137
138
139
140
141
142
143
            raysampler(cameras=cameras)
            torch.cuda.synchronize()

        return run_raysampler

    @staticmethod
    def init_raysampler(
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
144
        raysampler_type,
David Novotny's avatar
David Novotny committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        min_x=-1.0,
        max_x=1.0,
        min_y=-1.0,
        max_y=1.0,
        image_width=10,
        image_height=20,
        min_depth=1.0,
        max_depth=10.0,
        n_pts_per_ray=10,
    ):
        raysampler_params = {
            "min_x": min_x,
            "max_x": max_x,
            "min_y": min_y,
            "max_y": max_y,
            "n_pts_per_ray": n_pts_per_ray,
            "min_depth": min_depth,
            "max_depth": max_depth,
        }

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
165
        if issubclass(raysampler_type, MultinomialRaysampler):
David Novotny's avatar
David Novotny committed
166
167
168
169
170
171
172
173
            raysampler_params.update(
                {"image_width": image_width, "image_height": image_height}
            )
        elif issubclass(raysampler_type, MonteCarloRaysampler):
            raysampler_params["n_rays_per_image"] = image_width * image_height
        else:
            raise ValueError(str(raysampler_type))

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
174
        if issubclass(raysampler_type, NDCMultinomialRaysampler):
David Novotny's avatar
David Novotny committed
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
            # NDCGridRaysampler does not use min/max_x/y
            for k in ("min_x", "max_x", "min_y", "max_y"):
                del raysampler_params[k]

        # instantiate the raysampler
        raysampler = raysampler_type(**raysampler_params)

        return raysampler

    def test_raysamplers(
        self,
        batch_size=25,
        min_x=-1.0,
        max_x=1.0,
        min_y=-1.0,
        max_y=1.0,
        image_width=10,
        image_height=20,
        min_depth=1.0,
        max_depth=10.0,
    ):
        """
        Tests the shapes and outputs of MC and GridRaysamplers for randomly
        generated cameras and different numbers of points per ray.
        """

        device = torch.device("cuda")

        for n_pts_per_ray in (100, 1):

            for raysampler_type in (
                MonteCarloRaysampler,
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
207
208
                MultinomialRaysampler,
                NDCMultinomialRaysampler,
David Novotny's avatar
David Novotny committed
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
            ):

                raysampler = TestRaysampling.init_raysampler(
                    raysampler_type=raysampler_type,
                    min_x=min_x,
                    max_x=max_x,
                    min_y=min_y,
                    max_y=max_y,
                    image_width=image_width,
                    image_height=image_height,
                    min_depth=min_depth,
                    max_depth=max_depth,
                    n_pts_per_ray=n_pts_per_ray,
                )

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
224
                if issubclass(raysampler_type, NDCMultinomialRaysampler):
David Novotny's avatar
David Novotny committed
225
                    # adjust the gt bounds for NDCGridRaysampler
226
227
228
229
230
231
232
233
234
235
236
237
238
                    if image_width >= image_height:
                        range_x = image_width / image_height
                        range_y = 1.0
                    else:
                        range_x = 1.0
                        range_y = image_height / image_width

                    half_pix_width = range_x / image_width
                    half_pix_height = range_y / image_height
                    min_x_ = range_x - half_pix_width
                    max_x_ = -range_x + half_pix_width
                    min_y_ = range_y - half_pix_height
                    max_y_ = -range_y + half_pix_height
David Novotny's avatar
David Novotny committed
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
                else:
                    min_x_ = min_x
                    max_x_ = max_x
                    min_y_ = min_y
                    max_y_ = max_y

                # carry out the test over several camera types
                for cam_type in (
                    FoVPerspectiveCameras,
                    FoVOrthographicCameras,
                    OrthographicCameras,
                    PerspectiveCameras,
                ):

                    # init a batch of random cameras
                    cameras = init_random_cameras(
                        cam_type, batch_size, random_z=True
                    ).to(device)

                    # call the raysampler
                    ray_bundle = raysampler(cameras=cameras)

                    # check the shapes of the raysampler outputs
                    self._check_raysampler_output_shapes(
                        raysampler,
                        ray_bundle,
                        batch_size,
                        image_width,
                        image_height,
                        n_pts_per_ray,
                    )

                    # check the points sampled along each ray
                    self._check_raysampler_ray_points(
                        raysampler,
                        cameras,
                        ray_bundle,
                        min_x_,
                        max_x_,
                        min_y_,
                        max_y_,
                        image_width,
                        image_height,
                        min_depth,
                        max_depth,
                    )

                    # check the output direction vectors
                    self._check_raysampler_ray_directions(
                        cameras, raysampler, ray_bundle
                    )

    def _check_grid_shape(self, grid, batch_size, spatial_size, n_pts_per_ray, dim):
        """
        A helper for checking the desired size of a variable output by a raysampler.
        """
        tgt_shape = [
            x for x in [batch_size, *spatial_size, n_pts_per_ray, dim] if x > 0
        ]
        self.assertTrue(all(sz1 == sz2 for sz1, sz2 in zip(grid.shape, tgt_shape)))

    def _check_raysampler_output_shapes(
        self,
        raysampler,
        ray_bundle,
        batch_size,
        image_width,
        image_height,
        n_pts_per_ray,
    ):
        """
        Checks the shapes of raysampler outputs.
        """

Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
313
        if isinstance(raysampler, MultinomialRaysampler):
David Novotny's avatar
David Novotny committed
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
            spatial_size = [image_height, image_width]
        elif isinstance(raysampler, MonteCarloRaysampler):
            spatial_size = [image_height * image_width]
        else:
            raise ValueError(str(type(raysampler)))

        self._check_grid_shape(ray_bundle.xys, batch_size, spatial_size, 0, 2)
        self._check_grid_shape(ray_bundle.origins, batch_size, spatial_size, 0, 3)
        self._check_grid_shape(ray_bundle.directions, batch_size, spatial_size, 0, 3)
        self._check_grid_shape(
            ray_bundle.lengths, batch_size, spatial_size, n_pts_per_ray, 0
        )

    def _check_raysampler_ray_points(
        self,
        raysampler,
        cameras,
        ray_bundle,
        min_x,
        max_x,
        min_y,
        max_y,
        image_width,
        image_height,
        min_depth,
        max_depth,
    ):
        """
        Check rays_points_world and rays_zs outputs of raysamplers.
        """

        batch_size = cameras.R.shape[0]

        # convert to ray points
        rays_points_world = ray_bundle_variables_to_ray_points(
            ray_bundle.origins, ray_bundle.directions, ray_bundle.lengths
        )
        n_pts_per_ray = rays_points_world.shape[-2]

        # check that the outputs if ray_bundle_variables_to_ray_points and
        # ray_bundle_to_ray_points match
        rays_points_world_ = ray_bundle_to_ray_points(ray_bundle)
        self.assertClose(rays_points_world, rays_points_world_)

        # check that the depth of each ray point in camera coords
        # matches the expected linearly-spaced depth
        depth_expected = torch.linspace(
            min_depth,
            max_depth,
            n_pts_per_ray,
            dtype=torch.float32,
            device=rays_points_world.device,
        )
        ray_points_camera = (
            cameras.get_world_to_view_transform()
            .transform_points(rays_points_world.view(batch_size, -1, 3))
            .view(batch_size, -1, n_pts_per_ray, 3)
        )
        self.assertClose(
            ray_points_camera[..., 2],
            depth_expected[None, None, :].expand_as(ray_points_camera[..., 2]),
            atol=1e-4,
        )

        # check also that rays_zs is consistent with depth_expected
        self.assertClose(
            ray_bundle.lengths.view(batch_size, -1, n_pts_per_ray),
            depth_expected[None, None, :].expand_as(ray_points_camera[..., 2]),
            atol=1e-6,
        )

        # project the world ray points back to screen space
        ray_points_projected = cameras.transform_points(
            rays_points_world.view(batch_size, -1, 3)
        ).view(rays_points_world.shape)

        # check that ray_xy matches rays_points_projected xy
        rays_xy_projected = ray_points_projected[..., :2].view(
            batch_size, -1, n_pts_per_ray, 2
        )
        self.assertClose(
            ray_bundle.xys.view(batch_size, -1, 1, 2).expand_as(rays_xy_projected),
            rays_xy_projected,
            atol=1e-4,
        )

        # check that projected world points' xy coordinates
        # range correctly between [minx/y, max/y]
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
402
        if isinstance(raysampler, MultinomialRaysampler):
David Novotny's avatar
David Novotny committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
            # get the expected coordinates along each grid axis
            ys, xs = [
                torch.linspace(
                    low, high, sz, dtype=torch.float32, device=rays_points_world.device
                )
                for low, high, sz in (
                    (min_y, max_y, image_height),
                    (min_x, max_x, image_width),
                )
            ]
            # compare expected xy with the output xy
            for dim, gt_axis in zip(
                (0, 1), (xs[None, None, :, None], ys[None, :, None, None])
            ):
                self.assertClose(
                    ray_points_projected[..., dim],
                    gt_axis.expand_as(ray_points_projected[..., dim]),
                    atol=1e-4,
                )

        elif isinstance(raysampler, MonteCarloRaysampler):
            # check that the randomly sampled locations
            # are within the allowed bounds for both x and y axes
            for dim, axis_bounds in zip((0, 1), ((min_x, max_x), (min_y, max_y))):
                self.assertTrue(
                    (
                        (ray_points_projected[..., dim] <= axis_bounds[1])
                        & (ray_points_projected[..., dim] >= axis_bounds[0])
                    ).all()
                )

                # also check that x,y along each ray is constant
                if n_pts_per_ray > 1:
                    self.assertClose(
                        ray_points_projected[..., :2].std(dim=-2),
                        torch.zeros_like(ray_points_projected[..., 0, :2]),
                        atol=1e-5,
                    )

        else:
            raise ValueError(str(type(raysampler)))

    def _check_raysampler_ray_directions(self, cameras, raysampler, ray_bundle):
        """
        Check the rays_directions_world output of raysamplers.
        """

        batch_size = cameras.R.shape[0]
        n_pts_per_ray = ray_bundle.lengths.shape[-1]
        spatial_size = ray_bundle.xys.shape[1:-1]
        n_rays_per_image = spatial_size.numel()

        # obtain the ray points in world coords
        rays_points_world = cameras.unproject_points(
            torch.cat(
                (
                    ray_bundle.xys.view(batch_size, n_rays_per_image, 1, 2).expand(
                        batch_size, n_rays_per_image, n_pts_per_ray, 2
                    ),
                    ray_bundle.lengths.view(
                        batch_size, n_rays_per_image, n_pts_per_ray, 1
                    ),
                ),
                dim=-1,
            ).view(batch_size, -1, 3)
        ).view(batch_size, -1, n_pts_per_ray, 3)

        # reshape to common testing size
        rays_directions_world_normed = torch.nn.functional.normalize(
            ray_bundle.directions.view(batch_size, -1, 3), dim=-1
        )

        # check that the l2-normed difference of all consecutive planes
        # of points in world coords matches ray_directions_world
        rays_directions_world_ = torch.nn.functional.normalize(
            rays_points_world[:, :, -1:] - rays_points_world[:, :, :-1], dim=-1
        )
        self.assertClose(
            rays_directions_world_normed[:, :, None].expand_as(rays_directions_world_),
            rays_directions_world_,
            atol=1e-4,
        )

        # check the ray directions rotated using camera rotation matrix
        # match the ray directions of a camera with trivial extrinsics
        cameras_trivial_extrinsic = cameras.clone()
        cameras_trivial_extrinsic.R = eyes(
            N=batch_size, dim=3, dtype=cameras.R.dtype, device=cameras.device
        )
        cameras_trivial_extrinsic.T = torch.zeros_like(cameras.T)

        # make sure we get the same random rays in case we call the
        # MonteCarloRaysampler twice below
        with torch.random.fork_rng(devices=range(torch.cuda.device_count())):
            torch.random.manual_seed(42)
            ray_bundle_world_fix_seed = raysampler(cameras=cameras)
            torch.random.manual_seed(42)
            ray_bundle_camera_fix_seed = raysampler(cameras=cameras_trivial_extrinsic)

        rays_directions_camera_fix_seed_ = Rotate(
            cameras.R, device=cameras.R.device
        ).transform_points(ray_bundle_world_fix_seed.directions.view(batch_size, -1, 3))

        self.assertClose(
            rays_directions_camera_fix_seed_,
            ray_bundle_camera_fix_seed.directions.view(batch_size, -1, 3),
            atol=1e-5,
        )
511

512
513
514
515
    @unittest.skipIf(
        torch.__version__[:4] == "1.5.", "non persistent buffer needs PyTorch 1.6"
    )
    def test_load_state_different_resolution(self):
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
        # check that we can load the state of one ray sampler into
        # another with different image size.
        module1 = NDCGridRaysampler(
            image_width=20,
            image_height=30,
            n_pts_per_ray=40,
            min_depth=1.2,
            max_depth=2.3,
        )
        module2 = NDCGridRaysampler(
            image_width=22,
            image_height=32,
            n_pts_per_ray=42,
            min_depth=1.2,
            max_depth=2.3,
        )
        state = module1.state_dict()
        module2.load_state_dict(state)
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
534
535
536
537
538
539
540
541
542
543

    def test_jiggle(self):
        # random data which is in ascending order along the last dimension
        scale = 180
        data = scale * torch.cumsum(torch.rand(8, 3, 4, 20), dim=-1)

        out = _jiggle_within_stratas(data)
        self.assertTupleEqual(out.shape, data.shape)

        # Check `out` is in ascending order
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
544
        self.assertGreater((out[..., 1:] - out[..., :-1]).min(), 0)
Jeremy Reizenstein's avatar
Jeremy Reizenstein committed
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581

        self.assertConstant(out[..., :-1] < data[..., 1:], True)
        self.assertConstant(data[..., :-1] < out[..., 1:], True)

        jiggles = out - data
        # jiggles is random between -scale/2 and scale/2
        self.assertLess(jiggles.min(), -0.4 * scale)
        self.assertGreater(jiggles.min(), -0.5 * scale)
        self.assertGreater(jiggles.max(), 0.4 * scale)
        self.assertLess(jiggles.max(), 0.5 * scale)

    def test_safe_multinomial(self):
        mask = [
            [1, 0, 0, 0, 0],
            [1, 1, 0, 0, 0],
            [1, 1, 1, 0, 0],
            [1, 1, 1, 1, 0],
        ]
        tmask = torch.tensor(mask, dtype=torch.float32)

        for _ in range(5):
            random_scalar = torch.rand(1)
            samples = _safe_multinomial(tmask * random_scalar, 3)
            self.assertTupleEqual(samples.shape, (4, 3))

            # samples[0] is exactly determined
            self.assertConstant(samples[0], 0)

            self.assertGreaterEqual(samples[1].min(), 0)
            self.assertLessEqual(samples[1].max(), 1)

            # samples[2] is exactly determined
            self.assertSetEqual(set(samples[2].tolist()), {0, 1, 2})

            # samples[3] has enough sources, so must contain 3 distinct values.
            self.assertLessEqual(samples[3].max(), 3)
            self.assertEqual(len(set(samples[3].tolist())), 3)