Commit 3d3b2fdc authored by Nikhila Ravi's avatar Nikhila Ravi
Browse files

Re-sync with internal repository

parent 2480723a
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
from typing import Optional
import torch
from pytorch3d import _C
from pytorch3d.renderer.mesh.rasterize_meshes import pix_to_ndc
# TODO(jcjohns): Support non-square images
def rasterize_points(
pointclouds,
image_size: int = 256,
radius: float = 0.01,
points_per_pixel: int = 8,
bin_size: Optional[int] = None,
max_points_per_bin: Optional[int] = None,
):
"""
Pointcloud rasterization
Args:
pointclouds: A Pointclouds object representing a batch of point clouds to be
rasterized. This is a batch of N pointclouds, where each point cloud
can have a different number of points; the coordinates of each point
are (x, y, z). The coordinates are expected to
be in normalized device coordinates (NDC): [-1, 1]^3 with the camera at
(0, 0, 0); the x-axis goes from left-to-right, the y-axis goes from
top-to-bottom, and the z-axis goes from back-to-front.
image_size: Integer giving the resolution of the rasterized image
radius (Optional): Float giving the radius (in NDC units) of the disk to
be rasterized for each point.
points_per_pixel (Optional): We will keep track of this many points per
pixel, returning the nearest points_per_pixel points along the z-axis
bin_size: Size of bins to use for coarse-to-fine rasterization. Setting
bin_size=0 uses naive rasterization; setting bin_size=None attempts to
set it heuristically based on the shape of the input. This should not
affect the output, but can affect the speed of the forward pass.
points_per_bin: Only applicable when using coarse-to-fine rasterization
(bin_size > 0); this is the maxiumum number of points allowed within each
bin. If more than this many points actually fall into a bin, an error
will be raised. This should not affect the output values, but can affect
the memory usage in the forward pass.
Returns:
3-element tuple containing
- **idx**: int32 Tensor of shape (N, image_size, image_size, points_per_pixel)
giving the indices of the nearest points at each pixel, in ascending
z-order. Concretely `idx[n, y, x, k] = p` means that `points[p]` is the kth
closest point (along the z-direction) to pixel (y, x) - note that points
represents the packed points of shape (P, 3).
Pixels that are hit by fewer than points_per_pixel are padded with -1.
- **zbuf**: Tensor of shape (N, image_size, image_size, points_per_pixel)
giving the z-coordinates of the nearest points at each pixel, sorted in
z-order. Concretely, if `idx[n, y, x, k] = p` then
`zbuf[n, y, x, k] = points[n, p, 2]`. Pixels hit by fewer than
points_per_pixel are padded with -1
- **dists2**: Tensor of shape (N, image_size, image_size, points_per_pixel)
giving the squared Euclidean distance (in NDC units) in the x/y plane
for each point closest to the pixel. Concretely if `idx[n, y, x, k] = p`
then `dists[n, y, x, k]` is the squared distance between the pixel (y, x)
and the point `(points[n, p, 0], points[n, p, 1])`. Pixels hit with fewer
than points_per_pixel are padded with -1.
"""
points_packed = pointclouds.points_packed()
cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
num_points_per_cloud = pointclouds.num_points_per_cloud()
if bin_size is None:
if not points_packed.is_cuda:
# Binned CPU rasterization not fully implemented
bin_size = 0
else:
# TODO: These heuristics are not well-thought out!
if image_size <= 64:
bin_size = 8
elif image_size <= 256:
bin_size = 16
elif image_size <= 512:
bin_size = 32
elif image_size <= 1024:
bin_size = 64
if max_points_per_bin is None:
max_points_per_bin = int(max(10000, points_packed.shape[0] / 5))
# Function.apply cannot take keyword args, so we handle defaults in this
# wrapper and call apply with positional args only
return _RasterizePoints.apply(
points_packed,
cloud_to_packed_first_idx,
num_points_per_cloud,
image_size,
radius,
points_per_pixel,
bin_size,
max_points_per_bin,
)
class _RasterizePoints(torch.autograd.Function):
@staticmethod
def forward(
ctx,
points, # (P, 3)
cloud_to_packed_first_idx,
num_points_per_cloud,
image_size: int = 256,
radius: float = 0.01,
points_per_pixel: int = 8,
bin_size: int = 0,
max_points_per_bin: int = 0,
):
# TODO: Add better error handling for when there are more than
# max_points_per_bin in any bin.
args = (
points,
cloud_to_packed_first_idx,
num_points_per_cloud,
image_size,
radius,
points_per_pixel,
bin_size,
max_points_per_bin,
)
idx, zbuf, dists = _C.rasterize_points(*args)
ctx.save_for_backward(points, idx)
return idx, zbuf, dists
@staticmethod
def backward(ctx, grad_idx, grad_zbuf, grad_dists):
grad_points = None
grad_cloud_to_packed_first_idx = None
grad_num_points_per_cloud = None
grad_image_size = None
grad_radius = None
grad_points_per_pixel = None
grad_bin_size = None
grad_max_points_per_bin = None
points, idx = ctx.saved_tensors
args = (points, idx, grad_zbuf, grad_dists)
grad_points = _C.rasterize_points_backward(*args)
grads = (
grad_points,
grad_cloud_to_packed_first_idx,
grad_num_points_per_cloud,
grad_image_size,
grad_radius,
grad_points_per_pixel,
grad_bin_size,
grad_max_points_per_bin,
)
return grads
def rasterize_points_python(
pointclouds,
image_size: int = 256,
radius: float = 0.01,
points_per_pixel: int = 8,
):
"""
Naive pure PyTorch implementation of pointcloud rasterization.
Inputs / Outputs: Same as above
"""
N = len(pointclouds)
S, K = image_size, points_per_pixel
device = pointclouds.device
points_packed = pointclouds.points_packed()
cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
num_points_per_cloud = pointclouds.num_points_per_cloud()
# Intialize output tensors.
point_idxs = torch.full(
(N, S, S, K), fill_value=-1, dtype=torch.int32, device=device
)
zbuf = torch.full(
(N, S, S, K), fill_value=-1, dtype=torch.float32, device=device
)
pix_dists = torch.full(
(N, S, S, K), fill_value=-1, dtype=torch.float32, device=device
)
# NDC is from [-1, 1]. Get pixel size using specified image size.
radius2 = radius * radius
# Iterate through the batch of point clouds.
for n in range(N):
point_start_idx = cloud_to_packed_first_idx[n]
point_stop_idx = point_start_idx + num_points_per_cloud[n]
# Iterate through the horizontal lines of the image from top to bottom.
for yi in range(S):
# Y coordinate of one end of the image. Reverse the ordering
# of yi so that +Y is pointing up in the image.
yfix = S - 1 - yi
yf = pix_to_ndc(yfix, S)
# Iterate through pixels on this horizontal line, left to right.
for xi in range(S):
# X coordinate of one end of the image. Reverse the ordering
# of xi so that +X is pointing to the left in the image.
xfix = S - 1 - xi
xf = pix_to_ndc(xfix, S)
top_k_points = []
# Check whether each point in the batch affects this pixel.
for p in range(point_start_idx, point_stop_idx):
px, py, pz = points_packed[p, :]
if pz < 0:
continue
dx = px - xf
dy = py - yf
dist2 = dx * dx + dy * dy
if dist2 < radius2:
top_k_points.append((pz, p, dist2))
top_k_points.sort()
if len(top_k_points) > K:
top_k_points = top_k_points[:K]
for k, (pz, p, dist2) in enumerate(top_k_points):
zbuf[n, yi, xi, k] = pz
point_idxs[n, yi, xi, k] = p
pix_dists[n, yi, xi, k] = dist2
return point_idxs, zbuf, pix_dists
This diff is collapsed.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
from itertools import product
from fvcore.common.benchmark import benchmark
from test_pointclouds import TestPointclouds
def bm_compute_packed_padded_pointclouds() -> None:
kwargs_list = []
num_clouds = [32, 128]
max_p = [100, 10000]
feats = [1, 10, 300]
test_cases = product(num_clouds, max_p, feats)
for case in test_cases:
n, p, f = case
kwargs_list.append({"num_clouds": n, "max_p": p, "features": f})
benchmark(
TestPointclouds.compute_packed_with_init,
"COMPUTE_PACKED",
kwargs_list,
warmup_iters=1,
)
benchmark(
TestPointclouds.compute_padded_with_init,
"COMPUTE_PADDED",
kwargs_list,
warmup_iters=1,
)
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import torch
from fvcore.common.benchmark import benchmark
from pytorch3d.renderer.points.rasterize_points import (
rasterize_points,
rasterize_points_python,
)
from pytorch3d.structures.pointclouds import Pointclouds
def _bm_python_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
torch.manual_seed(231)
points = torch.randn(N, P, 3)
pointclouds = Pointclouds(points=points)
args = (pointclouds, img_size, radius, pts_per_pxl)
return lambda: rasterize_points_python(*args)
def _bm_cpu_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
torch.manual_seed(231)
points = torch.randn(N, P, 3)
pointclouds = Pointclouds(points=points)
args = (pointclouds, img_size, radius, pts_per_pxl)
return lambda: rasterize_points(*args)
def _bm_cuda_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
torch.manual_seed(231)
points = torch.randn(N, P, 3, device=torch.device("cuda"))
pointclouds = Pointclouds(points=points)
args = (pointclouds, img_size, radius, pts_per_pxl)
return lambda: rasterize_points(*args)
def bm_python_vs_cpu() -> None:
kwargs_list = [
{"N": 1, "P": 32, "img_size": 32, "radius": 0.1, "pts_per_pxl": 3},
{"N": 2, "P": 32, "img_size": 32, "radius": 0.1, "pts_per_pxl": 3},
]
benchmark(
_bm_python_with_init, "RASTERIZE_PYTHON", kwargs_list, warmup_iters=1
)
benchmark(_bm_cpu_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1)
kwargs_list = [
{"N": 2, "P": 32, "img_size": 32, "radius": 0.1, "pts_per_pxl": 3},
{"N": 4, "P": 1024, "img_size": 128, "radius": 0.05, "pts_per_pxl": 5},
]
benchmark(_bm_cpu_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1)
benchmark(_bm_cuda_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1)
This diff is collapsed.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import numpy as np
import unittest
import torch
from pytorch3d import _C
from pytorch3d.renderer.points.rasterize_points import (
rasterize_points,
rasterize_points_python,
)
from pytorch3d.structures.pointclouds import Pointclouds
from common_testing import TestCaseMixin
class TestRasterizePoints(TestCaseMixin, unittest.TestCase):
def test_python_simple_cpu(self):
self._simple_test_case(
rasterize_points_python, torch.device("cpu"), bin_size=-1
)
def test_naive_simple_cpu(self):
device = torch.device("cpu")
self._simple_test_case(rasterize_points, device)
def test_naive_simple_cuda(self):
device = torch.device("cuda")
self._simple_test_case(rasterize_points, device, bin_size=0)
def test_python_behind_camera(self):
self._test_behind_camera(
rasterize_points_python, torch.device("cpu"), bin_size=-1
)
def test_cpu_behind_camera(self):
self._test_behind_camera(rasterize_points, torch.device("cpu"))
def test_cuda_behind_camera(self):
self._test_behind_camera(
rasterize_points, torch.device("cuda"), bin_size=0
)
def test_cpp_vs_naive_vs_binned(self):
# Make sure that the backward pass runs for all pathways
N = 2
P = 1000
image_size = 32
radius = 0.1
points_per_pixel = 3
points1 = torch.randn(P, 3, requires_grad=True)
points2 = torch.randn(int(P / 2), 3, requires_grad=True)
pointclouds = Pointclouds(points=[points1, points2])
grad_zbuf = torch.randn(N, image_size, image_size, points_per_pixel)
grad_dists = torch.randn(N, image_size, image_size, points_per_pixel)
# Option I: CPU, naive
idx1, zbuf1, dists1 = rasterize_points(
pointclouds, image_size, radius, points_per_pixel, bin_size=0
)
loss = (zbuf1 * grad_zbuf).sum() + (dists1 * grad_dists).sum()
loss.backward()
grad1 = points1.grad.data.clone()
# Option II: CUDA, naive
points1_cuda = points1.cuda().detach().clone().requires_grad_(True)
points2_cuda = points2.cuda().detach().clone().requires_grad_(True)
pointclouds = Pointclouds(points=[points1_cuda, points2_cuda])
grad_zbuf = grad_zbuf.cuda()
grad_dists = grad_dists.cuda()
idx2, zbuf2, dists2 = rasterize_points(
pointclouds, image_size, radius, points_per_pixel, bin_size=0
)
loss = (zbuf2 * grad_zbuf).sum() + (dists2 * grad_dists).sum()
loss.backward()
idx2 = idx2.data.cpu().clone()
zbuf2 = zbuf2.data.cpu().clone()
dists2 = dists2.data.cpu().clone()
grad2 = points1_cuda.grad.data.cpu().clone()
# Option III: CUDA, binned
points1_cuda = points1.cuda().detach().clone().requires_grad_(True)
points2_cuda = points2.cuda().detach().clone().requires_grad_(True)
pointclouds = Pointclouds(points=[points1_cuda, points2_cuda])
idx3, zbuf3, dists3 = rasterize_points(
pointclouds, image_size, radius, points_per_pixel, bin_size=32
)
loss = (zbuf3 * grad_zbuf).sum() + (dists3 * grad_dists).sum()
points1.grad.data.zero_()
loss.backward()
idx3 = idx3.data.cpu().clone()
zbuf3 = zbuf3.data.cpu().clone()
dists3 = dists3.data.cpu().clone()
grad3 = points1_cuda.grad.data.cpu().clone()
# Make sure everything was the same
idx12_same = (idx1 == idx2).all().item()
idx13_same = (idx1 == idx3).all().item()
zbuf12_same = (zbuf1 == zbuf2).all().item()
zbuf13_same = (zbuf1 == zbuf3).all().item()
dists12_diff = (dists1 - dists2).abs().max().item()
dists13_diff = (dists1 - dists3).abs().max().item()
self.assertTrue(idx12_same)
self.assertTrue(idx13_same)
self.assertTrue(zbuf12_same)
self.assertTrue(zbuf13_same)
self.assertTrue(dists12_diff < 1e-6)
self.assertTrue(dists13_diff < 1e-6)
diff12 = (grad1 - grad2).abs().max().item()
diff13 = (grad1 - grad3).abs().max().item()
diff23 = (grad2 - grad3).abs().max().item()
self.assertTrue(diff12 < 5e-6)
self.assertTrue(diff13 < 5e-6)
self.assertTrue(diff23 < 5e-6)
def test_python_vs_cpu_naive(self):
torch.manual_seed(231)
image_size = 32
radius = 0.1
points_per_pixel = 3
# Test a batch of homogeneous point clouds.
N = 2
P = 17
points = torch.randn(N, P, 3, requires_grad=True)
pointclouds = Pointclouds(points=points)
args = (pointclouds, image_size, radius, points_per_pixel)
self._compare_impls(
rasterize_points_python,
rasterize_points,
args,
args,
points,
points,
compare_grads=True,
)
# Test a batch of heterogeneous point clouds.
P2 = 10
points1 = torch.randn(P, 3, requires_grad=True)
points2 = torch.randn(P2, 3)
pointclouds = Pointclouds(points=[points1, points2])
args = (pointclouds, image_size, radius, points_per_pixel)
self._compare_impls(
rasterize_points_python,
rasterize_points,
args,
args,
points1, # check gradients for first element in batch
points1,
compare_grads=True,
)
def test_cpu_vs_cuda_naive(self):
torch.manual_seed(231)
image_size = 64
radius = 0.1
points_per_pixel = 5
# Test homogeneous point cloud batch.
N = 2
P = 1000
bin_size = 0
points_cpu = torch.rand(N, P, 3, requires_grad=True)
points_cuda = points_cpu.cuda().detach().requires_grad_(True)
pointclouds_cpu = Pointclouds(points=points_cpu)
pointclouds_cuda = Pointclouds(points=points_cuda)
args_cpu = (
pointclouds_cpu,
image_size,
radius,
points_per_pixel,
bin_size,
)
args_cuda = (
pointclouds_cuda,
image_size,
radius,
points_per_pixel,
bin_size,
)
self._compare_impls(
rasterize_points,
rasterize_points,
args_cpu,
args_cuda,
points_cpu,
points_cuda,
compare_grads=True,
)
def _compare_impls(
self,
fn1,
fn2,
args1,
args2,
grad_var1=None,
grad_var2=None,
compare_grads=False,
):
idx1, zbuf1, dist1 = fn1(*args1)
torch.manual_seed(231)
grad_zbuf = torch.randn_like(zbuf1)
grad_dist = torch.randn_like(dist1)
loss = (zbuf1 * grad_zbuf).sum() + (dist1 * grad_dist).sum()
if compare_grads:
loss.backward()
grad_points1 = grad_var1.grad.data.clone().cpu()
idx2, zbuf2, dist2 = fn2(*args2)
grad_zbuf = grad_zbuf.to(zbuf2)
grad_dist = grad_dist.to(dist2)
loss = (zbuf2 * grad_zbuf).sum() + (dist2 * grad_dist).sum()
if compare_grads:
# clear points1.grad in case args1 and args2 reused the same tensor
grad_var1.grad.data.zero_()
loss.backward()
grad_points2 = grad_var2.grad.data.clone().cpu()
self.assertEqual((idx1.cpu() == idx2.cpu()).all().item(), 1)
self.assertEqual((zbuf1.cpu() == zbuf2.cpu()).all().item(), 1)
self.assertClose(dist1.cpu(), dist2.cpu())
if compare_grads:
self.assertTrue(
torch.allclose(grad_points1, grad_points2, atol=2e-6)
)
def _test_behind_camera(self, rasterize_points_fn, device, bin_size=None):
# Test case where all points are behind the camera -- nothing should
# get rasterized
N = 2
P = 32
xy = torch.randn(N, P, 2)
z = torch.randn(N, P, 1).abs().mul(-1) # Make them all negative
points = torch.cat([xy, z], dim=2).to(device)
image_size = 16
points_per_pixel = 3
radius = 0.2
idx_expected = torch.full(
(N, 16, 16, 3), fill_value=-1, dtype=torch.int32, device=device
)
zbuf_expected = torch.full(
(N, 16, 16, 3), fill_value=-1, dtype=torch.float32, device=device
)
dists_expected = zbuf_expected.clone()
pointclouds = Pointclouds(points=points)
if bin_size == -1:
# simple python case with no binning
idx, zbuf, dists = rasterize_points_fn(
pointclouds, image_size, radius, points_per_pixel
)
else:
idx, zbuf, dists = rasterize_points_fn(
pointclouds, image_size, radius, points_per_pixel, bin_size
)
idx_same = (idx == idx_expected).all().item() == 1
zbuf_same = (zbuf == zbuf_expected).all().item() == 1
self.assertTrue(idx_same)
self.assertTrue(zbuf_same)
self.assertTrue(torch.allclose(dists, dists_expected))
def _simple_test_case(self, rasterize_points_fn, device, bin_size=0):
# Create two pointclouds with different numbers of points.
# fmt: off
points1 = torch.tensor(
[
[0.0, 0.0, 0.0], # noqa: E241
[0.4, 0.0, 0.1], # noqa: E241
[0.0, 0.4, 0.2], # noqa: E241
[0.0, 0.0, -0.1], # noqa: E241 Points with negative z should be skippped
],
device=device,
)
points2 = torch.tensor(
[
[0.0, 0.0, 0.0], # noqa: E241
[0.4, 0.0, 0.1], # noqa: E241
[0.0, 0.4, 0.2], # noqa: E241
[0.0, 0.0, -0.1], # noqa: E241 Points with negative z should be skippped
[0.0, 0.0, -0.7], # noqa: E241 Points with negative z should be skippped
],
device=device,
)
# fmt: on
pointclouds = Pointclouds(points=[points1, points2])
image_size = 5
points_per_pixel = 2
radius = 0.5
# The expected output values. Note that in the outputs, the world space
# +Y is up, and the world space +X is left.
idx1_expected = torch.full(
(1, 5, 5, 2), fill_value=-1, dtype=torch.int32, device=device
)
# fmt: off
idx1_expected[0, :, :, 0] = torch.tensor([
[-1, -1, 2, -1, -1], # noqa: E241
[-1, 1, 0, 2, -1], # noqa: E241
[ 1, 0, 0, 0, -1], # noqa: E241 E201
[-1, 1, 0, -1, -1], # noqa: E241
[-1, -1, -1, -1, -1], # noqa: E241
], device=device)
idx1_expected[0, :, :, 1] = torch.tensor([
[-1, -1, -1, -1, -1], # noqa: E241
[-1, 2, 2, -1, -1], # noqa: E241
[-1, 1, 1, -1, -1], # noqa: E241
[-1, -1, -1, -1, -1], # noqa: E241
[-1, -1, -1, -1, -1], # noqa: E241
], device=device)
# fmt: on
zbuf1_expected = torch.full(
(1, 5, 5, 2), fill_value=100, dtype=torch.float32, device=device
)
# fmt: off
zbuf1_expected[0, :, :, 0] = torch.tensor([
[-1.0, -1.0, 0.2, -1.0, -1.0], # noqa: E241
[-1.0, 0.1, 0.0, 0.2, -1.0], # noqa: E241
[ 0.1, 0.0, 0.0, 0.0, -1.0], # noqa: E241 E201
[-1.0, 0.1, 0.0, -1.0, -1.0], # noqa: E241
[-1.0, -1.0, -1.0, -1.0, -1.0] # noqa: E241
], device=device)
zbuf1_expected[0, :, :, 1] = torch.tensor([
[-1.0, -1.0, -1.0, -1.0, -1.0], # noqa: E241
[-1.0, 0.2, 0.2, -1.0, -1.0], # noqa: E241
[-1.0, 0.1, 0.1, -1.0, -1.0], # noqa: E241
[-1.0, -1.0, -1.0, -1.0, -1.0], # noqa: E241
[-1.0, -1.0, -1.0, -1.0, -1.0], # noqa: E241
], device=device)
# fmt: on
dists1_expected = torch.full(
(1, 5, 5, 2), fill_value=0.0, dtype=torch.float32, device=device
)
# fmt: off
dists1_expected[0, :, :, 0] = torch.tensor([
[-1.00, -1.00, 0.16, -1.00, -1.00], # noqa: E241
[-1.00, 0.16, 0.16, 0.16, -1.00], # noqa: E241
[ 0.16, 0.16, 0.00, 0.16, -1.00], # noqa: E241 E201
[-1.00, 0.16, 0.16, -1.00, -1.00], # noqa: E241
[-1.00, -1.00, -1.00, -1.00, -1.00], # noqa: E241
], device=device)
dists1_expected[0, :, :, 1] = torch.tensor([
[-1.00, -1.00, -1.00, -1.00, -1.00], # noqa: E241
[-1.00, 0.16, 0.00, -1.00, -1.00], # noqa: E241
[-1.00, 0.00, 0.16, -1.00, -1.00], # noqa: E241
[-1.00, -1.00, -1.00, -1.00, -1.00], # noqa: E241
[-1.00, -1.00, -1.00, -1.00, -1.00], # noqa: E241
], device=device)
# fmt: on
if bin_size == -1:
# simple python case with no binning
idx, zbuf, dists = rasterize_points_fn(
pointclouds, image_size, radius, points_per_pixel
)
else:
idx, zbuf, dists = rasterize_points_fn(
pointclouds, image_size, radius, points_per_pixel, bin_size
)
# check first point cloud
idx_same = (idx[0, ...] == idx1_expected).all().item() == 1
if idx_same == 0:
print(idx[0, :, :, 0])
print(idx[0, :, :, 1])
zbuf_same = (zbuf[0, ...] == zbuf1_expected).all().item() == 1
dist_same = torch.allclose(dists[0, ...], dists1_expected)
self.assertTrue(idx_same)
self.assertTrue(zbuf_same)
self.assertTrue(dist_same)
# Check second point cloud - the indices in idx refer to points in the
# pointclouds.points_packed() tensor. In the second point cloud,
# two points are behind the screen - the expected indices are the same
# the first pointcloud but offset by the number of points in the
# first pointcloud.
num_points_per_cloud = pointclouds.num_points_per_cloud()
idx1_expected[idx1_expected >= 0] += num_points_per_cloud[0]
idx_same = (idx[1, ...] == idx1_expected).all().item() == 1
zbuf_same = (zbuf[1, ...] == zbuf1_expected).all().item() == 1
self.assertTrue(idx_same)
self.assertTrue(zbuf_same)
self.assertTrue(torch.allclose(dists[1, ...], dists1_expected))
def test_coarse_cpu(self):
return self._test_coarse_rasterize(torch.device("cpu"))
def test_coarse_cuda(self):
return self._test_coarse_rasterize(torch.device("cuda"))
def test_compare_coarse_cpu_vs_cuda(self):
torch.manual_seed(231)
N = 3
max_P = 1000
image_size = 64
radius = 0.1
bin_size = 16
max_points_per_bin = 500
# create heterogeneous point clouds
points = []
for _ in range(N):
p = np.random.choice(max_P)
points.append(torch.randn(p, 3))
pointclouds = Pointclouds(points=points)
points_packed = pointclouds.points_packed()
cloud_to_packed_first_idx = pointclouds.cloud_to_packed_first_idx()
num_points_per_cloud = pointclouds.num_points_per_cloud()
args = (
points_packed,
cloud_to_packed_first_idx,
num_points_per_cloud,
image_size,
radius,
bin_size,
max_points_per_bin,
)
bp_cpu = _C._rasterize_points_coarse(*args)
pointclouds_cuda = pointclouds.to("cuda:0")
points_packed = pointclouds_cuda.points_packed()
cloud_to_packed_first_idx = pointclouds_cuda.cloud_to_packed_first_idx()
num_points_per_cloud = pointclouds_cuda.num_points_per_cloud()
args = (
points_packed,
cloud_to_packed_first_idx,
num_points_per_cloud,
image_size,
radius,
bin_size,
max_points_per_bin,
)
bp_cuda = _C._rasterize_points_coarse(*args)
# Bin points might not be the same: CUDA version might write them in
# any order. But if we sort the non-(-1) elements of the CUDA output
# then they should be the same.
for n in range(N):
for by in range(bp_cpu.shape[1]):
for bx in range(bp_cpu.shape[2]):
K = (bp_cpu[n, by, bx] != -1).sum().item()
idxs_cpu = bp_cpu[n, by, bx].tolist()
idxs_cuda = bp_cuda[n, by, bx].tolist()
idxs_cuda[:K] = sorted(idxs_cuda[:K])
self.assertEqual(idxs_cpu, idxs_cuda)
def _test_coarse_rasterize(self, device):
#
# Note that +Y is up and +X is left in the diagram below.
#
# (4) |2
# |
# |
# |
# |1
# |
# (1) |
# | (2)
# ____________(0)__(5)___________________
# 2 1 | -1 -2
# |
# (3) |
# |
# |-1
# |
#
# Locations of the points are shown by o. The screen bounding box
# is between [-1, 1] in both the x and y directions.
#
# These points are interesting because:
# (0) Falls into two bins;
# (1) and (2) fall into one bin;
# (3) is out-of-bounds, but its disk is in-bounds;
# (4) is out-of-bounds, and its entire disk is also out-of-bounds
# (5) has a negative z-value, so it should be skipped
# fmt: off
points = torch.tensor(
[
[ 0.5, 0.0, 0.0], # noqa: E241, E201
[ 0.5, 0.5, 0.1], # noqa: E241, E201
[-0.3, 0.4, 0.0], # noqa: E241
[ 1.1, -0.5, 0.2], # noqa: E241, E201
[ 2.0, 2.0, 0.3], # noqa: E241, E201
[ 0.0, 0.0, -0.1], # noqa: E241, E201
],
device=device
)
# fmt: on
image_size = 16
radius = 0.2
bin_size = 8
max_points_per_bin = 5
bin_points_expected = -1 * torch.ones(
1, 2, 2, 5, dtype=torch.int32, device=device
)
# Note that the order is only deterministic here for CUDA if all points
# fit in one chunk. This will the the case for this small example, but
# to properly exercise coordianted writes among multiple chunks we need
# to use a bigger test case.
bin_points_expected[0, 1, 0, :2] = torch.tensor([0, 3])
bin_points_expected[0, 0, 1, 0] = torch.tensor([2])
bin_points_expected[0, 0, 0, :2] = torch.tensor([0, 1])
pointclouds = Pointclouds(points=[points])
args = (
pointclouds.points_packed(),
pointclouds.cloud_to_packed_first_idx(),
pointclouds.num_points_per_cloud(),
image_size,
radius,
bin_size,
max_points_per_bin,
)
bin_points = _C._rasterize_points_coarse(*args)
bin_points_same = (bin_points == bin_points_expected).all()
self.assertTrue(bin_points_same.item() == 1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment