fix default settings for point rasterization and update benchmark

Summary: Fixes the default setting of `max_points_per_bin` in `rasterize_points.py`. For large batches with large size pointclouds this was a causing the rasterizer to be very slow. Expanded the pointcloud rendering benchmarks to include larger size pointclouds and fixed cuda synchronization issue in benchmark. Reviewed By: gkioxari Differential Revision: D22301185 fbshipit-source-id: 5077c1ba2c43d73efc1c659f0ec75959ceddf893

fix default settings for point rasterization and update benchmark
Summary: Fixes the default setting of `max_points_per_bin` in `rasterize_points.py`. For large batches with large size pointclouds this was a causing the rasterizer to be very slow. Expanded the pointcloud rendering benchmarks to include larger size pointclouds and fixed cuda synchronization issue in benchmark. Reviewed By: gkioxari Differential Revision: D22301185 fbshipit-source-id: 5077c1ba2c43d73efc1c659f0ec75959ceddf893
88f57938 · Nikhila Ravi · Facebook GitHub Bot · b636f295 · 88f57938 · 88f57938
Commit 88f57938 authored Jun 30, 2020 by Nikhila Ravi Committed by Facebook GitHub Bot Jun 30, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 3 deletions

pytorch3d/renderer/points/rasterize_points.py pytorch3d/renderer/points/rasterize_points.py +1 -1

tests/bm_rasterize_points.py tests/bm_rasterize_points.py +14 -2

No files found.
--- a/pytorch3d/renderer/points/rasterize_points.py
+++ b/pytorch3d/renderer/points/rasterize_points.py
@@ -98,7 +98,7 @@ def rasterize_points(
            )
    if max_points_per_bin is None:
-        max_points_per_bin = int(max(10000, points_packed.shape[0] / 5))
+        max_points_per_bin = int(max(10000, pointclouds._P / 5))
    # Function.apply cannot take keyword args, so we handle defaults in this
    # wrapper and call apply with positional args only

--- a/tests/bm_rasterize_points.py
+++ b/tests/bm_rasterize_points.py
@@ -28,10 +28,17 @@ def _bm_cpu_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
 def _bm_cuda_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
    torch.manual_seed(231)
-    points = torch.randn(N, P, 3, device=torch.device("cuda"))
+    device = torch.device("cuda:0")
+    points = torch.randn(N, P, 3, device=device)
    pointclouds = Pointclouds(points=points)
    args = (pointclouds, img_size, radius, pts_per_pxl)
-    return lambda: rasterize_points(*args)
+    torch.cuda.synchronize(device)
+    def fn():
+        rasterize_points(*args)
+        torch.cuda.synchronize(device)
+    return fn
 def bm_python_vs_cpu() -> None:
@@ -46,4 +53,9 @@ def bm_python_vs_cpu() -> None:
        {"N": 4, "P": 1024, "img_size": 128, "radius": 0.05, "pts_per_pxl": 5},
    ]
    benchmark(_bm_cpu_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1)
+    kwargs_list += [
+        {"N": 32, "P": 10000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50},
+        {"N": 32, "P": 100000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50},
+        {"N": 8, "P": 200000, "img_size": 512, "radius": 0.01, "pts_per_pxl": 50},
+    ]
    benchmark(_bm_cuda_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1)