"...text-generation-inference.git" did not exist on "e6d3eb5d5d257fb20caf1f86fb3cd2ef530fb555"
Commit 88f57938 authored by Nikhila Ravi's avatar Nikhila Ravi Committed by Facebook GitHub Bot
Browse files

fix default settings for point rasterization and update benchmark

Summary:
Fixes the default setting of `max_points_per_bin` in `rasterize_points.py`. For large batches with large size pointclouds this was a causing the rasterizer to be very slow.

Expanded the pointcloud rendering benchmarks to include larger size pointclouds and fixed cuda synchronization issue in benchmark.

Reviewed By: gkioxari

Differential Revision: D22301185

fbshipit-source-id: 5077c1ba2c43d73efc1c659f0ec75959ceddf893
parent b636f295
...@@ -98,7 +98,7 @@ def rasterize_points( ...@@ -98,7 +98,7 @@ def rasterize_points(
) )
if max_points_per_bin is None: if max_points_per_bin is None:
max_points_per_bin = int(max(10000, points_packed.shape[0] / 5)) max_points_per_bin = int(max(10000, pointclouds._P / 5))
# Function.apply cannot take keyword args, so we handle defaults in this # Function.apply cannot take keyword args, so we handle defaults in this
# wrapper and call apply with positional args only # wrapper and call apply with positional args only
......
...@@ -28,10 +28,17 @@ def _bm_cpu_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3): ...@@ -28,10 +28,17 @@ def _bm_cpu_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
def _bm_cuda_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3): def _bm_cuda_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
torch.manual_seed(231) torch.manual_seed(231)
points = torch.randn(N, P, 3, device=torch.device("cuda")) device = torch.device("cuda:0")
points = torch.randn(N, P, 3, device=device)
pointclouds = Pointclouds(points=points) pointclouds = Pointclouds(points=points)
args = (pointclouds, img_size, radius, pts_per_pxl) args = (pointclouds, img_size, radius, pts_per_pxl)
return lambda: rasterize_points(*args) torch.cuda.synchronize(device)
def fn():
rasterize_points(*args)
torch.cuda.synchronize(device)
return fn
def bm_python_vs_cpu() -> None: def bm_python_vs_cpu() -> None:
...@@ -46,4 +53,9 @@ def bm_python_vs_cpu() -> None: ...@@ -46,4 +53,9 @@ def bm_python_vs_cpu() -> None:
{"N": 4, "P": 1024, "img_size": 128, "radius": 0.05, "pts_per_pxl": 5}, {"N": 4, "P": 1024, "img_size": 128, "radius": 0.05, "pts_per_pxl": 5},
] ]
benchmark(_bm_cpu_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1) benchmark(_bm_cpu_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1)
kwargs_list += [
{"N": 32, "P": 10000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50},
{"N": 32, "P": 100000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50},
{"N": 8, "P": 200000, "img_size": 512, "radius": 0.01, "pts_per_pxl": 50},
]
benchmark(_bm_cuda_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1) benchmark(_bm_cuda_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment