Commit cb495504 authored by Krzysztof Chalupka's avatar Krzysztof Chalupka Committed by Facebook GitHub Bot
Browse files

Add MeshRasterizerOpenGL

Summary:
Adding MeshRasterizerOpenGL, a faster alternative to MeshRasterizer. The new rasterizer follows the ideas from "Differentiable Surface Rendering via non-Differentiable Sampling".

The new rasterizer 20x faster on a 2M face mesh (try pose optimization on Nefertiti from https://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/!). The larger the mesh, the larger the speedup.

There are two main disadvantages:
* The new rasterizer works with an OpenGL backend, so requires pycuda.gl and pyopengl installed (though we avoided writing any C++ code, everything is in Python!)
* The new rasterizer is non-differentiable. However, you can still differentiate the rendering function if you use if with the new SplatterPhongShader which we recently added to PyTorch3D (see the original paper cited above).

Reviewed By: patricklabatut, jcjohnson

Differential Revision: D37698816

fbshipit-source-id: 54d120639d3cb001f096237807e54aced0acda25
parent 36edf2b3
......@@ -66,7 +66,7 @@ from .mesh import (
)
try:
from .opengl import EGLContext, global_device_context_store
from .opengl import EGLContext, global_device_context_store, MeshRasterizerOpenGL
except (ImportError, ModuleNotFoundError):
pass # opengl or pycuda.gl not available, or pytorch3_opengl not in TARGETS.
......
......@@ -4,7 +4,6 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from .clip import (
clip_faces,
ClipFrustum,
......
......@@ -11,6 +11,8 @@ import numpy as np
import torch
from pytorch3d import _C
from ..utils import parse_image_size
from .clip import (
clip_faces,
ClipFrustum,
......@@ -149,20 +151,8 @@ def rasterize_meshes(
# If the ratio of H:W is large this might cause issues as the smaller
# dimension will have fewer bins.
# TODO: consider a better way of setting the bin size.
if isinstance(image_size, (tuple, list)):
if len(image_size) != 2:
raise ValueError("Image size can only be a tuple/list of (H, W)")
if not all(i > 0 for i in image_size):
raise ValueError(
"Image sizes must be greater than 0; got %d, %d" % image_size
)
if not all(type(i) == int for i in image_size):
raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
max_image_size = max(*image_size)
im_size = image_size
else:
im_size = (image_size, image_size)
max_image_size = image_size
im_size = parse_image_size(image_size)
max_image_size = max(*im_size)
clipped_faces_neighbor_idx = None
......
......@@ -57,14 +57,14 @@ class Fragments:
pix_to_face: torch.Tensor
zbuf: torch.Tensor
bary_coords: torch.Tensor
dists: torch.Tensor
dists: Optional[torch.Tensor]
def detach(self) -> "Fragments":
return Fragments(
pix_to_face=self.pix_to_face,
zbuf=self.zbuf.detach(),
bary_coords=self.bary_coords.detach(),
dists=self.dists.detach(),
dists=self.dists.detach() if self.dists is not None else self.dists,
)
......@@ -85,6 +85,8 @@ class RasterizationSettings:
bin_size=0 uses naive rasterization; setting bin_size=None attempts
to set it heuristically based on the shape of the input. This should
not affect the output, but can affect the speed of the forward pass.
max_faces_opengl: Max number of faces in any mesh we will rasterize. Used only by
MeshRasterizerOpenGL to pre-allocate OpenGL memory.
max_faces_per_bin: Only applicable when using coarse-to-fine
rasterization (bin_size != 0); this is the maximum number of faces
allowed within each bin. This should not affect the output values,
......@@ -122,6 +124,7 @@ class RasterizationSettings:
blur_radius: float = 0.0
faces_per_pixel: int = 1
bin_size: Optional[int] = None
max_faces_opengl: int = 10_000_000
max_faces_per_bin: Optional[int] = None
perspective_correct: Optional[bool] = None
clip_barycentric_coords: Optional[bool] = None
......@@ -237,6 +240,10 @@ class MeshRasterizer(nn.Module):
znear = znear.min().item()
z_clip = None if not perspective_correct or znear is None else znear / 2
# By default, turn on clip_barycentric_coords if blur_radius > 0.
# When blur_radius > 0, a face can be matched to a pixel that is outside the
# face, resulting in negative barycentric coordinates.
pix_to_face, zbuf, bary_coords, dists = rasterize_meshes(
meshes_proj,
image_size=raster_settings.image_size,
......@@ -250,6 +257,10 @@ class MeshRasterizer(nn.Module):
z_clip_value=z_clip,
cull_to_frustum=raster_settings.cull_to_frustum,
)
return Fragments(
pix_to_face=pix_to_face, zbuf=zbuf, bary_coords=bary_coords, dists=dists
pix_to_face=pix_to_face,
zbuf=zbuf,
bary_coords=bary_coords,
dists=dists,
)
......@@ -349,6 +349,9 @@ class SplatterPhongShader(ShaderBase):
N, H, W, K, _ = colors.shape
self.splatter_blender = SplatterBlender((N, H, W, K), colors.device)
blend_params = kwargs.get("blend_params", self.blend_params)
self.check_blend_params(blend_params)
images = self.splatter_blender(
colors,
pixel_coords_cameras,
......@@ -359,6 +362,14 @@ class SplatterPhongShader(ShaderBase):
return images
def check_blend_params(self, blend_params):
if blend_params.sigma != 0.5:
warnings.warn(
f"SplatterPhongShader received sigma={blend_params.sigma}. sigma is "
"defined in pixel units, and any value other than 0.5 is highly "
"unexpected. Only use other values if you know what you are doing. "
)
class HardDepthShader(ShaderBase):
"""
......
......@@ -4,7 +4,6 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from typing import List, NamedTuple, Tuple
import torch
......
......@@ -32,5 +32,6 @@ def _can_import_egl_and_pycuda():
if _can_import_egl_and_pycuda():
from .opengl_utils import EGLContext, global_device_context_store
from .rasterizer_opengl import MeshRasterizerOpenGL
__all__ = [k for k in globals().keys() if not k.startswith("_")]
......@@ -224,7 +224,9 @@ class EGLContext:
"""
self.lock.acquire()
egl.eglMakeCurrent(self.dpy, self.surface, self.surface, self.context)
try:
yield
finally:
egl.eglMakeCurrent(
self.dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT
)
......@@ -418,5 +420,29 @@ def _init_cuda_context(device_id: int = 0):
return cuda_context
def _torch_to_opengl(torch_tensor, cuda_context, cuda_buffer):
# CUDA access to the OpenGL buffer is only allowed within a map-unmap block.
cuda_context.push()
mapping_obj = cuda_buffer.map()
# data_ptr points to the OpenGL shader storage buffer memory.
data_ptr, sz = mapping_obj.device_ptr_and_size()
# Copy the torch tensor to the OpenGL buffer directly on device.
cuda_copy = cuda.Memcpy2D()
cuda_copy.set_src_device(torch_tensor.data_ptr())
cuda_copy.set_dst_device(data_ptr)
cuda_copy.width_in_bytes = cuda_copy.src_pitch = cuda_copy.dst_ptch = (
torch_tensor.shape[1] * 4
)
cuda_copy.height = torch_tensor.shape[0]
cuda_copy(False)
# Unmap and pop the cuda context to make sure OpenGL won't interfere with
# PyTorch ops down the line.
mapping_obj.unmap()
cuda_context.pop()
# Initialize a global _DeviceContextStore. Almost always we will only need a single one.
global_device_context_store = _DeviceContextStore()
This diff is collapsed.
......@@ -11,6 +11,8 @@ import torch
from pytorch3d import _C
from pytorch3d.renderer.mesh.rasterize_meshes import pix_to_non_square_ndc
from ..utils import parse_image_size
# Maximum number of faces per bins for
# coarse-to-fine rasterization
......@@ -102,20 +104,8 @@ def rasterize_points(
# If the ratio of H:W is large this might cause issues as the smaller
# dimension will have fewer bins.
# TODO: consider a better way of setting the bin size.
if isinstance(image_size, (tuple, list)):
if len(image_size) != 2:
raise ValueError("Image size can only be a tuple/list of (H, W)")
if not all(i > 0 for i in image_size):
raise ValueError(
"Image sizes must be greater than 0; got %d, %d" % image_size
)
if not all(type(i) == int for i in image_size):
raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
max_image_size = max(*image_size)
im_size = image_size
else:
im_size = (image_size, image_size)
max_image_size = image_size
im_size = parse_image_size(image_size)
max_image_size = max(*im_size)
if bin_size is None:
if not points_packed.is_cuda:
......
......@@ -8,7 +8,7 @@
import copy
import inspect
import warnings
from typing import Any, Optional, Tuple, Union
from typing import Any, List, Optional, Tuple, Union
import numpy as np
import torch
......@@ -432,3 +432,27 @@ def ndc_to_grid_sample_coords(
else:
xy_grid_sample[..., 0] *= aspect
return xy_grid_sample
def parse_image_size(
image_size: Union[List[int], Tuple[int, int], int]
) -> Tuple[int, int]:
"""
Args:
image_size: A single int (for square images) or a tuple/list of two ints.
Returns:
A tuple of two ints.
Throws:
ValueError if got more than two ints, any negative numbers or non-ints.
"""
if not isinstance(image_size, (tuple, list)):
return (image_size, image_size)
if len(image_size) != 2:
raise ValueError("Image size can only be a tuple/list of (H, W)")
if not all(i > 0 for i in image_size):
raise ValueError("Image sizes must be greater than 0; got %d, %d" % image_size)
if not all(type(i) == int for i in image_size):
raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
return tuple(image_size)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment