Add MeshRasterizerOpenGL

Summary: Adding MeshRasterizerOpenGL, a faster alternative to MeshRasterizer. The new rasterizer follows the ideas from "Differentiable Surface Rendering via non-Differentiable Sampling". The new rasterizer 20x faster on a 2M face mesh (try pose optimization on Nefertiti from https://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/!). The larger the mesh, the larger the speedup. There are two main disadvantages: * The new rasterizer works with an OpenGL backend, so requires pycuda.gl and pyopengl installed (though we avoided writing any C++ code, everything is in Python!) * The new rasterizer is non-differentiable. However, you can still differentiate the rendering function if you use if with the new SplatterPhongShader which we recently added to PyTorch3D (see the original paper cited above). Reviewed By: patricklabatut, jcjohnson Differential Revision: D37698816 fbshipit-source-id: 54d120639d3cb001f096237807e54aced0acda25

Add MeshRasterizerOpenGL
Summary: Adding MeshRasterizerOpenGL, a faster alternative to MeshRasterizer. The new rasterizer follows the ideas from "Differentiable Surface Rendering via non-Differentiable Sampling". The new rasterizer 20x faster on a 2M face mesh (try pose optimization on Nefertiti from https://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/!). The larger the mesh, the larger the speedup. There are two main disadvantages: * The new rasterizer works with an OpenGL backend, so requires pycuda.gl and pyopengl installed (though we avoided writing any C++ code, everything is in Python!) * The new rasterizer is non-differentiable. However, you can still differentiate the rendering function if you use if with the new SplatterPhongShader which we recently added to PyTorch3D (see the original paper cited above). Reviewed By: patricklabatut, jcjohnson Differential Revision: D37698816 fbshipit-source-id: 54d120639d3cb001f096237807e54aced0acda25
cb495504 · Krzysztof Chalupka · Facebook GitHub Bot · 36edf2b3 · cb495504 · cb495504
Commit cb495504 authored Jul 22, 2022 by Krzysztof Chalupka Committed by Facebook GitHub Bot Jul 22, 2022
20 changed files
--- a/pytorch3d/renderer/__init__.py
+++ b/pytorch3d/renderer/__init__.py
@@ -66,7 +66,7 @@ from .mesh import (
 )

 try:
-    from .opengl import EGLContext, global_device_context_store
+    from .opengl import EGLContext, global_device_context_store, MeshRasterizerOpenGL
 except (ImportError, ModuleNotFoundError):
    pass  # opengl or pycuda.gl not available, or pytorch3_opengl not in TARGETS.


--- a/pytorch3d/renderer/mesh/__init__.py
+++ b/pytorch3d/renderer/mesh/__init__.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

-
 from .clip import (
    clip_faces,
    ClipFrustum,

--- a/pytorch3d/renderer/mesh/rasterize_meshes.py
+++ b/pytorch3d/renderer/mesh/rasterize_meshes.py
@@ -11,6 +11,8 @@ import numpy as np
 import torch
 from pytorch3d import _C

+from ..utils import parse_image_size
+
 from .clip import (
    clip_faces,
    ClipFrustum,
@@ -149,20 +151,8 @@ def rasterize_meshes(
    # If the ratio of H:W is large this might cause issues as the smaller
    # dimension will have fewer bins.
    # TODO: consider a better way of setting the bin size.
-    if isinstance(image_size, (tuple, list)):
-        if len(image_size) != 2:
-            raise ValueError("Image size can only be a tuple/list of (H, W)")
-        if not all(i > 0 for i in image_size):
-            raise ValueError(
-                "Image sizes must be greater than 0; got %d, %d" % image_size
-            )
-        if not all(type(i) == int for i in image_size):
-            raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
-        max_image_size = max(*image_size)
-        im_size = image_size
-    else:
-        im_size = (image_size, image_size)
-        max_image_size = image_size
+    im_size = parse_image_size(image_size)
+    max_image_size = max(*im_size)

    clipped_faces_neighbor_idx = None


--- a/pytorch3d/renderer/mesh/rasterizer.py
+++ b/pytorch3d/renderer/mesh/rasterizer.py
@@ -57,14 +57,14 @@ class Fragments:
    pix_to_face: torch.Tensor
    zbuf: torch.Tensor
    bary_coords: torch.Tensor
-    dists: torch.Tensor
+    dists: Optional[torch.Tensor]

    def detach(self) -> "Fragments":
        return Fragments(
            pix_to_face=self.pix_to_face,
            zbuf=self.zbuf.detach(),
            bary_coords=self.bary_coords.detach(),
-            dists=self.dists.detach(),
+            dists=self.dists.detach() if self.dists is not None else self.dists,
        )


@@ -85,6 +85,8 @@ class RasterizationSettings:
            bin_size=0 uses naive rasterization; setting bin_size=None attempts
            to set it heuristically based on the shape of the input. This should
            not affect the output, but can affect the speed of the forward pass.
+        max_faces_opengl: Max number of faces in any mesh we will rasterize. Used only by
+            MeshRasterizerOpenGL to pre-allocate OpenGL memory.
        max_faces_per_bin: Only applicable when using coarse-to-fine
            rasterization (bin_size != 0); this is the maximum number of faces
            allowed within each bin. This should not affect the output values,
@@ -122,6 +124,7 @@ class RasterizationSettings:
    blur_radius: float = 0.0
    faces_per_pixel: int = 1
    bin_size: Optional[int] = None
+    max_faces_opengl: int = 10_000_000
    max_faces_per_bin: Optional[int] = None
    perspective_correct: Optional[bool] = None
    clip_barycentric_coords: Optional[bool] = None
@@ -237,6 +240,10 @@ class MeshRasterizer(nn.Module):
                znear = znear.min().item()
            z_clip = None if not perspective_correct or znear is None else znear / 2

+        # By default, turn on clip_barycentric_coords if blur_radius > 0.
+        # When blur_radius > 0, a face can be matched to a pixel that is outside the
+        # face, resulting in negative barycentric coordinates.
+
        pix_to_face, zbuf, bary_coords, dists = rasterize_meshes(
            meshes_proj,
            image_size=raster_settings.image_size,
@@ -250,6 +257,10 @@ class MeshRasterizer(nn.Module):
            z_clip_value=z_clip,
            cull_to_frustum=raster_settings.cull_to_frustum,
        )
+
        return Fragments(
-            pix_to_face=pix_to_face, zbuf=zbuf, bary_coords=bary_coords, dists=dists
+            pix_to_face=pix_to_face,
+            zbuf=zbuf,
+            bary_coords=bary_coords,
+            dists=dists,
        )
--- a/pytorch3d/renderer/mesh/shader.py
+++ b/pytorch3d/renderer/mesh/shader.py
@@ -349,6 +349,9 @@ class SplatterPhongShader(ShaderBase):
            N, H, W, K, _ = colors.shape
            self.splatter_blender = SplatterBlender((N, H, W, K), colors.device)

+        blend_params = kwargs.get("blend_params", self.blend_params)
+        self.check_blend_params(blend_params)
+
        images = self.splatter_blender(
            colors,
            pixel_coords_cameras,
@@ -359,6 +362,14 @@ class SplatterPhongShader(ShaderBase):

        return images

+    def check_blend_params(self, blend_params):
+        if blend_params.sigma != 0.5:
+            warnings.warn(
+                f"SplatterPhongShader received sigma={blend_params.sigma}. sigma is "
+                "defined in pixel units, and any value other than 0.5 is highly "
+                "unexpected. Only use other values if you know what you are doing. "
+            )
+

 class HardDepthShader(ShaderBase):
    """

--- a/pytorch3d/renderer/mesh/utils.py
+++ b/pytorch3d/renderer/mesh/utils.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

-
 from typing import List, NamedTuple, Tuple

 import torch

--- a/pytorch3d/renderer/opengl/__init__.py
+++ b/pytorch3d/renderer/opengl/__init__.py
@@ -32,5 +32,6 @@ def _can_import_egl_and_pycuda():

 if _can_import_egl_and_pycuda():
    from .opengl_utils import EGLContext, global_device_context_store
+    from .rasterizer_opengl import MeshRasterizerOpenGL

 __all__ = [k for k in globals().keys() if not k.startswith("_")]
--- a/pytorch3d/renderer/opengl/opengl_utils.py
+++ b/pytorch3d/renderer/opengl/opengl_utils.py
@@ -224,7 +224,9 @@ class EGLContext:
        """
        self.lock.acquire()
        egl.eglMakeCurrent(self.dpy, self.surface, self.surface, self.context)
+        try:
            yield
+        finally:
            egl.eglMakeCurrent(
                self.dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT
            )
@@ -418,5 +420,29 @@ def _init_cuda_context(device_id: int = 0):
    return cuda_context


+def _torch_to_opengl(torch_tensor, cuda_context, cuda_buffer):
+    # CUDA access to the OpenGL buffer is only allowed within a map-unmap block.
+    cuda_context.push()
+    mapping_obj = cuda_buffer.map()
+
+    # data_ptr points to the OpenGL shader storage buffer memory.
+    data_ptr, sz = mapping_obj.device_ptr_and_size()
+
+    # Copy the torch tensor to the OpenGL buffer directly on device.
+    cuda_copy = cuda.Memcpy2D()
+    cuda_copy.set_src_device(torch_tensor.data_ptr())
+    cuda_copy.set_dst_device(data_ptr)
+    cuda_copy.width_in_bytes = cuda_copy.src_pitch = cuda_copy.dst_ptch = (
+        torch_tensor.shape[1] * 4
+    )
+    cuda_copy.height = torch_tensor.shape[0]
+    cuda_copy(False)
+
+    # Unmap and pop the cuda context to make sure OpenGL won't interfere with
+    # PyTorch ops down the line.
+    mapping_obj.unmap()
+    cuda_context.pop()
+
+
 # Initialize a global _DeviceContextStore. Almost always we will only need a single one.
 global_device_context_store = _DeviceContextStore()
--- a/pytorch3d/renderer/opengl/rasterizer_opengl.py
+++ b/pytorch3d/renderer/opengl/rasterizer_opengl.py
--- a/pytorch3d/renderer/points/rasterize_points.py
+++ b/pytorch3d/renderer/points/rasterize_points.py
@@ -11,6 +11,8 @@ import torch
 from pytorch3d import _C
 from pytorch3d.renderer.mesh.rasterize_meshes import pix_to_non_square_ndc

+from ..utils import parse_image_size
+

 # Maximum number of faces per bins for
 # coarse-to-fine rasterization
@@ -102,20 +104,8 @@ def rasterize_points(
    # If the ratio of H:W is large this might cause issues as the smaller
    # dimension will have fewer bins.
    # TODO: consider a better way of setting the bin size.
-    if isinstance(image_size, (tuple, list)):
-        if len(image_size) != 2:
-            raise ValueError("Image size can only be a tuple/list of (H, W)")
-        if not all(i > 0 for i in image_size):
-            raise ValueError(
-                "Image sizes must be greater than 0; got %d, %d" % image_size
-            )
-        if not all(type(i) == int for i in image_size):
-            raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
-        max_image_size = max(*image_size)
-        im_size = image_size
-    else:
-        im_size = (image_size, image_size)
-        max_image_size = image_size
+    im_size = parse_image_size(image_size)
+    max_image_size = max(*im_size)

    if bin_size is None:
        if not points_packed.is_cuda:

--- a/pytorch3d/renderer/utils.py
+++ b/pytorch3d/renderer/utils.py
@@ -8,7 +8,7 @@
 import copy
 import inspect
 import warnings
-from typing import Any, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union

 import numpy as np
 import torch
@@ -432,3 +432,27 @@ def ndc_to_grid_sample_coords(
    else:
        xy_grid_sample[..., 0] *= aspect
    return xy_grid_sample
+
+
+def parse_image_size(
+    image_size: Union[List[int], Tuple[int, int], int]
+) -> Tuple[int, int]:
+    """
+    Args:
+        image_size: A single int (for square images) or a tuple/list of two ints.
+
+    Returns:
+        A tuple of two ints.
+
+    Throws:
+        ValueError if got more than two ints, any negative numbers or non-ints.
+    """
+    if not isinstance(image_size, (tuple, list)):
+        return (image_size, image_size)
+    if len(image_size) != 2:
+        raise ValueError("Image size can only be a tuple/list of (H, W)")
+    if not all(i > 0 for i in image_size):
+        raise ValueError("Image sizes must be greater than 0; got %d, %d" % image_size)
+    if not all(type(i) == int for i in image_size):
+        raise ValueError("Image sizes must be integers; got %f, %f" % image_size)
+    return tuple(image_size)
--- a/tests/data/test_cow_image_rectangle.png
+++ b/tests/data/test_cow_image_rectangle.png
--- a/tests/data/test_cow_image_rectangle_MeshRasterizer.png
+++ b/tests/data/test_cow_image_rectangle_MeshRasterizer.png
--- a/tests/data/test_cow_image_rectangle_MeshRasterizerOpenGL.png
+++ b/tests/data/test_cow_image_rectangle_MeshRasterizerOpenGL.png
--- a/tests/data/test_joinatlas_1_MeshRasterizer.png
+++ b/tests/data/test_joinatlas_1_MeshRasterizer.png
--- a/tests/data/test_joinatlas_1_MeshRasterizerOpenGL.png
+++ b/tests/data/test_joinatlas_1_MeshRasterizerOpenGL.png
--- a/tests/data/test_joinatlas_2_MeshRasterizer.png
+++ b/tests/data/test_joinatlas_2_MeshRasterizer.png
--- a/tests/data/test_joinatlas_2_MeshRasterizerOpenGL.png
+++ b/tests/data/test_joinatlas_2_MeshRasterizerOpenGL.png
--- a/tests/data/test_joinatlas_final.png
+++ b/tests/data/test_joinatlas_final.png
--- a/tests/data/test_joinatlas_final_MeshRasterizerOpenGL.png
+++ b/tests/data/test_joinatlas_final_MeshRasterizerOpenGL.png