"test/vscode:/vscode.git/clone" did not exist on "05ae795ae51c5f72ef7747cd3c06fadccff97e5a"
Unverified Commit 8340e19d authored by Ruilong Li(李瑞龙)'s avatar Ruilong Li(李瑞龙) Committed by GitHub
Browse files

0.5.0: Rewrite all the underlying CUDA. Speedup and Benchmarking. (#182)

* importance_sampling with test

* package importance_sampling

* compute_intervals tested and packaged

* compute_intervals_v2

* bicycle is failing

* fix cut in compute_intervals_v2, test pass for rendering

* hacky way to get opaque_bkgd work

* reorg ING

* PackedRaySegmentsSpec

* chunk_ids -> ray_ids

* binary -> occupied

* test_traverse_grid_basic checked

* fix traverse_grid with step size, checked

* support max_step_size, not verified

* _cuda and cuda; upgrade ray_marching

* inclusive scan

* test_exclusive_sum but seems to have numeric error

* inclusive_sum_backward verified

* exclusive sum backward

* merge fwd and bwd for scan

* inclusive & exclusive prod verified

* support normal scan with torch funcs

* rendering and tests

* a bit clean up

* importance_sampling verified

* stratified for importance_sampling

* importance_sampling in pdf.py

* RaySegmentsSpec in data_specs; fix various bugs

* verified with _proposal_packed.py

* importance sampling support batch input/output. need to verify

* prop script with batch samples

* try to use cumsum  instead of cumprod

* searchsorted

* benchmarking prop

* ray_aabb_intersect untested

* update prop benchmark numbers

* minor fixes

* batched ray_aabb_intersect

* ray_aabb_intersect and traverse with grid(s)

* tiny optimize for traverse_grids kernels

* traverse_grids return intervals and samples

* cub not verified

* cleanup

* propnet and occgrid as estimators

* training print iters 10k

* prop is good now

* benchmark in google sheet.

* really cleanup: scan.py and test

* pack.py and test

* rendering and test

* data_specs.py and pdf.py docs

* data_specs.py and pdf.py docs

* init and headers

* grid.py and test for it

* occ grid docs

* generated docs

* example docs for pack and scan function.

* doc fix for volrend.py

* doc fix for pdf.py

* fix doc for rendering function

* docs

* propnet docs

* update scripts

* docs: index.rst

* methodology docs

* docs for examples

* mlp nerf script

* update t-nerf script

* rename dnerf to tnerf

* misc update

* bug fix: pdf_loss with test

* minor fix

* update readme with submodules

* fix format

* update gitingore file

* fix doc failure. teaser png to jpg

* docs in examples/
parent e547490c
#!/usr/bin/env python3
#
# File : prop_utils.py
# Author : Hang Gao
# Email : hangg.sv7@gmail.com
# Date : 02/19/2023
#
# Distributed under terms of the MIT license.
from typing import Callable, Literal, Optional, Sequence, Tuple
import torch
import torch.nn.functional as F
from .intersection import ray_aabb_intersect
from .pdf import pdf_outer, pdf_sampling
def sample_from_weighted(
bins: torch.Tensor,
weights: torch.Tensor,
num_samples: int,
stratified: bool = False,
vmin: float = -torch.inf,
vmax: float = torch.inf,
) -> torch.Tensor:
"""
Args:
bins: (..., B + 1).
weights: (..., B).
Returns:
samples: (..., S + 1).
"""
B = weights.shape[-1]
S = num_samples
assert bins.shape[-1] == B + 1
dtype, device = bins.dtype, bins.device
eps = torch.finfo(weights.dtype).eps
# (..., B).
pdf = F.normalize(weights, p=1, dim=-1)
# (..., B + 1).
cdf = torch.cat(
[
torch.zeros_like(pdf[..., :1]),
torch.cumsum(pdf[..., :-1], dim=-1),
torch.ones_like(pdf[..., :1]),
],
dim=-1,
)
# (..., S). Sample positions between [0, 1).
if not stratified:
pad = 1 / (2 * S)
# Get the center of each pdf bins.
u = torch.linspace(pad, 1 - pad - eps, S, dtype=dtype, device=device)
u = u.broadcast_to(bins.shape[:-1] + (S,))
else:
# `u` is in [0, 1) --- it can be zero, but it can never be 1.
u_max = eps + (1 - eps) / S
max_jitter = (1 - u_max) / (S - 1) - eps
# Only perform one jittering per ray (`single_jitter` in the original
# implementation.)
u = (
torch.linspace(0, 1 - u_max, S, dtype=dtype, device=device)
+ torch.rand(
*bins.shape[:-1],
1,
dtype=dtype,
device=device,
)
* max_jitter
)
# (..., S).
ceil = torch.searchsorted(cdf.contiguous(), u.contiguous(), side="right")
floor = ceil - 1
# (..., S * 2).
inds = torch.cat([floor, ceil], dim=-1)
# (..., S).
cdf0, cdf1 = cdf.gather(-1, inds).split(S, dim=-1)
b0, b1 = bins.gather(-1, inds).split(S, dim=-1)
# (..., S). Linear interpolation in 1D.
t = (u - cdf0) / torch.clamp(cdf1 - cdf0, min=eps)
# Sample centers.
centers = b0 + t * (b1 - b0)
samples = (centers[..., 1:] + centers[..., :-1]) / 2
samples = torch.cat(
[
(2 * centers[..., :1] - samples[..., :1]).clamp_min(vmin),
samples,
(2 * centers[..., -1:] - samples[..., -1:]).clamp_max(vmax),
],
dim=-1,
)
return samples
def render_weight_from_density(
sigmas: torch.Tensor,
t_starts: torch.Tensor,
t_ends: torch.Tensor,
opaque_bkgd: bool = False,
) -> torch.Tensor:
"""
Args:
sigmas: (..., S, 1).
t_starts: (..., S).
t_ends: (..., S).
Return:
weights: (..., S).
"""
# (..., S).
deltas = t_ends - t_starts
# (..., S).
sigma_deltas = sigmas[..., 0] * deltas
if opaque_bkgd:
sigma_deltas = torch.cat(
[
sigma_deltas[..., :-1],
torch.full_like(sigma_deltas[..., -1:], torch.inf),
],
dim=-1,
)
alphas = 1 - torch.exp(-sigma_deltas)
trans = torch.exp(
-(
torch.cat(
[
torch.zeros_like(sigma_deltas[..., :1]),
torch.cumsum(sigma_deltas[..., :-1], dim=-1),
],
dim=-1,
)
)
)
weights = alphas * trans
return weights
def render_from_weighted(
rgbs: torch.Tensor,
t_vals: torch.Tensor,
weights: torch.Tensor,
render_bkgd: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Args:
rgbs: (..., S, 3).
t_vals: (..., S + 1, 1).
weights: (..., S, 1).
Return:
colors: (..., 3).
opacities: (..., 3).
depths: (..., 1). The naming is a bit confusing since it is actually
the expected marching *distances*.
"""
# Use white instead of black background by default.
render_bkgd = (
render_bkgd
if render_bkgd is not None
else torch.ones(3, dtype=rgbs.dtype, device=rgbs.device)
)
eps = torch.finfo(rgbs.dtype).eps
# (..., 1).
opacities = weights.sum(axis=-2)
# (..., 1).
bkgd_weights = (1 - opacities).clamp_min(0)
# (..., 3).
colors = (weights * rgbs).sum(dim=-2) + bkgd_weights * render_bkgd
# (..., S, 1).
t_mids = (t_vals[..., 1:, :] + t_vals[..., :-1, :]) / 2
depths = (weights * t_mids).sum(dim=-2) / opacities.clamp_min(eps)
return colors, opacities, depths
def transform_stot(
transform_type: Literal["uniform", "lindisp"],
s_vals: torch.Tensor,
t_min: torch.Tensor,
t_max: torch.Tensor,
) -> torch.Tensor:
if transform_type == "uniform":
_contract_fn, _icontract_fn = lambda x: x, lambda x: x
elif transform_type == "lindisp":
_contract_fn, _icontract_fn = lambda x: 1 / x, lambda x: 1 / x
else:
raise ValueError(f"Unknown transform_type: {transform_type}")
s_min, s_max = _contract_fn(t_min), _contract_fn(t_max)
icontract_fn = lambda s: _icontract_fn(s * s_max + (1 - s) * s_min)
return icontract_fn(s_vals)
def rendering(
# radiance field
rgb_sigma_fn: Callable,
num_samples: int,
# proposals
prop_sigma_fns: Sequence[Callable],
num_samples_per_prop: Sequence[int],
# rays
rays_o: torch.Tensor,
rays_d: torch.Tensor,
t_min: Optional[torch.Tensor] = None,
t_max: Optional[torch.Tensor] = None,
# bounding box of the scene
scene_aabb: Optional[torch.Tensor] = None,
# rendering options
near_plane: Optional[float] = None,
far_plane: Optional[float] = None,
stratified: bool = False,
sampling_type: Literal["uniform", "lindisp"] = "lindisp",
opaque_bkgd: bool = False,
render_bkgd: Optional[torch.Tensor] = None,
# gradient options
proposal_requires_grad: bool = False,
proposal_annealing: float = 1.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
if len(prop_sigma_fns) != len(num_samples_per_prop):
raise ValueError(
"`sigma_fns` and `samples_per_level` must have the same length."
)
if not rays_o.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if t_min is None or t_max is None:
if scene_aabb is not None:
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb)
else:
t_min = torch.zeros_like(rays_o[..., 0])
t_max = torch.ones_like(rays_o[..., 0]) * 1e10
if near_plane is not None:
t_min = torch.clamp(t_min, min=near_plane)
t_max = torch.clamp(t_max, min=near_plane)
if far_plane is not None:
t_min = torch.clamp(t_min, max=far_plane)
t_max = torch.clamp(t_max, max=far_plane)
s_vals = torch.cat(
[
torch.zeros_like(rays_o[..., :1]),
torch.ones_like(rays_o[..., :1]),
],
dim=-1,
)
weights = torch.ones_like(rays_o[..., :1])
rgbs = t_vals = None
weights_per_level, s_vals_per_level = [], []
for level, (level_fn, level_samples) in enumerate(
zip(
prop_sigma_fns + [rgb_sigma_fn],
num_samples_per_prop + [num_samples],
)
):
is_prop = level < len(prop_sigma_fns)
annealed_weights = torch.pow(weights, proposal_annealing)
# (N, S + 1).
s_vals = sample_from_weighted(
s_vals,
annealed_weights,
level_samples,
stratified=stratified,
vmin=0.0,
vmax=1.0,
).detach()
# s_vals = pdf_sampling(
# s_vals,
# annealed_weights,
# level_samples,
# padding=0.0,
# stratified=stratified,
# ).detach()
t_vals = transform_stot(
sampling_type, s_vals, t_min[..., None], t_max[..., None] # type: ignore
)
if is_prop:
with torch.set_grad_enabled(proposal_requires_grad):
# (N, S, 1).
sigmas = level_fn(t_vals[..., :-1, None], t_vals[..., 1:, None])
else:
# (N, S, *).
rgbs, sigmas = level_fn(
t_vals[..., :-1, None], t_vals[..., 1:, None]
)
# (N, S).
weights = render_weight_from_density(
sigmas,
t_vals[..., :-1],
t_vals[..., 1:],
opaque_bkgd=opaque_bkgd,
)
weights_per_level.append(weights)
s_vals_per_level.append(s_vals)
assert rgbs is not None and t_vals is not None
rgbs, opacities, depths = render_from_weighted(
rgbs, t_vals[..., None], weights[..., None], render_bkgd
)
return (
rgbs,
opacities,
depths,
(weights_per_level, s_vals_per_level),
)
def _outer(
t0_starts: torch.Tensor,
t0_ends: torch.Tensor,
t1_starts: torch.Tensor,
t1_ends: torch.Tensor,
y1: torch.Tensor,
) -> torch.Tensor:
"""
Args:
t0_starts: (..., S0).
t0_ends: (..., S0).
t1_starts: (..., S1).
t1_ends: (..., S1).
y1: (..., S1).
"""
cy1 = torch.cat(
[torch.zeros_like(y1[..., :1]), torch.cumsum(y1, dim=-1)], dim=-1
)
idx_lo = (
torch.searchsorted(
t1_starts.contiguous(), t0_starts.contiguous(), side="right"
)
- 1
)
idx_lo = torch.clamp(idx_lo, min=0, max=y1.shape[-1] - 1)
idx_hi = torch.searchsorted(
t1_ends.contiguous(), t0_ends.contiguous(), side="right"
)
idx_hi = torch.clamp(idx_hi, min=0, max=y1.shape[-1] - 1)
cy1_lo = torch.take_along_dim(cy1[..., :-1], idx_lo, dim=-1)
cy1_hi = torch.take_along_dim(cy1[..., 1:], idx_hi, dim=-1)
y0_outer = cy1_hi - cy1_lo
return y0_outer
def _lossfun_outer(
t: torch.Tensor, w: torch.Tensor, t_env: torch.Tensor, w_env: torch.Tensor
):
"""
Args:
t: interval edges, (..., S + 1).
w: weights, (..., S).
t_env: interval edges of the upper bound enveloping historgram, (..., S + 1).
w_env: weights that should upper bound the inner (t,w) histogram, (..., S).
"""
eps = 1e-7 # torch.finfo(t.dtype).eps
w_outer = pdf_outer(t_env, w_env, None, t, None)
# w_outer = _outer(
# t[..., :-1], t[..., 1:], t_env[..., :-1], t_env[..., 1:], w_env
# )
return torch.clip(w - w_outer, min=0) ** 2 / (w + eps)
def compute_prop_loss(
s_vals_per_level: Sequence[torch.Tensor],
weights_per_level: Sequence[torch.Tensor],
) -> torch.Tensor:
c = s_vals_per_level[-1].detach()
w = weights_per_level[-1].detach()
loss = 0.0
for svals, weights in zip(s_vals_per_level[:-1], weights_per_level[:-1]):
loss += torch.mean(_lossfun_outer(c, w, svals, weights))
return loss
def get_proposal_requires_grad_fn(
target: float = 5.0, num_steps: int = 1000
) -> Callable:
schedule = lambda s: min(s / num_steps, 1.0) * target
steps_since_last_grad = 0
def proposal_requires_grad_fn(step: int) -> bool:
nonlocal steps_since_last_grad
target_steps_since_last_grad = schedule(step)
requires_grad = steps_since_last_grad > target_steps_since_last_grad
if requires_grad:
steps_since_last_grad = 0
steps_since_last_grad += 1
return requires_grad
return proposal_requires_grad_fn
def get_proposal_annealing_fn(
slop: float = 10.0, num_steps: int = 1000
) -> Callable:
def proposal_annealing_fn(step: int) -> float:
# https://arxiv.org/pdf/2111.12077.pdf eq. 18
train_frac = max(min(float(step) / num_steps, 0), 1)
bias = lambda x, b: (b * x) / ((b - 1) * x + 1)
anneal = bias(train_frac, slop)
return anneal
return proposal_annealing_fn
from typing import Callable, Optional, Tuple
import torch
import nerfacc.cuda as _C
from .contraction import ContractionType
from .grid import Grid
from .intersection import ray_aabb_intersect
from .vol_rendering import render_visibility
@torch.no_grad()
def ray_marching(
# rays
rays_o: torch.Tensor,
rays_d: torch.Tensor,
t_min: Optional[torch.Tensor] = None,
t_max: Optional[torch.Tensor] = None,
# bounding box of the scene
scene_aabb: Optional[torch.Tensor] = None,
# binarized grid for skipping empty space
grid: Optional[Grid] = None,
# sigma/alpha function for skipping invisible space
sigma_fn: Optional[Callable] = None,
alpha_fn: Optional[Callable] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
# rendering options
near_plane: Optional[float] = None,
far_plane: Optional[float] = None,
render_step_size: float = 1e-3,
stratified: bool = False,
cone_angle: float = 0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Ray marching with space skipping.
Note:
The logic for computing `t_min` and `t_max`:
1. If `t_min` and `t_max` are given, use them with highest priority.
2. If `t_min` and `t_max` are not given, but `scene_aabb` is given, use \
:func:`ray_aabb_intersect` to compute `t_min` and `t_max`.
3. If `t_min` and `t_max` are not given, and `scene_aabb` is not given, \
set `t_min` to 0.0, and `t_max` to 1e10. (the case of unbounded scene)
4. Always clip `t_min` with `near_plane` and `t_max` with `far_plane` if given.
Warning:
This function is not differentiable to any inputs.
Args:
rays_o: Ray origins of shape (n_rays, 3).
rays_d: Normalized ray directions of shape (n_rays, 3).
t_min: Optional. Per-ray minimum distance. Tensor with shape (n_rays).
t_max: Optional. Per-ray maximum distance. Tensor with shape (n_rays).
scene_aabb: Optional. Scene bounding box for computing t_min and t_max.
A tensor with shape (6,) {xmin, ymin, zmin, xmax, ymax, zmax}.
`scene_aabb` will be ignored if both `t_min` and `t_max` are provided.
grid: Optional. Grid that idicates where to skip during marching.
See :class:`nerfacc.Grid` for details.
sigma_fn: Optional. If provided, the marching will skip the invisible space
by evaluating the density along the ray with `sigma_fn`. It should be a
function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation density values (N, 1).
You should only provide either `sigma_fn` or `alpha_fn`.
alpha_fn: Optional. If provided, the marching will skip the invisible space
by evaluating the density along the ray with `alpha_fn`. It should be a
function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation opacity values (N, 1).
You should only provide either `sigma_fn` or `alpha_fn`.
early_stop_eps: Early stop threshold for skipping invisible space. Default: 1e-4.
alpha_thre: Alpha threshold for skipping empty space. Default: 0.0.
near_plane: Optional. Near plane distance. If provided, it will be used
to clip t_min.
far_plane: Optional. Far plane distance. If provided, it will be used
to clip t_max.
render_step_size: Step size for marching. Default: 1e-3.
stratified: Whether to use stratified sampling. Default: False.
cone_angle: Cone angle for linearly-increased step size. 0. means
constant step size. Default: 0.0.
Returns:
A tuple of tensors.
- **ray_indices**: Ray index of each sample. IntTensor with shape (n_samples).
- **t_starts**: Per-sample start distance. Tensor with shape (n_samples, 1).
- **t_ends**: Per-sample end distance. Tensor with shape (n_samples, 1).
Examples:
.. code-block:: python
import torch
from nerfacc import OccupancyGrid, ray_marching, unpack_info
device = "cuda:0"
batch_size = 128
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# Ray marching with aabb.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-3
)
# Ray marching with per-ray t_min and t_max.
t_min = torch.zeros((batch_size,), device=device)
t_max = torch.ones((batch_size,), device=device)
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, t_min=t_min, t_max=t_max, render_step_size=1e-3
)
# Ray marching with aabb and skip areas based on occupancy grid.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)
grid = OccupancyGrid(roi_aabb=[0.0, 0.0, 0.0, 0.5, 0.5, 0.5]).to(device)
ray_indices, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, grid=grid, render_step_size=1e-3
)
# Convert t_starts and t_ends to sample locations.
t_mid = (t_starts + t_ends) / 2.0
sample_locs = rays_o[ray_indices] + t_mid * rays_d[ray_indices]
"""
if not rays_o.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if alpha_fn is not None and sigma_fn is not None:
raise ValueError(
"Only one of `alpha_fn` and `sigma_fn` should be provided."
)
# logic for t_min and t_max:
# 1. if t_min and t_max are given, use them with highest priority.
# 2. if t_min and t_max are not given, but scene_aabb is given, use
# ray_aabb_intersect to compute t_min and t_max.
# 3. if t_min and t_max are not given, and scene_aabb is not given,
# set t_min to 0.0, and t_max to 1e10. (the case of unbounded scene)
# 4. always clip t_min with near_plane and t_max with far_plane if given.
if t_min is None or t_max is None:
if scene_aabb is not None:
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb)
else:
t_min = torch.zeros_like(rays_o[..., 0])
t_max = torch.ones_like(rays_o[..., 0]) * 1e10
if near_plane is not None:
t_min = torch.clamp(t_min, min=near_plane)
if far_plane is not None:
t_max = torch.clamp(t_max, max=far_plane)
# stratified sampling: prevent overfitting during training
if stratified:
t_min = t_min + torch.rand_like(t_min) * render_step_size
# use grid for skipping if given
if grid is not None:
grid_roi_aabb = grid.roi_aabb
grid_binary = grid.binary
contraction_type = grid.contraction_type.to_cpp_version()
else:
grid_roi_aabb = torch.tensor(
[-1e10, -1e10, -1e10, 1e10, 1e10, 1e10],
dtype=torch.float32,
device=rays_o.device,
)
grid_binary = torch.ones(
[1, 1, 1, 1], dtype=torch.bool, device=rays_o.device
)
contraction_type = ContractionType.AABB.to_cpp_version()
# marching with grid-based skipping
packed_info, ray_indices, t_starts, t_ends = _C.ray_marching(
# rays
rays_o.contiguous(),
rays_d.contiguous(),
t_min.contiguous(),
t_max.contiguous(),
# coontraction and grid
grid_roi_aabb.contiguous(),
grid_binary.contiguous(),
contraction_type,
# sampling
render_step_size,
cone_angle,
)
# skip invisible space
if (alpha_thre > 0.0 or early_stop_eps > 0.0) and (
sigma_fn is not None or alpha_fn is not None
):
# Query sigma without gradients
if sigma_fn is not None:
sigmas = sigma_fn(t_starts, t_ends, ray_indices)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
elif alpha_fn is not None:
alphas = alpha_fn(t_starts, t_ends, ray_indices)
assert (
alphas.shape == t_starts.shape
), "alphas must have shape of (N, 1)! Got {}".format(alphas.shape)
if grid is not None:
alpha_thre = min(alpha_thre, grid.occs.mean().item())
# Compute visibility of the samples, and filter out invisible samples
masks = render_visibility(
alphas,
ray_indices=ray_indices,
packed_info=packed_info,
early_stop_eps=early_stop_eps,
alpha_thre=alpha_thre,
n_rays=rays_o.shape[0],
)
ray_indices, t_starts, t_ends = (
ray_indices[masks],
t_starts[masks],
t_ends[masks],
)
return ray_indices, t_starts, t_ends
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Optional
import torch
from torch import Tensor
from . import cuda as _C
def inclusive_sum(
inputs: Tensor, packed_info: Optional[Tensor] = None
) -> Tensor:
"""Inclusive Sum that supports flattened tensor.
This function is equivalent to `torch.cumsum(inputs, dim=-1)`, but allows
for a flattened input tensor and a `packed_info` tensor that specifies the
chunks in the flattened input.
Args:
inputs: The tensor to be summed. Can be either a N-D tensor, or a flattened
tensor with `packed_info` specified.
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened input tensor, with in total n_rays chunks.
If None, the input is assumed to be a N-D tensor and the sum is computed
along the last dimension. Default is None.
Returns:
The inclusive sum with the same shape as the input tensor.
Example:
.. code-block:: python
>>> inputs = torch.tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], device="cuda")
>>> packed_info = torch.tensor([[0, 2], [2, 3], [5, 4]], device="cuda")
>>> inclusive_sum(inputs, packed_info)
tensor([ 1., 3., 3., 7., 12., 6., 13., 21., 30.], device='cuda:0')
"""
if packed_info is None:
# Batched inclusive sum on the last dimension.
outputs = torch.cumsum(inputs, dim=-1)
else:
# Flattened inclusive sum.
assert inputs.dim() == 1, "inputs must be flattened."
assert (
packed_info.dim() == 2 and packed_info.shape[-1] == 2
), "packed_info must be 2-D with shape (B, 2)."
chunk_starts, chunk_cnts = packed_info.unbind(dim=-1)
outputs = _InclusiveSum.apply(chunk_starts, chunk_cnts, inputs, False)
return outputs
def exclusive_sum(
inputs: Tensor, packed_info: Optional[Tensor] = None
) -> Tensor:
"""Exclusive Sum that supports flattened tensor.
Similar to :func:`nerfacc.inclusive_sum`, but computes the exclusive sum.
Args:
inputs: The tensor to be summed. Can be either a N-D tensor, or a flattened
tensor with `packed_info` specified.
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened input tensor, with in total n_rays chunks.
If None, the input is assumed to be a N-D tensor and the sum is computed
along the last dimension. Default is None.
Returns:
The exclusive sum with the same shape as the input tensor.
Example:
.. code-block:: python
>>> inputs = torch.tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], device="cuda")
>>> packed_info = torch.tensor([[0, 2], [2, 3], [5, 4]], device="cuda")
>>> exclusive_sum(inputs, packed_info)
tensor([ 0., 1., 0., 3., 7., 0., 6., 13., 21.], device='cuda:0')
"""
if packed_info is None:
# Batched exclusive sum on the last dimension.
outputs = torch.cumsum(
torch.cat(
[torch.zeros_like(inputs[..., :1]), inputs[..., :-1]], dim=-1
),
dim=-1,
)
else:
# Flattened exclusive sum.
assert inputs.dim() == 1, "inputs must be flattened."
assert (
packed_info.dim() == 2 and packed_info.shape[-1] == 2
), "packed_info must be 2-D with shape (B, 2)."
chunk_starts, chunk_cnts = packed_info.unbind(dim=-1)
outputs = _ExclusiveSum.apply(chunk_starts, chunk_cnts, inputs, False)
return outputs
def inclusive_prod(
inputs: Tensor, packed_info: Optional[Tensor] = None
) -> Tensor:
"""Inclusive Product that supports flattened tensor.
This function is equivalent to `torch.cumprod(inputs, dim=-1)`, but allows
for a flattened input tensor and a `packed_info` tensor that specifies the
chunks in the flattened input.
Args:
inputs: The tensor to be producted. Can be either a N-D tensor, or a flattened
tensor with `packed_info` specified.
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened input tensor, with in total n_rays chunks.
If None, the input is assumed to be a N-D tensor and the product is computed
along the last dimension. Default is None.
Returns:
The inclusive product with the same shape as the input tensor.
Example:
.. code-block:: python
>>> inputs = torch.tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], device="cuda")
>>> packed_info = torch.tensor([[0, 2], [2, 3], [5, 4]], device="cuda")
>>> inclusive_prod(inputs, packed_info)
tensor([1., 2., 3., 12., 60., 6., 42., 336., 3024.], device='cuda:0')
"""
if packed_info is None:
# Batched inclusive product on the last dimension.
outputs = torch.cumprod(inputs, dim=-1)
else:
# Flattened inclusive product.
assert inputs.dim() == 1, "inputs must be flattened."
assert (
packed_info.dim() == 2 and packed_info.shape[-1] == 2
), "packed_info must be 2-D with shape (B, 2)."
chunk_starts, chunk_cnts = packed_info.unbind(dim=-1)
outputs = _InclusiveProd.apply(chunk_starts, chunk_cnts, inputs)
return outputs
def exclusive_prod(
inputs: Tensor, packed_info: Optional[Tensor] = None
) -> Tensor:
"""Exclusive Product that supports flattened tensor.
Similar to :func:`nerfacc.inclusive_prod`, but computes the exclusive product.
Args:
inputs: The tensor to be producted. Can be either a N-D tensor, or a flattened
tensor with `packed_info` specified.
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened input tensor, with in total n_rays chunks.
If None, the input is assumed to be a N-D tensor and the product is computed
along the last dimension. Default is None.
Returns:
The exclusive product with the same shape as the input tensor.
Example:
.. code-block:: python
>>> inputs = torch.tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], device="cuda")
>>> packed_info = torch.tensor([[0, 2], [2, 3], [5, 4]], device="cuda")
>>> exclusive_prod(inputs, packed_info)
tensor([1., 1., 1., 3., 12., 1., 6., 42., 336.], device='cuda:0')
"""
if packed_info is None:
outputs = torch.cumprod(
torch.cat(
[torch.ones_like(inputs[..., :1]), inputs[..., :-1]], dim=-1
),
dim=-1,
)
else:
chunk_starts, chunk_cnts = packed_info.unbind(dim=-1)
outputs = _ExclusiveProd.apply(chunk_starts, chunk_cnts, inputs)
return outputs
class _InclusiveSum(torch.autograd.Function):
"""Inclusive Sum on a Flattened Tensor."""
@staticmethod
def forward(ctx, chunk_starts, chunk_cnts, inputs, normalize: bool = False):
chunk_starts = chunk_starts.contiguous()
chunk_cnts = chunk_cnts.contiguous()
inputs = inputs.contiguous()
outputs = _C.inclusive_sum(
chunk_starts, chunk_cnts, inputs, normalize, False
)
if ctx.needs_input_grad[2]:
ctx.normalize = normalize
ctx.save_for_backward(chunk_starts, chunk_cnts)
return outputs
@staticmethod
def backward(ctx, grad_outputs):
grad_outputs = grad_outputs.contiguous()
chunk_starts, chunk_cnts = ctx.saved_tensors
normalize = ctx.normalize
assert normalize == False, "Only support backward for normalize==False."
grad_inputs = _C.inclusive_sum(
chunk_starts, chunk_cnts, grad_outputs, normalize, True
)
return None, None, grad_inputs, None
class _ExclusiveSum(torch.autograd.Function):
"""Exclusive Sum on a Flattened Tensor."""
@staticmethod
def forward(ctx, chunk_starts, chunk_cnts, inputs, normalize: bool = False):
chunk_starts = chunk_starts.contiguous()
chunk_cnts = chunk_cnts.contiguous()
inputs = inputs.contiguous()
outputs = _C.exclusive_sum(
chunk_starts, chunk_cnts, inputs, normalize, False
)
if ctx.needs_input_grad[2]:
ctx.normalize = normalize
ctx.save_for_backward(chunk_starts, chunk_cnts)
return outputs
@staticmethod
def backward(ctx, grad_outputs):
grad_outputs = grad_outputs.contiguous()
chunk_starts, chunk_cnts = ctx.saved_tensors
normalize = ctx.normalize
assert normalize == False, "Only support backward for normalize==False."
grad_inputs = _C.exclusive_sum(
chunk_starts, chunk_cnts, grad_outputs, normalize, True
)
return None, None, grad_inputs, None
class _InclusiveProd(torch.autograd.Function):
"""Inclusive Product on a Flattened Tensor."""
@staticmethod
def forward(ctx, chunk_starts, chunk_cnts, inputs):
chunk_starts = chunk_starts.contiguous()
chunk_cnts = chunk_cnts.contiguous()
inputs = inputs.contiguous()
outputs = _C.inclusive_prod_forward(chunk_starts, chunk_cnts, inputs)
if ctx.needs_input_grad[2]:
ctx.save_for_backward(chunk_starts, chunk_cnts, inputs, outputs)
return outputs
@staticmethod
def backward(ctx, grad_outputs):
grad_outputs = grad_outputs.contiguous()
chunk_starts, chunk_cnts, inputs, outputs = ctx.saved_tensors
grad_inputs = _C.inclusive_prod_backward(
chunk_starts, chunk_cnts, inputs, outputs, grad_outputs
)
return None, None, grad_inputs
class _ExclusiveProd(torch.autograd.Function):
"""Exclusive Product on a Flattened Tensor."""
@staticmethod
def forward(ctx, chunk_starts, chunk_cnts, inputs):
chunk_starts = chunk_starts.contiguous()
chunk_cnts = chunk_cnts.contiguous()
inputs = inputs.contiguous()
outputs = _C.exclusive_prod_forward(chunk_starts, chunk_cnts, inputs)
if ctx.needs_input_grad[2]:
ctx.save_for_backward(chunk_starts, chunk_cnts, inputs, outputs)
return outputs
@staticmethod
def backward(ctx, grad_outputs):
grad_outputs = grad_outputs.contiguous()
chunk_starts, chunk_cnts, inputs, outputs = ctx.saved_tensors
grad_inputs = _C.exclusive_prod_backward(
chunk_starts, chunk_cnts, inputs, outputs, grad_outputs
)
return None, None, grad_inputs
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
Copyright (c) 2022 Ruilong Li, UC Berkeley. Copyright (c) 2022 Ruilong Li, UC Berkeley.
""" """
__version__ = "0.4.0" __version__ = "0.5.0"
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Callable, Optional, Tuple
import torch
from torch import Tensor
import nerfacc.cuda as _C
from .pack import pack_info
def rendering(
# ray marching results
t_starts: torch.Tensor,
t_ends: torch.Tensor,
ray_indices: torch.Tensor,
n_rays: int,
# radiance field
rgb_sigma_fn: Optional[Callable] = None,
rgb_alpha_fn: Optional[Callable] = None,
# rendering options
render_bkgd: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Render the rays through the radience field defined by `rgb_sigma_fn`.
This function is differentiable to the outputs of `rgb_sigma_fn` so it can
be used for gradient-based optimization.
Note:
Either `rgb_sigma_fn` or `rgb_alpha_fn` should be provided.
Warning:
This function is not differentiable to `t_starts`, `t_ends` and `ray_indices`.
Args:
t_starts: Per-sample start distance. Tensor with shape (n_samples, 1).
t_ends: Per-sample end distance. Tensor with shape (n_samples, 1).
ray_indices: Ray index of each sample. IntTensor with shape (n_samples).
n_rays: Total number of rays. This will decide the shape of the ouputs.
rgb_sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \
ray indices (N,)} and returns the post-activation rgb (N, 3) and density \
values (N, 1).
rgb_alpha_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \
ray indices (N,)} and returns the post-activation rgb (N, 3) and opacity \
values (N, 1).
render_bkgd: Optional. Background color. Tensor with shape (3,).
Returns:
Ray colors (n_rays, 3), opacities (n_rays, 1) and depths (n_rays, 1).
Examples:
.. code-block:: python
>>> rays_o = torch.rand((128, 3), device="cuda:0")
>>> rays_d = torch.randn((128, 3), device="cuda:0")
>>> rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
>>> ray_indices, t_starts, t_ends = ray_marching(
>>> rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3)
>>> def rgb_sigma_fn(t_starts, t_ends, ray_indices):
>>> # This is a dummy function that returns random values.
>>> rgbs = torch.rand((t_starts.shape[0], 3), device="cuda:0")
>>> sigmas = torch.rand((t_starts.shape[0], 1), device="cuda:0")
>>> return rgbs, sigmas
>>> colors, opacities, depths = rendering(
>>> t_starts, t_ends, ray_indices, n_rays=128, rgb_sigma_fn=rgb_sigma_fn)
>>> print(colors.shape, opacities.shape, depths.shape)
torch.Size([128, 3]) torch.Size([128, 1]) torch.Size([128, 1])
"""
if rgb_sigma_fn is None and rgb_alpha_fn is None:
raise ValueError(
"At least one of `rgb_sigma_fn` and `rgb_alpha_fn` should be specified."
)
# Query sigma/alpha and color with gradients
if rgb_sigma_fn is not None:
rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
# Rendering: compute weights.
weights = render_weight_from_density(
t_starts,
t_ends,
sigmas,
ray_indices=ray_indices,
n_rays=n_rays,
)
elif rgb_alpha_fn is not None:
rgbs, alphas = rgb_alpha_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
alphas.shape == t_starts.shape
), "alphas must have shape of (N, 1)! Got {}".format(alphas.shape)
# Rendering: compute weights.
weights = render_weight_from_alpha(
alphas,
ray_indices=ray_indices,
n_rays=n_rays,
)
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(
weights, ray_indices, values=rgbs, n_rays=n_rays
)
opacities = accumulate_along_rays(
weights, ray_indices, values=None, n_rays=n_rays
)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# Background composition.
if render_bkgd is not None:
colors = colors + render_bkgd * (1.0 - opacities)
return colors, opacities, depths
def accumulate_along_rays(
weights: Tensor,
ray_indices: Tensor,
values: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
Note:
This function is only differentiable to `weights` and `values`.
Args:
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples,).
ray_indices: Ray index of each sample. LongTensor with shape (n_samples).
values: The values to be accmulated. Tensor with shape (n_samples, D). If \
None, the accumulated values are just weights. Default is None.
n_rays: Total number of rays. This will decide the shape of the ouputs. If \
None, it will be inferred from `ray_indices.max() + 1`. If specified \
it should be at least larger than `ray_indices.max()`. Default is None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given then we return \
the accumulated weights, in which case D == 1.
Examples:
.. code-block:: python
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)
opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# (n_rays, 3), (n_rays, 1), (n_rays, 1)
print(colors.shape, opacities.shape, depths.shape)
"""
assert ray_indices.dim() == 1 and weights.dim() == 2
if not weights.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if values is not None:
assert (
values.dim() == 2 and values.shape[0] == weights.shape[0]
), "Invalid shapes: {} vs {}".format(values.shape, weights.shape)
src = weights * values
else:
src = weights
if ray_indices.numel() == 0:
assert n_rays is not None
return torch.zeros((n_rays, src.shape[-1]), device=weights.device)
if n_rays is None:
n_rays = int(ray_indices.max()) + 1
# assert n_rays > ray_indices.max()
outputs = torch.zeros(
(n_rays, src.shape[-1]), device=src.device, dtype=src.dtype
)
outputs.index_add_(0, ray_indices, src)
return outputs
def render_transmittance_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Compute transmittance :math:`T_i` from density :math:`\\sigma_i`.
.. math::
T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
sigmas: The density values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering transmittance. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> t_starts = torch.tensor(
>>> [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device="cuda")
>>> t_ends = torch.tensor(
>>> [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device="cuda")
>>> sigmas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
[[1.00], [0.67], [0.30], [1.00], [0.45], [1.00], [1.00]]
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromDensityCUB.apply(
ray_indices, t_starts, t_ends, sigmas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
transmittance = _RenderingTransmittanceFromDensityNaive.apply(
packed_info, t_starts, t_ends, sigmas
)
return transmittance
def render_transmittance_from_alpha(
alphas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Compute transmittance :math:`T_i` from alpha :math:`\\alpha_i`.
.. math::
T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering transmittance. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda"))
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
transmittance = _RenderingTransmittanceFromAlphaNaive.apply(
packed_info, alphas
)
return transmittance
def render_weight_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> torch.Tensor:
"""Compute rendering weights :math:`w_i` from density :math:`\\sigma_i` and interval :math:`\\delta_i`.
.. math::
w_i = T_i(1 - exp(-\\sigma_i\delta_i)), \\quad\\textrm{where}\\quad T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
sigmas: The density values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering weights. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> t_starts = torch.tensor(
>>> [[0.0], [1.0], [2.0], [3.0], [4.0], [5.0], [6.0]], device="cuda")
>>> t_ends = torch.tensor(
>>> [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0]], device="cuda")
>>> sigmas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights = render_weight_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
[[0.33], [0.37], [0.03], [0.55], [0.04], [0.00], [0.59]]
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromDensityCUB.apply(
ray_indices, t_starts, t_ends, sigmas
)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = transmittance * alphas
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
weights = _RenderingWeightFromDensityNaive.apply(
packed_info, t_starts, t_ends, sigmas
)
return weights
def render_weight_from_alpha(
alphas: Tensor,
*,
packed_info: Optional[torch.Tensor] = None,
ray_indices: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
) -> torch.Tensor:
"""Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`.
.. math::
w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
Returns:
The rendering weights. Tensor with shape (n_sample, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda"))
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights = render_weight_from_alpha(alphas, ray_indices=ray_indices)
tensor([[0.4], [0.48], [0.012], [0.8], [0.02], [0.0], [0.9]])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
weights = transmittance * alphas
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
weights = _RenderingWeightFromAlphaNaive.apply(packed_info, alphas)
return weights
@torch.no_grad()
def render_visibility(
alphas: torch.Tensor,
*,
ray_indices: Optional[torch.Tensor] = None,
packed_info: Optional[torch.Tensor] = None,
n_rays: Optional[int] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
) -> torch.Tensor:
"""Filter out transparent and occluded samples.
In this function, we first compute the transmittance from the sample opacity. The
transmittance is then used to filter out occluded samples. And opacity is used to
filter out transparent samples. The function returns a boolean tensor indicating
which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).
Note:
Either `ray_indices` or `packed_info` should be provided. If `ray_indices` is
provided, CUB acceleration will be used if available (CUDA >= 11.6). Otherwise,
we will use the naive implementation with `packed_info`.
Args:
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
packed_info: Optional. Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. LongTensor with shape (n_rays, 2).
ray_indices: Optional. Ray index of each sample. LongTensor with shape (n_sample).
n_rays: Optional. Number of rays. Only useful when `ray_indices` is provided yet \
CUB acceleration is not available. We will implicitly convert `ray_indices` to \
`packed_info` and use the naive implementation. If not provided, we will infer \
it from `ray_indices` but it will be slower.
early_stop_eps: The early stopping threshold on transmittance.
alpha_thre: The threshold on opacity.
Returns:
The visibility of each sample. Tensor with shape (n_samples, 1).
Examples:
.. code-block:: python
>>> alphas = torch.tensor(
>>> [[0.4], [0.8], [0.1], [0.8], [0.1], [0.0], [0.9]], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([[1.0], [0.6], [0.12], [1.0], [0.2], [1.0], [1.0]])
>>> visibility = render_visibility(
>>> alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)
tensor([True, True, False, True, False, False, True])
"""
assert (
ray_indices is not None or packed_info is not None
), "Either ray_indices or packed_info should be provided."
if ray_indices is not None and _C.is_cub_available():
transmittance = _RenderingTransmittanceFromAlphaCUB.apply(
ray_indices, alphas
)
else:
if packed_info is None:
packed_info = pack_info(ray_indices, n_rays=n_rays)
transmittance = _RenderingTransmittanceFromAlphaNaive.apply(
packed_info, alphas
)
visibility = transmittance >= early_stop_eps
if alpha_thre > 0:
visibility = visibility & (alphas >= alpha_thre)
visibility = visibility.squeeze(-1)
return visibility
class _RenderingTransmittanceFromDensityCUB(torch.autograd.Function):
"""Rendering transmittance from density with CUB implementation."""
@staticmethod
def forward(ctx, ray_indices, t_starts, t_ends, sigmas):
ray_indices = ray_indices.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
transmittance = _C.transmittance_from_sigma_forward_cub(
ray_indices, t_starts, t_ends, sigmas
)
if ctx.needs_input_grad[3]:
ctx.save_for_backward(ray_indices, t_starts, t_ends, transmittance)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
ray_indices, t_starts, t_ends, transmittance = ctx.saved_tensors
grad_sigmas = _C.transmittance_from_sigma_backward_cub(
ray_indices, t_starts, t_ends, transmittance, transmittance_grads
)
return None, None, None, grad_sigmas
class _RenderingTransmittanceFromDensityNaive(torch.autograd.Function):
"""Rendering transmittance from density with naive forloop."""
@staticmethod
def forward(ctx, packed_info, t_starts, t_ends, sigmas):
packed_info = packed_info.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
transmittance = _C.transmittance_from_sigma_forward_naive(
packed_info, t_starts, t_ends, sigmas
)
if ctx.needs_input_grad[3]:
ctx.save_for_backward(packed_info, t_starts, t_ends, transmittance)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
packed_info, t_starts, t_ends, transmittance = ctx.saved_tensors
grad_sigmas = _C.transmittance_from_sigma_backward_naive(
packed_info, t_starts, t_ends, transmittance, transmittance_grads
)
return None, None, None, grad_sigmas
class _RenderingTransmittanceFromAlphaCUB(torch.autograd.Function):
"""Rendering transmittance from opacity with CUB implementation."""
@staticmethod
def forward(ctx, ray_indices, alphas):
ray_indices = ray_indices.contiguous()
alphas = alphas.contiguous()
transmittance = _C.transmittance_from_alpha_forward_cub(
ray_indices, alphas
)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(ray_indices, transmittance, alphas)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
ray_indices, transmittance, alphas = ctx.saved_tensors
grad_alphas = _C.transmittance_from_alpha_backward_cub(
ray_indices, alphas, transmittance, transmittance_grads
)
return None, grad_alphas
class _RenderingTransmittanceFromAlphaNaive(torch.autograd.Function):
"""Rendering transmittance from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
transmittance = _C.transmittance_from_alpha_forward_naive(
packed_info, alphas
)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, transmittance, alphas)
return transmittance
@staticmethod
def backward(ctx, transmittance_grads):
transmittance_grads = transmittance_grads.contiguous()
packed_info, transmittance, alphas = ctx.saved_tensors
grad_alphas = _C.transmittance_from_alpha_backward_naive(
packed_info, alphas, transmittance, transmittance_grads
)
return None, grad_alphas
class _RenderingWeightFromDensityNaive(torch.autograd.Function):
"""Rendering weight from density with naive forloop."""
@staticmethod
def forward(ctx, packed_info, t_starts, t_ends, sigmas):
packed_info = packed_info.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
weights = _C.weight_from_sigma_forward_naive(
packed_info, t_starts, t_ends, sigmas
)
if ctx.needs_input_grad[3]:
ctx.save_for_backward(
packed_info, t_starts, t_ends, sigmas, weights
)
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
packed_info, t_starts, t_ends, sigmas, weights = ctx.saved_tensors
grad_sigmas = _C.weight_from_sigma_backward_naive(
weights, grad_weights, packed_info, t_starts, t_ends, sigmas
)
return None, None, None, grad_sigmas
class _RenderingWeightFromAlphaNaive(torch.autograd.Function):
"""Rendering weight from opacity with naive forloop."""
@staticmethod
def forward(ctx, packed_info, alphas):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
weights = _C.weight_from_alpha_forward_naive(packed_info, alphas)
if ctx.needs_input_grad[1]:
ctx.save_for_backward(packed_info, alphas, weights)
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
packed_info, alphas, weights = ctx.saved_tensors
grad_alphas = _C.weight_from_alpha_backward_naive(
weights, grad_weights, packed_info, alphas
)
return None, grad_alphas
"""
Copyright (c) 2022 Ruilong Li, UC Berkeley.
"""
from typing import Callable, Dict, Optional, Tuple
import torch
from torch import Tensor
from .pack import pack_info
from .scan import exclusive_prod, exclusive_sum
def rendering(
# ray marching results
t_starts: Tensor,
t_ends: Tensor,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
# radiance field
rgb_sigma_fn: Optional[Callable] = None,
rgb_alpha_fn: Optional[Callable] = None,
# rendering options
render_bkgd: Optional[Tensor] = None,
) -> Tuple[Tensor, Tensor, Tensor, Dict]:
"""Render the rays through the radience field defined by `rgb_sigma_fn`.
This function is differentiable to the outputs of `rgb_sigma_fn` so it can
be used for gradient-based optimization. It supports both batched and flattened input tensor.
For flattened input tensor, both `ray_indices` and `n_rays` should be provided.
Note:
Either `rgb_sigma_fn` or `rgb_alpha_fn` should be provided.
Warning:
This function is not differentiable to `t_starts`, `t_ends` and `ray_indices`.
Args:
t_starts: Per-sample start distance. Tensor with shape (n_rays, n_samples) or (all_samples,).
t_ends: Per-sample end distance. Tensor with shape (n_rays, n_samples) or (all_samples,).
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
rgb_sigma_fn: A function that takes in samples {t_starts, t_ends,
ray indices} and returns the post-activation rgb (..., 3) and density
values (...,). The shape `...` is the same as the shape of `t_starts`.
rgb_alpha_fn: A function that takes in samples {t_starts, t_ends,
ray indices} and returns the post-activation rgb (..., 3) and opacity
values (...,). The shape `...` is the same as the shape of `t_starts`.
render_bkgd: Background color. Tensor with shape (3,).
Returns:
Ray colors (n_rays, 3), opacities (n_rays, 1), depths (n_rays, 1) and a dict
containing extra intermediate results (e.g., "weights", "trans", "alphas")
Examples:
.. code-block:: python
>>> t_starts = torch.tensor([0.1, 0.2, 0.1, 0.2, 0.3], device="cuda:0")
>>> t_ends = torch.tensor([0.2, 0.3, 0.2, 0.3, 0.4], device="cuda:0")
>>> ray_indices = torch.tensor([0, 0, 1, 1, 1], device="cuda:0")
>>> def rgb_sigma_fn(t_starts, t_ends, ray_indices):
>>> # This is a dummy function that returns random values.
>>> rgbs = torch.rand((t_starts.shape[0], 3), device="cuda:0")
>>> sigmas = torch.rand((t_starts.shape[0],), device="cuda:0")
>>> return rgbs, sigmas
>>> colors, opacities, depths, extras = rendering(
>>> t_starts, t_ends, ray_indices, n_rays=2, rgb_sigma_fn=rgb_sigma_fn)
>>> print(colors.shape, opacities.shape, depths.shape)
torch.Size([2, 3]) torch.Size([2, 1]) torch.Size([2, 1])
>>> extras.keys()
dict_keys(['weights', 'alphas', 'trans'])
"""
if ray_indices is not None:
assert (
t_starts.shape == t_ends.shape == ray_indices.shape
), "Since nerfacc 0.5.0, t_starts, t_ends and ray_indices must have the same shape (N,). "
if rgb_sigma_fn is None and rgb_alpha_fn is None:
raise ValueError(
"At least one of `rgb_sigma_fn` and `rgb_alpha_fn` should be specified."
)
# Query sigma/alpha and color with gradients
if rgb_sigma_fn is not None:
rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N,)! Got {}".format(sigmas.shape)
# Rendering: compute weights.
weights, trans, alphas = render_weight_from_density(
t_starts,
t_ends,
sigmas,
ray_indices=ray_indices,
n_rays=n_rays,
)
extras = {
"weights": weights,
"alphas": alphas,
"trans": trans,
"sigmas": sigmas,
"rgbs": rgbs,
}
elif rgb_alpha_fn is not None:
rgbs, alphas = rgb_alpha_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
alphas.shape == t_starts.shape
), "alphas must have shape of (N,)! Got {}".format(alphas.shape)
# Rendering: compute weights.
weights, trans = render_weight_from_alpha(
alphas,
ray_indices=ray_indices,
n_rays=n_rays,
)
extras = {
"weights": weights,
"trans": trans,
"rgbs": rgbs,
"alphas": alphas,
}
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(
weights, values=rgbs, ray_indices=ray_indices, n_rays=n_rays
)
opacities = accumulate_along_rays(
weights, values=None, ray_indices=ray_indices, n_rays=n_rays
)
depths = accumulate_along_rays(
weights,
values=(t_starts + t_ends)[..., None] / 2.0,
ray_indices=ray_indices,
n_rays=n_rays,
)
depths = depths / opacities.clamp_min(torch.finfo(rgbs.dtype).eps)
# Background composition.
if render_bkgd is not None:
colors = colors + render_bkgd * (1.0 - opacities)
return colors, opacities, depths, extras
def render_transmittance_from_alpha(
alphas: Tensor,
packed_info: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Compute transmittance :math:`T_i` from alpha :math:`\\alpha_i`.
.. math::
T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
This function supports both batched and flattened input tensor. For flattened input tensor, either
(`packed_info`) or (`ray_indices` and `n_rays`) should be provided.
Args:
alphas: The opacity values of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened samples, with in total n_rays chunks.
Useful for flattened input.
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
Returns:
The rendering transmittance with the same shape as `alphas`.
Examples:
.. code-block:: python
>>> alphas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1, 0.0, 0.9], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([1.0, 0.6, 0.12, 1.0, 0.2, 1.0, 1.0])
"""
# FIXME Try not to use exclusive_prod because:
# 1. torch.cumprod is much slower than torch.cumsum
# 2. exclusive_prod gradient on input == 0 is not correct.
if ray_indices is not None and packed_info is None:
packed_info = pack_info(ray_indices, n_rays)
trans = exclusive_prod(1 - alphas, packed_info)
return trans
def render_transmittance_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
packed_info: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tuple[Tensor, Tensor]:
"""Compute transmittance :math:`T_i` from density :math:`\\sigma_i`.
.. math::
T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j)
This function supports both batched and flattened input tensor. For flattened input tensor, either
(`packed_info`) or (`ray_indices` and `n_rays`) should be provided.
Args:
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (all_samples,) or (n_rays, n_samples).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (all_samples,) or (n_rays, n_samples).
sigmas: The density values of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened samples, with in total n_rays chunks.
Useful for flattened input.
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
Returns:
The rendering transmittance and opacities, both with the same shape as `sigmas`.
Examples:
.. code-block:: python
>>> t_starts = torch.tensor([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], device="cuda")
>>> t_ends = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], device="cuda")
>>> sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1, 0.0, 0.9], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance, alphas = render_transmittance_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
transmittance: [1.00, 0.67, 0.30, 1.00, 0.45, 1.00, 1.00]
alphas: [0.33, 0.55, 0.095, 0.55, 0.095, 0.00, 0.59]
"""
if ray_indices is not None and packed_info is None:
packed_info = pack_info(ray_indices, n_rays)
sigmas_dt = sigmas * (t_ends - t_starts)
alphas = 1.0 - torch.exp(-sigmas_dt)
trans = torch.exp(-exclusive_sum(sigmas_dt, packed_info))
return trans, alphas
def render_weight_from_alpha(
alphas: Tensor,
packed_info: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tuple[Tensor, Tensor]:
"""Compute rendering weights :math:`w_i` from opacity :math:`\\alpha_i`.
.. math::
w_i = T_i\\alpha_i, \\quad\\textrm{where}\\quad T_i = \\prod_{j=1}^{i-1}(1-\\alpha_j)
This function supports both batched and flattened input tensor. For flattened input tensor, either
(`packed_info`) or (`ray_indices` and `n_rays`) should be provided.
Args:
alphas: The opacity values of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened samples, with in total n_rays chunks.
Useful for flattened input.
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
Returns:
The rendering weights and transmittance, both with the same shape as `alphas`.
Examples:
.. code-block:: python
>>> alphas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1, 0.0, 0.9], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights, transmittance = render_weight_from_alpha(alphas, ray_indices=ray_indices)
weights: [0.4, 0.48, 0.012, 0.8, 0.02, 0.0, 0.9])
transmittance: [1.00, 0.60, 0.12, 1.00, 0.20, 1.00, 1.00]
"""
trans = render_transmittance_from_alpha(
alphas, packed_info, ray_indices, n_rays
)
weights = trans * alphas
return weights, trans
def render_weight_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
packed_info: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tuple[Tensor, Tensor, Tensor]:
"""Compute rendering weights :math:`w_i` from density :math:`\\sigma_i` and interval :math:`\\delta_i`.
.. math::
w_i = T_i(1 - exp(-\\sigma_i\delta_i)), \\quad\\textrm{where}\\quad T_i = exp(-\\sum_{j=1}^{i-1}\\sigma_j\delta_j)
This function supports both batched and flattened input tensor. For flattened input tensor, either
(`packed_info`) or (`ray_indices` and `n_rays`) should be provided.
Args:
t_starts: The start time of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
t_ends: The end time of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
sigmas: The density values of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened samples, with in total n_rays chunks.
Useful for flattened input.
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
Returns:
The rendering weights, transmittance and opacities, both with the same shape as `sigmas`.
Examples:
.. code-block:: python
>>> t_starts = torch.tensor([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], device="cuda")
>>> t_ends = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], device="cuda")
>>> sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1, 0.0, 0.9], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> weights, transmittance, alphas = render_weight_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
weights: [0.33, 0.37, 0.03, 0.55, 0.04, 0.00, 0.59]
transmittance: [1.00, 0.67, 0.30, 1.00, 0.45, 1.00, 1.00]
alphas: [0.33, 0.55, 0.095, 0.55, 0.095, 0.00, 0.59]
"""
trans, alphas = render_transmittance_from_density(
t_starts, t_ends, sigmas, packed_info, ray_indices, n_rays
)
weights = trans * alphas
return weights, trans, alphas
@torch.no_grad()
def render_visibility_from_alpha(
alphas: Tensor,
packed_info: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
) -> Tensor:
"""Compute visibility from opacity :math:`\\alpha_i`.
In this function, we first compute the transmittance from the sample opacity. The
transmittance is then used to filter out occluded samples. And opacity is used to
filter out transparent samples. The function returns a boolean tensor indicating
which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).
This function supports both batched and flattened input tensor. For flattened input tensor, either
(`packed_info`) or (`ray_indices` and `n_rays`) should be provided.
Args:
alphas: The opacity values of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened samples, with in total n_rays chunks.
Useful for flattened input.
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
early_stop_eps: The early stopping threshold on transmittance.
alpha_thre: The threshold on opacity.
Returns:
A boolean tensor indicating which samples are visible. Same shape as `alphas`.
Examples:
.. code-block:: python
>>> alphas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1, 0.0, 0.9], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance = render_transmittance_from_alpha(alphas, ray_indices=ray_indices)
tensor([1.0, 0.6, 0.12, 1.0, 0.2, 1.0, 1.0])
>>> visibility = render_visibility_from_alpha(
>>> alphas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)
tensor([True, True, False, True, False, False, True])
"""
trans = render_transmittance_from_alpha(
alphas, packed_info, ray_indices, n_rays
)
vis = trans >= early_stop_eps
if alpha_thre > 0:
vis = vis & (alphas >= alpha_thre)
return vis
@torch.no_grad()
def render_visibility_from_density(
t_starts: Tensor,
t_ends: Tensor,
sigmas: Tensor,
packed_info: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
early_stop_eps: float = 1e-4,
alpha_thre: float = 0.0,
) -> Tensor:
"""Compute visibility from density :math:`\\sigma_i` and interval :math:`\\delta_i`.
In this function, we first compute the transmittance and opacity from the sample density. The
transmittance is then used to filter out occluded samples. And opacity is used to
filter out transparent samples. The function returns a boolean tensor indicating
which samples are visible (`transmittance > early_stop_eps` and `opacity > alpha_thre`).
This function supports both batched and flattened input tensor. For flattened input tensor, either
(`packed_info`) or (`ray_indices` and `n_rays`) should be provided.
Args:
alphas: The opacity values of the samples. Tensor with shape (all_samples,) or (n_rays, n_samples).
packed_info: A tensor of shape (n_rays, 2) that specifies the start and count
of each chunk in the flattened samples, with in total n_rays chunks.
Useful for flattened input.
ray_indices: Ray indices of the flattened samples. LongTensor with shape (all_samples).
n_rays: Number of rays. Only useful when `ray_indices` is provided.
early_stop_eps: The early stopping threshold on transmittance.
alpha_thre: The threshold on opacity.
Returns:
A boolean tensor indicating which samples are visible. Same shape as `alphas`.
Examples:
.. code-block:: python
>>> t_starts = torch.tensor([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0], device="cuda")
>>> t_ends = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], device="cuda")
>>> sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1, 0.0, 0.9], device="cuda")
>>> ray_indices = torch.tensor([0, 0, 0, 1, 1, 2, 2], device="cuda")
>>> transmittance, alphas = render_transmittance_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices)
transmittance: [1.00, 0.67, 0.30, 1.00, 0.45, 1.00, 1.00]
alphas: [0.33, 0.55, 0.095, 0.55, 0.095, 0.00, 0.59]
>>> visibility = render_visibility_from_density(
>>> t_starts, t_ends, sigmas, ray_indices=ray_indices, early_stop_eps=0.3, alpha_thre=0.2)
tensor([True, True, False, True, False, False, True])
"""
trans, alphas = render_transmittance_from_density(
t_starts, t_ends, sigmas, packed_info, ray_indices, n_rays
)
vis = trans >= early_stop_eps
if alpha_thre > 0:
vis = vis & (alphas >= alpha_thre)
return vis
def accumulate_along_rays(
weights: Tensor,
values: Optional[Tensor] = None,
ray_indices: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
This function supports both batched inputs and flattened inputs with
`ray_indices` and `n_rays` provided.
Note:
This function is differentiable to `weights` and `values`.
Args:
weights: Weights to be accumulated. If `ray_indices` not provided,
`weights` must be batched with shape (n_rays, n_samples). Else it
must be flattened with shape (all_samples,).
values: Values to be accumulated. If `ray_indices` not provided,
`values` must be batched with shape (n_rays, n_samples, D). Else it
must be flattened with shape (all_samples, D). None means
we accumulate weights along rays. Default: None.
ray_indices: Ray indices of the samples with shape (all_samples,).
If provided, `weights` must be a flattened tensor with shape (all_samples,)
and values (if not None) must be a flattened tensor with shape (all_samples, D).
Default: None.
n_rays: Number of rays. Should be provided together with `ray_indices`. Default: None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given we return
the accumulated weights, in which case D == 1.
Examples:
.. code-block:: python
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(weights, rgbs, ray_indices, n_rays)
opacities = accumulate_along_rays(weights, None, ray_indices, n_rays)
depths = accumulate_along_rays(
weights,
(t_starts + t_ends)[:, None] / 2.0,
ray_indices,
n_rays,
)
# (n_rays, 3), (n_rays, 1), (n_rays, 1)
print(colors.shape, opacities.shape, depths.shape)
"""
if values is None:
src = weights[..., None]
else:
assert values.dim() == weights.dim() + 1
assert weights.shape == values.shape[:-1]
src = weights[..., None] * values
if ray_indices is not None:
assert n_rays is not None, "n_rays must be provided"
assert weights.dim() == 1, "weights must be flattened"
outputs = torch.zeros(
(n_rays, src.shape[-1]), device=src.device, dtype=src.dtype
)
outputs.index_add_(0, ray_indices, src)
else:
outputs = torch.sum(src, dim=-2)
return outputs
...@@ -73,7 +73,7 @@ def main(): ...@@ -73,7 +73,7 @@ def main():
rays_d = torch.randn((batch_size, 3), device=device) rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
ray_indices, t_starts, t_ends = nerfacc.ray_marching( ray_indices, t_starts, t_ends = nerfacc._ray_marching(
rays_o, rays_o,
rays_d, rays_d,
near_plane=0.1, near_plane=0.1,
...@@ -99,7 +99,7 @@ def main(): ...@@ -99,7 +99,7 @@ def main():
packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size) packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size)
fn = ( fn = (
lambda: nerfacc.vol_rendering._RenderingDensity.apply( lambda: nerfacc._vol_rendering._RenderingDensity.apply(
packed_info, t_starts, t_ends, sigmas, 0 packed_info, t_starts, t_ends, sigmas, 0
) )
.sum() .sum()
......
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
multi_line_output = 3 multi_line_output = 3
line_length = 80 line_length = 80
include_trailing_comma = true include_trailing_comma = true
skip=./examples/pycolmap skip=./examples/pycolmap,./benchmarks
\ No newline at end of file \ No newline at end of file
...@@ -29,7 +29,9 @@ def get_extensions(): ...@@ -29,7 +29,9 @@ def get_extensions():
from torch.utils.cpp_extension import CUDAExtension from torch.utils.cpp_extension import CUDAExtension
extensions_dir = osp.join("nerfacc", "cuda", "csrc") extensions_dir = osp.join("nerfacc", "cuda", "csrc")
sources = glob.glob(osp.join(extensions_dir, "*.cu")) sources = glob.glob(osp.join(extensions_dir, "*.cu")) + glob.glob(
osp.join(extensions_dir, "*.cpp")
)
# remove generated 'hip' files, in case of rebuilds # remove generated 'hip' files, in case of rebuilds
sources = [path for path in sources if "hip" not in path] sources = [path for path in sources if "hip" not in path]
......
import pytest
import torch
import nerfacc.cuda as _C
from nerfacc import ContractionType, contract, contract_inv
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_ContractionType():
ctype = ContractionType.AABB.to_cpp_version()
assert ctype == _C.ContractionTypeGetter(0)
ctype = ContractionType.UN_BOUNDED_TANH.to_cpp_version()
assert ctype == _C.ContractionTypeGetter(1)
ctype = ContractionType.UN_BOUNDED_SPHERE.to_cpp_version()
assert ctype == _C.ContractionTypeGetter(2)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_identity():
x = torch.rand([batch_size, 3], device=device)
roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
x_out = contract(x, roi=roi, type=ContractionType.AABB)
assert torch.allclose(x_out, x, atol=eps)
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB)
assert torch.allclose(x_inv, x, atol=eps)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_aabb():
x = torch.rand([batch_size, 3], device=device)
roi = torch.tensor(
[-1, -1, -1, 1, 1, 1], dtype=torch.float32, device=device
)
x_out = contract(x, roi=roi, type=ContractionType.AABB)
x_out_tgt = x * 0.5 + 0.5
assert torch.allclose(x_out, x_out_tgt, atol=eps)
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.AABB)
assert torch.allclose(x_inv, x, atol=eps)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_tanh():
x = torch.randn([batch_size, 3], device=device)
roi = torch.tensor(
[-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device
)
x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_TANH)
x_out_tgt = (
torch.tanh((x - roi[:3]) / (roi[3:] - roi[:3]) - 0.5) * 0.5 + 0.5
)
assert torch.allclose(x_out, x_out_tgt, atol=eps)
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_TANH)
assert torch.allclose(x_inv, x, atol=eps)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_sphere():
x = torch.randn([batch_size, 3], device=device)
roi = torch.tensor(
[-0.2, -0.3, -0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device
)
x_out = contract(x, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE)
assert ((x_out - 0.5).norm(dim=-1) < 0.5).all()
x_inv = contract_inv(x_out, roi=roi, type=ContractionType.UN_BOUNDED_SPHERE)
assert torch.allclose(x_inv, x, atol=eps)
if __name__ == "__main__":
test_ContractionType()
test_identity()
test_aabb()
test_tanh()
test_sphere()
import pytest import pytest
import torch import torch
from nerfacc import OccupancyGrid
device = "cuda:0" device = "cuda:0"
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def occ_eval_fn(x: torch.Tensor) -> torch.Tensor: def test_ray_aabb_intersect():
"""Pesudo occupancy function: (N, 3) -> (N, 1).""" from nerfacc.grid import _ray_aabb_intersect, ray_aabb_intersect
return ((x - 0.5).norm(dim=-1, keepdim=True) < 0.5).float()
torch.manual_seed(42)
n_rays = 1000
n_aabbs = 100
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") rays_o = torch.rand((n_rays, 3), device=device)
def test_occ_grid(): rays_d = torch.randn((n_rays, 3), device=device)
roi_aabb = [0, 0, 0, 1, 1, 1] rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device) aabb_min = torch.rand((n_aabbs, 3), device=device)
occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1) aabb_max = aabb_min + torch.rand_like(aabb_min)
assert occ_grid.roi_aabb.shape == (6,) aabbs = torch.cat([aabb_min, aabb_max], dim=-1)
assert occ_grid.binary.shape == (1, 128, 128, 128)
# [n_rays, n_aabbs]
tmins, tmaxs, hits = ray_aabb_intersect(rays_o, rays_d, aabbs)
_tmins, _tmaxs, _hits = _ray_aabb_intersect(rays_o, rays_d, aabbs)
assert torch.allclose(tmins, _tmins), (tmins - _tmins).abs().max()
assert torch.allclose(tmaxs, _tmaxs), (tmaxs - _tmaxs).abs().max()
assert (hits == _hits).all(), (hits == _hits).float().mean()
# whether mid points are inside aabbs
tmids = torch.clamp((tmins + tmaxs) / 2, min=0.0)
points = tmids[:, :, None] * rays_d[:, None, :] + rays_o[:, None, :]
_hits = (
(points >= aabb_min[None, ...]) & (points <= aabb_max[None, ...])
).all(dim=-1)
assert torch.allclose(hits, _hits)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_query_grid(): def test_traverse_grids():
roi_aabb = [0, 0, 0, 1, 1, 1] from nerfacc.grid import _enlarge_aabb, _query, traverse_grids
occ_grid = OccupancyGrid(roi_aabb=roi_aabb, resolution=128).to(device)
occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1) torch.manual_seed(42)
samples = torch.rand((100, 3), device=device) n_rays = 10
occs = occ_grid.query_occ(samples) n_aabbs = 4
assert occs.shape == (100,)
rays_o = torch.randn((n_rays, 3), device=device)
rays_d = torch.randn((n_rays, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device)
aabbs = torch.stack(
[_enlarge_aabb(base_aabb, 2**i) for i in range(n_aabbs)]
)
binaries = torch.rand((n_aabbs, 32, 32, 32), device=device) > 0.5
intervals, samples = traverse_grids(rays_o, rays_d, binaries, aabbs)
ray_indices = samples.ray_indices
t_starts = intervals.vals[intervals.is_left]
t_ends = intervals.vals[intervals.is_right]
positions = (
rays_o[ray_indices]
+ rays_d[ray_indices] * (t_starts + t_ends)[:, None] / 2.0
)
occs, selector = _query(positions, binaries, base_aabb)
assert occs.all(), occs.float().mean()
assert selector.all(), selector.float().mean()
if __name__ == "__main__": if __name__ == "__main__":
test_occ_grid() test_ray_aabb_intersect()
test_query_grid() test_traverse_grids()
import pytest
import torch
from nerfacc import ray_aabb_intersect
device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_intersection():
rays_o = torch.rand([batch_size, 3], device=device)
rays_d = torch.randn([batch_size, 3], device=device)
aabb = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)
assert (t_min == 0).all()
t = torch.rand_like(t_min) * (t_max - t_min) + t_min
x = rays_o + t.unsqueeze(-1) * rays_d
assert (x >= 0).all() and (x <= 1).all()
if __name__ == "__main__":
test_intersection()
import pytest import pytest
import torch import torch
from nerfacc import pack_data, pack_info, unpack_data, unpack_info
device = "cuda:0" device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_pack_data(): def test_pack_info():
n_rays = 2 from nerfacc.pack import pack_info
n_samples = 3
data = torch.rand((n_rays, n_samples, 2), device=device, requires_grad=True)
mask = torch.rand((n_rays, n_samples), device=device) > 0.5
packed_data, packed_info = pack_data(data, mask)
unpacked_data = unpack_data(packed_info, packed_data, n_samples)
unpacked_data.sum().backward()
assert (data.grad[mask] == 1).all()
assert torch.allclose(
unpacked_data.sum(dim=1), (data * mask[..., None]).sum(dim=1)
)
_packed_info = torch.tensor(
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_unpack_info():
packed_info = torch.tensor(
[[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device
) )
ray_indices_tgt = torch.tensor( ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device [0, 2, 2, 2, 2], dtype=torch.int64, device=device
) )
ray_indices = unpack_info(packed_info, n_samples=5) packed_info = pack_info(ray_indices, n_rays=_packed_info.shape[0])
packed_info_2 = pack_info(ray_indices, n_rays=packed_info.shape[0]) assert (packed_info == _packed_info).all()
assert torch.allclose(packed_info.int(), packed_info_2.int())
assert torch.allclose(ray_indices, ray_indices_tgt)
if __name__ == "__main__": if __name__ == "__main__":
test_pack_data() test_pack_info()
test_unpack_info()
import pytest
import torch
device = "cuda:0"
def _create_intervals(n_rays, n_samples, flat=False):
from nerfacc.data_specs import RayIntervals
torch.manual_seed(42)
vals = torch.rand((n_rays, n_samples + 1), device=device)
vals = torch.sort(vals, -1)[0]
sample_masks = torch.rand((n_rays, n_samples), device=device) > 0.5
is_lefts = torch.cat(
[
sample_masks,
torch.zeros((n_rays, 1), device=device, dtype=torch.bool),
],
dim=-1,
)
is_rights = torch.cat(
[
torch.zeros((n_rays, 1), device=device, dtype=torch.bool),
sample_masks,
],
dim=-1,
)
if not flat:
return RayIntervals(vals=vals)
else:
interval_masks = is_lefts | is_rights
vals = vals[interval_masks]
is_lefts = is_lefts[interval_masks]
is_rights = is_rights[interval_masks]
chunk_cnts = (interval_masks).long().sum(-1)
chunk_starts = torch.cumsum(chunk_cnts, 0) - chunk_cnts
packed_info = torch.stack([chunk_starts, chunk_cnts], -1)
return RayIntervals(
vals, packed_info, is_left=is_lefts, is_right=is_rights
)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_searchsorted():
from nerfacc.data_specs import RayIntervals
from nerfacc.pdf import searchsorted
torch.manual_seed(42)
query: RayIntervals = _create_intervals(10, 100, flat=False)
key: RayIntervals = _create_intervals(10, 100, flat=False)
ids_left, ids_right = searchsorted(key, query)
y = key.vals.gather(-1, ids_right)
_ids_right = torch.searchsorted(key.vals, query.vals, right=True)
_ids_right = torch.clamp(_ids_right, 0, key.vals.shape[-1] - 1)
_y = key.vals.gather(-1, _ids_right)
assert torch.allclose(ids_right, _ids_right)
assert torch.allclose(y, _y)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_importance_sampling():
from nerfacc.data_specs import RayIntervals
from nerfacc.pdf import _sample_from_weighted, importance_sampling
torch.manual_seed(42)
intervals: RayIntervals = _create_intervals(5, 100, flat=False)
cdfs = torch.rand_like(intervals.vals)
cdfs = torch.sort(cdfs, -1)[0]
n_intervels_per_ray = 100
stratified = False
_intervals, _samples = importance_sampling(
intervals,
cdfs,
n_intervels_per_ray,
stratified,
)
for i in range(intervals.vals.shape[0]):
_vals, _mids = _sample_from_weighted(
intervals.vals[i : i + 1],
cdfs[i : i + 1, 1:] - cdfs[i : i + 1, :-1],
n_intervels_per_ray,
stratified,
intervals.vals[i].min(),
intervals.vals[i].max(),
)
assert torch.allclose(_intervals.vals[i : i + 1], _vals, atol=1e-4)
assert torch.allclose(_samples.vals[i : i + 1], _mids, atol=1e-4)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_pdf_loss():
from nerfacc.data_specs import RayIntervals
from nerfacc.estimators.prop_net import _lossfun_outer, _pdf_loss
from nerfacc.pdf import _sample_from_weighted, importance_sampling
torch.manual_seed(42)
intervals: RayIntervals = _create_intervals(5, 100, flat=False)
cdfs = torch.rand_like(intervals.vals)
cdfs = torch.sort(cdfs, -1)[0]
n_intervels_per_ray = 10
stratified = False
_intervals, _samples = importance_sampling(
intervals,
cdfs,
n_intervels_per_ray,
stratified,
)
_cdfs = torch.rand_like(_intervals.vals)
_cdfs = torch.sort(_cdfs, -1)[0]
loss = _pdf_loss(intervals, cdfs, _intervals, _cdfs)
loss2 = _lossfun_outer(
intervals.vals,
cdfs[:, 1:] - cdfs[:, :-1],
_intervals.vals,
_cdfs[:, 1:] - _cdfs[:, :-1],
)
assert torch.allclose(loss, loss2, atol=1e-4)
if __name__ == "__main__":
test_importance_sampling()
test_searchsorted()
test_pdf_loss()
import pytest
import torch
from nerfacc import OccupancyGrid, ray_marching, unpack_info
device = "cuda:0"
batch_size = 128
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_marching_with_near_far():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
ray_indices, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-3,
)
return
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_marching_with_grid():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
grid = OccupancyGrid(roi_aabb=[0, 0, 0, 1, 1, 1]).to(device)
grid._binary[:] = True
ray_indices, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
grid=grid,
near_plane=0.0,
far_plane=1.0,
render_step_size=1e-2,
)
ray_indices = ray_indices
samples = (
rays_o[ray_indices] + rays_d[ray_indices] * (t_starts + t_ends) / 2.0
)
assert (samples <= grid.roi_aabb[3:].unsqueeze(0)).all()
assert (samples >= grid.roi_aabb[:3].unsqueeze(0)).all()
return
if __name__ == "__main__":
test_marching_with_near_far()
test_marching_with_grid()
import pytest import pytest
import torch import torch
from nerfacc import (
accumulate_along_rays,
render_transmittance_from_density,
render_visibility,
render_weight_from_alpha,
render_weight_from_density,
rendering,
)
device = "cuda:0" device = "cuda:0"
batch_size = 32
eps = 1e-6
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_render_visibility(): def test_render_visibility():
from nerfacc.volrend import render_visibility_from_alpha
ray_indices = torch.tensor( ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device [0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,) ) # (all_samples,)
alphas = torch.tensor( alphas = torch.tensor(
[0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
).unsqueeze( ) # (all_samples,)
-1
) # (n_samples, 1)
# transmittance: [1.0, 1.0, 0.7, 0.14, 0.028] # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
vis = render_visibility( vis = render_visibility_from_alpha(
alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0 alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0
) )
vis_tgt = torch.tensor( vis_tgt = torch.tensor(
...@@ -36,7 +25,7 @@ def test_render_visibility(): ...@@ -36,7 +25,7 @@ def test_render_visibility():
assert torch.allclose(vis, vis_tgt) assert torch.allclose(vis, vis_tgt)
# transmittance: [1.0, 1.0, 1.0, 0.2, 0.04] # transmittance: [1.0, 1.0, 1.0, 0.2, 0.04]
vis = render_visibility( vis = render_visibility_from_alpha(
alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35 alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35
) )
vis_tgt = torch.tensor( vis_tgt = torch.tensor(
...@@ -47,43 +36,48 @@ def test_render_visibility(): ...@@ -47,43 +36,48 @@ def test_render_visibility():
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_render_weight_from_alpha(): def test_render_weight_from_alpha():
from nerfacc.volrend import render_weight_from_alpha
ray_indices = torch.tensor( ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device [0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,) ) # (all_samples,)
alphas = torch.tensor( alphas = torch.tensor(
[0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
).unsqueeze( ) # (all_samples,)
-1
) # (n_samples, 1)
# transmittance: [1.0, 1.0, 0.7, 0.14, 0.028] # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
weights = render_weight_from_alpha( weights, _ = render_weight_from_alpha(
alphas, ray_indices=ray_indices, n_rays=3 alphas, ray_indices=ray_indices, n_rays=3
) )
weights_tgt = torch.tensor( weights_tgt = torch.tensor(
[1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5], [1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5],
dtype=torch.float32, dtype=torch.float32,
device=device, device=device,
).unsqueeze(-1) )
assert torch.allclose(weights, weights_tgt) assert torch.allclose(weights, weights_tgt)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_render_weight_from_density(): def test_render_weight_from_density():
from nerfacc.volrend import (
render_weight_from_alpha,
render_weight_from_density,
)
ray_indices = torch.tensor( ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device [0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,) ) # (all_samples,)
sigmas = torch.rand( sigmas = torch.rand(
(ray_indices.shape[0], 1), device=device (ray_indices.shape[0],), device=device
) # (n_samples, 1) ) # (all_samples,)
t_starts = torch.rand_like(sigmas) t_starts = torch.rand_like(sigmas)
t_ends = torch.rand_like(sigmas) + 1.0 t_ends = torch.rand_like(sigmas) + 1.0
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = render_weight_from_density( weights, _, _ = render_weight_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
) )
weights_tgt = render_weight_from_alpha( weights_tgt, _ = render_weight_from_alpha(
alphas, ray_indices=ray_indices, n_rays=3 alphas, ray_indices=ray_indices, n_rays=3
) )
assert torch.allclose(weights, weights_tgt) assert torch.allclose(weights, weights_tgt)
...@@ -91,70 +85,55 @@ def test_render_weight_from_density(): ...@@ -91,70 +85,55 @@ def test_render_weight_from_density():
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_accumulate_along_rays(): def test_accumulate_along_rays():
from nerfacc.volrend import accumulate_along_rays
ray_indices = torch.tensor( ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device [0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (n_rays,) ) # (all_samples,)
weights = torch.tensor( weights = torch.tensor(
[0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
).unsqueeze(-1) ) # (all_samples,)
values = torch.rand((5, 2), device=device) # (n_samples, 1) values = torch.rand((5, 2), device=device) # (all_samples, 2)
ray_values = accumulate_along_rays( ray_values = accumulate_along_rays(
weights, ray_indices, values=values, n_rays=3 weights, values=values, ray_indices=ray_indices, n_rays=3
) )
assert ray_values.shape == (3, 2) assert ray_values.shape == (3, 2)
assert torch.allclose(ray_values[0, :], weights[0, :] * values[0, :]) assert torch.allclose(ray_values[0, :], weights[0, None] * values[0, :])
assert (ray_values[1, :] == 0).all() assert (ray_values[1, :] == 0).all()
assert torch.allclose( assert torch.allclose(
ray_values[2, :], (weights[1:, :] * values[1:]).sum(dim=0) ray_values[2, :], (weights[1:, None] * values[1:]).sum(dim=0)
) )
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_rendering(): def test_grads():
def rgb_sigma_fn(t_starts, t_ends, ray_indices): from nerfacc.volrend import (
return torch.hstack([t_starts] * 3), t_starts render_transmittance_from_density,
render_weight_from_alpha,
ray_indices = torch.tensor( render_weight_from_density,
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,)
sigmas = torch.rand(
(ray_indices.shape[0], 1), device=device
) # (n_samples, 1)
t_starts = torch.rand_like(sigmas)
t_ends = torch.rand_like(sigmas) + 1.0
_, _, _ = rendering(
t_starts,
t_ends,
ray_indices=ray_indices,
n_rays=3,
rgb_sigma_fn=rgb_sigma_fn,
) )
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_grads():
ray_indices = torch.tensor( ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device [0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (samples,) ) # (all_samples,)
packed_info = torch.tensor( packed_info = torch.tensor(
[[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device [[0, 1], [1, 0], [1, 4]], dtype=torch.long, device=device
) )
sigmas = torch.tensor([[0.4], [0.8], [0.1], [0.8], [0.1]], device="cuda") sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1], device=device)
sigmas.requires_grad = True sigmas.requires_grad = True
t_starts = torch.rand_like(sigmas) t_starts = torch.rand_like(sigmas)
t_ends = t_starts + 1.0 t_ends = t_starts + 1.0
weights_ref = torch.tensor( weights_ref = torch.tensor(
[[0.3297], [0.5507], [0.0428], [0.2239], [0.0174]], device="cuda" [0.3297, 0.5507, 0.0428, 0.2239, 0.0174], device=device
) )
sigmas_grad_ref = torch.tensor( sigmas_grad_ref = torch.tensor(
[[0.6703], [0.1653], [0.1653], [0.1653], [0.1653]], device="cuda" [0.6703, 0.1653, 0.1653, 0.1653, 0.1653], device=device
) )
# naive impl. trans from sigma # naive impl. trans from sigma
trans = render_transmittance_from_density( trans, _ = render_transmittance_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
) )
weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts))) weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
...@@ -165,7 +144,7 @@ def test_grads(): ...@@ -165,7 +144,7 @@ def test_grads():
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
# naive impl. trans from alpha # naive impl. trans from alpha
trans = render_transmittance_from_density( trans, _ = render_transmittance_from_density(
t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3 t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
) )
weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts))) weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
...@@ -175,7 +154,7 @@ def test_grads(): ...@@ -175,7 +154,7 @@ def test_grads():
assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
weights = render_weight_from_density( weights, _, _ = render_weight_from_density(
t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
) )
weights.sum().backward() weights.sum().backward()
...@@ -184,7 +163,7 @@ def test_grads(): ...@@ -184,7 +163,7 @@ def test_grads():
assert torch.allclose(weights_ref, weights, atol=1e-4) assert torch.allclose(weights_ref, weights, atol=1e-4)
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
weights = render_weight_from_density( weights, _, _ = render_weight_from_density(
t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3 t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
) )
weights.sum().backward() weights.sum().backward()
...@@ -194,7 +173,7 @@ def test_grads(): ...@@ -194,7 +173,7 @@ def test_grads():
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = render_weight_from_alpha( weights, _ = render_weight_from_alpha(
alphas, ray_indices=ray_indices, n_rays=3 alphas, ray_indices=ray_indices, n_rays=3
) )
weights.sum().backward() weights.sum().backward()
...@@ -204,7 +183,7 @@ def test_grads(): ...@@ -204,7 +183,7 @@ def test_grads():
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
weights = render_weight_from_alpha( weights, _ = render_weight_from_alpha(
alphas, packed_info=packed_info, n_rays=3 alphas, packed_info=packed_info, n_rays=3
) )
weights.sum().backward() weights.sum().backward()
...@@ -214,10 +193,35 @@ def test_grads(): ...@@ -214,10 +193,35 @@ def test_grads():
assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_rendering():
from nerfacc.volrend import rendering
def rgb_sigma_fn(t_starts, t_ends, ray_indices):
return torch.stack([t_starts] * 3, dim=-1), t_starts
ray_indices = torch.tensor(
[0, 2, 2, 2, 2], dtype=torch.int64, device=device
) # (all_samples,)
sigmas = torch.rand(
(ray_indices.shape[0],), device=device
) # (all_samples,)
t_starts = torch.rand_like(sigmas)
t_ends = torch.rand_like(sigmas) + 1.0
_, _, _, _ = rendering(
t_starts,
t_ends,
ray_indices=ray_indices,
n_rays=3,
rgb_sigma_fn=rgb_sigma_fn,
)
if __name__ == "__main__": if __name__ == "__main__":
test_render_visibility() test_render_visibility()
test_render_weight_from_alpha() test_render_weight_from_alpha()
test_render_weight_from_density() test_render_weight_from_density()
test_accumulate_along_rays() test_accumulate_along_rays()
test_rendering()
test_grads() test_grads()
test_rendering()
import pytest
import torch
device = "cuda:0"
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_inclusive_sum():
from nerfacc.scan import inclusive_sum
torch.manual_seed(42)
data = torch.rand((5, 1000), device=device, requires_grad=True)
outputs1 = inclusive_sum(data)
outputs1 = outputs1.flatten()
outputs1.sum().backward()
grad1 = data.grad.clone()
data.grad.zero_()
chunk_starts = torch.arange(
0, data.numel(), data.shape[1], device=device, dtype=torch.long
)
chunk_cnts = torch.full(
(data.shape[0],), data.shape[1], dtype=torch.long, device=device
)
packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
flatten_data = data.flatten()
outputs2 = inclusive_sum(flatten_data, packed_info=packed_info)
outputs2.sum().backward()
grad2 = data.grad.clone()
assert torch.allclose(outputs1, outputs2)
assert torch.allclose(grad1, grad2)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_exclusive_sum():
from nerfacc.scan import exclusive_sum
torch.manual_seed(42)
data = torch.rand((5, 1000), device=device, requires_grad=True)
outputs1 = exclusive_sum(data)
outputs1 = outputs1.flatten()
outputs1.sum().backward()
grad1 = data.grad.clone()
data.grad.zero_()
chunk_starts = torch.arange(
0, data.numel(), data.shape[1], device=device, dtype=torch.long
)
chunk_cnts = torch.full(
(data.shape[0],), data.shape[1], dtype=torch.long, device=device
)
packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
flatten_data = data.flatten()
outputs2 = exclusive_sum(flatten_data, packed_info=packed_info)
outputs2.sum().backward()
grad2 = data.grad.clone()
# TODO: check exclusive sum. numeric error?
# print((outputs1 - outputs2).abs().max()) # 0.0002
assert torch.allclose(outputs1, outputs2, atol=3e-4)
assert torch.allclose(grad1, grad2)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_inclusive_prod():
from nerfacc.scan import inclusive_prod
torch.manual_seed(42)
data = torch.rand((5, 1000), device=device, requires_grad=True)
outputs1 = inclusive_prod(data)
outputs1 = outputs1.flatten()
outputs1.sum().backward()
grad1 = data.grad.clone()
data.grad.zero_()
chunk_starts = torch.arange(
0, data.numel(), data.shape[1], device=device, dtype=torch.long
)
chunk_cnts = torch.full(
(data.shape[0],), data.shape[1], dtype=torch.long, device=device
)
packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
flatten_data = data.flatten()
outputs2 = inclusive_prod(flatten_data, packed_info=packed_info)
outputs2.sum().backward()
grad2 = data.grad.clone()
assert torch.allclose(outputs1, outputs2)
assert torch.allclose(grad1, grad2)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_exclusive_prod():
from nerfacc.scan import exclusive_prod
torch.manual_seed(42)
data = torch.rand((5, 1000), device=device, requires_grad=True)
outputs1 = exclusive_prod(data)
outputs1 = outputs1.flatten()
outputs1.sum().backward()
grad1 = data.grad.clone()
data.grad.zero_()
chunk_starts = torch.arange(
0, data.numel(), data.shape[1], device=device, dtype=torch.long
)
chunk_cnts = torch.full(
(data.shape[0],), data.shape[1], dtype=torch.long, device=device
)
packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
flatten_data = data.flatten()
outputs2 = exclusive_prod(flatten_data, packed_info=packed_info)
outputs2.sum().backward()
grad2 = data.grad.clone()
# TODO: check exclusive sum. numeric error?
# print((outputs1 - outputs2).abs().max())
assert torch.allclose(outputs1, outputs2)
assert torch.allclose(grad1, grad2)
if __name__ == "__main__":
test_inclusive_sum()
test_exclusive_sum()
test_inclusive_prod()
test_exclusive_prod()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment