Unverified Commit 8dcfbad9 authored by Ruilong Li(李瑞龙)'s avatar Ruilong Li(李瑞龙) Committed by GitHub
Browse files

Reformat (#31)



* seems working

* contraction func in cuda

* Update type

* More type updates

* disable DDA for contraction

* update contraction perfom in readme

* 360 data: Garden

* eval at max_steps

* add perform of 360 to readme

* fix contraction scaling

* tiny hot fix

* new volrend

* cleanup ray_marching.cu

* cleanup backend

* tests

* cleaning up Grid

* fix doc for grid base class

* check and fix for contraction

* test grid

* rendering and marching

* transmittance_compress verified

* rendering is indeed faster

* pipeline is working

* lego example

* cleanup

* cuda folder is cleaned up! finally!

* cuda formatting

* contraction verify

* upgrade grid

* test for ray marching

* pipeline

* ngp with contraction

* train_ngp runs but slow

* trasmittance seperate to two. Now NGP is as fast as before

* verified faster than before

* bug fix for contraction

* ngp contraction fix

* tiny cleanup

* contraction works! yay!

* contraction with tanh seems working

* minor update

* support alpha rendering

* absorb visibility to ray marching

* tiny import update

* get rid of contraction temperture;

* doc for ContractionType

* doc for Grid

* doc for grid.py is done

* doc for ray marching

* rendering function

* fix doc for rendering

* doc for vol rend

* autosummary for utils

* fix autosummary line break

* utils docs

* api doc is done

* starting work on examples

* contraction for npg is in python now

* further clean up examples

* mlp nerf is running

* dnerf is in

* update readme command

* merge

* disable pylint error for now

* reformatting and skip tests without cuda

* fix the type issue for contractiontype

* fix cuda attribute issue

* bump to 0.1.0
Co-authored-by: default avatarMatt Tancik <tancik@berkeley.edu>
parent a7611603
#include "include/helpers_cuda.h"
#include "include/helpers_math.h"
#include "include/helpers_contraction.h"
inline __device__ __host__ float calc_dt(
const float t, const float cone_angle,
const float dt_min, const float dt_max)
{
return clamp(t * cone_angle, dt_min, dt_max);
}
inline __device__ __host__ int grid_idx_at(
const float3 xyz_unit, const int3 grid_res)
{
// xyz should be always in [0, 1]^3.
int3 ixyz = make_int3(xyz_unit * make_float3(grid_res));
ixyz = clamp(ixyz, make_int3(0, 0, 0), grid_res - 1);
int3 grid_offset = make_int3(grid_res.y * grid_res.z, grid_res.z, 1);
int idx = dot(ixyz, grid_offset);
return idx;
}
inline __device__ __host__ bool grid_occupied_at(
const float3 xyz,
const float3 roi_min, const float3 roi_max,
ContractionType type,
const int3 grid_res, const bool *grid_binary)
{
if (type == ContractionType::AABB &&
(xyz.x < roi_min.x || xyz.x > roi_max.x ||
xyz.y < roi_min.y || xyz.y > roi_max.y ||
xyz.z < roi_min.z || xyz.z > roi_max.z))
{
return false;
}
float3 xyz_unit = apply_contraction(
xyz, roi_min, roi_max, type);
int idx = grid_idx_at(xyz_unit, grid_res);
return grid_binary[idx];
}
// dda like step
inline __device__ __host__ float distance_to_next_voxel(
const float3 xyz, const float3 dir, const float3 inv_dir,
const float3 roi_min, const float3 roi_max, const int3 grid_res)
{
float3 _occ_res = make_float3(grid_res);
float3 _xyz = roi_to_unit(xyz, roi_min, roi_max) * _occ_res;
float3 txyz = ((floorf(_xyz + 0.5f + 0.5f * sign(dir)) - _xyz) * inv_dir) / _occ_res * (roi_max - roi_min);
float t = min(min(txyz.x, txyz.y), txyz.z);
return fmaxf(t, 0.0f);
}
inline __device__ __host__ float advance_to_next_voxel(
const float t, const float dt_min,
const float3 xyz, const float3 dir, const float3 inv_dir,
const float3 roi_min, const float3 roi_max, const int3 grid_res)
{
// Regular stepping (may be slower but matches non-empty space)
float t_target = t + distance_to_next_voxel(
xyz, dir, inv_dir, roi_min, roi_max, grid_res);
float _t = t;
do
{
_t += dt_min;
} while (_t < t_target);
return _t;
}
// -------------------------------------------------------------------------------
// Raymarching
// -------------------------------------------------------------------------------
__global__ void ray_marching_kernel(
// rays info
const uint32_t n_rays,
const float *rays_o, // shape (n_rays, 3)
const float *rays_d, // shape (n_rays, 3)
const float *t_min, // shape (n_rays,)
const float *t_max, // shape (n_rays,)
// occupancy grid & contraction
const float *roi,
const int3 grid_res,
const bool *grid_binary, // shape (reso_x, reso_y, reso_z)
const ContractionType type,
// sampling
const float step_size,
const float cone_angle,
const int *packed_info,
// first round outputs
int *num_steps,
// second round outputs
float *t_starts,
float *t_ends)
{
CUDA_GET_THREAD_ID(i, n_rays);
bool is_first_round = (packed_info == nullptr);
// locate
rays_o += i * 3;
rays_d += i * 3;
t_min += i;
t_max += i;
if (is_first_round)
{
num_steps += i;
}
else
{
int base = packed_info[i * 2 + 0];
int steps = packed_info[i * 2 + 1];
t_starts += base;
t_ends += base;
}
const float3 origin = make_float3(rays_o[0], rays_o[1], rays_o[2]);
const float3 dir = make_float3(rays_d[0], rays_d[1], rays_d[2]);
const float3 inv_dir = 1.0f / dir;
const float near = t_min[0], far = t_max[0];
const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);
const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);
// TODO: compute dt_max from occ resolution.
float dt_min = step_size;
float dt_max = 1e10f;
int j = 0;
float t0 = near;
float dt = calc_dt(t0, cone_angle, dt_min, dt_max);
float t1 = t0 + dt;
float t_mid = (t0 + t1) * 0.5f;
while (t_mid < far)
{
// current center
const float3 xyz = origin + t_mid * dir;
if (grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_binary))
{
if (!is_first_round)
{
t_starts[j] = t0;
t_ends[j] = t1;
}
++j;
// march to next sample
t0 = t1;
t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max);
t_mid = (t0 + t1) * 0.5f;
}
else
{
// march to next sample
switch (type)
{
case ContractionType::AABB:
// no contraction
t_mid = advance_to_next_voxel(
t_mid, dt_min, xyz, dir, inv_dir, roi_min, roi_max, grid_res);
dt = calc_dt(t_mid, cone_angle, dt_min, dt_max);
t0 = t_mid - dt * 0.5f;
t1 = t_mid + dt * 0.5f;
break;
default:
// any type of scene contraction does not work with DDA.
t0 = t1;
t1 = t0 + calc_dt(t0, cone_angle, dt_min, dt_max);
t_mid = (t0 + t1) * 0.5f;
break;
}
}
}
if (is_first_round)
{
*num_steps = j;
}
return;
}
std::vector<torch::Tensor> ray_marching(
// rays
const torch::Tensor rays_o,
const torch::Tensor rays_d,
const torch::Tensor t_min,
const torch::Tensor t_max,
// occupancy grid & contraction
const torch::Tensor roi,
const torch::Tensor grid_binary,
const ContractionType type,
// sampling
const float step_size,
const float cone_angle)
{
DEVICE_GUARD(rays_o);
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(t_min);
CHECK_INPUT(t_max);
CHECK_INPUT(roi);
CHECK_INPUT(grid_binary);
TORCH_CHECK(rays_o.ndimension() == 2 & rays_o.size(1) == 3)
TORCH_CHECK(rays_d.ndimension() == 2 & rays_d.size(1) == 3)
TORCH_CHECK(t_min.ndimension() == 1)
TORCH_CHECK(t_max.ndimension() == 1)
TORCH_CHECK(roi.ndimension() == 1 & roi.size(0) == 6)
TORCH_CHECK(grid_binary.ndimension() == 3)
const int n_rays = rays_o.size(0);
const int3 grid_res = make_int3(
grid_binary.size(0), grid_binary.size(1), grid_binary.size(2));
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// helper counter
torch::Tensor num_steps = torch::zeros(
{n_rays}, rays_o.options().dtype(torch::kInt32));
// count number of samples per ray
ray_marching_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
// rays
n_rays,
rays_o.data_ptr<float>(),
rays_d.data_ptr<float>(),
t_min.data_ptr<float>(),
t_max.data_ptr<float>(),
// occupancy grid & contraction
roi.data_ptr<float>(),
grid_res,
grid_binary.data_ptr<bool>(),
type,
// sampling
step_size,
cone_angle,
nullptr, /* packed_info */
// outputs
num_steps.data_ptr<int>(),
nullptr, /* t_starts */
nullptr /* t_ends */);
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
// output samples starts and ends
int total_steps = cum_steps[cum_steps.size(0) - 1].item<int>();
torch::Tensor t_starts = torch::zeros({total_steps, 1}, rays_o.options());
torch::Tensor t_ends = torch::zeros({total_steps, 1}, rays_o.options());
ray_marching_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
// rays
n_rays,
rays_o.data_ptr<float>(),
rays_d.data_ptr<float>(),
t_min.data_ptr<float>(),
t_max.data_ptr<float>(),
// occupancy grid & contraction
roi.data_ptr<float>(),
grid_res,
grid_binary.data_ptr<bool>(),
type,
// sampling
step_size,
cone_angle,
packed_info.data_ptr<int>(),
// outputs
nullptr, /* num_steps */
t_starts.data_ptr<float>(),
t_ends.data_ptr<float>());
return {packed_info, t_starts, t_ends};
}
// -----------------------------------------------------------------------------
// Ray index for each sample
// -----------------------------------------------------------------------------
__global__ void ray_indices_kernel(
// input
const int n_rays,
const int *packed_info,
// output
int *ray_indices)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0)
return;
ray_indices += base;
for (int j = 0; j < steps; ++j)
{
ray_indices[j] = i;
}
}
torch::Tensor unpack_to_ray_indices(const torch::Tensor packed_info)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
const int n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
int n_samples = packed_info[n_rays - 1].sum(0).item<int>();
torch::Tensor ray_indices = torch::zeros(
{n_samples}, packed_info.options().dtype(torch::kInt32));
ray_indices_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
packed_info.data_ptr<int>(),
ray_indices.data_ptr<int>());
return ray_indices;
}
// ----------------------------------------------------------------------------
// Query the occupancy grid
// ----------------------------------------------------------------------------
__global__ void query_occ_kernel(
// rays info
const uint32_t n_samples,
const float *samples, // shape (n_samples, 3)
// occupancy grid & contraction
const float *roi,
const int3 grid_res,
const bool *grid_binary, // shape (reso_x, reso_y, reso_z)
const ContractionType type,
// outputs
bool *occs)
{
CUDA_GET_THREAD_ID(i, n_samples);
// locate
samples += i * 3;
occs += i;
const float3 roi_min = make_float3(roi[0], roi[1], roi[2]);
const float3 roi_max = make_float3(roi[3], roi[4], roi[5]);
const float3 xyz = make_float3(samples[0], samples[1], samples[2]);
*occs = grid_occupied_at(xyz, roi_min, roi_max, type, grid_res, grid_binary);
return;
}
torch::Tensor query_occ(
const torch::Tensor samples,
// occupancy grid & contraction
const torch::Tensor roi,
const torch::Tensor grid_binary,
const ContractionType type)
{
DEVICE_GUARD(samples);
CHECK_INPUT(samples);
const int n_samples = samples.size(0);
const int3 grid_res = make_int3(
grid_binary.size(0), grid_binary.size(1), grid_binary.size(2));
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);
torch::Tensor occs = torch::zeros(
{n_samples}, samples.options().dtype(torch::kBool));
query_occ_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_samples,
samples.data_ptr<float>(),
// grid
roi.data_ptr<float>(),
grid_res,
grid_binary.data_ptr<bool>(),
type,
// outputs
occs.data_ptr<bool>());
return occs;
}
#include "include/helpers_cuda.h"
template <typename scalar_t>
__global__ void rendering_forward_kernel(
const uint32_t n_rays,
const int *packed_info, // input ray & point indices.
const scalar_t *starts, // input start t
const scalar_t *ends, // input end t
const scalar_t *sigmas, // input density after activation
const scalar_t *alphas, // input alpha (opacity) values.
const scalar_t early_stop_eps, // transmittance threshold for early stop
// outputs: should be all-zero initialized
int *num_steps, // the number of valid steps for each ray
scalar_t *weights, // the number rendering weights for each sample
bool *compact_selector // the samples that we needs to compute the gradients
)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0)
return;
if (alphas != nullptr)
{
// rendering with alpha
alphas += base;
}
else
{
// rendering with density
starts += base;
ends += base;
sigmas += base;
}
if (num_steps != nullptr)
{
num_steps += i;
}
if (weights != nullptr)
{
weights += base;
}
if (compact_selector != nullptr)
{
compact_selector += base;
}
// accumulated rendering
scalar_t T = 1.f;
int j = 0;
for (; j < steps; ++j)
{
if (T < early_stop_eps)
{
break;
}
scalar_t alpha;
if (alphas != nullptr)
{
// rendering with alpha
alpha = alphas[j];
}
else
{
// rendering with density
scalar_t delta = ends[j] - starts[j];
alpha = 1.f - __expf(-sigmas[j] * delta);
}
const scalar_t weight = alpha * T;
T *= (1.f - alpha);
if (weights != nullptr)
{
weights[j] = weight;
}
if (compact_selector != nullptr)
{
compact_selector[j] = true;
}
}
if (num_steps != nullptr)
{
*num_steps = j;
}
return;
}
template <typename scalar_t>
__global__ void rendering_backward_kernel(
const uint32_t n_rays,
const int *packed_info, // input ray & point indices.
const scalar_t *starts, // input start t
const scalar_t *ends, // input end t
const scalar_t *sigmas, // input density after activation
const scalar_t *alphas, // input alpha (opacity) values.
const scalar_t early_stop_eps, // transmittance threshold for early stop
const scalar_t *weights, // forward output
const scalar_t *grad_weights, // input gradients
// if alphas was given, we compute the gradients for alphas.
// otherwise, we compute the gradients for sigmas.
scalar_t *grad_sigmas, // output gradients
scalar_t *grad_alphas // output gradients
)
{
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0)
return;
if (alphas != nullptr)
{
// rendering with alpha
alphas += base;
grad_alphas += base;
}
else
{
// rendering with density
starts += base;
ends += base;
sigmas += base;
grad_sigmas += base;
}
weights += base;
grad_weights += base;
scalar_t accum = 0;
for (int j = 0; j < steps; ++j)
{
accum += grad_weights[j] * weights[j];
}
// backward of accumulated rendering
scalar_t T = 1.f;
for (int j = 0; j < steps; ++j)
{
if (T < early_stop_eps)
{
break;
}
scalar_t alpha;
if (alphas != nullptr)
{
// rendering with alpha
alpha = alphas[j];
grad_alphas[j] = (grad_weights[j] * T - accum) / fmaxf(1.f - alpha, 1e-10f);
}
else
{
// rendering with density
scalar_t delta = ends[j] - starts[j];
alpha = 1.f - __expf(-sigmas[j] * delta);
grad_sigmas[j] = (grad_weights[j] * T - accum) * delta;
}
accum -= grad_weights[j] * weights[j];
T *= (1.f - alpha);
}
}
std::vector<torch::Tensor> rendering_forward(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas,
float early_stop_eps,
bool compression)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
if (compression)
{
// compress the samples to get rid of invisible ones.
torch::Tensor num_steps = torch::zeros({n_rays}, packed_info.options());
torch::Tensor compact_selector = torch::zeros(
{n_samples}, sigmas.options().dtype(torch::kBool));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"rendering_forward",
([&]
{ rendering_forward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
nullptr, // alphas
early_stop_eps,
// outputs
num_steps.data_ptr<int>(),
nullptr,
compact_selector.data_ptr<bool>()); }));
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor compact_packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
return {compact_packed_info, compact_selector};
}
else
{
// just do the forward rendering.
torch::Tensor weights = torch::zeros({n_samples}, sigmas.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"rendering_forward",
([&]
{ rendering_forward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
nullptr, // alphas
early_stop_eps,
// outputs
nullptr,
weights.data_ptr<scalar_t>(),
nullptr); }));
return {weights};
}
}
torch::Tensor rendering_backward(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas,
float early_stop_eps)
{
DEVICE_GUARD(packed_info);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor grad_sigmas = torch::zeros(sigmas.sizes(), sigmas.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"rendering_backward",
([&]
{ rendering_backward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
nullptr, // alphas
early_stop_eps,
weights.data_ptr<scalar_t>(),
grad_weights.data_ptr<scalar_t>(),
// outputs
grad_sigmas.data_ptr<scalar_t>(),
nullptr // alphas gradients
); }));
return grad_sigmas;
}
// -- rendering with alphas -- //
std::vector<torch::Tensor> rendering_alphas_forward(
torch::Tensor packed_info,
torch::Tensor alphas,
float early_stop_eps,
bool compression)
{
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(alphas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(alphas.ndimension() == 2 & alphas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = alphas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
if (compression)
{
// compress the samples to get rid of invisible ones.
torch::Tensor num_steps = torch::zeros({n_rays}, packed_info.options());
torch::Tensor compact_selector = torch::zeros(
{n_samples}, alphas.options().dtype(torch::kBool));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
alphas.scalar_type(),
"rendering_alphas_forward",
([&]
{ rendering_forward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr<int>(),
nullptr, // starts
nullptr, // ends
nullptr, // sigmas
alphas.data_ptr<scalar_t>(),
early_stop_eps,
// outputs
num_steps.data_ptr<int>(),
nullptr,
compact_selector.data_ptr<bool>()); }));
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor compact_packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
return {compact_selector, compact_packed_info};
}
else
{
// just do the forward rendering.
torch::Tensor weights = torch::zeros({n_samples}, alphas.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
alphas.scalar_type(),
"rendering_forward",
([&]
{ rendering_forward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr<int>(),
nullptr, // starts
nullptr, // ends
nullptr, // sigmas
alphas.data_ptr<scalar_t>(),
early_stop_eps,
// outputs
nullptr,
weights.data_ptr<scalar_t>(),
nullptr); }));
return {weights};
}
}
torch::Tensor rendering_alphas_backward(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor alphas,
float early_stop_eps)
{
DEVICE_GUARD(packed_info);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = alphas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor grad_alphas = torch::zeros(alphas.sizes(), alphas.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
alphas.scalar_type(),
"rendering_alphas_backward",
([&]
{ rendering_backward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
// inputs
packed_info.data_ptr<int>(),
nullptr, // starts
nullptr, // ends
nullptr, // sigmas
alphas.data_ptr<scalar_t>(),
early_stop_eps,
weights.data_ptr<scalar_t>(),
grad_weights.data_ptr<scalar_t>(),
// outputs
nullptr, // sigma gradients
grad_alphas.data_ptr<scalar_t>()); }));
return grad_alphas;
}
#include <pybind11/pybind11.h>
#include "include/helpers_cuda.h"
inline __device__ int cascaded_grid_idx_at(
const float x, const float y, const float z,
const int resx, const int resy, const int resz,
const float* aabb
) {
int ix = (int)(((x - aabb[0]) / (aabb[3] - aabb[0])) * resx);
int iy = (int)(((y - aabb[1]) / (aabb[4] - aabb[1])) * resy);
int iz = (int)(((z - aabb[2]) / (aabb[5] - aabb[2])) * resz);
ix = __clamp(ix, 0, resx-1);
iy = __clamp(iy, 0, resy-1);
iz = __clamp(iz, 0, resz-1);
int idx = ix * resy * resz + iy * resz + iz;
return idx;
}
inline __device__ bool grid_occupied_at(
const float x, const float y, const float z,
const int resx, const int resy, const int resz,
const float* aabb, const bool* occ_binary
) {
if (x <= aabb[0] || x >= aabb[3] || y <= aabb[1] || y >= aabb[4] || z <= aabb[2] || z >= aabb[5]) {
return false;
}
int idx = cascaded_grid_idx_at(x, y, z, resx, resy, resz, aabb);
return occ_binary[idx];
}
inline __device__ float distance_to_next_voxel(
float x, float y, float z,
float dir_x, float dir_y, float dir_z,
float idir_x, float idir_y, float idir_z,
const int resx, const int resy, const int resz,
const float* aabb
) { // dda like step
// TODO: this is ugly -- optimize this.
float _x = ((x - aabb[0]) / (aabb[3] - aabb[0])) * resx;
float _y = ((y - aabb[1]) / (aabb[4] - aabb[1])) * resy;
float _z = ((z - aabb[2]) / (aabb[5] - aabb[2])) * resz;
float tx = ((floorf(_x + 0.5f + 0.5f * __sign(dir_x)) - _x) * idir_x) / resx * (aabb[3] - aabb[0]);
float ty = ((floorf(_y + 0.5f + 0.5f * __sign(dir_y)) - _y) * idir_y) / resy * (aabb[4] - aabb[1]);
float tz = ((floorf(_z + 0.5f + 0.5f * __sign(dir_z)) - _z) * idir_z) / resz * (aabb[5] - aabb[2]);
float t = min(min(tx, ty), tz);
return fmaxf(t, 0.0f);
}
inline __device__ float advance_to_next_voxel(
float t,
float x, float y, float z,
float dir_x, float dir_y, float dir_z,
float idir_x, float idir_y, float idir_z,
const int resx, const int resy, const int resz, const float* aabb,
float dt_min) {
// Regular stepping (may be slower but matches non-empty space)
float t_target = t + distance_to_next_voxel(
x, y, z,
dir_x, dir_y, dir_z,
idir_x, idir_y, idir_z,
resx, resy, resz, aabb
);
do {
t += dt_min;
} while (t < t_target);
return t;
}
__global__ void marching_steps_kernel(
// rays info
const uint32_t n_rays,
const float* rays_o, // shape (n_rays, 3)
const float* rays_d, // shape (n_rays, 3)
const float* t_min, // shape (n_rays,)
const float* t_max, // shape (n_rays,)
// density grid
const float* aabb, // [min_x, min_y, min_z, max_x, max_y, max_y]
const int resx,
const int resy,
const int resz,
const bool* occ_binary, // shape (reso_x, reso_y, reso_z)
// sampling
const float dt,
// outputs
int* num_steps
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
rays_o += i * 3;
rays_d += i * 3;
t_min += i;
t_max += i;
num_steps += i;
const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2];
const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2];
const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz;
const float near = t_min[0], far = t_max[0];
int j = 0;
float t0 = near; // TODO(ruilongli): perturb `near` as in ngp_pl?
float t1 = t0 + dt;
float t_mid = (t0 + t1) * 0.5f;
while (t_mid < far) {
// current center
const float x = ox + t_mid * dx;
const float y = oy + t_mid * dy;
const float z = oz + t_mid * dz;
if (grid_occupied_at(x, y, z, resx, resy, resz, aabb, occ_binary)) {
++j;
// march to next sample
t0 = t1;
t1 = t0 + dt;
t_mid = (t0 + t1) * 0.5f;
}
else {
// march to next sample
t_mid = advance_to_next_voxel(
t_mid, x, y, z, dx, dy, dz, rdx, rdy, rdz, resx, resy, resz, aabb, dt
);
t0 = t_mid - dt * 0.5f;
t1 = t_mid + dt * 0.5f;
}
}
if (j == 0) return;
num_steps[0] = j;
return;
}
__global__ void marching_forward_kernel(
// rays info
const uint32_t n_rays,
const float* rays_o, // shape (n_rays, 3)
const float* rays_d, // shape (n_rays, 3)
const float* t_min, // shape (n_rays,)
const float* t_max, // shape (n_rays,)
// density grid
const float* aabb, // [min_x, min_y, min_z, max_x, max_y, max_y]
const int resx,
const int resy,
const int resz,
const bool* occ_binary, // shape (reso_x, reso_y, reso_z)
// sampling
const float dt,
const int* packed_info,
// frustrum outputs
float* frustum_starts,
float* frustum_ends
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
rays_o += i * 3;
rays_d += i * 3;
t_min += i;
t_max += i;
int base = packed_info[i * 2 + 0];
int steps = packed_info[i * 2 + 1];
const float ox = rays_o[0], oy = rays_o[1], oz = rays_o[2];
const float dx = rays_d[0], dy = rays_d[1], dz = rays_d[2];
const float rdx = 1 / dx, rdy = 1 / dy, rdz = 1 / dz;
const float near = t_min[0], far = t_max[0];
// locate
frustum_starts += base;
frustum_ends += base;
int j = 0;
float t0 = near;
float t1 = t0 + dt;
float t_mid = (t0 + t1) / 2.;
while (t_mid < far) {
// current center
const float x = ox + t_mid * dx;
const float y = oy + t_mid * dy;
const float z = oz + t_mid * dz;
if (grid_occupied_at(x, y, z, resx, resy, resz, aabb, occ_binary)) {
frustum_starts[j] = t0;
frustum_ends[j] = t1;
++j;
// march to next sample
t0 = t1;
t1 = t0 + dt;
t_mid = (t0 + t1) * 0.5f;
}
else {
// march to next sample
t_mid = advance_to_next_voxel(
t_mid, x, y, z, dx, dy, dz, rdx, rdy, rdz, resx, resy, resz, aabb, dt
);
t0 = t_mid - dt * 0.5f;
t1 = t_mid + dt * 0.5f;
}
}
if (j != steps) {
printf("WTF %d v.s. %d\n", j, steps);
}
return;
}
__global__ void ray_indices_kernel(
// input
const int n_rays,
const int* packed_info,
// output
int* ray_indices
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0) return;
ray_indices += base;
for (int j = 0; j < steps; ++j) {
ray_indices[j] = i;
}
}
__global__ void occ_query_kernel(
// rays info
const uint32_t n_samples,
const float* samples, // shape (n_samples, 3)
// density grid
const float* aabb, // [min_x, min_y, min_z, max_x, max_y, max_y]
const int resx,
const int resy,
const int resz,
const bool* occ_binary, // shape (reso_x, reso_y, reso_z)
// outputs
bool* occs
) {
CUDA_GET_THREAD_ID(i, n_samples);
// locate
samples += i * 3;
occs += i;
occs[0] = grid_occupied_at(
samples[0], samples[1], samples[2],
resx, resy, resz, aabb, occ_binary
);
return;
}
std::vector<torch::Tensor> volumetric_marching(
// rays
const torch::Tensor rays_o,
const torch::Tensor rays_d,
const torch::Tensor t_min,
const torch::Tensor t_max,
// density grid
const torch::Tensor aabb,
const pybind11::list resolution,
const torch::Tensor occ_binary,
// sampling
const float dt
) {
DEVICE_GUARD(rays_o);
CHECK_INPUT(rays_o);
CHECK_INPUT(rays_d);
CHECK_INPUT(t_min);
CHECK_INPUT(t_max);
CHECK_INPUT(aabb);
CHECK_INPUT(occ_binary);
const int n_rays = rays_o.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// helper counter
torch::Tensor num_steps = torch::zeros(
{n_rays}, rays_o.options().dtype(torch::kInt32));
// count number of samples per ray
marching_steps_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
// rays
n_rays,
rays_o.data_ptr<float>(),
rays_d.data_ptr<float>(),
t_min.data_ptr<float>(),
t_max.data_ptr<float>(),
// density grid
aabb.data_ptr<float>(),
resolution[0].cast<int>(),
resolution[1].cast<int>(),
resolution[2].cast<int>(),
occ_binary.data_ptr<bool>(),
// sampling
dt,
// outputs
num_steps.data_ptr<int>()
);
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
// std::cout << "num_steps" << num_steps.dtype() << std::endl;
// std::cout << "cum_steps" << cum_steps.dtype() << std::endl;
// std::cout << "packed_info" << packed_info.dtype() << std::endl;
// output frustum samples
int total_steps = cum_steps[cum_steps.size(0) - 1].item<int>();
torch::Tensor frustum_starts = torch::zeros({total_steps, 1}, rays_o.options());
torch::Tensor frustum_ends = torch::zeros({total_steps, 1}, rays_o.options());
marching_forward_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
// rays
n_rays,
rays_o.data_ptr<float>(),
rays_d.data_ptr<float>(),
t_min.data_ptr<float>(),
t_max.data_ptr<float>(),
// density grid
aabb.data_ptr<float>(),
resolution[0].cast<int>(),
resolution[1].cast<int>(),
resolution[2].cast<int>(),
occ_binary.data_ptr<bool>(),
// sampling
dt,
packed_info.data_ptr<int>(),
// outputs
frustum_starts.data_ptr<float>(),
frustum_ends.data_ptr<float>()
);
return {packed_info, frustum_starts, frustum_ends};
}
torch::Tensor unpack_to_ray_indices(const torch::Tensor packed_info) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
const int n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
int n_samples = packed_info[n_rays - 1].sum(0).item<int>();
torch::Tensor ray_indices = torch::zeros(
{n_samples}, packed_info.options().dtype(torch::kInt32));
ray_indices_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
packed_info.data_ptr<int>(),
ray_indices.data_ptr<int>()
);
return ray_indices;
}
torch::Tensor query_occ(
const torch::Tensor samples,
// density grid
const torch::Tensor aabb,
const pybind11::list resolution,
const torch::Tensor occ_binary
) {
DEVICE_GUARD(samples);
CHECK_INPUT(samples);
const int n_samples = samples.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_samples, threads);
torch::Tensor occs = torch::zeros(
{n_samples}, samples.options().dtype(torch::kBool));
occ_query_kernel<<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_samples,
samples.data_ptr<float>(),
// density grid
aabb.data_ptr<float>(),
resolution[0].cast<int>(),
resolution[1].cast<int>(),
resolution[2].cast<int>(),
occ_binary.data_ptr<bool>(),
// outputs
occs.data_ptr<bool>()
);
return occs;
}
#include "include/helpers_cuda.h"
template <typename scalar_t>
__global__ void volumetric_rendering_steps_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
// output: should be all zero (false) initialized
int* num_steps,
bool* selector
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0) return;
starts += base;
ends += base;
sigmas += base;
num_steps += i;
selector += base;
// accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
int j = 0;
for (; j < steps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
T *= (1.f - alpha);
selector[j] = true;
}
num_steps[0] = j;
return;
}
template <typename scalar_t>
__global__ void volumetric_rendering_weights_forward_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
// should be all-zero initialized
scalar_t* weights // output
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0) return;
starts += base;
ends += base;
sigmas += base;
weights += base;
// accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
for (int j = 0; j < steps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
weights[j] = weight;
T *= (1.f - alpha);
}
}
template <typename scalar_t>
__global__ void volumetric_rendering_weights_backward_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
const scalar_t* weights, // forward output
const scalar_t* grad_weights, // input
scalar_t* grad_sigmas // output
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0) return;
starts += base;
ends += base;
sigmas += base;
weights += base;
grad_weights += base;
grad_sigmas += base;
scalar_t accum = 0;
for (int j = 0; j < steps; ++j) {
accum += grad_weights[j] * weights[j];
}
// backward of accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
for (int j = 0; j < steps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
grad_sigmas[j] = delta * (grad_weights[j] * T - accum);
accum -= grad_weights[j] * weights[j];
T *= (1.f - alpha);
}
}
std::vector<torch::Tensor> volumetric_rendering_steps(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
torch::Tensor num_steps = torch::zeros({n_rays}, packed_info.options());
torch::Tensor selector = torch::zeros({n_samples}, packed_info.options().dtype(torch::kBool));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_marching_steps",
([&]
{ volumetric_rendering_steps_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
num_steps.data_ptr<int>(),
selector.data_ptr<bool>()
);
}));
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor compact_packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
return {compact_packed_info, selector};
}
torch::Tensor volumetric_rendering_weights_forward(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor weights = torch::zeros({n_samples}, sigmas.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_weights_forward",
([&]
{ volumetric_rendering_weights_forward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
weights.data_ptr<scalar_t>()
);
}));
return weights;
}
torch::Tensor volumetric_rendering_weights_backward(
torch::Tensor weights,
torch::Tensor grad_weights,
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
) {
DEVICE_GUARD(packed_info);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor grad_sigmas = torch::zeros(sigmas.sizes(), sigmas.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_weights_backward",
([&]
{ volumetric_rendering_weights_backward_kernel<scalar_t><<<blocks, threads, 0, at::cuda::getCurrentCUDAStream()>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
weights.data_ptr<scalar_t>(),
grad_weights.data_ptr<scalar_t>(),
grad_sigmas.data_ptr<scalar_t>()
);
}));
return grad_sigmas;
}
""" Occupancy field for accelerating volumetric rendering. """
from typing import Callable, List, Tuple, Union
from typing import Callable, List, Union
import torch
from torch import nn
import torch.nn as nn
from .contraction import ContractionType, contract_inv
# TODO: add this to the dependency
# from torch_scatter import scatter_max
def meshgrid3d(
res: List[int], device: Union[torch.device, str] = "cpu"
) -> torch.Tensor:
"""Create 3D grid coordinates.
class Grid(nn.Module):
"""An abstract Grid class.
Args:
res: resolutions for {x, y, z} dimensions.
The grid is used as a cache of the 3D space to indicate whether each voxel
area is important or not for the differentiable rendering process. The
ray marching function (see :func:`nerfacc.ray_marching`) would use the
grid to skip the unimportant voxel areas.
Returns:
torch.long with shape (res[0], res[1], res[2], 3): dense 3D grid coordinates.
To work with :func:`nerfacc.ray_marching`, three attributes must exist:
- :attr:`roi_aabb`: The axis-aligned bounding box of the region of interest.
- :attr:`binary`: A 3D binarized tensor of shape {resx, resy, resz}, \
with torch.bool data type.
- :attr:`contraction_type`: The contraction type of the grid, indicating how \
the 3D space is mapped to the grid.
"""
assert len(res) == 3
return (
torch.stack(
torch.meshgrid(
[
torch.arange(res[0]),
torch.arange(res[1]),
torch.arange(res[2]),
],
indexing="ij",
),
dim=-1,
)
.long()
.to(device)
)
def __init__(self, *args, **kwargs):
super().__init__()
self._dummy = torch.nn.Parameter(torch.empty(0))
class OccupancyField(nn.Module):
"""Occupancy Field that supports EMA updates. Both 2D and 3D are supported.
@property
def device(self) -> torch.device:
return self._dummy.device
Note:
Make sure the arguemnts match with the ``num_dim`` -- Either 2D or 3D.
@property
def roi_aabb(self) -> torch.Tensor:
"""The axis-aligned bounding box of the region of interest.
Its is a shape (6,) tensor in the format of {minx, miny, minz, maxx, maxy, maxz}.
"""
if hasattr(self, "_roi_aabb"):
return getattr(self, "_roi_aabb")
else:
raise NotImplementedError("please set an attribute named _roi_aabb")
@property
def binary(self) -> torch.Tensor:
"""A 3D binarized tensor with torch.bool data type.
The tensor is of shape (resx, resy, resz), in which each boolen value
represents whether the corresponding voxel should be kept or not.
"""
if hasattr(self, "_binary"):
return getattr(self, "_binary")
else:
raise NotImplementedError("please set an attribute named _binary")
@property
def contraction_type(self) -> ContractionType:
"""The contraction type of the grid.
The contraction type is an indicator of how the 3D space is contracted
to this voxel grid. See :class:`nerfacc.ContractionType` for more details.
"""
if hasattr(self, "_contraction_type"):
return getattr(self, "_contraction_type")
else:
raise NotImplementedError(
"please set an attribute named _contraction_type"
)
class OccupancyGrid(Grid):
"""Occupancy grid: whether each voxel area is occupied or not.
Args:
occ_eval_fn: A Callable function that takes in the un-normalized points x,
with shape of (N, 2) or (N, 3) (depends on ``num_dim``),
and outputs the occupancy of those points with shape of (N, 1).
aabb: Scene bounding box. If ``num_dim=2`` it should be {min_x, min_y,max_x, max_y}.
If ``num_dim=3`` it should be {min_x, min_y, min_z, max_x, max_y, max_z}.
resolution: The field resolution. It can either be a int of a list of ints
to specify resolution on each dimension. If ``num_dim=2`` it is for {res_x, res_y}.
If ``num_dim=3`` it is for {res_x, res_y, res_z}. Default is 128.
num_dim: The space dimension. Either 2 or 3. Default is 3.
Attributes:
aabb: Scene bounding box.
occ_grid: The occupancy grid. It is a tensor of shape (num_cells,).
occ_grid_binary: The binary occupancy grid. It is a tensor of shape (num_cells,).
grid_coords: The grid coordinates. It is a tensor of shape (num_cells, num_dim).
grid_indices: The grid indices. It is a tensor of shape (num_cells,).
roi_aabb: The axis-aligned bounding box of the region of interest. Useful for mapping
the 3D space to the grid.
resolution: The resolution of the grid. If an integer is given, the grid is assumed to
be a cube. Otherwise, a list or a tensor of shape (3,) is expected. Default: 128.
contraction_type: The contraction type of the grid. See :class:`nerfacc.ContractionType`
for more details. Default: :attr:`nerfacc.ContractionType.AABB`.
"""
aabb: torch.Tensor
occ_grid: torch.Tensor
occ_grid_binary: torch.Tensor
grid_coords: torch.Tensor
grid_indices: torch.Tensor
NUM_DIM: int = 3
def __init__(
self,
occ_eval_fn: Callable,
aabb: Union[torch.Tensor, List[float]],
resolution: Union[int, List[int]] = 128,
num_dim: int = 3,
roi_aabb: Union[List[int], torch.Tensor],
resolution: Union[int, List[int], torch.Tensor] = 128,
contraction_type: ContractionType = ContractionType.AABB,
) -> None:
super().__init__()
self.occ_eval_fn = occ_eval_fn
if not isinstance(aabb, torch.Tensor):
aabb = torch.tensor(aabb, dtype=torch.float32)
if not isinstance(resolution, (list, tuple)):
resolution = [resolution] * num_dim
assert num_dim in [2, 3], "Currently only supports 2D or 3D field."
assert aabb.shape == (
num_dim * 2,
), f"shape of aabb ({aabb.shape}) should be num_dim * 2 ({num_dim * 2})."
assert (
len(resolution) == num_dim
), f"length of resolution ({len(resolution)}) should be num_dim ({num_dim})."
self.register_buffer("aabb", aabb)
self.resolution = resolution
self.register_buffer("resolution_tensor", torch.tensor(resolution))
self.num_dim = num_dim
self.num_cells = int(torch.tensor(resolution).prod().item())
# Stores cell occupancy values ranged in [0, 1].
occ_grid = torch.zeros(self.num_cells)
self.register_buffer("occ_grid", occ_grid)
occ_grid_binary = torch.zeros(self.num_cells, dtype=torch.bool)
self.register_buffer("occ_grid_binary", occ_grid_binary)
if isinstance(resolution, int):
resolution = [resolution] * self.NUM_DIM
if isinstance(resolution, (list, tuple)):
resolution = torch.tensor(resolution, dtype=torch.int32)
assert isinstance(
resolution, torch.Tensor
), f"Invalid type: {type(resolution)}"
assert resolution.shape == (
self.NUM_DIM,
), f"Invalid shape: {resolution.shape}"
if isinstance(roi_aabb, (list, tuple)):
roi_aabb = torch.tensor(roi_aabb, dtype=torch.float32)
assert isinstance(
roi_aabb, torch.Tensor
), f"Invalid type: {type(roi_aabb)}"
assert roi_aabb.shape == torch.Size(
[self.NUM_DIM * 2]
), f"Invalid shape: {roi_aabb.shape}"
# total number of voxels
self.num_cells = int(resolution.prod().item())
# required attributes
self.register_buffer("_roi_aabb", roi_aabb)
self.register_buffer(
"_binary", torch.zeros(resolution.tolist(), dtype=torch.bool)
)
self._contraction_type = contraction_type
# helper attributes
self.register_buffer("resolution", resolution)
self.register_buffer("occs", torch.zeros(self.num_cells))
# Grid coords & indices
grid_coords = meshgrid3d(self.resolution).reshape(
self.num_cells, self.num_dim
grid_coords = _meshgrid3d(resolution).reshape(
self.num_cells, self.NUM_DIM
)
self.register_buffer("grid_coords", grid_coords)
grid_indices = torch.arange(self.num_cells)
......@@ -116,13 +143,14 @@ class OccupancyField(nn.Module):
@torch.no_grad()
def _sample_uniform_and_occupied_cells(self, n: int) -> torch.Tensor:
"""Samples both n uniform and occupied cells."""
device = self.occ_grid.device
uniform_indices = torch.randint(self.num_cells, (n,), device=device)
occupied_indices = torch.nonzero(self.occ_grid_binary)[:, 0]
uniform_indices = torch.randint(
self.num_cells, (n,), device=self.device
)
occupied_indices = torch.nonzero(self._binary.flatten())[:, 0]
if n < len(occupied_indices):
selector = torch.randint(len(occupied_indices), (n,), device=device)
selector = torch.randint(
len(occupied_indices), (n,), device=self.device
)
occupied_indices = occupied_indices[selector]
indices = torch.cat([uniform_indices, occupied_indices], dim=0)
return indices
......@@ -131,6 +159,7 @@ class OccupancyField(nn.Module):
def _update(
self,
step: int,
occ_eval_fn: Callable,
occ_thre: float = 0.01,
ema_decay: float = 0.95,
warmup_steps: int = 256,
......@@ -147,92 +176,47 @@ class OccupancyField(nn.Module):
grid_coords = self.grid_coords[indices]
x = (
grid_coords + torch.rand_like(grid_coords, dtype=torch.float32)
) / self.resolution_tensor
bb_min, bb_max = torch.split(
self.aabb, [self.num_dim, self.num_dim], dim=0
) / self.resolution
# voxel coordinates [0, 1]^3 -> world
x = contract_inv(
x,
roi=self._roi_aabb,
type=self._contraction_type,
)
x = x * (bb_max - bb_min) + bb_min
occ = self.occ_eval_fn(x).squeeze(-1)
occ = occ_eval_fn(x).squeeze(-1)
# ema update
self.occ_grid[indices] = torch.maximum(
self.occ_grid[indices] * ema_decay, occ
)
self.occs[indices] = torch.maximum(self.occs[indices] * ema_decay, occ)
# suppose to use scatter max but emperically it is almost the same.
# self.occ_grid, _ = scatter_max(
# occ, indices, dim=0, out=self.occ_grid * ema_decay
# self.occs, _ = scatter_max(
# occ, indices, dim=0, out=self.occs * ema_decay
# )
self.occ_grid_binary = self.occ_grid > torch.clamp(
self.occ_grid.mean(), max=occ_thre
)
@torch.no_grad()
def query_occ(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""Query the occupancy, given samples.
Args:
x: Samples with shape (..., 2) or (..., 3).
Returns:
float and binary occupancy values with shape (...) respectively.
"""
assert (
x.shape[-1] == self.num_dim
), "The samples are not drawn from a proper space!"
resolution = torch.tensor(self.resolution).to(self.occ_grid.device)
bb_min, bb_max = torch.split(
self.aabb, [self.num_dim, self.num_dim], dim=0
)
x = (x - bb_min) / (bb_max - bb_min)
selector = ((x > 0.0) & (x < 1.0)).all(dim=-1)
grid_coords = torch.floor(x * resolution).long()
if self.num_dim == 2:
grid_indices = (
grid_coords[..., 0] * self.resolution[-1] + grid_coords[..., 1]
)
elif self.num_dim == 3:
grid_indices = (
grid_coords[..., 0] * self.resolution[-1] * self.resolution[-2]
+ grid_coords[..., 1] * self.resolution[-1]
+ grid_coords[..., 2]
)
else:
raise NotImplementedError("Currently only supports 2D or 3D field.")
occs = torch.zeros(x.shape[:-1], device=x.device)
occs[selector] = self.occ_grid[grid_indices[selector]]
occs_binary = torch.zeros(
x.shape[:-1], device=x.device, dtype=torch.bool
)
occs_binary[selector] = self.occ_grid_binary[grid_indices[selector]]
return occs, occs_binary
self._binary = (
self.occs > torch.clamp(self.occs.mean(), max=occ_thre)
).reshape(self._binary.shape)
@torch.no_grad()
def every_n_step(
self,
step: int,
occ_eval_fn: Callable,
occ_thre: float = 1e-2,
ema_decay: float = 0.95,
warmup_steps: int = 256,
n: int = 16,
):
"""Update the field every n steps during training.
This function is designed for training only. If for some reason you want to
manually update the field, please use the ``_update()`` function instead.
) -> None:
"""Update the grid every n steps during training.
Args:
step: Current training step.
occ_thre: Threshold to binarize the occupancy field.
ema_decay: The decay rate for EMA updates.
occ_eval_fn: A function that takes in sample locations :math:`(N, 3)` and
returns the occupancy values :math:`(N, 1)` at those locations.
occ_thre: Threshold used to binarize the occupancy grid. Default: 1e-2.
ema_decay: The decay rate for EMA updates. Default: 0.95.
warmup_steps: Sample all cells during the warmup stage. After the warmup
stage we change the sampling strategy to 1/4 uniformly sampled cells
together with 1/4 occupied cells.
n: Update the field every n steps.
Returns:
None
together with 1/4 occupied cells. Default: 256.
n: Update the grid every n steps. Default: 16.
"""
if not self.training:
raise RuntimeError(
......@@ -243,18 +227,31 @@ class OccupancyField(nn.Module):
if step % n == 0 and self.training:
self._update(
step=step,
occ_eval_fn=occ_eval_fn,
occ_thre=occ_thre,
ema_decay=ema_decay,
warmup_steps=warmup_steps,
)
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""Query the occupancy, given samples.
Args:
x: Samples with shape (..., 2) or (..., 3).
Returns:
float and binary occupancy values with shape (...) respectively.
"""
return self.query_occ(x)
def _meshgrid3d(
res: torch.Tensor, device: Union[torch.device, str] = "cpu"
) -> torch.Tensor:
"""Create 3D grid coordinates."""
assert len(res) == 3
res = res.tolist()
return (
torch.stack(
torch.meshgrid(
[
torch.arange(res[0]),
torch.arange(res[1]),
torch.arange(res[2]),
],
indexing="ij",
),
dim=-1,
)
.long()
.to(device)
)
from typing import Callable, Optional, Tuple
import torch
from .grid import Grid
from .ray_marching import ray_marching, unpack_to_ray_indices
from .vol_rendering import accumulate_along_rays, render_weight_from_density
def rendering(
# radiance field
rgb_sigma_fn: Callable,
# ray marching results
packed_info: torch.Tensor,
t_starts: torch.Tensor,
t_ends: torch.Tensor,
# rendering options
early_stop_eps: float = 1e-4,
render_bkgd: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Render the rays through the radience field defined by `rgb_sigma_fn`.
This function is differentiable to the outputs of `rgb_sigma_fn` so it can be used for
gradient-based optimization.
Warning:
This function is not differentiable to `t_starts`, `t_ends`.
Args:
rgb_sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1), \
ray indices (N,)} and returns the post-activation rgb (N, 3) and density \
values (N, 1).
packed_info: Packed ray marching info. See :func:`ray_marching` for details.
t_starts: Per-sample start distance. Tensor with shape (n_samples, 1).
t_ends: Per-sample end distance. Tensor with shape (n_samples, 1).
early_stop_eps: Early stop threshold during trasmittance accumulation. Default: 1e-4.
render_bkgd: Optional. Background color. Tensor with shape (3,).
Returns:
Ray colors (n_rays, 3), opacities (n_rays, 1) and depths (n_rays, 1).
Examples:
.. code-block:: python
import torch
from nerfacc import OccupancyGrid, ray_marching, rendering
device = "cuda:0"
batch_size = 128
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# Rendering.
def rgb_sigma_fn(t_starts, t_ends, ray_indices):
# This is a dummy function that returns random values.
rgbs = torch.rand((t_starts.shape[0], 3), device=device)
sigmas = torch.rand((t_starts.shape[0], 1), device=device)
return rgbs, sigmas
colors, opacities, depths = rendering(rgb_sigma_fn, packed_info, t_starts, t_ends)
# torch.Size([128, 3]) torch.Size([128, 1]) torch.Size([128, 1])
print(colors.shape, opacities.shape, depths.shape)
"""
n_rays = packed_info.shape[0]
ray_indices = unpack_to_ray_indices(packed_info)
# Query sigma and color with gradients
rgbs, sigmas = rgb_sigma_fn(t_starts, t_ends, ray_indices)
assert rgbs.shape[-1] == 3, "rgbs must have 3 channels, got {}".format(
rgbs.shape
)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
# Rendering: compute weights and ray indices.
weights = render_weight_from_density(
packed_info, t_starts, t_ends, sigmas, early_stop_eps
)
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(
weights, ray_indices, values=rgbs, n_rays=n_rays
)
opacities = accumulate_along_rays(
weights, ray_indices, values=None, n_rays=n_rays
)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# Background composition.
if render_bkgd is not None:
colors = colors + render_bkgd * (1.0 - opacities)
return colors, opacities, depths
def volumetric_rendering(
# radiance field
sigma_fn: Callable,
rgb_sigma_fn: Callable,
# rays
rays_o: torch.Tensor,
rays_d: torch.Tensor,
t_min: Optional[torch.Tensor] = None,
t_max: Optional[torch.Tensor] = None,
# bounding box of the scene
scene_aabb: Optional[torch.Tensor] = None,
# grid for skipping samples
grid: Optional[Grid] = None,
# rendering options
near_plane: Optional[float] = None,
far_plane: Optional[float] = None,
render_step_size: float = 1e-3,
stratified: bool = False,
cone_angle: float = 0.0,
early_stop_eps: float = 1e-4,
render_bkgd: Optional[torch.Tensor] = None,
return_extra_info: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, int, int]:
"""Differentiable volumetric rendering pipeline.
This function is the integration of those individual functions:
- ray_aabb_intersect: ray AABB intersection.
- ray_marching: ray marching with grid-based skipping.
- compute_weights: compute transmittance and compress samples.
- accumulate_along_rays: accumulate samples along rays to get final per-ray RGB etc.
Args:
sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation density values (N, 1).
rgb_sigma_fn: A function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation rgb (N, 3) and density
values (N, 1).
rays_o: Ray origins. Tensor with shape (n_rays, 3).
rays_d: Normalized ray directions. Tensor with shape (n_rays, 3).
t_min: Optional. Per-ray minimum distance. Tensor with shape (n_rays).
t_max: Optional. Per-ray maximum distance. Tensor with shape (n_rays).
scene_aabb: Optional. Scene bounding box for computing t_min and t_max.
A tensor with shape (6,) {xmin, ymin, zmin, xmax, ymax, zmax}.
scene_aabb which be ignored if both t_min and t_max are provided.
grid: Optional. Grid for to idicates where to skip during marching.
See :class:`nerfacc.Grid` for details.
near_plane: Optional. Near plane distance. If provided, it will be used
to clip t_min.
far_plane: Optional. Far plane distance. If provided, it will be used
to clip t_max.
render_step_size: Step size for marching. Default: 1e-3.
stratified: Whether to use stratified sampling. Default: False.
cone_angle: Cone angle for linearly-increased step size. 0. means
constant step size. Default: 0.0.
early_stop_eps: Early stop threshold for marching. Default: 1e-4.
render_bkgd: Optional. Background color. If provided, it will be used
to fill the background. Default: None.
return_extra_info: Whether to return extra info. Default: False.
Returns:
Ray colors (n_rays, 3), opacities (n_rays, 1) and depths (n_rays, 1).
If return_extra_info is True, it will also return a dictionary of extra info,
including:
- "n_marching_samples": Total number of samples kept after marching.
- "n_rendering_samples": Total number of samples used for actual rendering.
"""
assert rays_o.shape == rays_d.shape and rays_o.dim() == 2, "Invalid rays."
n_rays = rays_o.shape[0]
rays_o = rays_o.contiguous()
rays_d = rays_d.contiguous()
extra_info = {}
with torch.no_grad():
# Ray marching with skipping.
packed_info, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
t_min=t_min,
t_max=t_max,
scene_aabb=scene_aabb,
grid=grid,
sigma_fn=sigma_fn,
early_stop_eps=early_stop_eps,
near_plane=near_plane,
far_plane=far_plane,
render_step_size=render_step_size,
stratified=stratified,
cone_angle=cone_angle,
)
extra_info["n_rendering_samples"] = len(t_starts)
colors, opacities, depths = rendering(
rgb_sigma_fn,
packed_info=packed_info,
t_starts=t_starts,
t_ends=t_ends,
early_stop_eps=early_stop_eps,
render_bkgd=render_bkgd,
)
if return_extra_info:
return colors, opacities, depths, extra_info
else:
return colors, opacities, depths
from typing import Callable, Optional, Tuple
import torch
from torch import Tensor
import nerfacc.cuda as _C
from .grid import Grid
from .vol_rendering import render_visibility
@torch.no_grad()
def ray_aabb_intersect(
rays_o: Tensor, rays_d: Tensor, aabb: Tensor
) -> Tuple[Tensor, Tensor]:
"""Ray AABB Test.
Note:
this function is not differentiable to any inputs.
Args:
rays_o: Ray origins of shape (n_rays, 3).
rays_d: Normalized ray directions of shape (n_rays, 3).
aabb: Scene bounding box {xmin, ymin, zmin, xmax, ymax, zmax}. \
Tensor with shape (6)
Returns:
Ray AABB intersection {t_min, t_max} with shape (n_rays) respectively. \
Note the t_min is clipped to minimum zero. 1e10 means no intersection.
Examples:
.. code-block:: python
aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device="cuda:0")
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)
"""
if rays_o.is_cuda and rays_d.is_cuda and aabb.is_cuda:
rays_o = rays_o.contiguous()
rays_d = rays_d.contiguous()
aabb = aabb.contiguous()
t_min, t_max = _C.ray_aabb_intersect(rays_o, rays_d, aabb)
else:
raise NotImplementedError("Only support cuda inputs.")
return t_min, t_max
@torch.no_grad()
def unpack_to_ray_indices(packed_info: Tensor) -> Tensor:
"""Unpack `packed_info` to `ray_indices`. Useful for converting per ray data to per sample data.
Note:
this function is not differentiable to any inputs.
Args:
packed_info: Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
Returns:
Ray index of each sample. LongTensor with shape (n_sample).
Examples:
.. code-block:: python
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# torch.Size([128, 2]) torch.Size([115200, 1]) torch.Size([115200, 1])
print(packed_info.shape, t_starts.shape, t_ends.shape)
# Unpack per-ray info to per-sample info.
ray_indices = unpack_to_ray_indices(packed_info)
# torch.Size([115200]) torch.int64
print(ray_indices.shape, ray_indices.dtype)
"""
if packed_info.is_cuda:
ray_indices = _C.unpack_to_ray_indices(packed_info.contiguous())
else:
raise NotImplementedError("Only support cuda inputs.")
return ray_indices.long()
@torch.no_grad()
def ray_marching(
# rays
rays_o: Tensor,
rays_d: Tensor,
t_min: Optional[Tensor] = None,
t_max: Optional[Tensor] = None,
# bounding box of the scene
scene_aabb: Optional[Tensor] = None,
# binarized grid for skipping empty space
grid: Optional[Grid] = None,
# sigma function for skipping invisible space
sigma_fn: Optional[Callable] = None,
early_stop_eps: float = 1e-4,
# rendering options
near_plane: Optional[float] = None,
far_plane: Optional[float] = None,
render_step_size: float = 1e-3,
stratified: bool = False,
cone_angle: float = 0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Ray marching with space skipping.
Note:
The logic for computing `t_min` and `t_max`:
1. If `t_min` and `t_max` are given, use them with highest priority.
2. If `t_min` and `t_max` are not given, but `scene_aabb` is given, use \
:func:`ray_aabb_intersect` to compute `t_min` and `t_max`.
3. If `t_min` and `t_max` are not given, and `scene_aabb` is not given, \
set `t_min` to 0.0, and `t_max` to 1e10. (the case of unbounded scene)
4. Always clip `t_min` with `near_plane` and `t_max` with `far_plane` if given.
Warning:
This function is not differentiable to any inputs.
Args:
rays_o: Ray origins of shape (n_rays, 3).
rays_d: Normalized ray directions of shape (n_rays, 3).
t_min: Optional. Per-ray minimum distance. Tensor with shape (n_rays).
t_max: Optional. Per-ray maximum distance. Tensor with shape (n_rays).
scene_aabb: Optional. Scene bounding box for computing t_min and t_max.
A tensor with shape (6,) {xmin, ymin, zmin, xmax, ymax, zmax}.
`scene_aabb` will be ignored if both `t_min` and `t_max` are provided.
grid: Optional. Grid that idicates where to skip during marching.
See :class:`nerfacc.Grid` for details.
sigma_fn: Optional. If provided, the marching will skip the invisible space
by evaluating the density along the ray with `sigma_fn`. It should be a
function that takes in samples {t_starts (N, 1), t_ends (N, 1),
ray indices (N,)} and returns the post-activation density values (N, 1).
early_stop_eps: Early stop threshold for skipping invisible space. Default: 1e-4.
near_plane: Optional. Near plane distance. If provided, it will be used
to clip t_min.
far_plane: Optional. Far plane distance. If provided, it will be used
to clip t_max.
render_step_size: Step size for marching. Default: 1e-3.
stratified: Whether to use stratified sampling. Default: False.
cone_angle: Cone angle for linearly-increased step size. 0. means
constant step size. Default: 0.0.
Returns:
A tuple of tensors.
- **packed_info**: Stores information on which samples belong to the same ray. \
Tensor with shape (n_rays, 2). The first column stores the index of the \
first sample of each ray. The second column stores the number of samples \
of each ray.
- **t_starts**: Per-sample start distance. Tensor with shape (n_samples, 1).
- **t_ends**: Per-sample end distance. Tensor with shape (n_samples, 1).
Examples:
.. code-block:: python
import torch
from nerfacc import OccupancyGrid, ray_marching, unpack_to_ray_indices
device = "cuda:0"
batch_size = 128
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# Ray marching with aabb.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, render_step_size=1e-3
)
# Ray marching with per-ray t_min and t_max.
t_min = torch.zeros((batch_size,), device=device)
t_max = torch.ones((batch_size,), device=device)
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, t_min=t_min, t_max=t_max, render_step_size=1e-3
)
# Ray marching with aabb and skip areas based on occupancy grid.
scene_aabb = torch.tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0], device=device)
grid = OccupancyGrid(roi_aabb=[0.0, 0.0, 0.0, 0.5, 0.5, 0.5]).to(device)
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, scene_aabb=scene_aabb, grid=grid, render_step_size=1e-3
)
# Convert t_starts and t_ends to sample locations.
ray_indices = unpack_to_ray_indices(packed_info)
t_mid = (t_starts + t_ends) / 2.0
sample_locs = rays_o[ray_indices] + t_mid * rays_d[ray_indices]
"""
if not rays_o.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
# logic for t_min and t_max:
# 1. if t_min and t_max are given, use them with highest priority.
# 2. if t_min and t_max are not given, but scene_aabb is given, use
# ray_aabb_intersect to compute t_min and t_max.
# 3. if t_min and t_max are not given, and scene_aabb is not given,
# set t_min to 0.0, and t_max to 1e10. (the case of unbounded scene)
# 4. always clip t_min with near_plane and t_max with far_plane if given.
if t_min is None or t_max is None:
if scene_aabb is not None:
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb)
else:
t_min = torch.zeros_like(rays_o[..., 0])
t_max = torch.ones_like(rays_o[..., 0]) * 1e10
if near_plane is not None:
t_min = torch.clamp(t_min, min=near_plane)
if far_plane is not None:
t_max = torch.clamp(t_max, max=far_plane)
# stratified sampling: prevent overfitting during training
if stratified:
t_min = t_min + torch.rand_like(t_min) * render_step_size
# use grid for skipping if given
if grid is not None:
grid_roi_aabb = grid.roi_aabb
grid_binary = grid.binary
contraction_type = _C.ContractionType(grid.contraction_type.value)
else:
grid_roi_aabb = torch.tensor(
[-1e10, -1e10, -1e10, 1e10, 1e10, 1e10],
dtype=torch.float32,
device=rays_o.device,
)
grid_binary = torch.ones(
[1, 1, 1], dtype=torch.bool, device=rays_o.device
)
contraction_type = _C.ContractionType.AABB
# marching with grid-based skipping
packed_info, t_starts, t_ends = _C.ray_marching(
# rays
rays_o.contiguous(),
rays_d.contiguous(),
t_min.contiguous(),
t_max.contiguous(),
# coontraction and grid
grid_roi_aabb.contiguous(),
grid_binary.contiguous(),
contraction_type,
# sampling
render_step_size,
cone_angle,
)
# skip invisible space
if sigma_fn is not None:
# Query sigma without gradients
ray_indices = unpack_to_ray_indices(packed_info)
sigmas = sigma_fn(t_starts, t_ends, ray_indices)
assert (
sigmas.shape == t_starts.shape
), "sigmas must have shape of (N, 1)! Got {}".format(sigmas.shape)
alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
# Compute visibility of the samples, and filter out invisible samples
visibility, packed_info_visible = render_visibility(
packed_info, alphas, early_stop_eps
)
t_starts, t_ends = t_starts[visibility], t_ends[visibility]
packed_info = packed_info_visible
return packed_info, t_starts, t_ends
""" Volumetric rendering utilities. """
from typing import Any, List, Optional, Tuple
import torch
from torch import Tensor
import nerfacc.cuda as nerfacc_cuda
@torch.no_grad()
def ray_aabb_intersect(
rays_o: Tensor, rays_d: Tensor, aabb: Tensor
) -> Tuple[Tensor, Tensor]:
"""Ray AABB Test.
Note: this function is not differentiable to inputs.
Args:
rays_o: Ray origins. Tensor with shape (n_rays, 3).
rays_d: Normalized ray directions. Tensor with shape (n_rays, 3).
aabb: Scene bounding box {xmin, ymin, zmin, xmax, ymax, zmax}. \
Tensor with shape (6)
Returns:
Ray AABB intersection {t_min, t_max} with shape (n_rays) respectively. \
Note the t_min is clipped to minimum zero. 1e10 means no intersection.
"""
if rays_o.is_cuda and rays_d.is_cuda and aabb.is_cuda:
rays_o = rays_o.contiguous()
rays_d = rays_d.contiguous()
aabb = aabb.contiguous()
t_min, t_max = nerfacc_cuda.ray_aabb_intersect(rays_o, rays_d, aabb)
else:
raise NotImplementedError("Only support cuda inputs.")
return t_min, t_max
@torch.no_grad()
def volumetric_marching(
rays_o: Tensor,
rays_d: Tensor,
aabb: Tensor,
scene_resolution: List[int],
scene_occ_binary: Tensor,
t_min: Optional[Tensor] = None,
t_max: Optional[Tensor] = None,
render_step_size: float = 1e-3,
near_plane: float = 0.0,
stratified: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""Volumetric marching with occupancy test.
Note: this function is not differentiable to inputs.
Args:
rays_o: Ray origins. Tensor with shape (n_rays, 3).
rays_d: Normalized ray directions. Tensor with shape (n_rays, 3).
aabb: Scene bounding box {xmin, ymin, zmin, xmax, ymax, zmax}. \
Tensor with shape (6)
scene_resolution: Shape of the `scene_occ_binary`. {resx, resy, resz}.
scene_occ_binary: Scene occupancy binary field. BoolTensor with \
shape (resx * resy * resz)
t_min: Optional. Ray near planes. Tensor with shape (n_ray,). \
If not given it will be calculated using aabb test. Default is None.
t_max: Optional. Ray far planes. Tensor with shape (n_ray,). \
If not given it will be calculated using aabb test. Default is None.
render_step_size: Marching step size. Default is 1e-3.
near_plane: Near plane of the camera. Default is 0.0.
stratified: Whether to use stratified sampling. Default is False.
Returns:
A tuple of tensors containing
- **packed_info**: Stores information on which samples belong to the same ray. \
It is a tensor with shape (n_rays, 2). For each ray, the two values \
indicate the start index and the number of samples for this ray, \
respectively.
- **frustum_starts**: Sampled frustum directions. Tensor with shape (n_samples, 3).
- **frustum_ends**: Sampled frustum directions. Tensor with shape (n_samples, 3).
"""
if not rays_o.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if t_min is None or t_max is None:
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, aabb)
if near_plane > 0.0:
t_min = torch.clamp(t_min, min=near_plane)
assert (
scene_occ_binary.numel()
== scene_resolution[0] * scene_resolution[1] * scene_resolution[2]
), f"Shape {scene_occ_binary.shape} is not right!"
if stratified:
t_min = t_min + torch.rand_like(t_min) * render_step_size
(
packed_info,
frustum_starts,
frustum_ends,
) = nerfacc_cuda.volumetric_marching(
# rays
rays_o.contiguous(),
rays_d.contiguous(),
t_min.contiguous(),
t_max.contiguous(),
# density grid
aabb.contiguous(),
scene_resolution,
scene_occ_binary.contiguous(),
# sampling
render_step_size,
)
return packed_info, frustum_starts, frustum_ends
@torch.no_grad()
def volumetric_rendering_steps(
packed_info: Tensor,
sigmas: Tensor,
frustum_starts: Tensor,
frustum_ends: Tensor,
*args,
) -> Tuple[Tensor, ...]:
"""Compute rendering marching steps.
This function will compact the samples by terminate the marching once the \
transmittance reaches to 0.9999. It is recommanded that before running your \
network with gradients enabled, first run this function without gradients \
(torch.no_grad()) to quickly filter out some samples.
Note: this function is not differentiable to inputs.
Args:
packed_info: Stores infomation on which samples belong to the same ray. \
See volumetric_marching for details. Tensor with shape (n_rays, 2).
sigmas: Densities at those samples. Tensor with shape (n_samples, 1).
frustum_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
frustum_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
Returns:
A tuple of tensors containing
- **compact_packed_info**: Compacted version of input packed_info.
- **compact_frustum_starts**: Compacted version of input frustum_starts.
- **compact_frustum_ends**: Compacted version of input frustum_ends.
"""
if (
packed_info.is_cuda
and frustum_starts.is_cuda
and frustum_ends.is_cuda
and sigmas.is_cuda
):
packed_info = packed_info.contiguous()
frustum_starts = frustum_starts.contiguous()
frustum_ends = frustum_ends.contiguous()
sigmas = sigmas.contiguous()
(
compact_packed_info,
compact_selector,
) = nerfacc_cuda.volumetric_rendering_steps(
packed_info, frustum_starts, frustum_ends, sigmas
)
compact_frustum_starts = frustum_starts[compact_selector]
compact_frustum_ends = frustum_ends[compact_selector]
extras = (arg[compact_selector] for arg in args)
else:
raise NotImplementedError("Only support cuda inputs.")
return (
compact_packed_info,
compact_frustum_starts,
compact_frustum_ends,
*extras,
)
def volumetric_rendering_weights(
packed_info: Tensor,
sigmas: Tensor,
frustum_starts: Tensor,
frustum_ends: Tensor,
) -> Tensor:
"""Compute weights for volumetric rendering.
Note: this function is only differentiable to `sigmas`.
Args:
packed_info: Stores information on which samples belong to the same ray. \
See ``volumetric_marching`` for details. Tensor with shape (n_rays, 2).
sigmas: Densities at those samples. Tensor with shape (n_samples, 1).
frustum_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
frustum_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
Returns:
Volumetric rendering weights for those samples. Tensor with shape (n_samples).
"""
if (
packed_info.is_cuda
and frustum_starts.is_cuda
and frustum_ends.is_cuda
and sigmas.is_cuda
):
packed_info = packed_info.contiguous()
frustum_starts = frustum_starts.contiguous()
frustum_ends = frustum_ends.contiguous()
sigmas = sigmas.contiguous()
weights = _VolumetricRenderingWeights.apply(
packed_info, frustum_starts, frustum_ends, sigmas
)
else:
raise NotImplementedError("Only support cuda inputs.")
return weights
def volumetric_rendering_accumulate(
weights: Tensor,
ray_indices: Tensor,
values: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
Note: this function is only differentiable to weights and values.
Args:
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples).
ray_indices: Ray index of each sample. IntTensor with shape (n_sample).
values: The values to be accmulated. Tensor with shape (n_samples, D). If \
None, the accumulated values are just weights. Default is None.
n_rays: Total number of rays. This will decide the shape of the ouputs. If \
None, it will be inferred from `ray_indices.max() + 1`. If specified \
it should be at least larger than `ray_indices.max()`. Default is None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given then we return \
the accumulated weights, in which case D == 1.
"""
assert ray_indices.dim() == 1 and weights.dim() == 1
if not weights.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if values is not None:
assert values.dim() == 2 and values.shape[0] == weights.shape[0]
src = weights[:, None] * values
else:
src = weights[:, None]
if ray_indices.numel() == 0:
assert n_rays is not None
return torch.zeros((n_rays, src.shape[-1]), device=weights.device)
if n_rays is None:
n_rays = int(ray_indices.max()) + 1
else:
assert n_rays > ray_indices.max()
index = ray_indices[:, None].long().expand(-1, src.shape[-1])
outputs = torch.zeros((n_rays, src.shape[-1]), device=weights.device)
outputs.scatter_add_(0, index, src)
return outputs
@torch.no_grad()
def unpack_to_ray_indices(packed_info: Tensor) -> Tensor:
"""Unpack `packed_info` to ray indices. Useful for converting per ray data to per sample data.
Note: this function is not differentiable to inputs.
Args:
packed_info: Stores information on which samples belong to the same ray. \
See ``volumetric_marching`` for details. Tensor with shape (n_rays, 2).
Returns:
Ray index of each sample. IntTensor with shape (n_sample).
"""
if packed_info.is_cuda:
packed_info = packed_info.contiguous()
ray_indices = nerfacc_cuda.unpack_to_ray_indices(packed_info)
else:
raise NotImplementedError("Only support cuda inputs.")
return ray_indices
class _VolumetricRenderingWeights(torch.autograd.Function):
@staticmethod
def forward(
ctx, packed_info, frustum_starts, frustum_ends, sigmas
): # pylint: disable=arguments-differ
weights = nerfacc_cuda.volumetric_rendering_weights_forward(
packed_info, frustum_starts, frustum_ends, sigmas
)
ctx.save_for_backward(
packed_info,
frustum_starts,
frustum_ends,
sigmas,
weights,
)
return weights
@staticmethod
def backward(ctx, grad_weights): # pylint: disable=arguments-differ
(
packed_info,
frustum_starts,
frustum_ends,
sigmas,
weights,
) = ctx.saved_tensors
grad_sigmas = nerfacc_cuda.volumetric_rendering_weights_backward(
weights,
grad_weights,
packed_info,
frustum_starts,
frustum_ends,
sigmas,
)
return None, None, None, grad_sigmas
@staticmethod
def jvp(ctx: Any, *grad_inputs: Any) -> Any:
raise NotImplementedError("Not implemented.")
from typing import Optional, Tuple
import torch
from torch import Tensor
import nerfacc.cuda as _C
def accumulate_along_rays(
weights: Tensor,
ray_indices: Tensor,
values: Optional[Tensor] = None,
n_rays: Optional[int] = None,
) -> Tensor:
"""Accumulate volumetric values along the ray.
Note:
This function is only differentiable to `weights` and `values`.
Args:
weights: Volumetric rendering weights for those samples. Tensor with shape \
(n_samples,).
ray_indices: Ray index of each sample. IntTensor with shape (n_samples). \
It can be obtained from `unpack_to_ray_indices(packed_info)`.
values: The values to be accmulated. Tensor with shape (n_samples, D). If \
None, the accumulated values are just weights. Default is None.
n_rays: Total number of rays. This will decide the shape of the ouputs. If \
None, it will be inferred from `ray_indices.max() + 1`. If specified \
it should be at least larger than `ray_indices.max()`. Default is None.
Returns:
Accumulated values with shape (n_rays, D). If `values` is not given then we return \
the accumulated weights, in which case D == 1.
Examples:
.. code-block:: python
# Rendering: accumulate rgbs, opacities, and depths along the rays.
colors = accumulate_along_rays(weights, ray_indices, values=rgbs, n_rays=n_rays)
opacities = accumulate_along_rays(weights, ray_indices, values=None, n_rays=n_rays)
depths = accumulate_along_rays(
weights,
ray_indices,
values=(t_starts + t_ends) / 2.0,
n_rays=n_rays,
)
# (n_rays, 3), (n_rays, 1), (n_rays, 1)
print(colors.shape, opacities.shape, depths.shape)
"""
assert ray_indices.dim() == 1 and weights.dim() == 1
if not weights.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
if values is not None:
assert (
values.dim() == 2 and values.shape[0] == weights.shape[0]
), "Invalid shapes: {} vs {}".format(values.shape, weights.shape)
src = weights[:, None] * values
else:
src = weights[:, None]
if ray_indices.numel() == 0:
assert n_rays is not None
return torch.zeros((n_rays, src.shape[-1]), device=weights.device)
if n_rays is None:
n_rays = int(ray_indices.max()) + 1
else:
assert n_rays > ray_indices.max()
ray_indices = ray_indices.int()
index = ray_indices[:, None].long().expand(-1, src.shape[-1])
outputs = torch.zeros((n_rays, src.shape[-1]), device=weights.device)
outputs.scatter_add_(0, index, src)
return outputs
def render_weight_from_density(
packed_info,
t_starts,
t_ends,
sigmas,
early_stop_eps: float = 1e-4,
) -> torch.Tensor:
"""Compute transmittance weights from density.
Args:
packed_info: Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
t_starts: Where the frustum-shape sample starts along a ray. Tensor with \
shape (n_samples, 1).
t_ends: Where the frustum-shape sample ends along a ray. Tensor with \
shape (n_samples, 1).
sigmas: The density values of the samples. Tensor with shape (n_samples, 1).
early_stop_eps: The epsilon value for early stopping. Default is 1e-4.
Returns:
transmittance weights with shape (n_samples,).
Examples:
.. code-block:: python
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# pesudo density
sigmas = torch.rand((t_starts.shape[0], 1), device="cuda:0")
# Rendering: compute weights and ray indices.
weights = render_weight_from_density(
packed_info, t_starts, t_ends, sigmas, early_stop_eps=1e-4
)
# torch.Size([115200, 1]) torch.Size([115200])
print(sigmas.shape, weights.shape)
"""
if not sigmas.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
weights = _RenderingDensity.apply(
packed_info, t_starts, t_ends, sigmas, early_stop_eps
)
return weights
def render_weight_from_alpha(
packed_info,
alphas,
early_stop_eps: float = 1e-4,
) -> Tuple[torch.Tensor, ...]:
"""Compute transmittance weights from density.
Args:
packed_info: Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
early_stop_eps: The epsilon value for early stopping. Default is 1e-4.
Returns:
transmittance weights with shape (n_samples,).
Examples:
.. code-block:: python
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# pesudo opacity
alphas = torch.rand((t_starts.shape[0], 1), device="cuda:0")
# Rendering: compute weights and ray indices.
weights = render_weight_from_alpha(
packed_info, alphas, early_stop_eps=1e-4
)
# torch.Size([115200, 1]) torch.Size([115200])
print(alphas.shape, weights.shape)
"""
if not alphas.is_cuda:
raise NotImplementedError("Only support cuda inputs.")
weights = _RenderingAlpha.apply(packed_info, alphas, early_stop_eps)
return weights
@torch.no_grad()
def render_visibility(
packed_info: torch.Tensor,
alphas: torch.Tensor,
early_stop_eps: float = 1e-4,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""Filter out invisible samples given alpha (opacity).
Args:
packed_info: Stores information on which samples belong to the same ray. \
See :func:`nerfacc.ray_marching` for details. Tensor with shape (n_rays, 2).
alphas: The opacity values of the samples. Tensor with shape (n_samples, 1).
early_stop_eps: The epsilon value for early stopping. Default is 1e-4.
Returns:
A tuple of tensors.
- **visibility**: The visibility mask for samples. Boolen tensor of shape \
(n_samples,).
- **packed_info_visible**: The new packed_info for visible samples. \
Tensor shape (n_rays, 2). It should be used if you use the visiblity \
mask to filter out invisible samples.
Examples:
.. code-block:: python
rays_o = torch.rand((128, 3), device="cuda:0")
rays_d = torch.randn((128, 3), device="cuda:0")
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
# Ray marching with near far plane.
packed_info, t_starts, t_ends = ray_marching(
rays_o, rays_d, near_plane=0.1, far_plane=1.0, render_step_size=1e-3
)
# pesudo opacity
alphas = torch.rand((t_starts.shape[0], 1), device="cuda:0")
# Rendering but only for computing visibility of each samples.
visibility, packed_info_visible = render_visibility(
packed_info, alphas, early_stop_eps=1e-4
)
t_starts_visible = t_starts[visibility]
t_ends_visible = t_ends[visibility]
# torch.Size([115200, 1]) torch.Size([1283, 1])
print(t_starts.shape, t_starts_visible.shape)
"""
visibility, packed_info_visible = _C.rendering_alphas_forward(
packed_info.contiguous(),
alphas.contiguous(),
early_stop_eps,
True, # compute visibility instead of weights
)
return visibility, packed_info_visible
class _RenderingDensity(torch.autograd.Function):
"""Rendering transmittance weights from density."""
@staticmethod
def forward(
ctx,
packed_info,
t_starts,
t_ends,
sigmas,
early_stop_eps: float = 1e-4,
):
packed_info = packed_info.contiguous()
t_starts = t_starts.contiguous()
t_ends = t_ends.contiguous()
sigmas = sigmas.contiguous()
weights = _C.rendering_forward(
packed_info,
t_starts,
t_ends,
sigmas,
early_stop_eps,
False, # not doing filtering
)[0]
if ctx.needs_input_grad[3]: # sigmas
ctx.save_for_backward(
packed_info,
t_starts,
t_ends,
sigmas,
weights,
)
ctx.early_stop_eps = early_stop_eps
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
early_stop_eps = ctx.early_stop_eps
(
packed_info,
t_starts,
t_ends,
sigmas,
weights,
) = ctx.saved_tensors
grad_sigmas = _C.rendering_backward(
weights,
grad_weights,
packed_info,
t_starts,
t_ends,
sigmas,
early_stop_eps,
)
return None, None, None, grad_sigmas, None
class _RenderingAlpha(torch.autograd.Function):
"""Rendering transmittance weights from alpha."""
@staticmethod
def forward(
ctx,
packed_info,
alphas,
early_stop_eps: float = 1e-4,
):
packed_info = packed_info.contiguous()
alphas = alphas.contiguous()
weights = _C.rendering_alphas_forward(
packed_info,
alphas,
early_stop_eps,
False, # not doing filtering
)[0]
if ctx.needs_input_grad[1]: # alphas
ctx.save_for_backward(
packed_info,
alphas,
weights,
)
ctx.early_stop_eps = early_stop_eps
return weights
@staticmethod
def backward(ctx, grad_weights):
grad_weights = grad_weights.contiguous()
early_stop_eps = ctx.early_stop_eps
(
packed_info,
alphas,
weights,
) = ctx.saved_tensors
grad_sigmas = _C.rendering_backward(
weights,
grad_weights,
packed_info,
alphas,
early_stop_eps,
)
return None, grad_sigmas, None
""" Full volumetric rendering pipeline. """
from typing import Callable, List, Optional, Tuple
import torch
from .utils import (
unpack_to_ray_indices,
volumetric_marching,
volumetric_rendering_accumulate,
volumetric_rendering_steps,
volumetric_rendering_weights,
)
def volumetric_rendering_pipeline(
sigma_fn: Callable,
rgb_sigma_fn: Callable,
rays_o: torch.Tensor,
rays_d: torch.Tensor,
scene_aabb: torch.Tensor,
scene_resolution: Optional[List[int]] = None,
scene_occ_binary: Optional[torch.Tensor] = None,
render_bkgd: Optional[torch.Tensor] = None,
render_step_size: float = 1e-3,
near_plane: float = 0.0,
stratified: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, int, int]:
"""Differentiable volumetric rendering pipeline.
This function is the integration of those individual functions:
- ray_aabb_intersect
- volumetric_marching
- volumetric_rendering_steps
- volumetric_rendering_weights
- volumetric_rendering_accumulate
Args:
sigma_fn: A function that takes in the {frustum starts (N, 1), frustum ends (N, 1), and
ray indices (N,)} and returns the post-activation sigma values (N, 1).
rgb_sigma_fn: A function that takes in the {frustum starts (N, 1), frustum ends (N, 1), and
ray indices (N,)} and returns the post-activation rgb values (N, 3) and sigma values (N, 1).
rays_o: The origin of the rays (n_rays, 3).
rays_d: The normalized direction of the rays (n_rays, 3).
scene_aabb: The scene axis-aligned bounding box {xmin, ymin, zmin, xmax, ymax, zmax}.
scene_resolution: The scene resolution (3,). Defaults to None.
scene_occ_binary: The scene occupancy binary tensor used to skip samples (n_cells,). Defaults to None.
render_bkgd: The background color (3,). Default: None.
render_step_size: The step size for the volumetric rendering. Default: 1e-3.
near_plane: The near plane for the volumetric rendering. Default: 0.0.
stratified: Whether to use stratified sampling. Default: False.
Returns:
Ray colors (n_rays, 3), and opacities (n_rays, 1), the number of marching steps, and the number of rendering steps.
"""
n_rays = rays_o.shape[0]
if scene_occ_binary is None:
scene_occ_binary = torch.ones(
(1),
dtype=torch.bool,
device=rays_o.device,
)
scene_resolution = [1, 1, 1]
if scene_resolution is None:
assert scene_occ_binary is not None and scene_occ_binary.dim() == 3
scene_resolution = scene_occ_binary.shape
rays_o = rays_o.contiguous()
rays_d = rays_d.contiguous()
scene_aabb = scene_aabb.contiguous()
scene_occ_binary = scene_occ_binary.contiguous()
with torch.no_grad():
# Ray marching and occupancy check.
assert scene_resolution is not None
packed_info, frustum_starts, frustum_ends = volumetric_marching(
rays_o,
rays_d,
aabb=scene_aabb,
scene_resolution=scene_resolution,
scene_occ_binary=scene_occ_binary,
render_step_size=render_step_size,
near_plane=near_plane,
stratified=stratified,
)
n_marching_samples = frustum_starts.shape[0]
ray_indices = unpack_to_ray_indices(packed_info)
# Query sigma without gradients
sigmas = sigma_fn(frustum_starts, frustum_ends, ray_indices)
# Ray marching and rendering check.
packed_info, frustum_starts, frustum_ends = volumetric_rendering_steps(
packed_info,
sigmas,
frustum_starts,
frustum_ends,
)
n_rendering_samples = frustum_starts.shape[0]
ray_indices = unpack_to_ray_indices(packed_info)
# Query sigma and color with gradients
rgbs, sigmas = rgb_sigma_fn(frustum_starts, frustum_ends, ray_indices)
assert rgbs.shape[-1] == 3, f"rgbs must have 3 channels, got {rgbs.shape}"
assert (
sigmas.shape[-1] == 1
), f"sigmas must have 1 channel, got {sigmas.shape}"
# Rendering: compute weights and ray indices.
weights = volumetric_rendering_weights(
packed_info, sigmas, frustum_starts, frustum_ends
)
# Rendering: accumulate rgbs and opacities along the rays.
colors = volumetric_rendering_accumulate(
weights, ray_indices, values=rgbs, n_rays=n_rays
)
opacities = volumetric_rendering_accumulate(
weights, ray_indices, values=None, n_rays=n_rays
)
# depths = volumetric_rendering_accumulate(
# weights,
# ray_indices,
# values=(frustum_starts + frustum_ends) / 2.0,
# n_rays=n_rays,
# )
if render_bkgd is not None:
render_bkgd = render_bkgd.contiguous()
colors = colors + render_bkgd * (1.0 - opacities)
return colors, opacities, n_marching_samples, n_rendering_samples
......@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "nerfacc"
version = "0.0.9"
version = "0.1.0"
authors = [{name = "Ruilong", email = "ruilongli94@gmail.com"}]
license = { text="MIT" }
requires-python = ">=3.8"
......@@ -35,6 +35,11 @@ dev = [
[tool.black]
line-length = 80
[tool.isort]
multi_line_output = 3
line_length = 80
include_trailing_comma = true
# pylint
[tool.pylint.messages_control]
max-line-length = 80
......
import torch
import tqdm
from nerfacc import volumetric_rendering_pipeline
device = "cuda:0"
def sigma_fn(frustum_starts, frustum_ends, ray_indices):
return torch.rand_like(frustum_ends[:, :1])
def rgb_sigma_fn(frustum_starts, frustum_ends, ray_indices):
return torch.rand(
(frustum_ends.shape[0], 3), device=device
), torch.rand_like(frustum_ends)
def test_rendering():
scene_aabb = torch.tensor([0, 0, 0, 1, 1, 1], device=device).float()
scene_resolution = [128, 128, 128]
scene_occ_binary = torch.ones((128 * 128 * 128), device=device).bool()
rays_o = torch.rand((10000, 3), device=device)
rays_d = torch.randn((10000, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
render_bkgd = torch.ones(3, device=device)
for step in tqdm.tqdm(range(1000)):
volumetric_rendering_pipeline(
sigma_fn,
rgb_sigma_fn,
rays_o,
rays_d,
scene_aabb,
scene_resolution,
scene_occ_binary,
render_bkgd,
render_step_size=1e-3,
near_plane=0.0,
stratified=False,
)
if __name__ == "__main__":
test_rendering()
import pytest
import torch
from nerfacc.contraction import ContractionType, contract, contract_inv
device = "cuda:0"
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_identity():
samples = torch.rand([128, 3], device=device)
roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
samples_out = contract(samples, roi=roi)
assert torch.allclose(samples_out, samples)
samples_inv = contract(samples_out, roi=roi)
assert torch.allclose(samples_inv, samples)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_normalization():
samples = torch.rand([128, 3], device=device)
roi = torch.tensor(
[-1, -1, -1, 1, 1, 1], dtype=torch.float32, device=device
)
samples_out = contract(samples, roi=roi)
assert torch.allclose(samples_out, samples * 0.5 + 0.5)
samples_inv = contract_inv(samples_out, roi=roi)
assert torch.allclose(samples_inv, samples, atol=1e-6)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_contract():
x = torch.rand([128, 3], device=device)
roi = torch.tensor(
[0.2, 0.3, 0.4, 0.7, 0.8, 0.6], dtype=torch.float32, device=device
)
for type in [
ContractionType.UN_BOUNDED_SPHERE,
ContractionType.UN_BOUNDED_TANH,
]:
x_unit = contract(x, roi=roi, type=type)
assert x_unit.max() <= 1 and x_unit.min() >= 0
x_inv = contract_inv(x_unit, roi=roi, type=type)
assert torch.allclose(x_inv, x, atol=1e-3)
if __name__ == "__main__":
test_identity()
test_normalization()
test_contract()
import pytest
import torch
from nerfacc.contraction import ContractionType
from nerfacc.grid import OccupancyGrid
device = "cuda:0"
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def occ_eval_fn(x: torch.Tensor) -> torch.Tensor:
"""Pesudo occupancy function: (N, 3) -> (N, 1)."""
return torch.rand_like(x[:, :1])
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_occ_grid():
occ_grid = OccupancyGrid(roi_aabb=[0, 0, 0, 1, 1, 1], resolution=128).to(
device
)
occ_grid.every_n_step(0, occ_eval_fn, occ_thre=0.1)
assert occ_grid.roi_aabb.shape == (6,)
assert occ_grid.binary.shape == (128, 128, 128)
if __name__ == "__main__":
test_occ_grid()
import torch
import tqdm
from nerfacc import volumetric_marching
device = "cuda:0"
def test_marching():
torch.manual_seed(42)
scene_aabb = torch.tensor([0, 0, 0, 1, 1, 1], device=device).float()
scene_occ_binary = torch.rand((128 * 128 * 128), device=device) > 0.5
rays_o = torch.rand((10000, 3), device=device)
rays_d = torch.randn((10000, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
for step in tqdm.tqdm(range(5000)):
volumetric_marching(
rays_o,
rays_d,
aabb=scene_aabb,
scene_resolution=[128, 128, 128],
scene_occ_binary=scene_occ_binary,
)
if __name__ == "__main__":
test_marching()
import torch
import tqdm
from nerfacc import OccupancyField
device = "cuda:0"
def occ_eval_fn(positions: torch.Tensor) -> torch.Tensor:
return torch.rand_like(positions[:, :1])
def test_occ_field():
occ_field = OccupancyField(occ_eval_fn, aabb=[0, 0, 0, 1, 1, 1]).to(device)
for step in tqdm.tqdm(range(50000)):
occ_field.every_n_step(step, occ_thre=0.1)
if __name__ == "__main__":
test_occ_field()
import pytest
import torch
from nerfacc.grid import OccupancyGrid
from nerfacc.ray_marching import ray_marching, unpack_to_ray_indices
device = "cuda:0"
batch_size = 128
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_marching_with_near_far():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
packed_info, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-3,
)
return
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_marching_with_grid():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
grid = OccupancyGrid(roi_aabb=[0, 0, 0, 1, 1, 1]).to(device)
grid._binary[:] = True
packed_info, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
grid=grid,
near_plane=0.0,
far_plane=1.0,
render_step_size=1e-2,
)
ray_indices = unpack_to_ray_indices(packed_info).long()
samples = (
rays_o[ray_indices] + rays_d[ray_indices] * (t_starts + t_ends) / 2.0
)
assert (samples <= grid.roi_aabb[3:].unsqueeze(0)).all()
assert (samples >= grid.roi_aabb[:3].unsqueeze(0)).all()
return
if __name__ == "__main__":
test_marching_with_near_far()
test_marching_with_grid()
import pytest
import torch
import tqdm
from nerfacc import (
unpack_to_ray_indices,
volumetric_marching,
volumetric_rendering_accumulate,
volumetric_rendering_steps,
volumetric_rendering_weights,
)
from nerfacc.ray_marching import ray_marching
from nerfacc.vol_rendering import render_weight_from_density
device = "cuda:0"
batch_size = 128
def test_rendering():
scene_aabb = torch.tensor([0, 0, 0, 1, 1, 1], device=device).float()
scene_occ_binary = torch.ones((128 * 128 * 128), device=device).bool()
rays_o = torch.rand((10000, 3), device=device)
rays_d = torch.randn((10000, 3), device=device)
@pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
def test_transmittance_compress():
rays_o = torch.rand((batch_size, 3), device=device)
rays_d = torch.randn((batch_size, 3), device=device)
rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
for step in tqdm.tqdm(range(1000)):
(packed_info, frustum_starts, frustum_ends,) = volumetric_marching(
rays_o,
rays_d,
aabb=scene_aabb,
scene_resolution=[128, 128, 128],
scene_occ_binary=scene_occ_binary,
)
sigmas = torch.rand_like(frustum_ends[:, :1], requires_grad=True) * 100
(
packed_info,
frustum_starts,
frustum_ends,
) = volumetric_rendering_steps(
packed_info,
sigmas,
frustum_starts,
frustum_ends,
)
ray_indices = unpack_to_ray_indices(packed_info)
sigmas = torch.rand_like(frustum_ends[:, :1], requires_grad=True) * 100
values = torch.rand_like(frustum_starts, requires_grad=True)
weights = volumetric_rendering_weights(
packed_info,
sigmas,
frustum_starts,
frustum_ends,
)
accum_values = volumetric_rendering_accumulate(
weights,
ray_indices,
values,
n_rays=rays_o.shape[0],
)
accum_values.sum().backward()
packed_info, t_starts, t_ends = ray_marching(
rays_o,
rays_d,
near_plane=0.1,
far_plane=1.0,
render_step_size=1e-2,
)
sigmas = torch.rand_like(t_starts, requires_grad=True)
weights = render_weight_from_density(
packed_info,
t_starts,
t_ends,
sigmas * 1e2,
)
weights.sum().backward()
assert sigmas.grad is not None
if __name__ == "__main__":
test_rendering()
test_transmittance_compress()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment