Commit 4a4bbaba authored by Ruilong Li's avatar Ruilong Li
Browse files

cleanup

parent 224ba1a7
...@@ -12,9 +12,9 @@ python examples/trainval.py ...@@ -12,9 +12,9 @@ python examples/trainval.py
| trainval (35k, 1<<16) | Lego | Mic | Materials | | trainval (35k, 1<<16) | Lego | Mic | Materials |
| - | - | - | - | | - | - | - | - |
| Time | 325s | 357s | 360s | | Time | 370s | 357s | 360s |
| PSNR | 36.20 | 36.55 | 29.59 | | PSNR | 36.20 | 36.55 | 29.59 |
| FPS | 12.56 | 25.54 | 9.65 | | FPS | 11.94 | 25.54 | 9.65 |
Tested with the default settings on the Lego test set. Tested with the default settings on the Lego test set.
......
import torch
from torch.cuda.amp import custom_bwd, custom_fwd
from ._backend import _C from ._backend import _C
# ray_aabb_intersect = _C.ray_aabb_intersect
ray_marching = _C.ray_marching ray_marching = _C.ray_marching
volumetric_rendering_forward = _C.volumetric_rendering_forward volumetric_rendering_steps = _C.volumetric_rendering_steps
volumetric_rendering_backward = _C.volumetric_rendering_backward
volumetric_rendering_inference = _C.volumetric_rendering_inference
# volumetric_weights_forward = _C.volumetric_weights_forward
# volumetric_weights_forward = _C.volumetric_weights_forward
class VolumeRenderer(torch.autograd.Function):
"""CUDA Volumetirc Renderer"""
@staticmethod
@custom_fwd(cast_inputs=torch.float32)
def forward(ctx, packed_info, starts, ends, sigmas, rgbs):
(
accumulated_weight,
accumulated_depth,
accumulated_color,
mask,
steps_counter,
) = volumetric_rendering_forward(packed_info, starts, ends, sigmas, rgbs)
ctx.save_for_backward(
accumulated_weight,
accumulated_depth,
accumulated_color,
packed_info,
starts,
ends,
sigmas,
rgbs,
)
return (
accumulated_weight,
accumulated_depth,
accumulated_color,
mask,
steps_counter,
)
@staticmethod
@custom_bwd
def backward(
ctx, grad_weight, grad_depth, grad_color, _grad_mask, _grad_steps_counter
):
(
accumulated_weight,
accumulated_depth,
accumulated_color,
packed_info,
starts,
ends,
sigmas,
rgbs,
) = ctx.saved_tensors
grad_sigmas, grad_rgbs = volumetric_rendering_backward(
accumulated_weight,
accumulated_depth,
accumulated_color,
grad_weight,
grad_depth,
grad_color,
packed_info,
starts,
ends,
sigmas,
rgbs,
)
# corresponds to the input argument list of forward()
return None, None, None, grad_sigmas, grad_rgbs
...@@ -7,52 +7,13 @@ std::vector<torch::Tensor> ray_aabb_intersect( ...@@ -7,52 +7,13 @@ std::vector<torch::Tensor> ray_aabb_intersect(
const torch::Tensor aabb const torch::Tensor aabb
); );
std::vector<torch::Tensor> volumetric_rendering_steps(
// std::vector<torch::Tensor> ray_marching(
// // rays
// const torch::Tensor rays_o,
// const torch::Tensor rays_d,
// const torch::Tensor t_min,
// const torch::Tensor t_max,
// // density grid
// const torch::Tensor aabb,
// const pybind11::list resolution,
// const torch::Tensor occ_binary,
// // sampling
// const int max_total_samples,
// const int max_per_ray_samples,
// const float dt
// );
std::vector<torch::Tensor> volumetric_rendering_inference(
torch::Tensor packed_info, torch::Tensor packed_info,
torch::Tensor starts, torch::Tensor starts,
torch::Tensor ends, torch::Tensor ends,
torch::Tensor sigmas torch::Tensor sigmas
); );
std::vector<torch::Tensor> volumetric_rendering_forward(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas,
torch::Tensor rgbs
);
std::vector<torch::Tensor> volumetric_rendering_backward(
torch::Tensor accumulated_weight,
torch::Tensor accumulated_depth,
torch::Tensor accumulated_color,
torch::Tensor grad_weight,
torch::Tensor grad_depth,
torch::Tensor grad_color,
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas,
torch::Tensor rgbs
);
std::vector<torch::Tensor> volumetric_weights_forward( std::vector<torch::Tensor> volumetric_weights_forward(
torch::Tensor packed_info, torch::Tensor packed_info,
torch::Tensor starts, torch::Tensor starts,
...@@ -87,9 +48,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) ...@@ -87,9 +48,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{ {
m.def("ray_aabb_intersect", &ray_aabb_intersect); m.def("ray_aabb_intersect", &ray_aabb_intersect);
m.def("ray_marching", &ray_marching); m.def("ray_marching", &ray_marching);
m.def("volumetric_rendering_inference", &volumetric_rendering_inference); m.def("volumetric_rendering_steps", &volumetric_rendering_steps);
m.def("volumetric_rendering_forward", &volumetric_rendering_forward);
m.def("volumetric_rendering_backward", &volumetric_rendering_backward);
m.def("volumetric_weights_forward", &volumetric_weights_forward); m.def("volumetric_weights_forward", &volumetric_weights_forward);
m.def("volumetric_weights_backward", &volumetric_weights_backward); m.def("volumetric_weights_backward", &volumetric_weights_backward);
} }
\ No newline at end of file
...@@ -61,7 +61,6 @@ inline __device__ float advance_to_next_voxel( ...@@ -61,7 +61,6 @@ inline __device__ float advance_to_next_voxel(
} }
__global__ void marching_steps_kernel( __global__ void marching_steps_kernel(
// rays info // rays info
const uint32_t n_rays, const uint32_t n_rays,
......
#include "include/helpers_cuda.h"
template <typename scalar_t>
__global__ void volumetric_rendering_inference_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
int* compact_packed_info, // output: should be all zero initialized
int* compact_selector, // output: should be all zero initialized
// writable helpers
int* steps_counter
) {
CUDA_GET_THREAD_ID(thread_id, n_rays);
// locate
const int base = packed_info[thread_id * 2 + 0]; // point idx start.
const int steps = packed_info[thread_id * 2 + 1]; // point idx shift.
if (steps == 0) return;
starts += base;
ends += base;
sigmas += base;
// accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
int j = 0;
for (; j < steps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
T *= (1.f - alpha);
}
int compact_base = atomicAdd(steps_counter, j);
compact_selector += compact_base;
for (int k = 0; k < j; ++k) {
compact_selector[k] = base + k;
}
compact_packed_info[thread_id * 2 + 0] = compact_base; // compact point idx start.
compact_packed_info[thread_id * 2 + 1] = j; // compact point idx shift.
}
template <typename scalar_t>
__global__ void volumetric_rendering_forward_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
const scalar_t* rgbs, // input rgb after activation
// should be all-zero initialized
scalar_t* accumulated_weight, // output
scalar_t* accumulated_depth, // output
scalar_t* accumulated_color, // output
bool* mask, // output
// writable helpers
int* steps_counter
) {
CUDA_GET_THREAD_ID(thread_id, n_rays);
// locate
const int i = packed_info[thread_id * 3 + 0]; // ray idx in {rays_o, rays_d}
const int base = packed_info[thread_id * 3 + 1]; // point idx start.
const int numsteps = packed_info[thread_id * 3 + 2]; // point idx shift.
if (numsteps == 0) return;
starts += base;
ends += base;
sigmas += base;
rgbs += base * 3;
accumulated_weight += i;
accumulated_depth += i;
accumulated_color += i * 3;
mask += i;
// accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
int j = 0;
for (; j < numsteps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t t = (ends[j] + starts[j]) * 0.5f;
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
accumulated_weight[0] += weight;
accumulated_depth[0] += weight * t;
accumulated_color[0] += weight * rgbs[j * 3 + 0];
accumulated_color[1] += weight * rgbs[j * 3 + 1];
accumulated_color[2] += weight * rgbs[j * 3 + 2];
T *= (1.f - alpha);
}
mask[0] = true;
atomicAdd(steps_counter, j);
}
template <typename scalar_t>
__global__ void volumetric_rendering_backward_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
const scalar_t* rgbs, // input rgb after activation
const scalar_t* accumulated_weight, // forward output
const scalar_t* accumulated_depth, // forward output
const scalar_t* accumulated_color, // forward output
const scalar_t* grad_weight, // input
const scalar_t* grad_depth, // input
const scalar_t* grad_color, // input
scalar_t* grad_sigmas, // output
scalar_t* grad_rgbs // output
) {
CUDA_GET_THREAD_ID(thread_id, n_rays);
// locate
const int i = packed_info[thread_id * 3 + 0]; // ray idx in {rays_o, rays_d}
const int base = packed_info[thread_id * 3 + 1]; // point idx start.
const int numsteps = packed_info[thread_id * 3 + 2]; // point idx shift.
if (numsteps == 0) return;
starts += base;
ends += base;
sigmas += base;
rgbs += base * 3;
grad_sigmas += base;
grad_rgbs += base * 3;
accumulated_weight += i;
accumulated_depth += i;
accumulated_color += i * 3;
grad_weight += i;
grad_depth += i;
grad_color += i * 3;
// backward of accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
int j = 0;
scalar_t r = 0, g = 0, b = 0, d = 0;
for (; j < numsteps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t t = (ends[j] + starts[j]) * 0.5f;
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
r += weight * rgbs[j * 3 + 0];
g += weight * rgbs[j * 3 + 1];
b += weight * rgbs[j * 3 + 2];
d += weight * t;
T *= (1.f - alpha);
grad_rgbs[j * 3 + 0] = grad_color[0] * weight;
grad_rgbs[j * 3 + 1] = grad_color[1] * weight;
grad_rgbs[j * 3 + 2] = grad_color[2] * weight;
grad_sigmas[j] = delta * (
grad_color[0] * (T * rgbs[j * 3 + 0] - (accumulated_color[0] - r)) +
grad_color[1] * (T * rgbs[j * 3 + 1] - (accumulated_color[1] - g)) +
grad_color[2] * (T * rgbs[j * 3 + 2] - (accumulated_color[2] - b)) +
grad_weight[0] * (1.f - accumulated_weight[0]) +
grad_depth[0] * (t * T - (accumulated_depth[0] - d))
);
}
}
std::vector<torch::Tensor> volumetric_rendering_inference(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// helper counter
torch::Tensor steps_counter = torch::zeros(
{1}, packed_info.options().dtype(torch::kInt32));
// outputs
torch::Tensor compact_packed_info = torch::zeros({n_rays, 2}, packed_info.options());
torch::Tensor compact_selector = - torch::ones({n_samples}, packed_info.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_inference",
([&]
{ volumetric_rendering_inference_kernel<scalar_t><<<blocks, threads>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
compact_packed_info.data_ptr<int>(),
compact_selector.data_ptr<int>(),
steps_counter.data_ptr<int>()
);
}));
return {compact_packed_info, compact_selector, steps_counter};
}
/**
* @brief Volumetric Rendering: Accumulating samples in the forward pass.
* The inputs, excepct for `sigmas` and `rgbs`, are the outputs of our
* cuda ray marching function in `raymarching.cu`
*
* @param packed_info Stores how to index the ray samples from the returned values.
* Shape of [n_rays, 3]. First value is the ray index. Second value is the sample
* start index in the results for this ray. Third value is the number of samples for
* this ray. Note for rays that have zero samples, we simply skip them so the `packed_info`
* has some zero padding in the end.
* @param starts: Where the frustum-shape sample starts along a ray. [total_samples, 1]
* @param ends: Where the frustum-shape sample ends along a ray. [total_samples, 1]
* @param sigmas Densities at those samples. [total_samples, 1]
* @param rgbs RGBs at those samples. [total_samples, 3]
* @return std::vector<torch::Tensor>
* - accumulated_weight: Ray opacity. [n_rays, 1]
* - accumulated_depth: Ray depth. [n_rays, 1]
* - accumulated_color: Ray color. [n_rays, 3]
* - mask: Boolen value store if this ray has valid samples from packed_info. [n_rays]
*/
std::vector<torch::Tensor> volumetric_rendering_forward(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas,
torch::Tensor rgbs
) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
CHECK_INPUT(rgbs);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 3);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
TORCH_CHECK(rgbs.ndimension() == 2 & rgbs.size(1) == 3);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// helper counter
torch::Tensor steps_counter = torch::zeros(
{1}, rgbs.options().dtype(torch::kInt32));
// outputs
torch::Tensor accumulated_weight = torch::zeros({n_rays, 1}, sigmas.options());
torch::Tensor accumulated_depth = torch::zeros({n_rays, 1}, sigmas.options());
torch::Tensor accumulated_color = torch::zeros({n_rays, 3}, sigmas.options());
// The rays that are not skipped during sampling.
torch::Tensor mask = torch::zeros({n_rays}, sigmas.options().dtype(torch::kBool));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_forward",
([&]
{ volumetric_rendering_forward_kernel<scalar_t><<<blocks, threads>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
rgbs.data_ptr<scalar_t>(),
accumulated_weight.data_ptr<scalar_t>(),
accumulated_depth.data_ptr<scalar_t>(),
accumulated_color.data_ptr<scalar_t>(),
mask.data_ptr<bool>(),
steps_counter.data_ptr<int>()
);
}));
return {accumulated_weight, accumulated_depth, accumulated_color, mask, steps_counter};
}
/**
* @brief Volumetric Rendering: Accumulating samples in the backward pass.
*/
std::vector<torch::Tensor> volumetric_rendering_backward(
torch::Tensor accumulated_weight,
torch::Tensor accumulated_depth,
torch::Tensor accumulated_color,
torch::Tensor grad_weight,
torch::Tensor grad_depth,
torch::Tensor grad_color,
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas,
torch::Tensor rgbs
) {
DEVICE_GUARD(packed_info);
const uint32_t n_rays = packed_info.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// outputs
torch::Tensor grad_sigmas = torch::zeros(sigmas.sizes(), sigmas.options());
torch::Tensor grad_rgbs = torch::zeros(rgbs.sizes(), rgbs.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_backward",
([&]
{ volumetric_rendering_backward_kernel<scalar_t><<<blocks, threads>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
rgbs.data_ptr<scalar_t>(),
accumulated_weight.data_ptr<scalar_t>(),
accumulated_depth.data_ptr<scalar_t>(),
accumulated_color.data_ptr<scalar_t>(),
grad_weight.data_ptr<scalar_t>(),
grad_depth.data_ptr<scalar_t>(),
grad_color.data_ptr<scalar_t>(),
grad_sigmas.data_ptr<scalar_t>(),
grad_rgbs.data_ptr<scalar_t>()
);
}));
return {grad_sigmas, grad_rgbs};
}
\ No newline at end of file
#include "include/helpers_cuda.h" #include "include/helpers_cuda.h"
template <typename scalar_t>
__global__ void volumetric_rendering_steps_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
// output: should be all zero (false) initialized
int* num_steps,
bool* selector
) {
CUDA_GET_THREAD_ID(i, n_rays);
// locate
const int base = packed_info[i * 2 + 0]; // point idx start.
const int steps = packed_info[i * 2 + 1]; // point idx shift.
if (steps == 0) return;
starts += base;
ends += base;
sigmas += base;
num_steps += i;
selector += base;
// accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
int j = 0;
for (; j < steps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
T *= (1.f - alpha);
selector[j] = true;
}
num_steps[0] = j;
return;
}
template <typename scalar_t> template <typename scalar_t>
__global__ void volumetric_weights_forward_kernel( __global__ void volumetric_weights_forward_kernel(
const uint32_t n_rays, const uint32_t n_rays,
...@@ -95,6 +138,55 @@ __global__ void volumetric_weights_backward_kernel( ...@@ -95,6 +138,55 @@ __global__ void volumetric_weights_backward_kernel(
} }
std::vector<torch::Tensor> volumetric_rendering_steps(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 2);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
torch::Tensor num_steps = torch::zeros({n_rays}, packed_info.options());
torch::Tensor selector = torch::zeros({n_samples}, packed_info.options().dtype(torch::kBool));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_inference",
([&]
{ volumetric_rendering_steps_kernel<scalar_t><<<blocks, threads>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
num_steps.data_ptr<int>(),
selector.data_ptr<bool>()
);
}));
torch::Tensor cum_steps = num_steps.cumsum(0, torch::kInt32);
torch::Tensor compact_packed_info = torch::stack({cum_steps - num_steps, num_steps}, 1);
return {compact_packed_info, selector};
}
std::vector<torch::Tensor> volumetric_weights_forward( std::vector<torch::Tensor> volumetric_weights_forward(
torch::Tensor packed_info, torch::Tensor packed_info,
torch::Tensor starts, torch::Tensor starts,
......
...@@ -78,7 +78,7 @@ def volumetric_accumulate( ...@@ -78,7 +78,7 @@ def volumetric_accumulate(
values: torch.Tensor = None, values: torch.Tensor = None,
n_rays: int = None, n_rays: int = None,
) -> torch.Tensor: ) -> torch.Tensor:
"""Accumulate values along the ray. """Accumulate volumetric values along the ray.
Note: this function is only differentiable to `weights` and `values`. Note: this function is only differentiable to `weights` and `values`.
......
...@@ -5,7 +5,7 @@ import torch ...@@ -5,7 +5,7 @@ import torch
from .cuda import ( # ComputeWeight,; VolumeRenderer,; ray_aabb_intersect, from .cuda import ( # ComputeWeight,; VolumeRenderer,; ray_aabb_intersect,
ray_marching, ray_marching,
volumetric_rendering_inference, volumetric_rendering_steps,
) )
from .utils import ray_aabb_intersect, volumetric_accumulate, volumetric_weights from .utils import ray_aabb_intersect, volumetric_accumulate, volumetric_weights
...@@ -35,16 +35,13 @@ def volumetric_rendering( ...@@ -35,16 +35,13 @@ def volumetric_rendering(
render_bkgd = render_bkgd.contiguous() render_bkgd = render_bkgd.contiguous()
n_rays = rays_o.shape[0] n_rays = rays_o.shape[0]
if render_est_n_samples is None:
render_total_samples = n_rays * render_n_samples
else:
render_total_samples = render_est_n_samples
if render_step_size is None: if render_step_size is None:
# Note: CPU<->GPU is not idea, try to pre-define it outside this function. # Note: CPU<->GPU is not idea, try to pre-define it outside this function.
render_step_size = ( render_step_size = (
(scene_aabb[3:] - scene_aabb[:3]).max() * math.sqrt(3) / render_n_samples (scene_aabb[3:] - scene_aabb[:3]).max() * math.sqrt(3) / render_n_samples
) )
# get packed samples from ray marching & occupancy check.
with torch.no_grad(): with torch.no_grad():
t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb) t_min, t_max = ray_aabb_intersect(rays_o, rays_d, scene_aabb)
...@@ -73,50 +70,30 @@ def volumetric_rendering( ...@@ -73,50 +70,30 @@ def volumetric_rendering(
) )
steps_counter = packed_info[:, -1].sum(0, keepdim=True) steps_counter = packed_info[:, -1].sum(0, keepdim=True)
# compat the samples thru volumetric rendering
with torch.no_grad(): with torch.no_grad():
densities = query_fn( densities = query_fn(
frustum_positions, frustum_dirs, only_density=True, **kwargs frustum_positions, frustum_dirs, only_density=True, **kwargs
) )
( compact_packed_info, compact_selector = volumetric_rendering_steps(
compact_packed_info,
compact_selector,
compact_steps_counter,
) = volumetric_rendering_inference(
packed_info.contiguous(), packed_info.contiguous(),
frustum_starts.contiguous(), frustum_starts.contiguous(),
frustum_ends.contiguous(), frustum_ends.contiguous(),
densities.contiguous(), densities.contiguous(),
) )
compact_selector = compact_selector[compact_selector >= 0].long()
compact_pad = int(math.ceil(len(compact_selector) / 256.0)) * 256 - len(
compact_selector
)
compact_selector = torch.nn.functional.pad(compact_selector, (0, compact_pad))
compact_frustum_positions = frustum_positions[compact_selector] compact_frustum_positions = frustum_positions[compact_selector]
compact_frustum_dirs = frustum_dirs[compact_selector] compact_frustum_dirs = frustum_dirs[compact_selector]
compact_frustum_starts = frustum_starts[compact_selector] compact_frustum_starts = frustum_starts[compact_selector]
compact_frustum_ends = frustum_ends[compact_selector] compact_frustum_ends = frustum_ends[compact_selector]
# print(compact_selector.float().mean(), compact_steps_counter, steps_counter) compact_steps_counter = compact_packed_info[:, -1].sum(0, keepdim=True)
# network
compact_query_results = query_fn( compact_query_results = query_fn(
compact_frustum_positions, compact_frustum_dirs, **kwargs compact_frustum_positions, compact_frustum_dirs, **kwargs
) )
compact_rgbs, compact_densities = compact_query_results[0], compact_query_results[1] compact_rgbs, compact_densities = compact_query_results[0], compact_query_results[1]
# ( # accumulation
# accumulated_weight,
# accumulated_depth,
# accumulated_color,
# alive_ray_mask,
# compact_steps_counter,
# ) = VolumeRenderer.apply(
# compact_packed_info.contiguous(),
# compact_frustum_starts.contiguous(),
# compact_frustum_ends.contiguous(),
# compact_densities.contiguous(),
# compact_rgbs.contiguous(),
# )
compact_weights, compact_ray_indices, alive_ray_mask = volumetric_weights( compact_weights, compact_ray_indices, alive_ray_mask = volumetric_weights(
compact_packed_info, compact_packed_info,
compact_frustum_starts, compact_frustum_starts,
...@@ -136,46 +113,6 @@ def volumetric_rendering( ...@@ -136,46 +113,6 @@ def volumetric_rendering(
n_rays, n_rays,
) )
# index = compact_ray_indices[:, None].long()
# accumulated_color = torch.zeros((n_rays, 3), device=device)
# accumulated_color.scatter_add_(
# dim=0,
# index=index.expand(-1, 3),
# src=compact_weights[:, None] * compact_rgbs,
# )
# accumulated_weight = torch.zeros((n_rays, 1), device=device)
# accumulated_weight.scatter_add_(
# dim=0,
# index=index.expand(-1, 1),
# src=compact_weights[:, None],
# )
# accumulated_depth = torch.zeros((n_rays, 1), device=device)
# accumulated_depth.scatter_add_(
# dim=0,
# index=index.expand(-1, 1),
# src=compact_weights[:, None]
# * (compact_frustum_starts + compact_frustum_ends)
# / 2.0,
# )
# query_results = query_fn(frustum_positions, frustum_dirs, **kwargs)
# rgbs, densities = query_results[0], query_results[1]
# (
# accumulated_weight,
# accumulated_depth,
# accumulated_color,
# alive_ray_mask,
# compact_steps_counter,
# ) = VolumeRenderer.apply(
# packed_info.contiguous(),
# frustum_starts.contiguous(),
# frustum_ends.contiguous(),
# densities.contiguous(),
# rgbs.contiguous(),
# )
accumulated_depth = torch.clip(accumulated_depth, t_min[:, None], t_max[:, None]) accumulated_depth = torch.clip(accumulated_depth, t_min[:, None], t_max[:, None])
accumulated_color = accumulated_color + render_bkgd * (1.0 - accumulated_weight) accumulated_color = accumulated_color + render_bkgd * (1.0 - accumulated_weight)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment