Commit 64ad63cd authored by Ruilong Li's avatar Ruilong Li
Browse files

inference first for vol render

parent 16324602
...@@ -14,7 +14,7 @@ python examples/trainval.py ...@@ -14,7 +14,7 @@ python examples/trainval.py
| - | - | - | - | | - | - | - | - |
| Time | 377s | 357s | 354s | | Time | 377s | 357s | 354s |
| PSNR | 36.08 | 36.58 | 29.63 | | PSNR | 36.08 | 36.58 | 29.63 |
35.37?
Tested with the default settings on the Lego test set. Tested with the default settings on the Lego test set.
...@@ -23,10 +23,7 @@ Tested with the default settings on the Lego test set. ...@@ -23,10 +23,7 @@ Tested with the default settings on the Lego test set.
| instant-ngp (paper) | trainval? | 36.39 | - | - | 3090 | | instant-ngp (paper) | trainval? | 36.39 | - | - | 3090 |
| instant-ngp (code) | train (35k steps) | 36.08 | 308 sec | 55.32 fps | TITAN RTX | 1734MB | | instant-ngp (code) | train (35k steps) | 36.08 | 308 sec | 55.32 fps | TITAN RTX | 1734MB |
| torch-ngp (`-O`) | train (30K steps) | 34.15 | 310 sec | 7.8 fps | V100 | | torch-ngp (`-O`) | train (30K steps) | 34.15 | 310 sec | 7.8 fps | V100 |
| ours | train (30K steps) | 34.40 | 296 sec | 6.2 fps | TITAN RTX | | ours | trainval (35K steps) | 36.08 | 343 sec | 9.6 fps | TITAN RTX |
| ours | trainval (30K steps) | 35.42 | 291 sec | 6.4 fps | TITAN RTX |
| ours (2**16 samples w preload) | trainval (35K steps) | 36.18 | 385 sec | 8.3 fps | TITAN RTX |
| ours (2**16 samples w preload) | train (35K steps) | 35.03 | 383 sec | 8.0 fps | TITAN RTX |
## Tips: ## Tips:
......
...@@ -58,8 +58,15 @@ class NGPradianceField(BaseRadianceField): ...@@ -58,8 +58,15 @@ class NGPradianceField(BaseRadianceField):
self.direction_encoding = tcnn.Encoding( self.direction_encoding = tcnn.Encoding(
n_input_dims=num_dim, n_input_dims=num_dim,
encoding_config={ encoding_config={
"otype": "SphericalHarmonics", "otype": "Composite",
"degree": 4, "nested": [
{
"n_dims_to_encode": 3,
"otype": "SphericalHarmonics",
"degree": 4,
},
# {"otype": "Identity", "n_bins": 4, "degree": 4},
],
}, },
) )
...@@ -134,6 +141,7 @@ class NGPradianceField(BaseRadianceField): ...@@ -134,6 +141,7 @@ class NGPradianceField(BaseRadianceField):
positions: torch.Tensor, positions: torch.Tensor,
directions: torch.Tensor = None, directions: torch.Tensor = None,
mask: torch.Tensor = None, mask: torch.Tensor = None,
only_density: bool = False,
): ):
if self.use_viewdirs and (directions is not None): if self.use_viewdirs and (directions is not None):
assert ( assert (
...@@ -143,12 +151,18 @@ class NGPradianceField(BaseRadianceField): ...@@ -143,12 +151,18 @@ class NGPradianceField(BaseRadianceField):
density = torch.zeros_like(positions[..., :1]) density = torch.zeros_like(positions[..., :1])
rgb = torch.zeros(list(positions.shape[:-1]) + [3], device=positions.device) rgb = torch.zeros(list(positions.shape[:-1]) + [3], device=positions.device)
density[mask], embedding = self.query_density(positions[mask]) density[mask], embedding = self.query_density(positions[mask])
if only_density:
return density
rgb[mask] = self.query_rgb( rgb[mask] = self.query_rgb(
directions[mask] if directions is not None else None, directions[mask] if directions is not None else None,
embedding=embedding, embedding=embedding,
) )
else: else:
density, embedding = self.query_density(positions, return_feat=True) density, embedding = self.query_density(positions, return_feat=True)
if only_density:
return density
rgb = self._query_rgb(directions, embedding=embedding) rgb = self._query_rgb(directions, embedding=embedding)
return rgb, density return rgb, density
...@@ -12,6 +12,27 @@ from nerfacc import OccupancyField, volumetric_rendering ...@@ -12,6 +12,27 @@ from nerfacc import OccupancyField, volumetric_rendering
TARGET_SAMPLE_BATCH_SIZE = 1 << 16 TARGET_SAMPLE_BATCH_SIZE = 1 << 16
# import tqdm
# device = "cuda:0"
# radiance_field = NGPradianceField(aabb=[0, 0, 0, 1, 1, 1]).to(device)
# positions = torch.rand((TARGET_SAMPLE_BATCH_SIZE, 3), device=device)
# directions = torch.rand(positions.shape, device=device)
# optimizer = torch.optim.Adam(
# radiance_field.parameters(),
# lr=1e-10,
# # betas=(0.9, 0.99),
# eps=1e-15,
# # weight_decay=1e-6,
# )
# for _ in tqdm.tqdm(range(1000)):
# rgbs, sigmas = radiance_field(positions, directions)
# loss = rgbs.mean()
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
# exit()
def render_image(radiance_field, rays, render_bkgd, render_step_size): def render_image(radiance_field, rays, render_bkgd, render_step_size):
"""Render the pixels of an image. """Render the pixels of an image.
......
...@@ -7,6 +7,7 @@ ray_aabb_intersect = _C.ray_aabb_intersect ...@@ -7,6 +7,7 @@ ray_aabb_intersect = _C.ray_aabb_intersect
ray_marching = _C.ray_marching ray_marching = _C.ray_marching
volumetric_rendering_forward = _C.volumetric_rendering_forward volumetric_rendering_forward = _C.volumetric_rendering_forward
volumetric_rendering_backward = _C.volumetric_rendering_backward volumetric_rendering_backward = _C.volumetric_rendering_backward
volumetric_rendering_inference = _C.volumetric_rendering_inference
class VolumeRenderer(torch.autograd.Function): class VolumeRenderer(torch.autograd.Function):
......
...@@ -24,6 +24,13 @@ std::vector<torch::Tensor> ray_marching( ...@@ -24,6 +24,13 @@ std::vector<torch::Tensor> ray_marching(
const float dt const float dt
); );
std::vector<torch::Tensor> volumetric_rendering_inference(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
);
std::vector<torch::Tensor> volumetric_rendering_forward( std::vector<torch::Tensor> volumetric_rendering_forward(
torch::Tensor packed_info, torch::Tensor packed_info,
torch::Tensor starts, torch::Tensor starts,
...@@ -51,6 +58,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) ...@@ -51,6 +58,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{ {
m.def("ray_aabb_intersect", &ray_aabb_intersect); m.def("ray_aabb_intersect", &ray_aabb_intersect);
m.def("ray_marching", &ray_marching); m.def("ray_marching", &ray_marching);
m.def("volumetric_rendering_inference", &volumetric_rendering_inference);
m.def("volumetric_rendering_forward", &volumetric_rendering_forward); m.def("volumetric_rendering_forward", &volumetric_rendering_forward);
m.def("volumetric_rendering_backward", &volumetric_rendering_backward); m.def("volumetric_rendering_backward", &volumetric_rendering_backward);
} }
\ No newline at end of file
#include "include/helpers_cuda.h" #include "include/helpers_cuda.h"
template <typename scalar_t>
__global__ void volumetric_rendering_inference_kernel(
const uint32_t n_rays,
const int* packed_info, // input ray & point indices.
const scalar_t* starts, // input start t
const scalar_t* ends, // input end t
const scalar_t* sigmas, // input density after activation
int* compact_packed_info, // output: should be all zero initialized
int* compact_selector, // output: should be all zero initialized
// writable helpers
int* steps_counter
) {
CUDA_GET_THREAD_ID(thread_id, n_rays);
// locate
const int i = packed_info[thread_id * 3 + 0]; // ray idx in {rays_o, rays_d}
const int base = packed_info[thread_id * 3 + 1]; // point idx start.
const int numsteps = packed_info[thread_id * 3 + 2]; // point idx shift.
if (numsteps == 0) return;
starts += base;
ends += base;
sigmas += base;
// accumulated rendering
scalar_t T = 1.f;
scalar_t EPSILON = 1e-4f;
int j = 0;
for (; j < numsteps; ++j) {
if (T < EPSILON) {
break;
}
const scalar_t delta = ends[j] - starts[j];
const scalar_t alpha = 1.f - __expf(-sigmas[j] * delta);
const scalar_t weight = alpha * T;
T *= (1.f - alpha);
}
int compact_base = atomicAdd(steps_counter, j);
compact_selector += compact_base;
for (int k = 0; k < j; ++k) {
compact_selector[k] = base + k;
}
compact_packed_info += thread_id * 3;
compact_packed_info[0] = i; // ray idx in {rays_o, rays_d}
compact_packed_info[1] = compact_base; // compact point idx start.
compact_packed_info[2] = j; // compact point idx shift.
}
template <typename scalar_t> template <typename scalar_t>
__global__ void volumetric_rendering_forward_kernel( __global__ void volumetric_rendering_forward_kernel(
const uint32_t n_rays, const uint32_t n_rays,
...@@ -137,6 +189,57 @@ __global__ void volumetric_rendering_backward_kernel( ...@@ -137,6 +189,57 @@ __global__ void volumetric_rendering_backward_kernel(
} }
} }
std::vector<torch::Tensor> volumetric_rendering_inference(
torch::Tensor packed_info,
torch::Tensor starts,
torch::Tensor ends,
torch::Tensor sigmas
) {
DEVICE_GUARD(packed_info);
CHECK_INPUT(packed_info);
CHECK_INPUT(starts);
CHECK_INPUT(ends);
CHECK_INPUT(sigmas);
TORCH_CHECK(packed_info.ndimension() == 2 & packed_info.size(1) == 3);
TORCH_CHECK(starts.ndimension() == 2 & starts.size(1) == 1);
TORCH_CHECK(ends.ndimension() == 2 & ends.size(1) == 1);
TORCH_CHECK(sigmas.ndimension() == 2 & sigmas.size(1) == 1);
const uint32_t n_rays = packed_info.size(0);
const uint32_t n_samples = sigmas.size(0);
const int threads = 256;
const int blocks = CUDA_N_BLOCKS_NEEDED(n_rays, threads);
// helper counter
torch::Tensor steps_counter = torch::zeros(
{1}, packed_info.options().dtype(torch::kInt32));
// outputs
torch::Tensor compact_packed_info = torch::zeros({n_rays, 3}, packed_info.options());
torch::Tensor compact_selector = - torch::ones({n_samples}, packed_info.options());
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
sigmas.scalar_type(),
"volumetric_rendering_inference",
([&]
{ volumetric_rendering_inference_kernel<scalar_t><<<blocks, threads>>>(
n_rays,
packed_info.data_ptr<int>(),
starts.data_ptr<scalar_t>(),
ends.data_ptr<scalar_t>(),
sigmas.data_ptr<scalar_t>(),
compact_packed_info.data_ptr<int>(),
compact_selector.data_ptr<int>(),
steps_counter.data_ptr<int>()
);
}));
return {compact_packed_info, compact_selector, steps_counter};
}
/** /**
* @brief Volumetric Rendering: Accumulating samples in the forward pass. * @brief Volumetric Rendering: Accumulating samples in the forward pass.
* The inputs, excepct for `sigmas` and `rgbs`, are the outputs of our * The inputs, excepct for `sigmas` and `rgbs`, are the outputs of our
......
...@@ -3,7 +3,12 @@ from typing import Callable, Tuple ...@@ -3,7 +3,12 @@ from typing import Callable, Tuple
import torch import torch
from .cuda import VolumeRenderer, ray_aabb_intersect, ray_marching from .cuda import (
VolumeRenderer,
ray_aabb_intersect,
ray_marching,
volumetric_rendering_inference,
)
def volumetric_rendering( def volumetric_rendering(
...@@ -69,6 +74,7 @@ def volumetric_rendering( ...@@ -69,6 +74,7 @@ def volumetric_rendering(
# squeeze valid samples # squeeze valid samples
total_samples = max(packed_info[:, -1].sum(), 1) total_samples = max(packed_info[:, -1].sum(), 1)
total_samples = int(math.ceil(total_samples / 256.0)) * 256
frustum_origins = frustum_origins[:total_samples] frustum_origins = frustum_origins[:total_samples]
frustum_dirs = frustum_dirs[:total_samples] frustum_dirs = frustum_dirs[:total_samples]
frustum_starts = frustum_starts[:total_samples] frustum_starts = frustum_starts[:total_samples]
...@@ -78,8 +84,36 @@ def volumetric_rendering( ...@@ -78,8 +84,36 @@ def volumetric_rendering(
frustum_origins + frustum_dirs * (frustum_starts + frustum_ends) / 2.0 frustum_origins + frustum_dirs * (frustum_starts + frustum_ends) / 2.0
) )
query_results = query_fn(frustum_positions, frustum_dirs, **kwargs) with torch.no_grad():
rgbs, densities = query_results[0], query_results[1] densities = query_fn(
frustum_positions, frustum_dirs, only_density=True, **kwargs
)
(
compact_packed_info,
compact_selector,
compact_steps_counter,
) = volumetric_rendering_inference(
packed_info.contiguous(),
frustum_starts.contiguous(),
frustum_ends.contiguous(),
densities.contiguous(),
)
compact_selector = compact_selector[compact_selector >= 0].long()
compact_pad = int(math.ceil(len(compact_selector) / 256.0)) * 256 - len(
compact_selector
)
compact_selector = torch.nn.functional.pad(compact_selector, (0, compact_pad))
compact_frustum_positions = frustum_positions[compact_selector]
compact_frustum_dirs = frustum_dirs[compact_selector]
compact_frustum_starts = frustum_starts[compact_selector]
compact_frustum_ends = frustum_ends[compact_selector]
# print(compact_selector.float().mean(), compact_steps_counter, steps_counter)
compact_query_results = query_fn(
compact_frustum_positions, compact_frustum_dirs, **kwargs
)
compact_rgbs, compact_densities = compact_query_results[0], compact_query_results[1]
( (
accumulated_weight, accumulated_weight,
accumulated_depth, accumulated_depth,
...@@ -87,13 +121,30 @@ def volumetric_rendering( ...@@ -87,13 +121,30 @@ def volumetric_rendering(
alive_ray_mask, alive_ray_mask,
compact_steps_counter, compact_steps_counter,
) = VolumeRenderer.apply( ) = VolumeRenderer.apply(
packed_info, compact_packed_info.contiguous(),
frustum_starts, compact_frustum_starts.contiguous(),
frustum_ends, compact_frustum_ends.contiguous(),
densities.contiguous(), compact_densities.contiguous(),
rgbs.contiguous(), compact_rgbs.contiguous(),
) )
# query_results = query_fn(frustum_positions, frustum_dirs, **kwargs)
# rgbs, densities = query_results[0], query_results[1]
# (
# accumulated_weight,
# accumulated_depth,
# accumulated_color,
# alive_ray_mask,
# compact_steps_counter,
# ) = VolumeRenderer.apply(
# packed_info.contiguous(),
# frustum_starts.contiguous(),
# frustum_ends.contiguous(),
# densities.contiguous(),
# rgbs.contiguous(),
# )
accumulated_depth = torch.clip(accumulated_depth, t_min[:, None], t_max[:, None]) accumulated_depth = torch.clip(accumulated_depth, t_min[:, None], t_max[:, None])
accumulated_color = accumulated_color + render_bkgd * (1.0 - accumulated_weight) accumulated_color = accumulated_color + render_bkgd * (1.0 - accumulated_weight)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment