Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nerfacc
Commits
4a4bbaba
Commit
4a4bbaba
authored
Sep 12, 2022
by
Ruilong Li
Browse files
cleanup
parent
224ba1a7
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
105 additions
and
557 deletions
+105
-557
README.md
README.md
+2
-2
nerfacc/cuda/__init__.py
nerfacc/cuda/__init__.py
+1
-72
nerfacc/cuda/csrc/pybind.cu
nerfacc/cuda/csrc/pybind.cu
+2
-43
nerfacc/cuda/csrc/ray_marching.cu
nerfacc/cuda/csrc/ray_marching.cu
+0
-1
nerfacc/cuda/csrc/vol_rendering.cu
nerfacc/cuda/csrc/vol_rendering.cu
+0
-368
nerfacc/cuda/csrc/volumetric_weights.cu
nerfacc/cuda/csrc/volumetric_weights.cu
+92
-0
nerfacc/utils.py
nerfacc/utils.py
+1
-1
nerfacc/volumetric_rendering.py
nerfacc/volumetric_rendering.py
+7
-70
No files found.
README.md
View file @
4a4bbaba
...
...
@@ -12,9 +12,9 @@ python examples/trainval.py
| trainval (35k, 1<<16) | Lego | Mic | Materials |
| - | - | - | - |
| Time | 3
25
s | 357s | 360s |
| Time | 3
70
s | 357s | 360s |
| PSNR | 36.20 | 36.55 | 29.59 |
| FPS | 1
2.56
| 25.54 | 9.65 |
| FPS | 1
1.94
| 25.54 | 9.65 |
Tested with the default settings on the Lego test set.
...
...
nerfacc/cuda/__init__.py
View file @
4a4bbaba
import
torch
from
torch.cuda.amp
import
custom_bwd
,
custom_fwd
from
._backend
import
_C
# ray_aabb_intersect = _C.ray_aabb_intersect
ray_marching
=
_C
.
ray_marching
volumetric_rendering_forward
=
_C
.
volumetric_rendering_forward
volumetric_rendering_backward
=
_C
.
volumetric_rendering_backward
volumetric_rendering_inference
=
_C
.
volumetric_rendering_inference
# volumetric_weights_forward = _C.volumetric_weights_forward
# volumetric_weights_forward = _C.volumetric_weights_forward
class
VolumeRenderer
(
torch
.
autograd
.
Function
):
"""CUDA Volumetirc Renderer"""
@
staticmethod
@
custom_fwd
(
cast_inputs
=
torch
.
float32
)
def
forward
(
ctx
,
packed_info
,
starts
,
ends
,
sigmas
,
rgbs
):
(
accumulated_weight
,
accumulated_depth
,
accumulated_color
,
mask
,
steps_counter
,
)
=
volumetric_rendering_forward
(
packed_info
,
starts
,
ends
,
sigmas
,
rgbs
)
ctx
.
save_for_backward
(
accumulated_weight
,
accumulated_depth
,
accumulated_color
,
packed_info
,
starts
,
ends
,
sigmas
,
rgbs
,
)
return
(
accumulated_weight
,
accumulated_depth
,
accumulated_color
,
mask
,
steps_counter
,
)
@
staticmethod
@
custom_bwd
def
backward
(
ctx
,
grad_weight
,
grad_depth
,
grad_color
,
_grad_mask
,
_grad_steps_counter
):
(
accumulated_weight
,
accumulated_depth
,
accumulated_color
,
packed_info
,
starts
,
ends
,
sigmas
,
rgbs
,
)
=
ctx
.
saved_tensors
grad_sigmas
,
grad_rgbs
=
volumetric_rendering_backward
(
accumulated_weight
,
accumulated_depth
,
accumulated_color
,
grad_weight
,
grad_depth
,
grad_color
,
packed_info
,
starts
,
ends
,
sigmas
,
rgbs
,
)
# corresponds to the input argument list of forward()
return
None
,
None
,
None
,
grad_sigmas
,
grad_rgbs
volumetric_rendering_steps
=
_C
.
volumetric_rendering_steps
nerfacc/cuda/csrc/pybind.cu
View file @
4a4bbaba
...
...
@@ -7,52 +7,13 @@ std::vector<torch::Tensor> ray_aabb_intersect(
const
torch
::
Tensor
aabb
);
// std::vector<torch::Tensor> ray_marching(
// // rays
// const torch::Tensor rays_o,
// const torch::Tensor rays_d,
// const torch::Tensor t_min,
// const torch::Tensor t_max,
// // density grid
// const torch::Tensor aabb,
// const pybind11::list resolution,
// const torch::Tensor occ_binary,
// // sampling
// const int max_total_samples,
// const int max_per_ray_samples,
// const float dt
// );
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_inference
(
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_steps
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
);
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_forward
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
,
torch
::
Tensor
rgbs
);
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_backward
(
torch
::
Tensor
accumulated_weight
,
torch
::
Tensor
accumulated_depth
,
torch
::
Tensor
accumulated_color
,
torch
::
Tensor
grad_weight
,
torch
::
Tensor
grad_depth
,
torch
::
Tensor
grad_color
,
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
,
torch
::
Tensor
rgbs
);
std
::
vector
<
torch
::
Tensor
>
volumetric_weights_forward
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
...
...
@@ -87,9 +48,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m
.
def
(
"ray_aabb_intersect"
,
&
ray_aabb_intersect
);
m
.
def
(
"ray_marching"
,
&
ray_marching
);
m
.
def
(
"volumetric_rendering_inference"
,
&
volumetric_rendering_inference
);
m
.
def
(
"volumetric_rendering_forward"
,
&
volumetric_rendering_forward
);
m
.
def
(
"volumetric_rendering_backward"
,
&
volumetric_rendering_backward
);
m
.
def
(
"volumetric_rendering_steps"
,
&
volumetric_rendering_steps
);
m
.
def
(
"volumetric_weights_forward"
,
&
volumetric_weights_forward
);
m
.
def
(
"volumetric_weights_backward"
,
&
volumetric_weights_backward
);
}
\ No newline at end of file
nerfacc/cuda/csrc/ray_marching.cu
View file @
4a4bbaba
...
...
@@ -61,7 +61,6 @@ inline __device__ float advance_to_next_voxel(
}
__global__
void
marching_steps_kernel
(
// rays info
const
uint32_t
n_rays
,
...
...
nerfacc/cuda/csrc/vol_rendering.cu
deleted
100644 → 0
View file @
224ba1a7
#include "include/helpers_cuda.h"
template
<
typename
scalar_t
>
__global__
void
volumetric_rendering_inference_kernel
(
const
uint32_t
n_rays
,
const
int
*
packed_info
,
// input ray & point indices.
const
scalar_t
*
starts
,
// input start t
const
scalar_t
*
ends
,
// input end t
const
scalar_t
*
sigmas
,
// input density after activation
int
*
compact_packed_info
,
// output: should be all zero initialized
int
*
compact_selector
,
// output: should be all zero initialized
// writable helpers
int
*
steps_counter
)
{
CUDA_GET_THREAD_ID
(
thread_id
,
n_rays
);
// locate
const
int
base
=
packed_info
[
thread_id
*
2
+
0
];
// point idx start.
const
int
steps
=
packed_info
[
thread_id
*
2
+
1
];
// point idx shift.
if
(
steps
==
0
)
return
;
starts
+=
base
;
ends
+=
base
;
sigmas
+=
base
;
// accumulated rendering
scalar_t
T
=
1.
f
;
scalar_t
EPSILON
=
1e-4
f
;
int
j
=
0
;
for
(;
j
<
steps
;
++
j
)
{
if
(
T
<
EPSILON
)
{
break
;
}
const
scalar_t
delta
=
ends
[
j
]
-
starts
[
j
];
const
scalar_t
alpha
=
1.
f
-
__expf
(
-
sigmas
[
j
]
*
delta
);
const
scalar_t
weight
=
alpha
*
T
;
T
*=
(
1.
f
-
alpha
);
}
int
compact_base
=
atomicAdd
(
steps_counter
,
j
);
compact_selector
+=
compact_base
;
for
(
int
k
=
0
;
k
<
j
;
++
k
)
{
compact_selector
[
k
]
=
base
+
k
;
}
compact_packed_info
[
thread_id
*
2
+
0
]
=
compact_base
;
// compact point idx start.
compact_packed_info
[
thread_id
*
2
+
1
]
=
j
;
// compact point idx shift.
}
template
<
typename
scalar_t
>
__global__
void
volumetric_rendering_forward_kernel
(
const
uint32_t
n_rays
,
const
int
*
packed_info
,
// input ray & point indices.
const
scalar_t
*
starts
,
// input start t
const
scalar_t
*
ends
,
// input end t
const
scalar_t
*
sigmas
,
// input density after activation
const
scalar_t
*
rgbs
,
// input rgb after activation
// should be all-zero initialized
scalar_t
*
accumulated_weight
,
// output
scalar_t
*
accumulated_depth
,
// output
scalar_t
*
accumulated_color
,
// output
bool
*
mask
,
// output
// writable helpers
int
*
steps_counter
)
{
CUDA_GET_THREAD_ID
(
thread_id
,
n_rays
);
// locate
const
int
i
=
packed_info
[
thread_id
*
3
+
0
];
// ray idx in {rays_o, rays_d}
const
int
base
=
packed_info
[
thread_id
*
3
+
1
];
// point idx start.
const
int
numsteps
=
packed_info
[
thread_id
*
3
+
2
];
// point idx shift.
if
(
numsteps
==
0
)
return
;
starts
+=
base
;
ends
+=
base
;
sigmas
+=
base
;
rgbs
+=
base
*
3
;
accumulated_weight
+=
i
;
accumulated_depth
+=
i
;
accumulated_color
+=
i
*
3
;
mask
+=
i
;
// accumulated rendering
scalar_t
T
=
1.
f
;
scalar_t
EPSILON
=
1e-4
f
;
int
j
=
0
;
for
(;
j
<
numsteps
;
++
j
)
{
if
(
T
<
EPSILON
)
{
break
;
}
const
scalar_t
delta
=
ends
[
j
]
-
starts
[
j
];
const
scalar_t
t
=
(
ends
[
j
]
+
starts
[
j
])
*
0.5
f
;
const
scalar_t
alpha
=
1.
f
-
__expf
(
-
sigmas
[
j
]
*
delta
);
const
scalar_t
weight
=
alpha
*
T
;
accumulated_weight
[
0
]
+=
weight
;
accumulated_depth
[
0
]
+=
weight
*
t
;
accumulated_color
[
0
]
+=
weight
*
rgbs
[
j
*
3
+
0
];
accumulated_color
[
1
]
+=
weight
*
rgbs
[
j
*
3
+
1
];
accumulated_color
[
2
]
+=
weight
*
rgbs
[
j
*
3
+
2
];
T
*=
(
1.
f
-
alpha
);
}
mask
[
0
]
=
true
;
atomicAdd
(
steps_counter
,
j
);
}
template
<
typename
scalar_t
>
__global__
void
volumetric_rendering_backward_kernel
(
const
uint32_t
n_rays
,
const
int
*
packed_info
,
// input ray & point indices.
const
scalar_t
*
starts
,
// input start t
const
scalar_t
*
ends
,
// input end t
const
scalar_t
*
sigmas
,
// input density after activation
const
scalar_t
*
rgbs
,
// input rgb after activation
const
scalar_t
*
accumulated_weight
,
// forward output
const
scalar_t
*
accumulated_depth
,
// forward output
const
scalar_t
*
accumulated_color
,
// forward output
const
scalar_t
*
grad_weight
,
// input
const
scalar_t
*
grad_depth
,
// input
const
scalar_t
*
grad_color
,
// input
scalar_t
*
grad_sigmas
,
// output
scalar_t
*
grad_rgbs
// output
)
{
CUDA_GET_THREAD_ID
(
thread_id
,
n_rays
);
// locate
const
int
i
=
packed_info
[
thread_id
*
3
+
0
];
// ray idx in {rays_o, rays_d}
const
int
base
=
packed_info
[
thread_id
*
3
+
1
];
// point idx start.
const
int
numsteps
=
packed_info
[
thread_id
*
3
+
2
];
// point idx shift.
if
(
numsteps
==
0
)
return
;
starts
+=
base
;
ends
+=
base
;
sigmas
+=
base
;
rgbs
+=
base
*
3
;
grad_sigmas
+=
base
;
grad_rgbs
+=
base
*
3
;
accumulated_weight
+=
i
;
accumulated_depth
+=
i
;
accumulated_color
+=
i
*
3
;
grad_weight
+=
i
;
grad_depth
+=
i
;
grad_color
+=
i
*
3
;
// backward of accumulated rendering
scalar_t
T
=
1.
f
;
scalar_t
EPSILON
=
1e-4
f
;
int
j
=
0
;
scalar_t
r
=
0
,
g
=
0
,
b
=
0
,
d
=
0
;
for
(;
j
<
numsteps
;
++
j
)
{
if
(
T
<
EPSILON
)
{
break
;
}
const
scalar_t
delta
=
ends
[
j
]
-
starts
[
j
];
const
scalar_t
t
=
(
ends
[
j
]
+
starts
[
j
])
*
0.5
f
;
const
scalar_t
alpha
=
1.
f
-
__expf
(
-
sigmas
[
j
]
*
delta
);
const
scalar_t
weight
=
alpha
*
T
;
r
+=
weight
*
rgbs
[
j
*
3
+
0
];
g
+=
weight
*
rgbs
[
j
*
3
+
1
];
b
+=
weight
*
rgbs
[
j
*
3
+
2
];
d
+=
weight
*
t
;
T
*=
(
1.
f
-
alpha
);
grad_rgbs
[
j
*
3
+
0
]
=
grad_color
[
0
]
*
weight
;
grad_rgbs
[
j
*
3
+
1
]
=
grad_color
[
1
]
*
weight
;
grad_rgbs
[
j
*
3
+
2
]
=
grad_color
[
2
]
*
weight
;
grad_sigmas
[
j
]
=
delta
*
(
grad_color
[
0
]
*
(
T
*
rgbs
[
j
*
3
+
0
]
-
(
accumulated_color
[
0
]
-
r
))
+
grad_color
[
1
]
*
(
T
*
rgbs
[
j
*
3
+
1
]
-
(
accumulated_color
[
1
]
-
g
))
+
grad_color
[
2
]
*
(
T
*
rgbs
[
j
*
3
+
2
]
-
(
accumulated_color
[
2
]
-
b
))
+
grad_weight
[
0
]
*
(
1.
f
-
accumulated_weight
[
0
])
+
grad_depth
[
0
]
*
(
t
*
T
-
(
accumulated_depth
[
0
]
-
d
))
);
}
}
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_inference
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
)
{
DEVICE_GUARD
(
packed_info
);
CHECK_INPUT
(
packed_info
);
CHECK_INPUT
(
starts
);
CHECK_INPUT
(
ends
);
CHECK_INPUT
(
sigmas
);
TORCH_CHECK
(
packed_info
.
ndimension
()
==
2
&
packed_info
.
size
(
1
)
==
2
);
TORCH_CHECK
(
starts
.
ndimension
()
==
2
&
starts
.
size
(
1
)
==
1
);
TORCH_CHECK
(
ends
.
ndimension
()
==
2
&
ends
.
size
(
1
)
==
1
);
TORCH_CHECK
(
sigmas
.
ndimension
()
==
2
&
sigmas
.
size
(
1
)
==
1
);
const
uint32_t
n_rays
=
packed_info
.
size
(
0
);
const
uint32_t
n_samples
=
sigmas
.
size
(
0
);
const
int
threads
=
256
;
const
int
blocks
=
CUDA_N_BLOCKS_NEEDED
(
n_rays
,
threads
);
// helper counter
torch
::
Tensor
steps_counter
=
torch
::
zeros
(
{
1
},
packed_info
.
options
().
dtype
(
torch
::
kInt32
));
// outputs
torch
::
Tensor
compact_packed_info
=
torch
::
zeros
({
n_rays
,
2
},
packed_info
.
options
());
torch
::
Tensor
compact_selector
=
-
torch
::
ones
({
n_samples
},
packed_info
.
options
());
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
sigmas
.
scalar_type
(),
"volumetric_rendering_inference"
,
([
&
]
{
volumetric_rendering_inference_kernel
<
scalar_t
><<<
blocks
,
threads
>>>
(
n_rays
,
packed_info
.
data_ptr
<
int
>
(),
starts
.
data_ptr
<
scalar_t
>
(),
ends
.
data_ptr
<
scalar_t
>
(),
sigmas
.
data_ptr
<
scalar_t
>
(),
compact_packed_info
.
data_ptr
<
int
>
(),
compact_selector
.
data_ptr
<
int
>
(),
steps_counter
.
data_ptr
<
int
>
()
);
}));
return
{
compact_packed_info
,
compact_selector
,
steps_counter
};
}
/**
* @brief Volumetric Rendering: Accumulating samples in the forward pass.
* The inputs, excepct for `sigmas` and `rgbs`, are the outputs of our
* cuda ray marching function in `raymarching.cu`
*
* @param packed_info Stores how to index the ray samples from the returned values.
* Shape of [n_rays, 3]. First value is the ray index. Second value is the sample
* start index in the results for this ray. Third value is the number of samples for
* this ray. Note for rays that have zero samples, we simply skip them so the `packed_info`
* has some zero padding in the end.
* @param starts: Where the frustum-shape sample starts along a ray. [total_samples, 1]
* @param ends: Where the frustum-shape sample ends along a ray. [total_samples, 1]
* @param sigmas Densities at those samples. [total_samples, 1]
* @param rgbs RGBs at those samples. [total_samples, 3]
* @return std::vector<torch::Tensor>
* - accumulated_weight: Ray opacity. [n_rays, 1]
* - accumulated_depth: Ray depth. [n_rays, 1]
* - accumulated_color: Ray color. [n_rays, 3]
* - mask: Boolen value store if this ray has valid samples from packed_info. [n_rays]
*/
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_forward
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
,
torch
::
Tensor
rgbs
)
{
DEVICE_GUARD
(
packed_info
);
CHECK_INPUT
(
packed_info
);
CHECK_INPUT
(
starts
);
CHECK_INPUT
(
ends
);
CHECK_INPUT
(
sigmas
);
CHECK_INPUT
(
rgbs
);
TORCH_CHECK
(
packed_info
.
ndimension
()
==
2
&
packed_info
.
size
(
1
)
==
3
);
TORCH_CHECK
(
starts
.
ndimension
()
==
2
&
starts
.
size
(
1
)
==
1
);
TORCH_CHECK
(
ends
.
ndimension
()
==
2
&
ends
.
size
(
1
)
==
1
);
TORCH_CHECK
(
sigmas
.
ndimension
()
==
2
&
sigmas
.
size
(
1
)
==
1
);
TORCH_CHECK
(
rgbs
.
ndimension
()
==
2
&
rgbs
.
size
(
1
)
==
3
);
const
uint32_t
n_rays
=
packed_info
.
size
(
0
);
const
int
threads
=
256
;
const
int
blocks
=
CUDA_N_BLOCKS_NEEDED
(
n_rays
,
threads
);
// helper counter
torch
::
Tensor
steps_counter
=
torch
::
zeros
(
{
1
},
rgbs
.
options
().
dtype
(
torch
::
kInt32
));
// outputs
torch
::
Tensor
accumulated_weight
=
torch
::
zeros
({
n_rays
,
1
},
sigmas
.
options
());
torch
::
Tensor
accumulated_depth
=
torch
::
zeros
({
n_rays
,
1
},
sigmas
.
options
());
torch
::
Tensor
accumulated_color
=
torch
::
zeros
({
n_rays
,
3
},
sigmas
.
options
());
// The rays that are not skipped during sampling.
torch
::
Tensor
mask
=
torch
::
zeros
({
n_rays
},
sigmas
.
options
().
dtype
(
torch
::
kBool
));
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
sigmas
.
scalar_type
(),
"volumetric_rendering_forward"
,
([
&
]
{
volumetric_rendering_forward_kernel
<
scalar_t
><<<
blocks
,
threads
>>>
(
n_rays
,
packed_info
.
data_ptr
<
int
>
(),
starts
.
data_ptr
<
scalar_t
>
(),
ends
.
data_ptr
<
scalar_t
>
(),
sigmas
.
data_ptr
<
scalar_t
>
(),
rgbs
.
data_ptr
<
scalar_t
>
(),
accumulated_weight
.
data_ptr
<
scalar_t
>
(),
accumulated_depth
.
data_ptr
<
scalar_t
>
(),
accumulated_color
.
data_ptr
<
scalar_t
>
(),
mask
.
data_ptr
<
bool
>
(),
steps_counter
.
data_ptr
<
int
>
()
);
}));
return
{
accumulated_weight
,
accumulated_depth
,
accumulated_color
,
mask
,
steps_counter
};
}
/**
* @brief Volumetric Rendering: Accumulating samples in the backward pass.
*/
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_backward
(
torch
::
Tensor
accumulated_weight
,
torch
::
Tensor
accumulated_depth
,
torch
::
Tensor
accumulated_color
,
torch
::
Tensor
grad_weight
,
torch
::
Tensor
grad_depth
,
torch
::
Tensor
grad_color
,
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
,
torch
::
Tensor
rgbs
)
{
DEVICE_GUARD
(
packed_info
);
const
uint32_t
n_rays
=
packed_info
.
size
(
0
);
const
int
threads
=
256
;
const
int
blocks
=
CUDA_N_BLOCKS_NEEDED
(
n_rays
,
threads
);
// outputs
torch
::
Tensor
grad_sigmas
=
torch
::
zeros
(
sigmas
.
sizes
(),
sigmas
.
options
());
torch
::
Tensor
grad_rgbs
=
torch
::
zeros
(
rgbs
.
sizes
(),
rgbs
.
options
());
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
sigmas
.
scalar_type
(),
"volumetric_rendering_backward"
,
([
&
]
{
volumetric_rendering_backward_kernel
<
scalar_t
><<<
blocks
,
threads
>>>
(
n_rays
,
packed_info
.
data_ptr
<
int
>
(),
starts
.
data_ptr
<
scalar_t
>
(),
ends
.
data_ptr
<
scalar_t
>
(),
sigmas
.
data_ptr
<
scalar_t
>
(),
rgbs
.
data_ptr
<
scalar_t
>
(),
accumulated_weight
.
data_ptr
<
scalar_t
>
(),
accumulated_depth
.
data_ptr
<
scalar_t
>
(),
accumulated_color
.
data_ptr
<
scalar_t
>
(),
grad_weight
.
data_ptr
<
scalar_t
>
(),
grad_depth
.
data_ptr
<
scalar_t
>
(),
grad_color
.
data_ptr
<
scalar_t
>
(),
grad_sigmas
.
data_ptr
<
scalar_t
>
(),
grad_rgbs
.
data_ptr
<
scalar_t
>
()
);
}));
return
{
grad_sigmas
,
grad_rgbs
};
}
\ No newline at end of file
nerfacc/cuda/csrc/volumetric_weights.cu
View file @
4a4bbaba
#include "include/helpers_cuda.h"
template
<
typename
scalar_t
>
__global__
void
volumetric_rendering_steps_kernel
(
const
uint32_t
n_rays
,
const
int
*
packed_info
,
// input ray & point indices.
const
scalar_t
*
starts
,
// input start t
const
scalar_t
*
ends
,
// input end t
const
scalar_t
*
sigmas
,
// input density after activation
// output: should be all zero (false) initialized
int
*
num_steps
,
bool
*
selector
)
{
CUDA_GET_THREAD_ID
(
i
,
n_rays
);
// locate
const
int
base
=
packed_info
[
i
*
2
+
0
];
// point idx start.
const
int
steps
=
packed_info
[
i
*
2
+
1
];
// point idx shift.
if
(
steps
==
0
)
return
;
starts
+=
base
;
ends
+=
base
;
sigmas
+=
base
;
num_steps
+=
i
;
selector
+=
base
;
// accumulated rendering
scalar_t
T
=
1.
f
;
scalar_t
EPSILON
=
1e-4
f
;
int
j
=
0
;
for
(;
j
<
steps
;
++
j
)
{
if
(
T
<
EPSILON
)
{
break
;
}
const
scalar_t
delta
=
ends
[
j
]
-
starts
[
j
];
const
scalar_t
alpha
=
1.
f
-
__expf
(
-
sigmas
[
j
]
*
delta
);
const
scalar_t
weight
=
alpha
*
T
;
T
*=
(
1.
f
-
alpha
);
selector
[
j
]
=
true
;
}
num_steps
[
0
]
=
j
;
return
;
}
template
<
typename
scalar_t
>
__global__
void
volumetric_weights_forward_kernel
(
const
uint32_t
n_rays
,
...
...
@@ -95,6 +138,55 @@ __global__ void volumetric_weights_backward_kernel(
}
std
::
vector
<
torch
::
Tensor
>
volumetric_rendering_steps
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
torch
::
Tensor
ends
,
torch
::
Tensor
sigmas
)
{
DEVICE_GUARD
(
packed_info
);
CHECK_INPUT
(
packed_info
);
CHECK_INPUT
(
starts
);
CHECK_INPUT
(
ends
);
CHECK_INPUT
(
sigmas
);
TORCH_CHECK
(
packed_info
.
ndimension
()
==
2
&
packed_info
.
size
(
1
)
==
2
);
TORCH_CHECK
(
starts
.
ndimension
()
==
2
&
starts
.
size
(
1
)
==
1
);
TORCH_CHECK
(
ends
.
ndimension
()
==
2
&
ends
.
size
(
1
)
==
1
);
TORCH_CHECK
(
sigmas
.
ndimension
()
==
2
&
sigmas
.
size
(
1
)
==
1
);
const
uint32_t
n_rays
=
packed_info
.
size
(
0
);
const
uint32_t
n_samples
=
sigmas
.
size
(
0
);
const
int
threads
=
256
;
const
int
blocks
=
CUDA_N_BLOCKS_NEEDED
(
n_rays
,
threads
);
torch
::
Tensor
num_steps
=
torch
::
zeros
({
n_rays
},
packed_info
.
options
());
torch
::
Tensor
selector
=
torch
::
zeros
({
n_samples
},
packed_info
.
options
().
dtype
(
torch
::
kBool
));
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
sigmas
.
scalar_type
(),
"volumetric_rendering_inference"
,
([
&
]
{
volumetric_rendering_steps_kernel
<
scalar_t
><<<
blocks
,
threads
>>>
(
n_rays
,
packed_info
.
data_ptr
<
int
>
(),
starts
.
data_ptr
<
scalar_t
>
(),
ends
.
data_ptr
<
scalar_t
>
(),
sigmas
.
data_ptr
<
scalar_t
>
(),
num_steps
.
data_ptr
<
int
>
(),
selector
.
data_ptr
<
bool
>
()
);
}));
torch
::
Tensor
cum_steps
=
num_steps
.
cumsum
(
0
,
torch
::
kInt32
);
torch
::
Tensor
compact_packed_info
=
torch
::
stack
({
cum_steps
-
num_steps
,
num_steps
},
1
);
return
{
compact_packed_info
,
selector
};
}
std
::
vector
<
torch
::
Tensor
>
volumetric_weights_forward
(
torch
::
Tensor
packed_info
,
torch
::
Tensor
starts
,
...
...
nerfacc/utils.py
View file @
4a4bbaba
...
...
@@ -78,7 +78,7 @@ def volumetric_accumulate(
values
:
torch
.
Tensor
=
None
,
n_rays
:
int
=
None
,
)
->
torch
.
Tensor
:
"""Accumulate values along the ray.
"""Accumulate
volumetric
values along the ray.
Note: this function is only differentiable to `weights` and `values`.
...
...
nerfacc/volumetric_rendering.py
View file @
4a4bbaba
...
...
@@ -5,7 +5,7 @@ import torch
from
.cuda
import
(
# ComputeWeight,; VolumeRenderer,; ray_aabb_intersect,
ray_marching
,
volumetric_rendering_
inference
,
volumetric_rendering_
steps
,
)
from
.utils
import
ray_aabb_intersect
,
volumetric_accumulate
,
volumetric_weights
...
...
@@ -35,16 +35,13 @@ def volumetric_rendering(
render_bkgd
=
render_bkgd
.
contiguous
()
n_rays
=
rays_o
.
shape
[
0
]
if
render_est_n_samples
is
None
:
render_total_samples
=
n_rays
*
render_n_samples
else
:
render_total_samples
=
render_est_n_samples
if
render_step_size
is
None
:
# Note: CPU<->GPU is not idea, try to pre-define it outside this function.
render_step_size
=
(
(
scene_aabb
[
3
:]
-
scene_aabb
[:
3
]).
max
()
*
math
.
sqrt
(
3
)
/
render_n_samples
)
# get packed samples from ray marching & occupancy check.
with
torch
.
no_grad
():
t_min
,
t_max
=
ray_aabb_intersect
(
rays_o
,
rays_d
,
scene_aabb
)
...
...
@@ -73,50 +70,30 @@ def volumetric_rendering(
)
steps_counter
=
packed_info
[:,
-
1
].
sum
(
0
,
keepdim
=
True
)
# compat the samples thru volumetric rendering
with
torch
.
no_grad
():
densities
=
query_fn
(
frustum_positions
,
frustum_dirs
,
only_density
=
True
,
**
kwargs
)
(
compact_packed_info
,
compact_selector
,
compact_steps_counter
,
)
=
volumetric_rendering_inference
(
compact_packed_info
,
compact_selector
=
volumetric_rendering_steps
(
packed_info
.
contiguous
(),
frustum_starts
.
contiguous
(),
frustum_ends
.
contiguous
(),
densities
.
contiguous
(),
)
compact_selector
=
compact_selector
[
compact_selector
>=
0
].
long
()
compact_pad
=
int
(
math
.
ceil
(
len
(
compact_selector
)
/
256.0
))
*
256
-
len
(
compact_selector
)
compact_selector
=
torch
.
nn
.
functional
.
pad
(
compact_selector
,
(
0
,
compact_pad
))
compact_frustum_positions
=
frustum_positions
[
compact_selector
]
compact_frustum_dirs
=
frustum_dirs
[
compact_selector
]
compact_frustum_starts
=
frustum_starts
[
compact_selector
]
compact_frustum_ends
=
frustum_ends
[
compact_selector
]
# print(
compact_s
elector.float().mean(), compact_steps_counter, steps_counter
)
compact_s
teps_counter
=
compact_packed_info
[:,
-
1
].
sum
(
0
,
keepdim
=
True
)
# network
compact_query_results
=
query_fn
(
compact_frustum_positions
,
compact_frustum_dirs
,
**
kwargs
)
compact_rgbs
,
compact_densities
=
compact_query_results
[
0
],
compact_query_results
[
1
]
# (
# accumulated_weight,
# accumulated_depth,
# accumulated_color,
# alive_ray_mask,
# compact_steps_counter,
# ) = VolumeRenderer.apply(
# compact_packed_info.contiguous(),
# compact_frustum_starts.contiguous(),
# compact_frustum_ends.contiguous(),
# compact_densities.contiguous(),
# compact_rgbs.contiguous(),
# )
# accumulation
compact_weights
,
compact_ray_indices
,
alive_ray_mask
=
volumetric_weights
(
compact_packed_info
,
compact_frustum_starts
,
...
...
@@ -136,46 +113,6 @@ def volumetric_rendering(
n_rays
,
)
# index = compact_ray_indices[:, None].long()
# accumulated_color = torch.zeros((n_rays, 3), device=device)
# accumulated_color.scatter_add_(
# dim=0,
# index=index.expand(-1, 3),
# src=compact_weights[:, None] * compact_rgbs,
# )
# accumulated_weight = torch.zeros((n_rays, 1), device=device)
# accumulated_weight.scatter_add_(
# dim=0,
# index=index.expand(-1, 1),
# src=compact_weights[:, None],
# )
# accumulated_depth = torch.zeros((n_rays, 1), device=device)
# accumulated_depth.scatter_add_(
# dim=0,
# index=index.expand(-1, 1),
# src=compact_weights[:, None]
# * (compact_frustum_starts + compact_frustum_ends)
# / 2.0,
# )
# query_results = query_fn(frustum_positions, frustum_dirs, **kwargs)
# rgbs, densities = query_results[0], query_results[1]
# (
# accumulated_weight,
# accumulated_depth,
# accumulated_color,
# alive_ray_mask,
# compact_steps_counter,
# ) = VolumeRenderer.apply(
# packed_info.contiguous(),
# frustum_starts.contiguous(),
# frustum_ends.contiguous(),
# densities.contiguous(),
# rgbs.contiguous(),
# )
accumulated_depth
=
torch
.
clip
(
accumulated_depth
,
t_min
[:,
None
],
t_max
[:,
None
])
accumulated_color
=
accumulated_color
+
render_bkgd
*
(
1.0
-
accumulated_weight
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment