Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishj6
Flashocc
Commits
3b8d508a
Commit
3b8d508a
authored
Sep 05, 2025
by
lishj6
🏸
Browse files
init_0905
parent
e968ab0f
Pipeline
#2906
canceled with stages
Changes
156
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2132 additions
and
0 deletions
+2132
-0
projects/mmdet3d_plugin/models/necks/fpn.py
projects/mmdet3d_plugin/models/necks/fpn.py
+203
-0
projects/mmdet3d_plugin/models/necks/lss_fpn.py
projects/mmdet3d_plugin/models/necks/lss_fpn.py
+125
-0
projects/mmdet3d_plugin/models/necks/view_transformer.py
projects/mmdet3d_plugin/models/necks/view_transformer.py
+620
-0
projects/mmdet3d_plugin/ops/__init__.py
projects/mmdet3d_plugin/ops/__init__.py
+5
-0
projects/mmdet3d_plugin/ops/bev_pool/__init__.py
projects/mmdet3d_plugin/ops/bev_pool/__init__.py
+1
-0
projects/mmdet3d_plugin/ops/bev_pool/bev_pool.py
projects/mmdet3d_plugin/ops/bev_pool/bev_pool.py
+126
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.cpp
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.cpp
+83
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.h
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.h
+32
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.cu
...ects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.cu
+109
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
+17
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
+80
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
+32
-0
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
...ects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
+101
-0
projects/mmdet3d_plugin/ops/bev_pool_v2/__init__.py
projects/mmdet3d_plugin/ops/bev_pool_v2/__init__.py
+2
-0
projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
+194
-0
projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
+111
-0
projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
+142
-0
projects/mmdet3d_plugin/ops/nearest_assign/__init__.py
projects/mmdet3d_plugin/ops/nearest_assign/__init__.py
+2
-0
projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
+89
-0
projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
.../mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
+58
-0
No files found.
projects/mmdet3d_plugin/models/necks/fpn.py
0 → 100644
View file @
3b8d508a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
ConvModule
from
mmcv.runner
import
BaseModule
,
auto_fp16
from
mmdet3d.models.builder
import
NECKS
@
NECKS
.
register_module
()
class
CustomFPN
(
BaseModule
):
r
"""Feature Pyramid Network.
This is an implementation of paper `Feature Pyramid Networks for Object
Detection <https://arxiv.org/abs/1612.03144>`_.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
num_outs (int): Number of output scales.
start_level (int): Index of the start input backbone level used to
build the feature pyramid. Default: 0.
end_level (int): Index of the end input backbone level (exclusive) to
build the feature pyramid. Default: -1, which means the last level.
add_extra_convs (bool | str): If bool, it decides whether to add conv
layers on top of the original feature maps. Default to False.
If True, it is equivalent to `add_extra_convs='on_input'`.
If str, it specifies the source feature map of the extra convs.
Only the following options are allowed
- 'on_input': Last feat map of neck inputs (i.e. backbone feature).
- 'on_lateral': Last feature map after lateral convs.
- 'on_output': The last output feature map after fpn convs.
relu_before_extra_convs (bool): Whether to apply relu before the extra
conv. Default: False.
no_norm_on_lateral (bool): Whether to apply norm on lateral.
Default: False.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Config dict for normalization layer. Default: None.
act_cfg (str): Config dict for activation layer in ConvModule.
Default: None.
upsample_cfg (dict): Config dict for interpolate layer.
Default: `dict(mode='nearest')`
init_cfg (dict or list[dict], optional): Initialization config dict.
Example:
>>> import torch
>>> in_channels = [2, 3, 5, 7]
>>> scales = [340, 170, 84, 43]
>>> inputs = [torch.rand(1, c, s, s)
... for c, s in zip(in_channels, scales)]
>>> self = FPN(in_channels, 11, len(in_channels)).eval()
>>> outputs = self.forward(inputs)
>>> for i in range(len(outputs)):
... print(f'outputs[{i}].shape = {outputs[i].shape}')
outputs[0].shape = torch.Size([1, 11, 340, 340])
outputs[1].shape = torch.Size([1, 11, 170, 170])
outputs[2].shape = torch.Size([1, 11, 84, 84])
outputs[3].shape = torch.Size([1, 11, 43, 43])
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
num_outs
,
start_level
=
0
,
end_level
=-
1
,
out_ids
=
[],
add_extra_convs
=
False
,
relu_before_extra_convs
=
False
,
no_norm_on_lateral
=
False
,
conv_cfg
=
None
,
norm_cfg
=
None
,
act_cfg
=
None
,
upsample_cfg
=
dict
(
mode
=
'nearest'
),
init_cfg
=
dict
(
type
=
'Xavier'
,
layer
=
'Conv2d'
,
distribution
=
'uniform'
)):
super
(
CustomFPN
,
self
).
__init__
(
init_cfg
)
assert
isinstance
(
in_channels
,
list
)
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
num_ins
=
len
(
in_channels
)
self
.
num_outs
=
num_outs
self
.
relu_before_extra_convs
=
relu_before_extra_convs
self
.
no_norm_on_lateral
=
no_norm_on_lateral
self
.
fp16_enabled
=
False
self
.
upsample_cfg
=
upsample_cfg
.
copy
()
self
.
out_ids
=
out_ids
if
end_level
==
-
1
:
self
.
backbone_end_level
=
self
.
num_ins
# assert num_outs >= self.num_ins - start_level
else
:
# if end_level < inputs, no extra level is allowed
self
.
backbone_end_level
=
end_level
assert
end_level
<=
len
(
in_channels
)
assert
num_outs
==
end_level
-
start_level
self
.
start_level
=
start_level
self
.
end_level
=
end_level
self
.
add_extra_convs
=
add_extra_convs
assert
isinstance
(
add_extra_convs
,
(
str
,
bool
))
if
isinstance
(
add_extra_convs
,
str
):
# Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
assert
add_extra_convs
in
(
'on_input'
,
'on_lateral'
,
'on_output'
)
elif
add_extra_convs
:
# True
self
.
add_extra_convs
=
'on_input'
self
.
lateral_convs
=
nn
.
ModuleList
()
self
.
fpn_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
start_level
,
self
.
backbone_end_level
):
l_conv
=
ConvModule
(
in_channels
[
i
],
out_channels
,
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
if
not
self
.
no_norm_on_lateral
else
None
,
act_cfg
=
act_cfg
,
inplace
=
False
)
self
.
lateral_convs
.
append
(
l_conv
)
if
i
in
self
.
out_ids
:
fpn_conv
=
ConvModule
(
out_channels
,
out_channels
,
3
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
,
inplace
=
False
)
self
.
fpn_convs
.
append
(
fpn_conv
)
# add extra conv layers (e.g., RetinaNet)
extra_levels
=
num_outs
-
self
.
backbone_end_level
+
self
.
start_level
if
self
.
add_extra_convs
and
extra_levels
>=
1
:
for
i
in
range
(
extra_levels
):
if
i
==
0
and
self
.
add_extra_convs
==
'on_input'
:
in_channels
=
self
.
in_channels
[
self
.
backbone_end_level
-
1
]
else
:
in_channels
=
out_channels
extra_fpn_conv
=
ConvModule
(
in_channels
,
out_channels
,
3
,
stride
=
2
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
act_cfg
=
act_cfg
,
inplace
=
False
)
self
.
fpn_convs
.
append
(
extra_fpn_conv
)
@
auto_fp16
()
def
forward
(
self
,
inputs
):
"""Forward function."""
assert
len
(
inputs
)
==
len
(
self
.
in_channels
)
# build laterals
laterals
=
[
lateral_conv
(
inputs
[
i
+
self
.
start_level
])
for
i
,
lateral_conv
in
enumerate
(
self
.
lateral_convs
)
]
# build top-down path
used_backbone_levels
=
len
(
laterals
)
for
i
in
range
(
used_backbone_levels
-
1
,
0
,
-
1
):
# In some cases, fixing `scale factor` (e.g. 2) is preferred, but
# it cannot co-exist with `size` in `F.interpolate`.
if
'scale_factor'
in
self
.
upsample_cfg
:
laterals
[
i
-
1
]
+=
F
.
interpolate
(
laterals
[
i
],
**
self
.
upsample_cfg
)
else
:
prev_shape
=
laterals
[
i
-
1
].
shape
[
2
:]
laterals
[
i
-
1
]
+=
F
.
interpolate
(
laterals
[
i
],
size
=
prev_shape
,
**
self
.
upsample_cfg
)
# build outputs
# part 1: from original levels
outs
=
[
self
.
fpn_convs
[
i
](
laterals
[
i
])
for
i
in
self
.
out_ids
]
# part 2: add extra levels
if
self
.
num_outs
>
len
(
outs
):
# use max pool to get more levels on top of outputs
# (e.g., Faster R-CNN, Mask R-CNN)
if
not
self
.
add_extra_convs
:
for
i
in
range
(
self
.
num_outs
-
used_backbone_levels
):
outs
.
append
(
F
.
max_pool2d
(
outs
[
-
1
],
1
,
stride
=
2
))
# add conv layers on top of original feature maps (RetinaNet)
else
:
if
self
.
add_extra_convs
==
'on_input'
:
extra_source
=
inputs
[
self
.
backbone_end_level
-
1
]
elif
self
.
add_extra_convs
==
'on_lateral'
:
extra_source
=
laterals
[
-
1
]
elif
self
.
add_extra_convs
==
'on_output'
:
extra_source
=
outs
[
-
1
]
else
:
raise
NotImplementedError
outs
.
append
(
self
.
fpn_convs
[
used_backbone_levels
](
extra_source
))
for
i
in
range
(
used_backbone_levels
+
1
,
self
.
num_outs
):
if
self
.
relu_before_extra_convs
:
outs
.
append
(
self
.
fpn_convs
[
i
](
F
.
relu
(
outs
[
-
1
])))
else
:
outs
.
append
(
self
.
fpn_convs
[
i
](
outs
[
-
1
]))
return
outs
projects/mmdet3d_plugin/models/necks/lss_fpn.py
0 → 100644
View file @
3b8d508a
# Copyright (c) Phigent Robotics. All rights reserved.
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
build_norm_layer
from
torch.utils.checkpoint
import
checkpoint
from
mmcv.cnn.bricks
import
ConvModule
from
mmdet.models
import
NECKS
@
NECKS
.
register_module
()
class
FPN_LSS
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
scale_factor
=
4
,
input_feature_index
=
(
0
,
2
),
norm_cfg
=
dict
(
type
=
'BN'
),
extra_upsample
=
2
,
lateral
=
None
,
use_input_conv
=
False
):
super
(
FPN_LSS
,
self
).
__init__
()
self
.
input_feature_index
=
input_feature_index
self
.
extra_upsample
=
extra_upsample
is
not
None
self
.
out_channels
=
out_channels
# 用于上采样high-level的feature map
self
.
up
=
nn
.
Upsample
(
scale_factor
=
scale_factor
,
mode
=
'bilinear'
,
align_corners
=
True
)
channels_factor
=
2
if
self
.
extra_upsample
else
1
self
.
conv
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
out_channels
*
channels_factor
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
),
build_norm_layer
(
norm_cfg
,
out_channels
*
channels_factor
)[
1
],
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
out_channels
*
channels_factor
,
out_channels
*
channels_factor
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
),
build_norm_layer
(
norm_cfg
,
out_channels
*
channels_factor
)[
1
],
nn
.
ReLU
(
inplace
=
True
),
)
if
self
.
extra_upsample
:
self
.
up2
=
nn
.
Sequential
(
nn
.
Upsample
(
scale_factor
=
extra_upsample
,
mode
=
'bilinear'
,
align_corners
=
True
),
nn
.
Conv2d
(
out_channels
*
channels_factor
,
out_channels
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
out_channels
,
out_channels
,
kernel_size
=
1
,
padding
=
0
)
)
self
.
lateral
=
lateral
is
not
None
if
self
.
lateral
:
self
.
lateral_conv
=
nn
.
Sequential
(
nn
.
Conv2d
(
lateral
,
lateral
,
kernel_size
=
1
,
padding
=
0
,
bias
=
False
),
build_norm_layer
(
norm_cfg
,
lateral
)[
1
],
nn
.
ReLU
(
inplace
=
True
)
)
#@torch.compile
def
forward
(
self
,
feats
):
"""
Args:
feats: List[Tensor,] multi-level features
List[(B, C1, H, W), (B, C2, H/2, W/2), (B, C3, H/4, W/4)]
Returns:
x: (B, C_out, 2*H, 2*W)
"""
x2
,
x1
=
feats
[
self
.
input_feature_index
[
0
]],
feats
[
self
.
input_feature_index
[
1
]]
if
self
.
lateral
:
x2
=
self
.
lateral_conv
(
x2
)
x1
=
self
.
up
(
x1
)
# (B, C3, H, W)
x1
=
torch
.
cat
([
x2
,
x1
],
dim
=
1
)
# (B, C1+C3, H, W)
x
=
self
.
conv
(
x1
)
# (B, C', H, W)
if
self
.
extra_upsample
:
x
=
self
.
up2
(
x
)
# (B, C_out, 2*H, 2*W)
return
x
@
NECKS
.
register_module
()
class
LSSFPN3D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
with_cp
=
False
):
super
().
__init__
()
self
.
up1
=
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'trilinear'
,
align_corners
=
True
)
self
.
up2
=
nn
.
Upsample
(
scale_factor
=
4
,
mode
=
'trilinear'
,
align_corners
=
True
)
self
.
conv
=
ConvModule
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
,
conv_cfg
=
dict
(
type
=
'Conv3d'
),
norm_cfg
=
dict
(
type
=
'BN3d'
,
),
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
))
self
.
with_cp
=
with_cp
def
forward
(
self
,
feats
):
"""
Args:
feats: List[
(B, C, Dz, Dy, Dx),
(B, 2C, Dz/2, Dy/2, Dx/2),
(B, 4C, Dz/4, Dy/4, Dx/4)
]
Returns:
x: (B, C, Dz, Dy, Dx)
"""
x_8
,
x_16
,
x_32
=
feats
x_16
=
self
.
up1
(
x_16
)
# (B, 2C, Dz, Dy, Dx)
x_32
=
self
.
up2
(
x_32
)
# (B, 4C, Dz, Dy, Dx)
x
=
torch
.
cat
([
x_8
,
x_16
,
x_32
],
dim
=
1
)
# (B, 7C, Dz, Dy, Dx)
if
self
.
with_cp
:
x
=
checkpoint
(
self
.
conv
,
x
)
else
:
x
=
self
.
conv
(
x
)
# (B, C, Dz, Dy, Dx)
return
x
projects/mmdet3d_plugin/models/necks/view_transformer.py
0 → 100644
View file @
3b8d508a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
import
torch.nn
as
nn
import
torch.distributed
as
dist
from
mmcv.runner
import
BaseModule
,
force_fp32
from
mmdet3d.models.builder
import
NECKS
from
...ops
import
bev_pool_v2
from
..model_utils
import
DepthNet
from
torch.cuda.amp.autocast_mode
import
autocast
import
torch.nn.functional
as
F
@
NECKS
.
register_module
(
force
=
True
)
class
LSSViewTransformer
(
BaseModule
):
r
"""Lift-Splat-Shoot view transformer with BEVPoolv2 implementation.
Please refer to the `paper <https://arxiv.org/abs/2008.05711>`_ and
`paper <https://arxiv.org/abs/2211.17111>`
Args:
grid_config (dict): Config of grid alone each axis in format of
(lower_bound, upper_bound, interval). axis in {x,y,z,depth}.
input_size (tuple(int)): Size of input images in format of (height,
width).
downsample (int): Down sample factor from the input size to the feature
size.
in_channels (int): Channels of input feature.
out_channels (int): Channels of transformed feature.
accelerate (bool): Whether the view transformation is conducted with
acceleration. Note: the intrinsic and extrinsic of cameras should
be constant when 'accelerate' is set true.
sid (bool): Whether to use Spacing Increasing Discretization (SID)
depth distribution as `STS: Surround-view Temporal Stereo for
Multi-view 3D Detection`.
collapse_z (bool): Whether to collapse in z direction.
"""
def
__init__
(
self
,
grid_config
,
input_size
,
downsample
=
16
,
in_channels
=
512
,
out_channels
=
64
,
accelerate
=
False
,
sid
=
False
,
collapse_z
=
True
,
):
super
(
LSSViewTransformer
,
self
).
__init__
()
self
.
grid_config
=
grid_config
self
.
downsample
=
downsample
self
.
create_grid_infos
(
**
grid_config
)
self
.
sid
=
sid
self
.
frustum
=
self
.
create_frustum
(
grid_config
[
'depth'
],
input_size
,
downsample
)
# (D, fH, fW, 3) 3:(u, v, d)
self
.
out_channels
=
out_channels
self
.
in_channels
=
in_channels
self
.
depth_net
=
nn
.
Conv2d
(
in_channels
,
self
.
D
+
self
.
out_channels
,
kernel_size
=
1
,
padding
=
0
)
self
.
accelerate
=
accelerate
self
.
initial_flag
=
True
self
.
collapse_z
=
collapse_z
def
create_grid_infos
(
self
,
x
,
y
,
z
,
**
kwargs
):
"""Generate the grid information including the lower bound, interval,
and size.
Args:
x (tuple(float)): Config of grid alone x axis in format of
(lower_bound, upper_bound, interval).
y (tuple(float)): Config of grid alone y axis in format of
(lower_bound, upper_bound, interval).
z (tuple(float)): Config of grid alone z axis in format of
(lower_bound, upper_bound, interval).
**kwargs: Container for other potential parameters
"""
self
.
grid_lower_bound
=
torch
.
Tensor
([
cfg
[
0
]
for
cfg
in
[
x
,
y
,
z
]])
# (min_x, min_y, min_z)
self
.
grid_interval
=
torch
.
Tensor
([
cfg
[
2
]
for
cfg
in
[
x
,
y
,
z
]])
# (dx, dy, dz)
self
.
grid_size
=
torch
.
Tensor
([(
cfg
[
1
]
-
cfg
[
0
])
/
cfg
[
2
]
for
cfg
in
[
x
,
y
,
z
]])
# (Dx, Dy, Dz)
def
create_frustum
(
self
,
depth_cfg
,
input_size
,
downsample
):
"""Generate the frustum template for each image.
Args:
depth_cfg (tuple(float)): Config of grid alone depth axis in format
of (lower_bound, upper_bound, interval).
input_size (tuple(int)): Size of input images in format of (height,
width).
downsample (int): Down sample scale factor from the input size to
the feature size.
Returns:
frustum: (D, fH, fW, 3) 3:(u, v, d)
"""
H_in
,
W_in
=
input_size
H_feat
,
W_feat
=
H_in
//
downsample
,
W_in
//
downsample
d
=
torch
.
arange
(
*
depth_cfg
,
dtype
=
torch
.
float
)
\
.
view
(
-
1
,
1
,
1
).
expand
(
-
1
,
H_feat
,
W_feat
)
# (D, fH, fW)
self
.
D
=
d
.
shape
[
0
]
if
self
.
sid
:
d_sid
=
torch
.
arange
(
self
.
D
).
float
()
depth_cfg_t
=
torch
.
tensor
(
depth_cfg
).
float
()
d_sid
=
torch
.
exp
(
torch
.
log
(
depth_cfg_t
[
0
])
+
d_sid
/
(
self
.
D
-
1
)
*
torch
.
log
((
depth_cfg_t
[
1
]
-
1
)
/
depth_cfg_t
[
0
]))
d
=
d_sid
.
view
(
-
1
,
1
,
1
).
expand
(
-
1
,
H_feat
,
W_feat
)
x
=
torch
.
linspace
(
0
,
W_in
-
1
,
W_feat
,
dtype
=
torch
.
float
)
\
.
view
(
1
,
1
,
W_feat
).
expand
(
self
.
D
,
H_feat
,
W_feat
)
# (D, fH, fW)
y
=
torch
.
linspace
(
0
,
H_in
-
1
,
H_feat
,
dtype
=
torch
.
float
)
\
.
view
(
1
,
H_feat
,
1
).
expand
(
self
.
D
,
H_feat
,
W_feat
)
# (D, fH, fW)
o
=
torch
.
stack
((
x
,
y
,
d
),
-
1
)
# (D, fH, fW, 3) 3:(u, v, d)
if
dist
.
is_initialized
():
return
o
.
to
(
f
"cuda:
{
dist
.
get_rank
()
}
"
,
non_blocking
=
True
)
else
:
return
o
def
get_lidar_coor
(
self
,
sensor2ego
,
ego2global
,
cam2imgs
,
post_rots
,
post_trans
,
bda
):
"""Calculate the locations of the frustum points in the lidar
coordinate system.
Args:
rots (torch.Tensor): Rotation from camera coordinate system to
lidar coordinate system in shape (B, N_cams, 3, 3).
trans (torch.Tensor): Translation from camera coordinate system to
lidar coordinate system in shape (B, N_cams, 3).
cam2imgs (torch.Tensor): Camera intrinsic matrixes in shape
(B, N_cams, 3, 3).
post_rots (torch.Tensor): Rotation in camera coordinate system in
shape (B, N_cams, 3, 3). It is derived from the image view
augmentation.
post_trans (torch.Tensor): Translation in camera coordinate system
derived from image view augmentation in shape (B, N_cams, 3).
Returns:
torch.tensor: Point coordinates in shape
(B, N_cams, D, ownsample, 3)
"""
B
,
N
,
_
,
_
=
sensor2ego
.
shape
# post-transformation
# B x N x D x H x W x 3
#points = self.frustum.to(sensor2ego) - post_trans.view(B, N, 1, 1, 1, 3)
points
=
self
.
frustum
-
post_trans
.
view
(
B
,
N
,
1
,
1
,
1
,
3
)
points
=
torch
.
inverse
(
post_rots
).
view
(
B
,
N
,
1
,
1
,
1
,
3
,
3
)
\
.
matmul
(
points
.
unsqueeze
(
-
1
))
# cam_to_ego
points
=
torch
.
cat
(
(
points
[...,
:
2
,
:]
*
points
[...,
2
:
3
,
:],
points
[...,
2
:
3
,
:]),
5
)
combine
=
sensor2ego
[:,:,:
3
,:
3
].
matmul
(
torch
.
inverse
(
cam2imgs
))
points
=
combine
.
view
(
B
,
N
,
1
,
1
,
1
,
3
,
3
).
matmul
(
points
).
squeeze
(
-
1
)
points
+=
sensor2ego
[:,:,:
3
,
3
].
view
(
B
,
N
,
1
,
1
,
1
,
3
)
points
=
bda
.
view
(
B
,
1
,
1
,
1
,
1
,
3
,
3
).
matmul
(
points
.
unsqueeze
(
-
1
)).
squeeze
(
-
1
)
return
points
#@torch.compile
def
get_ego_coor
(
self
,
sensor2ego
,
ego2global
,
cam2imgs
,
post_rots
,
post_trans
,
bda
):
"""Calculate the locations of the frustum points in the lidar
coordinate system.
Args:
sensor2ego (torch.Tensor): Transformation from camera coordinate system to
ego coordinate system in shape (B, N_cams, 4, 4).
ego2global (torch.Tensor): Translation from ego coordinate system to
global coordinate system in shape (B, N_cams, 4, 4).
cam2imgs (torch.Tensor): Camera intrinsic matrixes in shape
(B, N_cams, 3, 3).
post_rots (torch.Tensor): Rotation in camera coordinate system in
shape (B, N_cams, 3, 3). It is derived from the image view
augmentation.
post_trans (torch.Tensor): Translation in camera coordinate system
derived from image view augmentation in shape (B, N_cams, 3).
bda (torch.Tensor): Transformation in bev. (B, 3, 3)
Returns:
torch.tensor: Point coordinates in shape (B, N, D, fH, fW, 3)
"""
B
,
N
,
_
,
_
=
sensor2ego
.
shape
# post-transformation
# (D, fH, fW, 3) - (B, N, 1, 1, 1, 3) --> (B, N, D, fH, fW, 3)
#points = self.frustum.to(sensor2ego) - post_trans.view(B, N, 1, 1, 1, 3)
points
=
self
.
frustum
-
post_trans
.
view
(
B
,
N
,
1
,
1
,
1
,
3
)
# (B, N, 1, 1, 1, 3, 3) @ (B, N, D, fH, fW, 3, 1) --> (B, N, D, fH, fW, 3, 1)
points
=
torch
.
inverse
(
post_rots
).
view
(
B
,
N
,
1
,
1
,
1
,
3
,
3
)
\
.
matmul
(
points
.
unsqueeze
(
-
1
))
# cam_to_ego
# (B, N_, D, fH, fW, 3, 1) 3: (du, dv, d)
points
=
torch
.
cat
(
(
points
[...,
:
2
,
:]
*
points
[...,
2
:
3
,
:],
points
[...,
2
:
3
,
:]),
5
)
# R_{c->e} @ K^-1
combine
=
sensor2ego
[:,
:,
:
3
,
:
3
].
matmul
(
torch
.
inverse
(
cam2imgs
))
# (B, N, 1, 1, 1, 3, 3) @ (B, N, D, fH, fW, 3, 1) --> (B, N, D, fH, fW, 3, 1)
# --> (B, N, D, fH, fW, 3)
points
=
combine
.
view
(
B
,
N
,
1
,
1
,
1
,
3
,
3
).
matmul
(
points
).
squeeze
(
-
1
)
# (B, N, D, fH, fW, 3) + (B, N, 1, 1, 1, 3) --> (B, N, D, fH, fW, 3)
points
+=
sensor2ego
[:,
:,
:
3
,
3
].
view
(
B
,
N
,
1
,
1
,
1
,
3
)
# (B, 1, 1, 1, 3, 3) @ (B, N, D, fH, fW, 3, 1) --> (B, N, D, fH, fW, 3, 1)
# --> (B, N, D, fH, fW, 3)
points
=
bda
.
view
(
B
,
1
,
1
,
1
,
1
,
3
,
3
).
matmul
(
points
.
unsqueeze
(
-
1
)).
squeeze
(
-
1
)
return
points
def
init_acceleration_v2
(
self
,
coor
):
"""Pre-compute the necessary information in acceleration including the
index of points in the final feature.
Args:
coor (torch.tensor): Coordinate of points in lidar space in shape
(B, N, D, H, W, 3).
x (torch.tensor): Feature of points in shape
(B, N_cams, D, H, W, C).
"""
ranks_bev
,
ranks_depth
,
ranks_feat
,
\
interval_starts
,
interval_lengths
=
\
self
.
voxel_pooling_prepare_v2
(
coor
)
# ranks_bev: (N_points, ),
# ranks_depth: (N_points, ),
# ranks_feat: (N_points, ),
# interval_starts: (N_pillar, )
# interval_lengths: (N_pillar, )
self
.
ranks_bev
=
ranks_bev
.
int
().
contiguous
()
self
.
ranks_feat
=
ranks_feat
.
int
().
contiguous
()
self
.
ranks_depth
=
ranks_depth
.
int
().
contiguous
()
self
.
interval_starts
=
interval_starts
.
int
().
contiguous
()
self
.
interval_lengths
=
interval_lengths
.
int
().
contiguous
()
def
voxel_pooling_v2
(
self
,
coor
,
depth
,
feat
):
"""
Args:
coor: (B, N, D, fH, fW, 3)
depth: (B, N, D, fH, fW)
feat: (B, N, C, fH, fW)
Returns:
bev_feat: (B, C*Dz(=1), Dy, Dx)
"""
ranks_bev
,
ranks_depth
,
ranks_feat
,
\
interval_starts
,
interval_lengths
=
\
self
.
voxel_pooling_prepare_v2
(
coor
)
# ranks_bev: (N_points, ),
# ranks_depth: (N_points, ),
# ranks_feat: (N_points, ),
# interval_starts: (N_pillar, )
# interval_lengths: (N_pillar, )
if
ranks_feat
is
None
:
print
(
'warning ---> no points within the predefined '
'bev receptive field'
)
dummy
=
torch
.
zeros
(
size
=
[
feat
.
shape
[
0
],
feat
.
shape
[
2
],
int
(
self
.
grid_size
[
2
]),
int
(
self
.
grid_size
[
1
]),
int
(
self
.
grid_size
[
0
])
]).
to
(
feat
)
# (B, C, Dz, Dy, Dx)
dummy
=
torch
.
cat
(
dummy
.
unbind
(
dim
=
2
),
1
)
# (B, C*Dz, Dy, Dx)
return
dummy
feat
=
feat
.
permute
(
0
,
1
,
3
,
4
,
2
)
# (B, N, fH, fW, C)
bev_feat_shape
=
(
depth
.
shape
[
0
],
int
(
self
.
grid_size
[
2
]),
int
(
self
.
grid_size
[
1
]),
int
(
self
.
grid_size
[
0
]),
feat
.
shape
[
-
1
])
# (B, Dz, Dy, Dx, C)
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
)
# (B, C, Dz, Dy, Dx)
# collapse Z
if
self
.
collapse_z
:
bev_feat
=
torch
.
cat
(
bev_feat
.
unbind
(
dim
=
2
),
1
)
# (B, C*Dz, Dy, Dx)
return
bev_feat
#@torch.compile(options={"triton.cudagraphs":True})
def
voxel_pooling_prepare_v2
(
self
,
coor
):
"""Data preparation for voxel pooling.
Args:
coor (torch.tensor): Coordinate of points in the lidar space in
shape (B, N, D, H, W, 3).
Returns:
tuple[torch.tensor]:
ranks_bev: Rank of the voxel that a point is belong to in shape (N_points, ),
rank介于(0, B*Dx*Dy*Dz-1).
ranks_depth: Reserved index of points in the depth space in shape (N_Points),
rank介于(0, B*N*D*fH*fW-1).
ranks_feat: Reserved index of points in the feature space in shape (N_Points),
rank介于(0, B*N*fH*fW-1).
interval_starts: (N_pillar, )
interval_lengths: (N_pillar, )
"""
#with torch.cuda.device(coor.device):
#zero_t = torch.scalar_tensor(0).to(coor.device)
if
dist
.
is_initialized
():
dv
=
f
"cuda:
{
dist
.
get_rank
()
}
"
else
:
return
f
"cuda"
assert
dv
==
str
(
coor
.
device
)
with
torch
.
cuda
.
device
(
dv
):
B
,
N
,
D
,
H
,
W
,
_
=
coor
.
shape
num_points
=
B
*
N
*
D
*
H
*
W
# record the index of selected points for acceleration purpose
ranks_depth
=
torch
.
range
(
0
,
num_points
-
1
,
dtype
=
torch
.
int
,
device
=
coor
.
device
)
# (B*N*D*H*W, ), [0, 1, ..., B*N*D*fH*fW-1]
ranks_feat
=
torch
.
range
(
0
,
num_points
//
D
-
1
,
dtype
=
torch
.
int
,
device
=
coor
.
device
)
# [0, 1, ...,B*N*fH*fW-1]
ranks_feat
=
ranks_feat
.
reshape
(
B
,
N
,
1
,
H
,
W
)
ranks_feat
=
ranks_feat
.
expand
(
B
,
N
,
D
,
H
,
W
).
flatten
()
# (B*N*D*fH*fW, )
# convert coordinate into the voxel space
# ((B, N, D, fH, fW, 3) - (3, )) / (3, ) --> (B, N, D, fH, fW, 3) 3:(x, y, z) grid coords.
coor
=
((
coor
-
self
.
grid_lower_bound
.
to
(
coor
))
/
self
.
grid_interval
.
to
(
coor
))
coor
=
coor
.
long
().
view
(
num_points
,
3
)
# (B, N, D, fH, fW, 3) --> (B*N*D*fH*fW, 3)
# (B, N*D*fH*fW) --> (B*N*D*fH*fW, 1)
batch_idx
=
torch
.
range
(
0
,
B
-
1
).
reshape
(
B
,
1
).
\
expand
(
B
,
num_points
//
B
).
reshape
(
num_points
,
1
).
to
(
coor
)
coor
=
torch
.
cat
((
coor
,
batch_idx
),
1
)
# (B*N*D*fH*fW, 4) 4: (x, y, z, batch_id)
# filter out points that are outside box
kept
=
(
coor
[:,
0
]
>=
0
)
&
(
coor
[:,
0
]
<
self
.
grid_size
[
0
])
&
\
(
coor
[:,
1
]
>=
0
)
&
(
coor
[:,
1
]
<
self
.
grid_size
[
1
])
&
\
(
coor
[:,
2
]
>=
0
)
&
(
coor
[:,
2
]
<
self
.
grid_size
[
2
])
if
len
(
kept
)
==
0
:
return
None
,
None
,
None
,
None
,
None
# (N_points, 4), (N_points, ), (N_points, )
coor
,
ranks_depth
,
ranks_feat
=
\
coor
[
kept
],
ranks_depth
[
kept
],
ranks_feat
[
kept
]
# get tensors from the same voxel next to each other
ranks_bev
=
coor
[:,
3
]
*
(
self
.
grid_size
[
2
]
*
self
.
grid_size
[
1
]
*
self
.
grid_size
[
0
])
ranks_bev
+=
coor
[:,
2
]
*
(
self
.
grid_size
[
1
]
*
self
.
grid_size
[
0
])
ranks_bev
+=
coor
[:,
1
]
*
self
.
grid_size
[
0
]
+
coor
[:,
0
]
order
=
ranks_bev
.
argsort
()
# (N_points, ), (N_points, ), (N_points, )
ranks_bev
,
ranks_depth
,
ranks_feat
=
\
ranks_bev
[
order
],
ranks_depth
[
order
],
ranks_feat
[
order
]
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_bev
[
1
:]
!=
ranks_bev
[:
-
1
]
interval_starts
=
torch
.
where
(
kept
)[
0
].
int
()
if
len
(
interval_starts
)
==
0
:
return
None
,
None
,
None
,
None
,
None
interval_lengths
=
torch
.
zeros_like
(
interval_starts
)
interval_lengths
[:
-
1
]
=
interval_starts
[
1
:]
-
interval_starts
[:
-
1
]
interval_lengths
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts
[
-
1
]
return
ranks_bev
.
int
().
contiguous
(),
ranks_depth
.
int
().
contiguous
(
),
ranks_feat
.
int
().
contiguous
(),
interval_starts
.
int
().
contiguous
(
),
interval_lengths
.
int
().
contiguous
()
def
pre_compute
(
self
,
input
):
if
self
.
initial_flag
:
coor
=
self
.
get_ego_coor
(
*
input
[
1
:
7
])
# (B, N, D, fH, fW, 3)
self
.
init_acceleration_v2
(
coor
)
self
.
initial_flag
=
False
def
view_transform_core
(
self
,
input
,
depth
,
tran_feat
):
"""
Args:
input (list(torch.tensor)):
imgs: (B, N, 3, H, W) # N_views = 6 * (N_history + 1)
sensor2egos: (B, N, 4, 4)
ego2globals: (B, N, 4, 4)
intrins: (B, N, 3, 3)
post_rots: (B, N, 3, 3)
post_trans: (B, N, 3)
bda_rot: (B, 3, 3)
depth: (B*N, D, fH, fW)
tran_feat: (B*N, C, fH, fW)
Returns:
bev_feat: (B, C*Dz(=1), Dy, Dx)
depth: (B*N, D, fH, fW)
"""
B
,
N
,
C
,
H
,
W
=
input
[
0
].
shape
# Lift-Splat
if
self
.
accelerate
:
feat
=
tran_feat
.
view
(
B
,
N
,
self
.
out_channels
,
H
,
W
)
# (B, N, C, fH, fW)
feat
=
feat
.
permute
(
0
,
1
,
3
,
4
,
2
)
# (B, N, fH, fW, C)
depth
=
depth
.
view
(
B
,
N
,
self
.
D
,
H
,
W
)
# (B, N, D, fH, fW)
bev_feat_shape
=
(
depth
.
shape
[
0
],
int
(
self
.
grid_size
[
2
]),
int
(
self
.
grid_size
[
1
]),
int
(
self
.
grid_size
[
0
]),
feat
.
shape
[
-
1
])
# (B, Dz, Dy, Dx, C)
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
self
.
ranks_depth
,
self
.
ranks_feat
,
self
.
ranks_bev
,
bev_feat_shape
,
self
.
interval_starts
,
self
.
interval_lengths
)
# (B, C, Dz, Dy, Dx)
bev_feat
=
bev_feat
.
squeeze
(
2
)
# (B, C, Dy, Dx)
else
:
coor
=
self
.
get_ego_coor
(
*
input
[
1
:
7
])
# (B, N, D, fH, fW, 3)
bev_feat
=
self
.
voxel_pooling_v2
(
coor
,
depth
.
view
(
B
,
N
,
self
.
D
,
H
,
W
),
tran_feat
.
view
(
B
,
N
,
self
.
out_channels
,
H
,
W
))
# (B, C*Dz(=1), Dy, Dx)
return
bev_feat
,
depth
def
view_transform
(
self
,
input
,
depth
,
tran_feat
):
"""
Args:
input (list(torch.tensor)):
imgs: (B, N, C, H, W) # N_views = 6 * (N_history + 1)
sensor2egos: (B, N, 4, 4)
ego2globals: (B, N, 4, 4)
intrins: (B, N, 3, 3)
post_rots: (B, N, 3, 3)
post_trans: (B, N, 3)
bda_rot: (B, 3, 3)
depth: (B*N, D, fH, fW)
tran_feat: (B*N, C, fH, fW)
Returns:
bev_feat: (B, C, Dy, Dx)
depth: (B*N, D, fH, fW)
"""
if
self
.
accelerate
:
self
.
pre_compute
(
input
)
return
self
.
view_transform_core
(
input
,
depth
,
tran_feat
)
#@torch.compile
def
forward
(
self
,
input
):
"""Transform image-view feature into bird-eye-view feature.
Args:
input (list(torch.tensor)):
imgs: (B, N_views, 3, H, W) # N_views = 6 * (N_history + 1)
sensor2egos: (B, N_views, 4, 4)
ego2globals: (B, N_views, 4, 4)
intrins: (B, N_views, 3, 3)
post_rots: (B, N_views, 3, 3)
post_trans: (B, N_views, 3)
bda_rot: (B, 3, 3)
Returns:
bev_feat: (B, C, Dy, Dx)
depth: (B*N, D, fH, fW)
"""
x
=
input
[
0
]
# (B, N, C_in, fH, fW)
B
,
N
,
C
,
H
,
W
=
x
.
shape
x
=
x
.
view
(
B
*
N
,
C
,
H
,
W
)
# (B*N, C_in, fH, fW)
# (B*N, C_in, fH, fW) --> (B*N, D+C, fH, fW)
x
=
self
.
depth_net
(
x
)
depth_digit
=
x
[:,
:
self
.
D
,
...]
# (B*N, D, fH, fW)
tran_feat
=
x
[:,
self
.
D
:
self
.
D
+
self
.
out_channels
,
...]
# (B*N, C, fH, fW)
depth
=
depth_digit
.
softmax
(
dim
=
1
)
return
self
.
view_transform
(
input
,
depth
,
tran_feat
)
def
get_mlp_input
(
self
,
rot
,
tran
,
intrin
,
post_rot
,
post_tran
,
bda
):
return
None
@
NECKS
.
register_module
()
class
LSSViewTransformerBEVDepth
(
LSSViewTransformer
):
def
__init__
(
self
,
loss_depth_weight
=
3.0
,
depthnet_cfg
=
dict
(),
**
kwargs
):
super
(
LSSViewTransformerBEVDepth
,
self
).
__init__
(
**
kwargs
)
self
.
loss_depth_weight
=
loss_depth_weight
self
.
depth_net
=
DepthNet
(
in_channels
=
self
.
in_channels
,
mid_channels
=
self
.
in_channels
,
context_channels
=
self
.
out_channels
,
depth_channels
=
self
.
D
,
**
depthnet_cfg
)
def
get_mlp_input
(
self
,
sensor2ego
,
ego2global
,
intrin
,
post_rot
,
post_tran
,
bda
):
"""
Args:
sensor2ego: (B, N_views=6, 4, 4)
ego2global: (B, N_views=6, 4, 4)
intrin: (B, N_views, 3, 3)
post_rot: (B, N_views, 3, 3)
post_tran: (B, N_views, 3)
bda: (B, 3, 3)
Returns:
mlp_input: (B, N_views, 27)
"""
B
,
N
,
_
,
_
=
sensor2ego
.
shape
bda
=
bda
.
view
(
B
,
1
,
3
,
3
).
repeat
(
1
,
N
,
1
,
1
)
# (B, 3, 3) --> (B, N, 3, 3)
mlp_input
=
torch
.
stack
([
intrin
[:,
:,
0
,
0
],
# fx
intrin
[:,
:,
1
,
1
],
# fy
intrin
[:,
:,
0
,
2
],
# cx
intrin
[:,
:,
1
,
2
],
# cy
post_rot
[:,
:,
0
,
0
],
post_rot
[:,
:,
0
,
1
],
post_tran
[:,
:,
0
],
post_rot
[:,
:,
1
,
0
],
post_rot
[:,
:,
1
,
1
],
post_tran
[:,
:,
1
],
bda
[:,
:,
0
,
0
],
bda
[:,
:,
0
,
1
],
bda
[:,
:,
1
,
0
],
bda
[:,
:,
1
,
1
],
bda
[:,
:,
2
,
2
]
],
dim
=-
1
)
# (B, N_views, 15)
sensor2ego
=
sensor2ego
[:,
:,
:
3
,
:].
reshape
(
B
,
N
,
-
1
)
mlp_input
=
torch
.
cat
([
mlp_input
,
sensor2ego
],
dim
=-
1
)
# (B, N_views, 27)
return
mlp_input
def
forward
(
self
,
input
,
stereo_metas
=
None
):
"""
Args:
input (list(torch.tensor)):
imgs: (B, N_views, 3, H, W) # N_views = 6 * (N_history + 1)
sensor2egos: (B, N_views, 4, 4)
ego2globals: (B, N_views, 4, 4)
intrins: (B, N_views, 3, 3)
post_rots: (B, N_views, 3, 3)
post_trans: (B, N_views, 3)
bda_rot: (B, 3, 3)
mlp_input: (B, N_views, 27)
stereo_metas: None or dict{
k2s_sensor: (B, N_views, 4, 4)
intrins: (B, N_views, 3, 3)
post_rots: (B, N_views, 3, 3)
post_trans: (B, N_views, 3)
frustum: (D, fH_stereo, fW_stereo, 3) 3:(u, v, d)
cv_downsample: 4,
downsample: self.img_view_transformer.downsample=16,
grid_config: self.img_view_transformer.grid_config,
cv_feat_list: [feat_prev_iv, stereo_feat]
}
Returns:
bev_feat: (B, C, Dy, Dx)
depth: (B*N, D, fH, fW)
"""
(
x
,
rots
,
trans
,
intrins
,
post_rots
,
post_trans
,
bda
,
mlp_input
)
=
input
[:
8
]
B
,
N
,
C
,
H
,
W
=
x
.
shape
x
=
x
.
view
(
B
*
N
,
C
,
H
,
W
)
# (B*N_views, C, fH, fW)
x
=
self
.
depth_net
(
x
,
mlp_input
,
stereo_metas
)
# (B*N_views, D+C_context, fH, fW)
depth_digit
=
x
[:,
:
self
.
D
,
...]
# (B*N_views, D, fH, fW)
tran_feat
=
x
[:,
self
.
D
:
self
.
D
+
self
.
out_channels
,
...]
# (B*N_views, C_context, fH, fW)
depth
=
depth_digit
.
softmax
(
dim
=
1
)
# (B*N_views, D, fH, fW)
bev_feat
,
depth
=
self
.
view_transform
(
input
,
depth
,
tran_feat
)
return
bev_feat
,
depth
def
get_downsampled_gt_depth
(
self
,
gt_depths
):
"""
Input:
gt_depths: (B, N_views, img_h, img_w)
Output:
gt_depths: (B*N_views*fH*fW, D)
"""
B
,
N
,
H
,
W
=
gt_depths
.
shape
# (B*N_views, fH, downsample, fW, downsample, 1)
gt_depths
=
gt_depths
.
view
(
B
*
N
,
H
//
self
.
downsample
,
self
.
downsample
,
W
//
self
.
downsample
,
self
.
downsample
,
1
)
# (B*N_views, fH, fW, 1, downsample, downsample)
gt_depths
=
gt_depths
.
permute
(
0
,
1
,
3
,
5
,
2
,
4
).
contiguous
()
# (B*N_views*fH*fW, downsample, downsample)
gt_depths
=
gt_depths
.
view
(
-
1
,
self
.
downsample
*
self
.
downsample
)
gt_depths_tmp
=
torch
.
where
(
gt_depths
==
0.0
,
1e5
*
torch
.
ones_like
(
gt_depths
),
gt_depths
)
gt_depths
=
torch
.
min
(
gt_depths_tmp
,
dim
=-
1
).
values
# (B*N_views, fH, fW)
gt_depths
=
gt_depths
.
view
(
B
*
N
,
H
//
self
.
downsample
,
W
//
self
.
downsample
)
if
not
self
.
sid
:
# (D - (min_dist - interval_dist)) / interval_dist
# = (D - min_dist) / interval_dist + 1
gt_depths
=
(
gt_depths
-
(
self
.
grid_config
[
'depth'
][
0
]
-
self
.
grid_config
[
'depth'
][
2
]))
/
\
self
.
grid_config
[
'depth'
][
2
]
else
:
gt_depths
=
torch
.
log
(
gt_depths
)
-
torch
.
log
(
torch
.
tensor
(
self
.
grid_config
[
'depth'
][
0
]).
float
())
gt_depths
=
gt_depths
*
(
self
.
D
-
1
)
/
torch
.
log
(
torch
.
tensor
(
self
.
grid_config
[
'depth'
][
1
]
-
1.
).
float
()
/
self
.
grid_config
[
'depth'
][
0
])
gt_depths
=
gt_depths
+
1.
gt_depths
=
torch
.
where
((
gt_depths
<
self
.
D
+
1
)
&
(
gt_depths
>=
0.0
),
gt_depths
,
torch
.
zeros_like
(
gt_depths
))
# (B*N_views, fH, fW)
gt_depths
=
F
.
one_hot
(
gt_depths
.
long
(),
num_classes
=
self
.
D
+
1
).
view
(
-
1
,
self
.
D
+
1
)[:,
1
:]
# (B*N_views*fH*fW, D)
return
gt_depths
.
float
()
@
force_fp32
()
def
get_depth_loss
(
self
,
depth_labels
,
depth_preds
):
"""
Args:
depth_labels: (B, N_views, img_h, img_w)
depth_preds: (B*N_views, D, fH, fW)
Returns:
"""
depth_labels
=
self
.
get_downsampled_gt_depth
(
depth_labels
)
# (B*N_views*fH*fW, D)
# (B*N_views, D, fH, fW) --> (B*N_views, fH, fW, D) --> (B*N_views*fH*fW, D)
depth_preds
=
depth_preds
.
permute
(
0
,
2
,
3
,
1
).
contiguous
().
view
(
-
1
,
self
.
D
)
fg_mask
=
torch
.
max
(
depth_labels
,
dim
=
1
).
values
>
0.0
depth_labels
=
depth_labels
[
fg_mask
]
depth_preds
=
depth_preds
[
fg_mask
]
with
autocast
(
enabled
=
False
):
depth_loss
=
F
.
binary_cross_entropy
(
depth_preds
,
depth_labels
,
reduction
=
'none'
,
).
sum
()
/
max
(
1.0
,
fg_mask
.
sum
())
return
self
.
loss_depth_weight
*
depth_loss
@
NECKS
.
register_module
()
class
LSSViewTransformerBEVStereo
(
LSSViewTransformerBEVDepth
):
def
__init__
(
self
,
**
kwargs
):
super
(
LSSViewTransformerBEVStereo
,
self
).
__init__
(
**
kwargs
)
# (D, fH_stereo, fW_stereo, 3) 3:(u, v, d)
self
.
cv_frustum
=
self
.
create_frustum
(
kwargs
[
'grid_config'
][
'depth'
],
kwargs
[
'input_size'
],
downsample
=
4
)
projects/mmdet3d_plugin/ops/__init__.py
0 → 100644
View file @
3b8d508a
from
.bev_pool
import
bev_pool
from
.bev_pool_v2
import
bev_pool_v2
,
TRTBEVPoolv2
from
.nearest_assign
import
nearest_assign
__all__
=
[
'bev_pool'
,
'bev_pool_v2'
,
'TRTBEVPoolv2'
,
'nearest_assign'
]
\ No newline at end of file
projects/mmdet3d_plugin/ops/bev_pool/__init__.py
0 → 100644
View file @
3b8d508a
from
.bev_pool
import
bev_pool
projects/mmdet3d_plugin/ops/bev_pool/bev_pool.py
0 → 100644
View file @
3b8d508a
import
torch
from
.
import
bev_pool_ext
class
QuickBevPoolingCuda
(
torch
.
autograd
.
Function
):
@
staticmethod
def
forward
(
ctx
,
feats
,
coords
,
ranks
,
B
,
D
,
H
,
W
,
pooling_method
):
"""
Args:
ctx:
feats: (N, C)
coords: (N, 4) 4: (x_id, y_id, z_id, batch_id)
ranks: (N, ) eg: (0, 0, 1, 1, 1, 2, 2)
B:
D:
H:
W:
Returns:
out: (B, D, H, W, C)
"""
kept
=
torch
.
ones
(
feats
.
shape
[
0
],
device
=
feats
.
device
,
dtype
=
torch
.
bool
)
# (N, )
kept
[
1
:]
=
ranks
[
1
:]
!=
ranks
[:
-
1
]
# 边界点=1, 其余为0(pillar id发生变化) eg:(1, 0, 1, 0, 0, 1, 0)
interval_starts
=
torch
.
where
(
kept
)[
0
].
int
()
# 该pillar的起始位置 (N_pillar, ) eg: (0, 2, 5)
interval_lengths
=
torch
.
zeros_like
(
interval_starts
)
# pillar包含points的数量 (N_pillar, ) eg: (0, 0, 0)
interval_lengths
[:
-
1
]
=
interval_starts
[
1
:]
-
interval_starts
[:
-
1
]
# eg: (0, 2, 5)
interval_lengths
[
-
1
]
=
feats
.
shape
[
0
]
-
interval_starts
[
-
1
]
# eg: (0, 3, 2)
coords
=
coords
.
int
()
if
pooling_method
==
'sum'
:
out
=
bev_pool_ext
.
bev_sum_pool_forward
(
feats
,
# (N, C)
coords
,
# (N, 4) 4: (x_id, y_id, z_id, batch_id)
interval_lengths
,
# (N_pillar, )
interval_starts
,
# (N_pillar, )
B
,
D
,
H
,
W
,
)
elif
pooling_method
==
'max'
:
out
=
bev_pool_ext
.
bev_max_pool_forward
(
feats
,
# (N, C)
coords
,
# (N, 4) 4: (x_id, y_id, z_id, batch_id)
interval_lengths
,
# (N_pillar, )
interval_starts
,
# (N_pillar, )
B
,
D
,
H
,
W
,
)
ctx
.
save_for_backward
(
interval_starts
,
interval_lengths
,
coords
)
ctx
.
saved_shapes
=
B
,
D
,
H
,
W
ctx
.
pooling_method
=
pooling_method
return
out
@
staticmethod
def
backward
(
ctx
,
out_grad
):
"""
Args:
ctx:
out_grad: (B, D, H, W, C)
Returns:
x_grad: (N, C)
"""
# (N_pillar, ), (N_pillar, ), (N, 4) 4: (x_id, y_id, z_id, batch_id)
interval_starts
,
interval_lengths
,
geom_coords
=
ctx
.
saved_tensors
B
,
D
,
H
,
W
=
ctx
.
saved_shapes
pooling_method
=
ctx
.
pooling_method
out_grad
=
out_grad
.
contiguous
()
if
pooling_method
==
'sum'
:
x_grad
=
bev_pool_ext
.
bev_sum_pool_backward
(
out_grad
,
# (B, D, H, W, C)
geom_coords
,
# (N, 4) 4: (x_id, y_id, z_id, batch_id)
interval_lengths
,
# (N_pillar, )
interval_starts
,
# (N_pillar, )
B
,
D
,
H
,
W
,
)
# (N, C)
elif
pooling_method
==
'max'
:
x_grad
=
bev_pool_ext
.
bev_max_pool_backward
(
out_grad
,
# (B, D, H, W, C)
geom_coords
,
# (N, 4) 4: (x_id, y_id, z_id, batch_id)
interval_lengths
,
# (N_pillar, )
interval_starts
,
# (N_pillar, )
B
,
D
,
H
,
W
,
)
# (N, C)
return
x_grad
,
None
,
None
,
None
,
None
,
None
,
None
,
None
def
bev_pool
(
feats
,
coords
,
B
,
D
,
H
,
W
,
pooling_method
=
'sum'
):
"""
Args:
feats: (N, C)
coords: (N, 4) 4: (x_id, y_id, z_id, batch_id)
B:
D: Dz
H: Dy
W: Dx
Returns:
bev_features: (B, C, D, H, W)
"""
assert
feats
.
shape
[
0
]
==
coords
.
shape
[
0
]
ranks
=
(
coords
[:,
0
]
*
(
H
*
D
*
B
)
+
coords
[:,
1
]
*
(
D
*
B
)
+
coords
[:,
2
]
*
B
+
coords
[:,
3
]
)
# (N, )
indices
=
ranks
.
argsort
()
# (N, )
# (N, C), (N, 4), (N, )
feats
,
coords
,
ranks
=
feats
[
indices
],
coords
[
indices
],
ranks
[
indices
]
x
=
QuickBevPoolingCuda
.
apply
(
feats
,
coords
,
ranks
,
B
,
D
,
H
,
W
,
pooling_method
)
# (B, D, H, W, C)
x
=
x
.
permute
(
0
,
4
,
1
,
2
,
3
).
contiguous
()
# (B, C, D, H, W)
return
x
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.cpp
0 → 100644
View file @
3b8d508a
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_max_pool.h"
/*
Function: pillar pooling (forward, cuda)
Args:
geom_feats : input features, FloatTensor[N, C]
_geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
out : output features, FloatTensor[b, d, h, w, c]
*/
at
::
Tensor
bev_max_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_feats
.
size
(
0
);
int
c
=
_geom_feats
.
size
(
1
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_geom_feats
));
const
float
*
geom_feats
=
_geom_feats
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_geom_feats
.
dtype
()).
device
(
_geom_feats
.
device
());
at
::
Tensor
_out
=
torch
::
zeros
({
b
,
d
,
h
,
w
,
c
},
options
);
// (B, D=Dz, H=Dy, W=Dx, C)
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_max_pool
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
return
_out
;
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : input features, FloatTensor[B, D, H, W, C]
geom_coords : input coordinates, IntTensor[N, 4]
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
x_grad : output features, FloatTensor[N, C]
*/
at
::
Tensor
bev_max_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_coords
.
size
(
0
);
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_out_grad
.
dtype
()).
device
(
_out_grad
.
device
());
at
::
Tensor
_x_grad
=
torch
::
zeros
({
n
,
c
},
options
);
// (N, C)
float
*
x_grad
=
_x_grad
.
data_ptr
<
float
>
();
bev_max_pool_grad
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
return
_x_grad
;
}
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.h
0 → 100644
View file @
3b8d508a
#ifndef _BEV_MAX_POOL_H
#define _BEV_MAX_POOL_H
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
at
::
Tensor
bev_max_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
at
::
Tensor
bev_max_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
// CUDA function declarations
void
bev_max_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
x
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_max_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
);
#endif
\ No newline at end of file
projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.cu
0 → 100644
View file @
3b8d508a
#include <stdio.h>
#include <stdlib.h>
#include "bev_max_pool.h"
/*
Function: pillar pooling
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
geom_feats : input features, FloatTensor[n, c]
geom_coords : input coordinates, IntTensor[n, 4] 4: (x_id, y_id, z_id, batch_id)
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__
void
bev_max_pool_kernel
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
__restrict__
geom_feats
,
const
int
*
__restrict__
geom_coords
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
idx
/
c
;
int
cur_c
=
idx
%
c
;
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
int
interval_length
=
interval_lengths
[
index
];
const
int
*
cur_geom_coords
=
geom_coords
+
interval_start
*
4
;
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
const
float
*
cur_geom_feats
=
geom_feats
+
interval_start
*
c
+
cur_c
;
float
*
cur_out
=
out
+
cur_geom_coords
[
3
]
*
d
*
h
*
w
*
c
+
cur_geom_coords
[
2
]
*
h
*
w
*
c
+
cur_geom_coords
[
1
]
*
w
*
c
+
cur_geom_coords
[
0
]
*
c
+
cur_c
;
float
pmax
=
0
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
if
(
cur_geom_feats
[
i
*
c
]
>
pmax
)
pmax
=
cur_geom_feats
[
i
*
c
];
}
*
cur_out
=
pmax
;
}
/*
Function: pillar pooling backward
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[n_intervals]
interval_starts : starting position for pooled point, IntTensor[n_intervals]
x_grad : gradient of the image fmap, FloatTensor
*/
__global__
void
bev_max_pool_grad_kernel
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
__restrict__
out_grad
,
const
int
*
__restrict__
geom_coords
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
x_grad
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
idx
/
c
;
int
cur_c
=
idx
%
c
;
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
int
interval_length
=
interval_lengths
[
index
];
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
// 该pillar中所有点的梯度 与 该pillar特征的梯度相同.
const
int
*
cur_geom_coords
=
geom_coords
+
interval_start
*
4
;
float
*
cur_x_grad
=
x_grad
+
interval_start
*
c
+
cur_c
;
const
float
*
cur_out_grad
=
out_grad
+
cur_geom_coords
[
3
]
*
d
*
h
*
w
*
c
+
cur_geom_coords
[
2
]
*
h
*
w
*
c
+
cur_geom_coords
[
1
]
*
w
*
c
+
cur_geom_coords
[
0
]
*
c
+
cur_c
;
int
max_id
=
0
;
float
pmax
=
0
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
if
(
cur_x_grad
[
i
*
c
]
>
pmax
)
{
pmax
=
cur_x_grad
[
i
*
c
];
max_id
=
i
;
}
}
cur_x_grad
[
max_id
*
c
]
=
*
cur_out_grad
;
}
void
bev_max_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
geom_feats
,
const
int
*
geom_coords
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
)
{
bev_max_pool_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
}
void
bev_max_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_coords
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
)
{
bev_max_pool_grad_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
}
projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
0 → 100644
View file @
3b8d508a
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_sum_pool.h"
#include "bev_max_pool.h"
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"bev_sum_pool_forward"
,
&
bev_sum_pool_forward
,
"bev_sum_pool_forward"
);
m
.
def
(
"bev_sum_pool_backward"
,
&
bev_sum_pool_backward
,
"bev_sum_pool_backward"
);
m
.
def
(
"bev_max_pool_forward"
,
&
bev_max_pool_forward
,
"bev_max_pool_forward"
);
m
.
def
(
"bev_max_pool_backward"
,
&
bev_max_pool_backward
,
"bev_max_pool_backward"
);
}
\ No newline at end of file
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
0 → 100644
View file @
3b8d508a
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_sum_pool.h"
/*
Function: pillar pooling (forward, cuda)
Args:
geom_feats : input features, FloatTensor[N, C]
_geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
out : output features, FloatTensor[b, d, h, w, c]
*/
at
::
Tensor
bev_sum_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_feats
.
size
(
0
);
int
c
=
_geom_feats
.
size
(
1
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_geom_feats
));
const
float
*
geom_feats
=
_geom_feats
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_geom_feats
.
dtype
()).
device
(
_geom_feats
.
device
());
at
::
Tensor
_out
=
torch
::
zeros
({
b
,
d
,
h
,
w
,
c
},
options
);
// (B, D=Dz, H=Dy, W=Dx, C)
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_sum_pool
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
return
_out
;
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : input features, FloatTensor[B, D, H, W, C]
geom_coords : input coordinates, IntTensor[N, 4]
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
x_grad : output features, FloatTensor[N, C]
*/
at
::
Tensor
bev_sum_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_coords
.
size
(
0
);
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_out_grad
.
dtype
()).
device
(
_out_grad
.
device
());
at
::
Tensor
_x_grad
=
torch
::
zeros
({
n
,
c
},
options
);
// (N, C)
float
*
x_grad
=
_x_grad
.
data_ptr
<
float
>
();
bev_sum_pool_grad
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
return
_x_grad
;
}
\ No newline at end of file
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
0 → 100644
View file @
3b8d508a
#ifndef _BEV_SUM_POOL_H
#define _BEV_SUM_POOL_H
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
at
::
Tensor
bev_sum_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
at
::
Tensor
bev_sum_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
// CUDA function declarations
void
bev_sum_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
x
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_sum_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
);
#endif
\ No newline at end of file
projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
0 → 100644
View file @
3b8d508a
#include <stdio.h>
#include <stdlib.h>
#include "bev_sum_pool.h"
/*
Function: pillar pooling
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
geom_feats : input features, FloatTensor[n, c]
geom_coords : input coordinates, IntTensor[n, 4] 4: (x_id, y_id, z_id, batch_id)
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__
void
bev_sum_pool_kernel
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
__restrict__
geom_feats
,
const
int
*
__restrict__
geom_coords
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
idx
/
c
;
int
cur_c
=
idx
%
c
;
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
int
interval_length
=
interval_lengths
[
index
];
const
int
*
cur_geom_coords
=
geom_coords
+
interval_start
*
4
;
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
const
float
*
cur_geom_feats
=
geom_feats
+
interval_start
*
c
+
cur_c
;
float
*
cur_out
=
out
+
cur_geom_coords
[
3
]
*
d
*
h
*
w
*
c
+
cur_geom_coords
[
2
]
*
h
*
w
*
c
+
cur_geom_coords
[
1
]
*
w
*
c
+
cur_geom_coords
[
0
]
*
c
+
cur_c
;
float
psum
=
0
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
psum
+=
cur_geom_feats
[
i
*
c
];
}
*
cur_out
=
psum
;
}
/*
Function: pillar pooling backward
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[n_intervals]
interval_starts : starting position for pooled point, IntTensor[n_intervals]
x_grad : gradient of the image fmap, FloatTensor
*/
__global__
void
bev_sum_pool_grad_kernel
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
__restrict__
out_grad
,
const
int
*
__restrict__
geom_coords
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
x_grad
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
idx
/
c
;
int
cur_c
=
idx
%
c
;
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
int
interval_length
=
interval_lengths
[
index
];
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
// 该pillar中所有点的梯度 与 该pillar特征的梯度相同.
const
int
*
cur_geom_coords
=
geom_coords
+
interval_start
*
4
;
float
*
cur_x_grad
=
x_grad
+
interval_start
*
c
+
cur_c
;
const
float
*
cur_out_grad
=
out_grad
+
cur_geom_coords
[
3
]
*
d
*
h
*
w
*
c
+
cur_geom_coords
[
2
]
*
h
*
w
*
c
+
cur_geom_coords
[
1
]
*
w
*
c
+
cur_geom_coords
[
0
]
*
c
+
cur_c
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
cur_x_grad
[
i
*
c
]
=
*
cur_out_grad
;
}
}
void
bev_sum_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
geom_feats
,
const
int
*
geom_coords
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
)
{
bev_sum_pool_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
}
void
bev_sum_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_coords
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
)
{
bev_sum_pool_grad_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
}
projects/mmdet3d_plugin/ops/bev_pool_v2/__init__.py
0 → 100644
View file @
3b8d508a
# Copyright (c) Phigent Robotics. All rights reserved.
from
.bev_pool
import
bev_pool_v2
,
TRTBEVPoolv2
\ No newline at end of file
projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
0 → 100644
View file @
3b8d508a
# Copyright (c) Phigent Robotics. All rights reserved.
import
numpy
as
np
import
torch
from
.
import
bev_pool_v2_ext
__all__
=
[
'bev_pool_v2'
,
'TRTBEVPoolv2'
]
class
QuickCumsumCuda
(
torch
.
autograd
.
Function
):
r
"""BEVPoolv2 implementation for Lift-Splat-Shoot view transformation.
Please refer to the `paper <https://arxiv.org/abs/2211.17111>`_
"""
@
staticmethod
def
forward
(
ctx
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
):
ranks_bev
=
ranks_bev
.
int
()
# (N_points, ),
depth
=
depth
.
contiguous
().
float
()
# (B, N, D, fH, fW)
feat
=
feat
.
contiguous
().
float
()
# (B, N, fH, fW, C)
ranks_depth
=
ranks_depth
.
contiguous
().
int
()
# (N_points, ),
ranks_feat
=
ranks_feat
.
contiguous
().
int
()
# (N_points, ),
interval_lengths
=
interval_lengths
.
contiguous
().
int
()
# (N_pillar, )
interval_starts
=
interval_starts
.
contiguous
().
int
()
# (N_pillar, )
out
=
feat
.
new_zeros
(
bev_feat_shape
)
# (B, D_Z, D_Y, D_X, C)
bev_pool_v2_ext
.
bev_pool_v2_forward
(
depth
,
feat
,
out
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_lengths
,
interval_starts
,
)
ctx
.
save_for_backward
(
ranks_bev
,
depth
,
feat
,
ranks_feat
,
ranks_depth
)
return
out
@
staticmethod
def
backward
(
ctx
,
out_grad
):
ranks_bev
,
depth
,
feat
,
ranks_feat
,
ranks_depth
=
ctx
.
saved_tensors
order
=
ranks_feat
.
argsort
()
ranks_feat
,
ranks_depth
,
ranks_bev
=
\
ranks_feat
[
order
],
ranks_depth
[
order
],
ranks_bev
[
order
]
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_feat
[
1
:]
!=
ranks_feat
[:
-
1
]
interval_starts_bp
=
torch
.
where
(
kept
)[
0
].
int
()
interval_lengths_bp
=
torch
.
zeros_like
(
interval_starts_bp
)
interval_lengths_bp
[:
-
1
]
=
interval_starts_bp
[
1
:]
-
interval_starts_bp
[:
-
1
]
interval_lengths_bp
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts_bp
[
-
1
]
depth
=
depth
.
contiguous
()
feat
=
feat
.
contiguous
()
ranks_depth
=
ranks_depth
.
contiguous
()
ranks_feat
=
ranks_feat
.
contiguous
()
ranks_bev
=
ranks_bev
.
contiguous
()
interval_lengths_bp
=
interval_lengths_bp
.
contiguous
()
interval_starts_bp
=
interval_starts_bp
.
contiguous
()
depth_grad
=
depth
.
new_zeros
(
depth
.
shape
)
feat_grad
=
feat
.
new_zeros
(
feat
.
shape
)
out_grad
=
out_grad
.
contiguous
()
bev_pool_v2_ext
.
bev_pool_v2_backward
(
out_grad
,
depth_grad
,
feat_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_lengths_bp
,
interval_starts_bp
,
)
return
depth_grad
,
feat_grad
,
None
,
None
,
None
,
None
,
None
,
\
None
,
None
,
None
def
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
):
"""
Args:
depth: (B, N, D, fH, fW)
feat: (B, N, fH, fW, C)
ranks_depth: (N_points, ),
ranks_feat: (N_points, ),
ranks_bev: (N_points, ),
bev_feat_shape: (B, D_Z, D_Y, D_X, C)
interval_starts: (N_pillar, )
interval_lengths: (N_pillar, )
Returns:
x: bev feature in shape (B, C, Dz, Dy, Dx)
"""
x
=
QuickCumsumCuda
.
apply
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
)
# (B, Dz, Dy, Dx, C)
x
=
x
.
permute
(
0
,
4
,
1
,
2
,
3
).
contiguous
()
# (B, C, Dz, Dy, Dx)
return
x
class
TRTBEVPoolv2
(
torch
.
autograd
.
Function
):
@
staticmethod
def
symbolic
(
g
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
output_height
=
128
,
output_width
=
128
,
output_z
=
1
):
"""symbolic function for creating onnx op."""
return
g
.
op
(
'mmdeploy::bev_pool_v2'
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
output_height_i
=
output_height
,
output_width_i
=
output_width
,
output_z_i
=
output_z
)
@
staticmethod
def
forward
(
g
,
depth
,
# N,D,H,W
feat
,
# N,H,W,C
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
output_height
=
128
,
output_width
=
128
,
output_z
=
1
):
"""run forward."""
feat
=
feat
.
unsqueeze
(
0
)
depth
=
depth
.
unsqueeze
(
0
)
bev_feat_shape
=
(
depth
.
shape
[
0
],
output_z
,
output_height
,
output_width
,
feat
.
shape
[
-
1
])
# (B, Z, Y, X, C)
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
)
if
output_z
==
1
:
bev_feat
=
bev_feat
.
squeeze
(
2
)
bev_feat
=
bev_feat
.
permute
(
0
,
2
,
3
,
1
)
return
bev_feat
def
test_bev_pool_v2
():
depth
=
np
.
array
([
0.3
,
0.4
,
0.2
,
0.1
,
0.7
,
0.6
,
0.8
,
0.9
])
depth
=
torch
.
from_numpy
(
depth
).
float
().
cuda
()
depth
=
depth
.
view
(
1
,
1
,
2
,
2
,
2
).
requires_grad_
()
feat
=
torch
.
ones
(
size
=
[
1
,
1
,
2
,
2
,
2
],
dtype
=
torch
.
float
,
device
=
'cuda'
).
requires_grad_
()
ranks_depth
=
torch
.
from_numpy
(
np
.
array
([
0
,
4
,
1
,
6
])).
int
().
cuda
()
ranks_feat
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
2
])).
int
().
cuda
()
ranks_bev
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
1
])).
int
().
cuda
()
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_bev
[
1
:]
!=
ranks_bev
[:
-
1
]
interval_starts
=
torch
.
where
(
kept
)[
0
].
int
()
if
len
(
interval_starts
)
==
0
:
return
None
,
None
,
None
,
None
,
None
interval_lengths
=
torch
.
zeros_like
(
interval_starts
)
interval_lengths
[:
-
1
]
=
interval_starts
[
1
:]
-
interval_starts
[:
-
1
]
interval_lengths
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts
[
-
1
]
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
(
1
,
1
,
2
,
2
,
2
),
interval_starts
,
interval_lengths
)
loss
=
torch
.
sum
(
bev_feat
)
loss
.
backward
()
assert
loss
==
4.4
grad_depth
=
np
.
array
([
2.
,
2.
,
0.
,
0.
,
2.
,
0.
,
2.
,
0.
])
grad_depth
=
torch
.
from_numpy
(
grad_depth
).
float
()
grad_depth
=
grad_depth
.
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
depth
.
grad
.
allclose
(
grad_depth
)
grad_feat
=
np
.
array
([
1.0
,
1.0
,
0.4
,
0.4
,
0.8
,
0.8
,
0.
,
0.
])
grad_feat
=
torch
.
from_numpy
(
grad_feat
).
float
().
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
feat
.
grad
.
allclose
(
grad_feat
)
projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
0 → 100644
View file @
3b8d508a
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
// CUDA function declarations
void
bev_pool_v2
(
int
c
,
int
n_intervals
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_pool_v2_grad
(
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
depth_grad
,
float
*
feat_grad
);
/*
Function: pillar pooling (forward, cuda)
Args:
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
out : output features, FloatTensor[b, c, h_out, w_out]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
Return:
*/
void
bev_pool_v2_forward
(
const
at
::
Tensor
_depth
,
// (B, N, D, fH, fW)
const
at
::
Tensor
_feat
,
// (B, N, fH, fW, C)
at
::
Tensor
_out
,
// (B, D_Z, D_Y, D_X, C)
const
at
::
Tensor
_ranks_depth
,
// (N_points, ),
const
at
::
Tensor
_ranks_feat
,
// (N_points, ),
const
at
::
Tensor
_ranks_bev
,
// (N_points, ),
const
at
::
Tensor
_interval_lengths
,
// (N_pillar, )
const
at
::
Tensor
_interval_starts
// (N_pillar, )
)
{
int
c
=
_feat
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_depth
));
const
float
*
depth
=
_depth
.
data_ptr
<
float
>
();
const
float
*
feat
=
_feat
.
data_ptr
<
float
>
();
const
int
*
ranks_depth
=
_ranks_depth
.
data_ptr
<
int
>
();
const
int
*
ranks_feat
=
_ranks_feat
.
data_ptr
<
int
>
();
const
int
*
ranks_bev
=
_ranks_bev
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_pool_v2
(
c
,
n_intervals
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
out
);
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
depth_grad : grad of input depth, FloatTensor[n, d, h, w]
feat_grad : grad of input feature, FloatTensor[n, h, w, c]
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
*/
void
bev_pool_v2_backward
(
const
at
::
Tensor
_out_grad
,
at
::
Tensor
_depth_grad
,
at
::
Tensor
_feat_grad
,
const
at
::
Tensor
_depth
,
const
at
::
Tensor
_feat
,
const
at
::
Tensor
_ranks_depth
,
const
at
::
Tensor
_ranks_feat
,
const
at
::
Tensor
_ranks_bev
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
)
{
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
float
*
depth_grad
=
_depth_grad
.
data_ptr
<
float
>
();
float
*
feat_grad
=
_feat_grad
.
data_ptr
<
float
>
();
const
float
*
depth
=
_depth
.
data_ptr
<
float
>
();
const
float
*
feat
=
_feat
.
data_ptr
<
float
>
();
const
int
*
ranks_depth
=
_ranks_depth
.
data_ptr
<
int
>
();
const
int
*
ranks_feat
=
_ranks_feat
.
data_ptr
<
int
>
();
const
int
*
ranks_bev
=
_ranks_bev
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
bev_pool_v2_grad
(
c
,
n_intervals
,
out_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
depth_grad
,
feat_grad
);
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"bev_pool_v2_forward"
,
&
bev_pool_v2_forward
,
"bev_pool_v2_forward"
);
m
.
def
(
"bev_pool_v2_backward"
,
&
bev_pool_v2_backward
,
"bev_pool_v2_backward"
);
}
projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
0 → 100644
View file @
3b8d508a
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
/*
Function: pillar pooling
Args:
c : number of channels
n_intervals : number of unique points
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__
void
bev_pool_v2_kernel
(
int
c
,
int
n_intervals
,
const
float
*
__restrict__
depth
,
const
float
*
__restrict__
feat
,
const
int
*
__restrict__
ranks_depth
,
const
int
*
__restrict__
ranks_feat
,
const
int
*
__restrict__
ranks_bev
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// 该pillar的cur_c特征对应的索引.
int
index
=
idx
/
c
;
// pillar id
int
cur_c
=
idx
%
c
;
// channel id
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
// 该pillar的起始索引.
int
interval_length
=
interval_lengths
[
index
];
// 该pillar的包含的点数量.
float
psum
=
0
;
const
float
*
cur_depth
;
const
float
*
cur_feat
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
// ranks_depth[interval_start+i]: depth索引, 介于(0, B*N*D*fH*fW-1)之间.
cur_depth
=
depth
+
ranks_depth
[
interval_start
+
i
];
// ranks_feat[interval_start+i]: feature索引, 介于(0, B*N*fH*fW-1)之间.
cur_feat
=
feat
+
ranks_feat
[
interval_start
+
i
]
*
c
+
cur_c
;
//
psum
+=
*
cur_feat
*
*
cur_depth
;
// 聚合该pillar对应的cur_c特征.
}
const
int
*
cur_rank
=
ranks_bev
+
interval_start
;
// 该pillar在BEV grids中对应的索引.
float
*
cur_out
=
out
+
*
cur_rank
*
c
+
cur_c
;
// 该cur_c特征对应的索引位置.
*
cur_out
=
psum
;
}
/*
Function: pillar pooling backward
Args:
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
depth_grad : gradient of the depth fmap, FloatTensor
feat_grad : gradient of the feature fmap, FloatTensor
*/
__global__
void
bev_pool_grad_kernel
(
int
c
,
int
n_intervals
,
const
float
*
__restrict__
out_grad
,
const
float
*
__restrict__
depth
,
const
float
*
__restrict__
feat
,
const
int
*
__restrict__
ranks_depth
,
const
int
*
__restrict__
ranks_feat
,
const
int
*
__restrict__
ranks_bev
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
depth_grad
,
float
*
__restrict__
feat_grad
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// 该pillar对应的thread
if
(
idx
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
idx
];
// 该pillar的起始索引.
int
interval_length
=
interval_lengths
[
idx
];
// 该pillar的包含的点数量.
const
int
*
cur_rank
;
const
float
*
cur_out_grad
;
const
float
*
cur_out_grad_start
;
const
float
*
cur_feat
;
const
float
*
cur_feat_start
;
float
*
cur_depth_grad
;
float
grad_sum
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
cur_rank
=
ranks_bev
+
interval_start
+
i
;
// 该pillar在BEV grids中对应的索引.
cur_out_grad_start
=
out_grad
+
*
cur_rank
*
c
;
// pillar feature 的 grad.
cur_feat_start
=
feat
+
ranks_feat
[
interval_start
+
i
]
*
c
;
grad_sum
=
0
;
for
(
int
cur_c
=
0
;
cur_c
<
c
;
cur_c
++
){
cur_out_grad
=
cur_out_grad_start
+
cur_c
;
cur_feat
=
cur_feat_start
+
cur_c
;
grad_sum
+=
*
cur_out_grad
*
*
cur_feat
;
}
cur_depth_grad
=
depth_grad
+
ranks_depth
[
interval_start
+
i
];
*
cur_depth_grad
=
grad_sum
;
}
float
*
cur_feat_grad
;
const
float
*
cur_depth
;
for
(
int
cur_c
=
0
;
cur_c
<
c
;
cur_c
++
){
grad_sum
=
0
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
cur_rank
=
ranks_bev
+
interval_start
+
i
;
cur_out_grad
=
out_grad
+
*
cur_rank
*
c
+
cur_c
;
cur_depth
=
depth
+
ranks_depth
[
interval_start
+
i
];
grad_sum
+=
*
cur_out_grad
*
*
cur_depth
;
}
cur_feat_grad
=
feat_grad
+
ranks_feat
[
interval_start
]
*
c
+
cur_c
;
*
cur_feat_grad
=
grad_sum
;
}
}
void
bev_pool_v2
(
int
c
,
int
n_intervals
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
)
{
bev_pool_v2_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
c
,
n_intervals
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
out
);
}
void
bev_pool_v2_grad
(
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
depth_grad
,
float
*
feat_grad
)
{
bev_pool_grad_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
/
256
)),
256
>>>
(
c
,
n_intervals
,
out_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
depth_grad
,
feat_grad
);
}
projects/mmdet3d_plugin/ops/nearest_assign/__init__.py
0 → 100644
View file @
3b8d508a
# Copyright (c) Phigent Robotics. All rights reserved.
from
.nearest_assign
import
nearest_assign
\ No newline at end of file
projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
0 → 100644
View file @
3b8d508a
# Copyright (c) Phigent Robotics. All rights reserved.
import
numpy
as
np
import
torch
from
.
import
nearest_assign_ext
__all__
=
[
'nearest_assign'
]
class
QuickNearestAssignCuda
(
torch
.
autograd
.
Function
):
@
staticmethod
def
forward
(
ctx
,
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
,
):
occ_pred
=
occ_pred
.
contiguous
().
int
()
l2s_key
=
l2s_key
.
contiguous
().
int
()
occind2detind
=
occind2detind
.
contiguous
().
int
()
inst_cls
=
inst_cls
.
contiguous
().
int
()
inst_xyz
=
inst_xyz
.
contiguous
().
int
()
inst_id_list
=
inst_id_list
.
contiguous
().
int
()
inst_pred
=
occ_pred
.
new_zeros
(
occ_pred
.
shape
)
nearest_assign_ext
.
nearest_assign_forward
(
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
,
inst_pred
)
return
inst_pred
def
nearest_assign
(
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
):
inst_pred
=
QuickNearestAssignCuda
.
apply
(
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
)
# (B, Dz, Dy, Dx, C)
return
inst_pred
def
test_bev_pool_v2
():
depth
=
np
.
array
([
0.3
,
0.4
,
0.2
,
0.1
,
0.7
,
0.6
,
0.8
,
0.9
])
depth
=
torch
.
from_numpy
(
depth
).
float
().
cuda
()
depth
=
depth
.
view
(
1
,
1
,
2
,
2
,
2
).
requires_grad_
()
feat
=
torch
.
ones
(
size
=
[
1
,
1
,
2
,
2
,
2
],
dtype
=
torch
.
float
,
device
=
'cuda'
).
requires_grad_
()
ranks_depth
=
torch
.
from_numpy
(
np
.
array
([
0
,
4
,
1
,
6
])).
int
().
cuda
()
ranks_feat
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
2
])).
int
().
cuda
()
ranks_bev
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
1
])).
int
().
cuda
()
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_bev
[
1
:]
!=
ranks_bev
[:
-
1
]
interval_starts
=
torch
.
where
(
kept
)[
0
].
int
()
if
len
(
interval_starts
)
==
0
:
return
None
,
None
,
None
,
None
,
None
interval_lengths
=
torch
.
zeros_like
(
interval_starts
)
interval_lengths
[:
-
1
]
=
interval_starts
[
1
:]
-
interval_starts
[:
-
1
]
interval_lengths
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts
[
-
1
]
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
(
1
,
1
,
2
,
2
,
2
),
interval_starts
,
interval_lengths
)
loss
=
torch
.
sum
(
bev_feat
)
loss
.
backward
()
assert
loss
==
4.4
grad_depth
=
np
.
array
([
2.
,
2.
,
0.
,
0.
,
2.
,
0.
,
2.
,
0.
])
grad_depth
=
torch
.
from_numpy
(
grad_depth
).
float
()
grad_depth
=
grad_depth
.
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
depth
.
grad
.
allclose
(
grad_depth
)
grad_feat
=
np
.
array
([
1.0
,
1.0
,
0.4
,
0.4
,
0.8
,
0.8
,
0.
,
0.
])
grad_feat
=
torch
.
from_numpy
(
grad_feat
).
float
().
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
feat
.
grad
.
allclose
(
grad_feat
)
projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
0 → 100644
View file @
3b8d508a
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
// CUDA function declarations
void
nearest_assign
(
const
int
*
l2s_key
,
int
l2s_size
,
const
int
*
__restrict__
occind2detind
,
int
inst_size
,
const
int
*
__restrict__
occ_pred
,
const
int
*
__restrict__
inst_xyz
,
const
int
*
__restrict__
inst_cls
,
const
int
*
__restrict__
inst_id_list
,
int
*
__restrict__
inst_pred
);
void
nearest_assign_forward
(
const
at
::
Tensor
_occ_pred
,
// (200, 200, 16)
const
at
::
Tensor
_l2s_key
,
// (l2s_size, 1)
const
at
::
Tensor
_occind2detind
,
// (10, 1)
const
at
::
Tensor
_inst_cls
,
// (inst_size, 1)
const
at
::
Tensor
_inst_xyz
,
// (inst_size, 3)
const
at
::
Tensor
_inst_id_list
,
// (inst_size, 1)
at
::
Tensor
_inst_pred
// (200, 200, 16)
)
{
int
l2s_size
=
_l2s_key
.
size
(
0
);
int
inst_size
=
_inst_xyz
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_occ_pred
));
const
int
*
occ_pred
=
_occ_pred
.
data_ptr
<
int
>
();
const
int
*
inst_xyz
=
_inst_xyz
.
data_ptr
<
int
>
();
const
int
*
inst_cls
=
_inst_cls
.
data_ptr
<
int
>
();
const
int
*
l2s_key
=
_l2s_key
.
data_ptr
<
int
>
();
const
int
*
inst_id_list
=
_inst_id_list
.
data_ptr
<
int
>
();
const
int
*
occind2detind
=
_occind2detind
.
data_ptr
<
int
>
();
// std::map<int, int> l2s;
// for (int l2s_ind = 0; l2s_ind < l2s_size; l2s_ind++){
// l2s.insert(pair<int, int>(l2s_key[l2s_ind], l2s_val[l2s_ind]));
// }
int
*
inst_pred
=
_inst_pred
.
data_ptr
<
int
>
();
nearest_assign
(
l2s_key
,
l2s_size
,
occind2detind
,
inst_size
,
occ_pred
,
inst_xyz
,
inst_cls
,
inst_id_list
,
inst_pred
);
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"nearest_assign_forward"
,
&
nearest_assign_forward
,
"nearest_assign_forward"
);
}
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment