Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
d1aac35d
Commit
d1aac35d
authored
Apr 14, 2020
by
zhangwenwei
Browse files
Initial commit
parents
Changes
214
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1963 additions
and
0 deletions
+1963
-0
mmdet3d/models/fusion_layers/__init__.py
mmdet3d/models/fusion_layers/__init__.py
+3
-0
mmdet3d/models/fusion_layers/point_fusion.py
mmdet3d/models/fusion_layers/point_fusion.py
+287
-0
mmdet3d/models/losses/__init__.py
mmdet3d/models/losses/__init__.py
+3
-0
mmdet3d/models/middle_encoders/__init__.py
mmdet3d/models/middle_encoders/__init__.py
+4
-0
mmdet3d/models/middle_encoders/pillar_scatter.py
mmdet3d/models/middle_encoders/pillar_scatter.py
+85
-0
mmdet3d/models/middle_encoders/sparse_encoder.py
mmdet3d/models/middle_encoders/sparse_encoder.py
+215
-0
mmdet3d/models/necks/__init__.py
mmdet3d/models/necks/__init__.py
+4
-0
mmdet3d/models/necks/second_fpn.py
mmdet3d/models/necks/second_fpn.py
+147
-0
mmdet3d/models/registry.py
mmdet3d/models/registry.py
+5
-0
mmdet3d/models/roi_extractors/__init__.py
mmdet3d/models/roi_extractors/__init__.py
+3
-0
mmdet3d/models/utils/__init__.py
mmdet3d/models/utils/__init__.py
+3
-0
mmdet3d/models/utils/weight_init.py
mmdet3d/models/utils/weight_init.py
+46
-0
mmdet3d/models/voxel_encoders/__init__.py
mmdet3d/models/voxel_encoders/__init__.py
+8
-0
mmdet3d/models/voxel_encoders/pillar_encoder.py
mmdet3d/models/voxel_encoders/pillar_encoder.py
+378
-0
mmdet3d/models/voxel_encoders/utils.py
mmdet3d/models/voxel_encoders/utils.py
+148
-0
mmdet3d/models/voxel_encoders/voxel_encoder.py
mmdet3d/models/voxel_encoders/voxel_encoder.py
+478
-0
mmdet3d/ops/__init__.py
mmdet3d/ops/__init__.py
+11
-0
mmdet3d/ops/iou3d/__init__.py
mmdet3d/ops/iou3d/__init__.py
+4
-0
mmdet3d/ops/iou3d/iou3d_utils.py
mmdet3d/ops/iou3d/iou3d_utils.py
+113
-0
mmdet3d/ops/iou3d/setup.py
mmdet3d/ops/iou3d/setup.py
+18
-0
No files found.
mmdet3d/models/fusion_layers/__init__.py
0 → 100644
View file @
d1aac35d
from
.point_fusion
import
PointFusion
__all__
=
[
'PointFusion'
]
mmdet3d/models/fusion_layers/point_fusion.py
0 → 100644
View file @
d1aac35d
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
xavier_init
from
mmdet3d.models.utils
import
ConvModule
from
..plugins
import
NonLocal2D
from
..registry
import
FUSION_LAYERS
def
point_sample
(
img_features
,
points
,
lidar2img_rt
,
pcd_rotate_mat
,
img_scale_factor
,
img_crop_offset
,
pcd_trans_factor
,
pcd_scale_factor
,
pcd_flip
,
img_flip
,
img_pad_shape
,
img_shape
,
aligned
=
True
,
padding_mode
=
'zeros'
,
align_corners
=
True
,
):
"""sample image features using point coordinates
Arguments:
img_features (Tensor): 1xCxHxW image features
points (Tensor): Nx3 point cloud coordinates
P (Tensor): 4x4 transformation matrix
scale_factor (Tensor): scale_factor of images
img_pad_shape (int, int): int tuple indicates the h & w after padding,
this is necessary to obtain features in feature map
img_shape (int, int): int tuple indicates the h & w before padding
after scaling, this is necessary for flipping coordinates
return:
(Tensor): NxC image features sampled by point coordinates
"""
# aug order: flip -> trans -> scale -> rot
# The transformation follows the augmentation order in data pipeline
if
pcd_flip
:
# if the points are flipped, flip them back first
points
[:,
1
]
=
-
points
[:,
1
]
points
-=
pcd_trans_factor
# the points should be scaled to the original scale in velo coordinate
points
/=
pcd_scale_factor
# the points should be rotated back
# pcd_rotate_mat @ pcd_rotate_mat.inverse() is not exactly an identity
# matrix, use angle to create the inverse rot matrix neither.
points
=
points
@
pcd_rotate_mat
.
inverse
()
# project points from velo coordinate to camera coordinate
num_points
=
points
.
shape
[
0
]
pts_4d
=
torch
.
cat
([
points
,
points
.
new_ones
(
size
=
(
num_points
,
1
))],
dim
=-
1
)
pts_2d
=
pts_4d
@
lidar2img_rt
.
t
()
# cam_points is Tensor of Nx4 whose last column is 1
# transform camera coordinate to image coordinate
pts_2d
[:,
2
]
=
torch
.
clamp
(
pts_2d
[:,
2
],
min
=
1e-5
)
pts_2d
[:,
0
]
/=
pts_2d
[:,
2
]
pts_2d
[:,
1
]
/=
pts_2d
[:,
2
]
# img transformation: scale -> crop -> flip
# the image is resized by img_scale_factor
img_coors
=
pts_2d
[:,
0
:
2
]
*
img_scale_factor
# Nx2
img_coors
-=
img_crop_offset
# grid sample, the valid grid range should be in [-1,1]
coor_x
,
coor_y
=
torch
.
split
(
img_coors
,
1
,
dim
=
1
)
# each is Nx1
if
img_flip
:
# by default we take it as horizontal flip
# use img_shape before padding for flip
orig_h
,
orig_w
=
img_shape
coor_x
=
orig_w
-
coor_x
h
,
w
=
img_pad_shape
coor_y
=
coor_y
/
h
*
2
-
1
coor_x
=
coor_x
/
w
*
2
-
1
grid
=
torch
.
cat
([
coor_x
,
coor_y
],
dim
=
1
).
unsqueeze
(
0
).
unsqueeze
(
0
)
# Nx2 -> 1x1xNx2
# align_corner=True provides higher performance
mode
=
'bilinear'
if
aligned
else
'nearest'
point_features
=
F
.
grid_sample
(
img_features
,
grid
,
mode
=
mode
,
padding_mode
=
padding_mode
,
align_corners
=
align_corners
)
# 1xCx1xN feats
return
point_features
.
squeeze
().
t
()
@
FUSION_LAYERS
.
register_module
class
PointFusion
(
nn
.
Module
):
"""Fuse image features from fused single scale features
"""
def
__init__
(
self
,
img_channels
,
pts_channels
,
mid_channels
,
out_channels
,
img_levels
=
3
,
conv_cfg
=
None
,
norm_cfg
=
None
,
activation
=
None
,
activate_out
=
True
,
fuse_out
=
False
,
refine_type
=
None
,
dropout_ratio
=
0
,
aligned
=
True
,
align_corners
=
True
,
padding_mode
=
'zeros'
,
lateral_conv
=
True
):
super
(
PointFusion
,
self
).
__init__
()
if
isinstance
(
img_levels
,
int
):
img_levels
=
[
img_levels
]
if
isinstance
(
img_channels
,
int
):
img_channels
=
[
img_channels
]
*
len
(
img_levels
)
assert
isinstance
(
img_levels
,
list
)
assert
isinstance
(
img_channels
,
list
)
assert
len
(
img_channels
)
==
len
(
img_levels
)
self
.
img_levels
=
img_levels
self
.
activation
=
activation
self
.
activate_out
=
activate_out
self
.
fuse_out
=
fuse_out
self
.
refine_type
=
refine_type
self
.
dropout_ratio
=
dropout_ratio
self
.
img_channels
=
img_channels
self
.
aligned
=
aligned
self
.
align_corners
=
align_corners
self
.
padding_mode
=
padding_mode
self
.
lateral_convs
=
None
if
lateral_conv
:
self
.
lateral_convs
=
nn
.
ModuleList
()
for
i
in
range
(
len
(
img_channels
)):
l_conv
=
ConvModule
(
img_channels
[
i
],
mid_channels
,
3
,
padding
=
1
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
activation
=
self
.
activation
,
inplace
=
False
)
self
.
lateral_convs
.
append
(
l_conv
)
self
.
img_transform
=
nn
.
Sequential
(
nn
.
Linear
(
mid_channels
*
len
(
img_channels
),
out_channels
),
nn
.
BatchNorm1d
(
out_channels
,
eps
=
1e-3
,
momentum
=
0.01
),
)
else
:
self
.
img_transform
=
nn
.
Sequential
(
nn
.
Linear
(
sum
(
img_channels
),
out_channels
),
nn
.
BatchNorm1d
(
out_channels
,
eps
=
1e-3
,
momentum
=
0.01
),
)
self
.
pts_transform
=
nn
.
Sequential
(
nn
.
Linear
(
pts_channels
,
out_channels
),
nn
.
BatchNorm1d
(
out_channels
,
eps
=
1e-3
,
momentum
=
0.01
),
)
if
self
.
fuse_out
:
self
.
fuse_conv
=
nn
.
Sequential
(
nn
.
Linear
(
mid_channels
,
out_channels
),
# For pts the BN is initialized differently by default
# TODO: check whether this is necessary
nn
.
BatchNorm1d
(
out_channels
,
eps
=
1e-3
,
momentum
=
0.01
),
nn
.
ReLU
(
inplace
=
False
))
if
self
.
refine_type
==
'non_local'
:
self
.
refine
=
NonLocal2D
(
out_channels
,
reduction
=
1
,
use_scale
=
False
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
)
self
.
init_weights
()
# default init_weights for conv(msra) and norm in ConvModule
def
init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
(
nn
.
Conv2d
,
nn
.
Linear
)):
xavier_init
(
m
,
distribution
=
'uniform'
)
def
forward
(
self
,
img_feats
,
pts
,
pts_feats
,
img_meta
):
"""
img_feats (List[Tensor]): img features
pts: [List[Tensor]]: a batch of points with shape Nx3
pts_feats (Tensor): a tensor consist of point features of the
total batch
"""
img_pts
=
self
.
obtain_mlvl_feats
(
img_feats
,
pts
,
img_meta
)
img_pre_fuse
=
self
.
img_transform
(
img_pts
)
if
self
.
training
and
self
.
dropout_ratio
>
0
:
img_pre_fuse
=
F
.
dropout
(
img_pre_fuse
,
self
.
dropout_ratio
)
pts_pre_fuse
=
self
.
pts_transform
(
pts_feats
)
fuse_out
=
img_pre_fuse
+
pts_pre_fuse
if
self
.
activate_out
:
fuse_out
=
F
.
relu
(
fuse_out
)
if
self
.
fuse_out
:
fuse_out
=
self
.
fuse_conv
(
fuse_out
)
if
self
.
refine_type
is
not
None
:
fuse_out_T
=
fuse_out
.
t
()[
None
,
...,
None
]
# NxC -> 1xCxNx1
batch_idx
=
0
attentive
=
[]
for
i
in
range
(
len
(
pts
)):
end_idx
=
batch_idx
+
len
(
pts
[
i
])
attentive
.
append
(
self
.
refine
(
fuse_out_T
[:,
:,
batch_idx
:
end_idx
]))
batch_idx
=
end_idx
fuse_out
=
torch
.
cat
(
attentive
,
dim
=-
2
).
squeeze
().
t
()
return
fuse_out
def
obtain_mlvl_feats
(
self
,
img_feats
,
pts
,
img_meta
):
if
self
.
lateral_convs
is
not
None
:
img_ins
=
[
lateral_conv
(
img_feats
[
i
])
for
i
,
lateral_conv
in
zip
(
self
.
img_levels
,
self
.
lateral_convs
)
]
else
:
img_ins
=
img_feats
img_feats_per_point
=
[]
# Sample multi-level features
for
i
in
range
(
len
(
img_meta
)):
mlvl_img_feats
=
[]
for
level
in
range
(
len
(
self
.
img_levels
)):
if
torch
.
isnan
(
img_ins
[
level
][
i
:
i
+
1
]).
any
():
import
pdb
pdb
.
set_trace
()
mlvl_img_feats
.
append
(
self
.
sample_single
(
img_ins
[
level
][
i
:
i
+
1
],
pts
[
i
][:,
:
3
],
img_meta
[
i
]))
mlvl_img_feats
=
torch
.
cat
(
mlvl_img_feats
,
dim
=-
1
)
img_feats_per_point
.
append
(
mlvl_img_feats
)
img_pts
=
torch
.
cat
(
img_feats_per_point
,
dim
=
0
)
return
img_pts
def
sample_single
(
self
,
img_feats
,
pts
,
img_meta
):
pcd_scale_factor
=
(
img_meta
[
'pcd_scale_factor'
]
if
'pcd_scale_factor'
in
img_meta
.
keys
()
else
1
)
pcd_trans_factor
=
(
pts
.
new_tensor
(
img_meta
[
'pcd_trans'
])
if
'pcd_trans'
in
img_meta
.
keys
()
else
0
)
pcd_rotate_mat
=
(
pts
.
new_tensor
(
img_meta
[
'pcd_rotation'
])
if
'pcd_rotation'
in
img_meta
.
keys
()
else
torch
.
eye
(
3
).
type_as
(
pts
).
to
(
pts
.
device
))
img_scale_factor
=
(
img_meta
[
'scale_factor'
]
if
'scale_factor'
in
img_meta
.
keys
()
else
1
)
pcd_flip
=
img_meta
[
'pcd_flip'
]
if
'pcd_flip'
in
img_meta
.
keys
(
)
else
False
img_flip
=
img_meta
[
'flip'
]
if
'flip'
in
img_meta
.
keys
()
else
False
img_crop_offset
=
(
pts
.
new_tensor
(
img_meta
[
'img_crop_offset'
])
if
'img_crop_offset'
in
img_meta
.
keys
()
else
0
)
img_pts
=
point_sample
(
img_feats
,
pts
,
pts
.
new_tensor
(
img_meta
[
'lidar2img'
]),
pcd_rotate_mat
,
img_scale_factor
,
img_crop_offset
,
pcd_trans_factor
,
pcd_scale_factor
,
pcd_flip
=
pcd_flip
,
img_flip
=
img_flip
,
img_pad_shape
=
img_meta
[
'pad_shape'
][:
2
],
img_shape
=
img_meta
[
'img_shape'
][:
2
],
aligned
=
self
.
aligned
,
padding_mode
=
self
.
padding_mode
,
align_corners
=
self
.
align_corners
,
)
return
img_pts
mmdet3d/models/losses/__init__.py
0 → 100644
View file @
d1aac35d
from
mmdet.models.losses
import
FocalLoss
,
SmoothL1Loss
__all__
=
[
'FocalLoss'
,
'SmoothL1Loss'
]
mmdet3d/models/middle_encoders/__init__.py
0 → 100644
View file @
d1aac35d
from
.pillar_scatter
import
PointPillarsScatter
from
.sparse_encoder
import
SparseEncoder
__all__
=
[
'PointPillarsScatter'
,
'SparseEncoder'
]
mmdet3d/models/middle_encoders/pillar_scatter.py
0 → 100644
View file @
d1aac35d
import
torch
from
torch
import
nn
from
..registry
import
MIDDLE_ENCODERS
@
MIDDLE_ENCODERS
.
register_module
class
PointPillarsScatter
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
output_shape
):
"""
Point Pillar's Scatter.
Converts learned features from dense tensor to sparse pseudo image.
Args:
output_shape (list[int]): Required output shape of features.
in_channels (int): Number of input features.
"""
super
().
__init__
()
self
.
name
=
'PointPillarsScatter'
self
.
output_shape
=
output_shape
self
.
ny
=
output_shape
[
0
]
self
.
nx
=
output_shape
[
1
]
self
.
nchannels
=
in_channels
def
forward
(
self
,
voxel_features
,
coors
,
batch_size
=
None
):
# TODO: rewrite the function in a batch manner
# no need to deal with different batch cases
if
batch_size
is
not
None
:
return
self
.
forward_batch
(
voxel_features
,
coors
,
batch_size
)
else
:
return
self
.
forward_single
(
voxel_features
,
coors
)
def
forward_single
(
self
,
voxel_features
,
coors
):
# Create the canvas for this sample
canvas
=
torch
.
zeros
(
self
.
nchannels
,
self
.
nx
*
self
.
ny
,
dtype
=
voxel_features
.
dtype
,
device
=
voxel_features
.
device
)
indices
=
coors
[:,
1
]
*
self
.
nx
+
coors
[:,
2
]
indices
=
indices
.
long
()
voxels
=
voxel_features
.
t
()
# Now scatter the blob back to the canvas.
canvas
[:,
indices
]
=
voxels
# Undo the column stacking to final 4-dim tensor
canvas
=
canvas
.
view
(
1
,
self
.
nchannels
,
self
.
ny
,
self
.
nx
)
return
[
canvas
]
def
forward_batch
(
self
,
voxel_features
,
coors
,
batch_size
):
# batch_canvas will be the final output.
batch_canvas
=
[]
for
batch_itt
in
range
(
batch_size
):
# Create the canvas for this sample
canvas
=
torch
.
zeros
(
self
.
nchannels
,
self
.
nx
*
self
.
ny
,
dtype
=
voxel_features
.
dtype
,
device
=
voxel_features
.
device
)
# Only include non-empty pillars
batch_mask
=
coors
[:,
0
]
==
batch_itt
this_coors
=
coors
[
batch_mask
,
:]
indices
=
this_coors
[:,
2
]
*
self
.
nx
+
this_coors
[:,
3
]
indices
=
indices
.
type
(
torch
.
long
)
voxels
=
voxel_features
[
batch_mask
,
:]
voxels
=
voxels
.
t
()
# Now scatter the blob back to the canvas.
canvas
[:,
indices
]
=
voxels
# Append to a list for later stacking.
batch_canvas
.
append
(
canvas
)
# Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols)
batch_canvas
=
torch
.
stack
(
batch_canvas
,
0
)
# Undo the column stacking to final 4-dim tensor
batch_canvas
=
batch_canvas
.
view
(
batch_size
,
self
.
nchannels
,
self
.
ny
,
self
.
nx
)
return
batch_canvas
mmdet3d/models/middle_encoders/sparse_encoder.py
0 → 100644
View file @
d1aac35d
import
torch.nn
as
nn
import
mmdet3d.ops.spconv
as
spconv
from
..registry
import
MIDDLE_ENCODERS
from
..utils
import
build_norm_layer
@
MIDDLE_ENCODERS
.
register_module
class
SparseEncoder
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
output_shape
,
pre_act
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)):
super
().
__init__
()
self
.
sparse_shape
=
output_shape
self
.
output_shape
=
output_shape
self
.
in_channels
=
in_channels
self
.
pre_act
=
pre_act
# Spconv init all weight on its own
# TODO: make the network could be modified
if
pre_act
:
self
.
conv_input
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
in_channels
,
16
,
3
,
padding
=
1
,
bias
=
False
,
indice_key
=
'subm1'
),
)
block
=
self
.
pre_act_block
else
:
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
16
)
self
.
conv_input
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
in_channels
,
16
,
3
,
padding
=
1
,
bias
=
False
,
indice_key
=
'subm1'
),
norm_layer
,
nn
.
ReLU
(),
)
block
=
self
.
post_act_block
self
.
conv1
=
spconv
.
SparseSequential
(
block
(
16
,
16
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm1'
),
)
self
.
conv2
=
spconv
.
SparseSequential
(
# [1600, 1408, 41] -> [800, 704, 21]
block
(
16
,
32
,
3
,
norm_cfg
=
norm_cfg
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv2'
,
conv_type
=
'spconv'
),
block
(
32
,
32
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm2'
),
block
(
32
,
32
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm2'
),
)
self
.
conv3
=
spconv
.
SparseSequential
(
# [800, 704, 21] -> [400, 352, 11]
block
(
32
,
64
,
3
,
norm_cfg
=
norm_cfg
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv3'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm3'
),
block
(
64
,
64
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm3'
),
)
self
.
conv4
=
spconv
.
SparseSequential
(
# [400, 352, 11] -> [200, 176, 5]
block
(
64
,
64
,
3
,
norm_cfg
=
norm_cfg
,
stride
=
2
,
padding
=
(
0
,
1
,
1
),
indice_key
=
'spconv4'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm4'
),
block
(
64
,
64
,
3
,
norm_cfg
=
norm_cfg
,
padding
=
1
,
indice_key
=
'subm4'
),
)
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
128
)
self
.
conv_out
=
spconv
.
SparseSequential
(
# [200, 176, 5] -> [200, 176, 2]
spconv
.
SparseConv3d
(
128
,
128
,
(
3
,
1
,
1
),
stride
=
(
2
,
1
,
1
),
padding
=
0
,
bias
=
False
,
indice_key
=
'spconv_down2'
),
norm_layer
,
nn
.
ReLU
(),
)
def
forward
(
self
,
voxel_features
,
coors
,
batch_size
):
"""
:param voxel_features: (N, C)
:param coors: (N, 4) [batch_idx, z_idx, y_idx, x_idx]
:param batch_size:
:return:
"""
coors
=
coors
.
int
()
input_sp_tensor
=
spconv
.
SparseConvTensor
(
voxel_features
,
coors
,
self
.
sparse_shape
,
batch_size
)
x
=
self
.
conv_input
(
input_sp_tensor
)
x_conv1
=
self
.
conv1
(
x
)
x_conv2
=
self
.
conv2
(
x_conv1
)
x_conv3
=
self
.
conv3
(
x_conv2
)
x_conv4
=
self
.
conv4
(
x_conv3
)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out
=
self
.
conv_out
(
x_conv4
)
spatial_features
=
out
.
dense
()
N
,
C
,
D
,
H
,
W
=
spatial_features
.
shape
spatial_features
=
spatial_features
.
view
(
N
,
C
*
D
,
H
,
W
)
return
spatial_features
def
pre_act_block
(
self
,
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
None
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_cfg
=
None
):
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
in_channels
)
if
conv_type
==
'subm'
:
m
=
spconv
.
SparseSequential
(
norm_layer
,
nn
.
ReLU
(
inplace
=
True
),
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
),
)
elif
conv_type
==
'spconv'
:
m
=
spconv
.
SparseSequential
(
norm_layer
,
nn
.
ReLU
(
inplace
=
True
),
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
),
)
else
:
raise
NotImplementedError
return
m
def
post_act_block
(
self
,
in_channels
,
out_channels
,
kernel_size
,
indice_key
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_cfg
=
None
):
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_channels
)
if
conv_type
==
'subm'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
),
norm_layer
,
nn
.
ReLU
(
inplace
=
True
),
)
elif
conv_type
==
'spconv'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
),
norm_layer
,
nn
.
ReLU
(
inplace
=
True
),
)
else
:
raise
NotImplementedError
return
m
mmdet3d/models/necks/__init__.py
0 → 100644
View file @
d1aac35d
from
mmdet.models.necks.fpn
import
FPN
from
.second_fpn
import
SECONDFPN
__all__
=
[
'FPN'
,
'SECONDFPN'
]
mmdet3d/models/necks/second_fpn.py
0 → 100644
View file @
d1aac35d
import
logging
from
functools
import
partial
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
constant_init
,
kaiming_init
from
mmcv.runner
import
load_checkpoint
from
torch.nn
import
Sequential
from
torch.nn.modules.batchnorm
import
_BatchNorm
from
..
import
builder
from
..registry
import
NECKS
from
..utils
import
build_norm_layer
class
Empty
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Empty
,
self
).
__init__
()
def
forward
(
self
,
*
args
,
**
kwargs
):
if
len
(
args
)
==
1
:
return
args
[
0
]
elif
len
(
args
)
==
0
:
return
None
return
args
@
NECKS
.
register_module
class
SECONDFPN
(
nn
.
Module
):
"""Compare with RPN, RPNV2 support arbitrary number of stage.
"""
def
__init__
(
self
,
use_norm
=
True
,
in_channels
=
[
128
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
num_upsample_filters
=
[
256
,
256
,
256
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
)):
# if for GroupNorm,
# cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True)
super
(
SECONDFPN
,
self
).
__init__
()
assert
len
(
num_upsample_filters
)
==
len
(
upsample_strides
)
self
.
in_channels
=
in_channels
if
norm_cfg
is
not
None
:
ConvTranspose2d
=
partial
(
nn
.
ConvTranspose2d
,
bias
=
False
)
else
:
ConvTranspose2d
=
partial
(
nn
.
ConvTranspose2d
,
bias
=
True
)
deblocks
=
[]
for
i
,
num_upsample_filter
in
enumerate
(
num_upsample_filters
):
norm_layer
=
(
build_norm_layer
(
norm_cfg
,
num_upsample_filter
)[
1
]
if
norm_cfg
is
not
None
else
Empty
)
deblock
=
Sequential
(
ConvTranspose2d
(
in_channels
[
i
],
num_upsample_filter
,
upsample_strides
[
i
],
stride
=
upsample_strides
[
i
]),
norm_layer
,
nn
.
ReLU
(
inplace
=
True
),
)
deblocks
.
append
(
deblock
)
self
.
deblocks
=
nn
.
ModuleList
(
deblocks
)
def
init_weights
(
self
,
pretrained
=
None
):
if
isinstance
(
pretrained
,
str
):
logger
=
logging
.
getLogger
()
load_checkpoint
(
self
,
pretrained
,
strict
=
False
,
logger
=
logger
)
elif
pretrained
is
None
:
# keeping the initiation yields better results
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
kaiming_init
(
m
)
elif
isinstance
(
m
,
(
_BatchNorm
,
nn
.
GroupNorm
)):
constant_init
(
m
,
1
)
else
:
raise
TypeError
(
'pretrained must be a str or None'
)
return
def
forward
(
self
,
inputs
):
assert
len
(
inputs
)
==
len
(
self
.
in_channels
)
ups
=
[
deblock
(
inputs
[
i
])
for
i
,
deblock
in
enumerate
(
self
.
deblocks
)]
if
len
(
ups
)
>
1
:
x
=
torch
.
cat
(
ups
,
dim
=
1
)
else
:
x
=
ups
[
0
]
return
[
x
]
@
NECKS
.
register_module
class
SECONDFusionFPN
(
SECONDFPN
):
"""Compare with RPN, RPNV2 support arbitrary number of stage.
"""
def
__init__
(
self
,
use_norm
=
True
,
in_channels
=
[
128
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
num_upsample_filters
=
[
256
,
256
,
256
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
down_sample_rate
=
[
40
,
8
,
8
],
fusion_layer
=
None
,
cat_points
=
False
):
super
(
SECONDFusionFPN
,
self
).
__init__
(
use_norm
,
in_channels
,
upsample_strides
,
num_upsample_filters
,
norm_cfg
,
)
self
.
fusion_layer
=
None
if
fusion_layer
is
not
None
:
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
self
.
cat_points
=
cat_points
self
.
down_sample_rate
=
down_sample_rate
def
forward
(
self
,
inputs
,
coors
=
None
,
points
=
None
,
img_feats
=
None
,
img_meta
=
None
):
assert
len
(
inputs
)
==
len
(
self
.
in_channels
)
ups
=
[
deblock
(
inputs
[
i
])
for
i
,
deblock
in
enumerate
(
self
.
deblocks
)]
if
len
(
ups
)
>
1
:
x
=
torch
.
cat
(
ups
,
dim
=
1
)
else
:
x
=
ups
[
0
]
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
downsample_pts_coors
=
torch
.
zeros_like
(
coors
)
downsample_pts_coors
[:,
0
]
=
coors
[:,
0
]
downsample_pts_coors
[:,
1
]
=
(
coors
[:,
1
]
/
self
.
down_sample_rate
[
0
])
downsample_pts_coors
[:,
2
]
=
(
coors
[:,
2
]
/
self
.
down_sample_rate
[
1
])
downsample_pts_coors
[:,
3
]
=
(
coors
[:,
3
]
/
self
.
down_sample_rate
[
2
])
# fusion for each point
x
=
self
.
fusion_layer
(
img_feats
,
points
,
x
,
downsample_pts_coors
,
img_meta
)
return
[
x
]
mmdet3d/models/registry.py
0 → 100644
View file @
d1aac35d
from
mmdet.utils
import
Registry
VOXEL_ENCODERS
=
Registry
(
'voxel_encoder'
)
MIDDLE_ENCODERS
=
Registry
(
'middle_encoder'
)
FUSION_LAYERS
=
Registry
(
'fusion_layer'
)
mmdet3d/models/roi_extractors/__init__.py
0 → 100644
View file @
d1aac35d
from
mmdet.models.roi_extractors.single_level
import
SingleRoIExtractor
__all__
=
[
'SingleRoIExtractor'
]
mmdet3d/models/utils/__init__.py
0 → 100644
View file @
d1aac35d
from
mmdet.models.utils
import
ResLayer
,
bias_init_with_prob
__all__
=
[
'bias_init_with_prob'
,
'ResLayer'
]
mmdet3d/models/utils/weight_init.py
0 → 100644
View file @
d1aac35d
import
numpy
as
np
import
torch.nn
as
nn
def
xavier_init
(
module
,
gain
=
1
,
bias
=
0
,
distribution
=
'normal'
):
assert
distribution
in
[
'uniform'
,
'normal'
]
if
distribution
==
'uniform'
:
nn
.
init
.
xavier_uniform_
(
module
.
weight
,
gain
=
gain
)
else
:
nn
.
init
.
xavier_normal_
(
module
.
weight
,
gain
=
gain
)
if
hasattr
(
module
,
'bias'
):
nn
.
init
.
constant_
(
module
.
bias
,
bias
)
def
normal_init
(
module
,
mean
=
0
,
std
=
1
,
bias
=
0
):
nn
.
init
.
normal_
(
module
.
weight
,
mean
,
std
)
if
hasattr
(
module
,
'bias'
):
nn
.
init
.
constant_
(
module
.
bias
,
bias
)
def
uniform_init
(
module
,
a
=
0
,
b
=
1
,
bias
=
0
):
nn
.
init
.
uniform_
(
module
.
weight
,
a
,
b
)
if
hasattr
(
module
,
'bias'
):
nn
.
init
.
constant_
(
module
.
bias
,
bias
)
def
kaiming_init
(
module
,
mode
=
'fan_out'
,
nonlinearity
=
'relu'
,
bias
=
0
,
distribution
=
'normal'
):
assert
distribution
in
[
'uniform'
,
'normal'
]
if
distribution
==
'uniform'
:
nn
.
init
.
kaiming_uniform_
(
module
.
weight
,
mode
=
mode
,
nonlinearity
=
nonlinearity
)
else
:
nn
.
init
.
kaiming_normal_
(
module
.
weight
,
mode
=
mode
,
nonlinearity
=
nonlinearity
)
if
hasattr
(
module
,
'bias'
):
nn
.
init
.
constant_
(
module
.
bias
,
bias
)
def
bias_init_with_prob
(
prior_prob
):
""" initialize conv/fc bias value according to giving probablity"""
bias_init
=
float
(
-
np
.
log
((
1
-
prior_prob
)
/
prior_prob
))
return
bias_init
mmdet3d/models/voxel_encoders/__init__.py
0 → 100644
View file @
d1aac35d
from
.pillar_encoder
import
AlignedPillarFeatureNet
,
PillarFeatureNet
from
.voxel_encoder
import
(
DynamicVFE
,
VoxelFeatureExtractor
,
VoxelFeatureExtractorV2
,
VoxelFeatureExtractorV3
)
__all__
=
[
'PillarFeatureNet'
,
'AlignedPillarFeatureNet'
,
'VoxelFeatureExtractor'
,
'DynamicVFE'
,
'VoxelFeatureExtractorV2'
,
'VoxelFeatureExtractorV3'
]
mmdet3d/models/voxel_encoders/pillar_encoder.py
0 → 100644
View file @
d1aac35d
import
torch
from
torch
import
nn
from
mmdet3d.ops
import
DynamicScatter
,
build_norm_layer
from
..registry
import
VOXEL_ENCODERS
from
.utils
import
PFNLayer
,
get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
class
PillarFeatureNet
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super
(
PillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
num_input_features
+=
3
if
with_voxel_center
:
num_input_features
+=
2
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
self
.
num_input_features
=
num_input_features
num_filters
=
[
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
<
len
(
num_filters
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
pfn_layers
.
append
(
PFNLayer
(
in_filters
,
out_filters
,
use_norm
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
# Need pillar (voxel) size and x/y offset in order to calculate offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
point_cloud_range
=
point_cloud_range
def
forward
(
self
,
features
,
num_points
,
coors
):
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
[:,
:,
:
2
]
f_center
[:,
:,
0
]
=
f_center
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
f_center
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to whether
# pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
features
*=
mask
for
pfn
in
self
.
pfn_layers
:
features
=
pfn
(
features
,
num_points
)
return
features
.
squeeze
()
@
VOXEL_ENCODERS
.
register_module
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
):
"""
Dynamic Pillar Feature Net for Dynamic Voxelization.
The difference is in the forward part
"""
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
num_input_features
,
use_norm
,
num_filters
,
with_distance
,
with_cluster_center
=
with_cluster_center
,
with_voxel_center
=
with_voxel_center
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
mode
=
mode
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
# TODO: currently only support one PFNLayer
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
pfn_layers
.
append
(
nn
.
Sequential
(
nn
.
Linear
(
in_filters
,
out_filters
,
bias
=
False
),
norm_layer
,
nn
.
ReLU
(
inplace
=
True
)))
self
.
num_pfn
=
len
(
pfn_layers
)
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
self
.
pfn_scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
(
mode
!=
'max'
))
self
.
cluster_scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
average_points
=
True
)
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_y
=
int
(
(
self
.
point_cloud_range
[
4
]
-
self
.
point_cloud_range
[
1
])
/
self
.
vy
)
canvas_x
=
int
(
(
self
.
point_cloud_range
[
3
]
-
self
.
point_cloud_range
[
0
])
/
self
.
vx
)
canvas_channel
=
voxel_mean
.
size
(
1
)
batch_size
=
pts_coors
[
-
1
,
0
]
+
1
canvas_len
=
canvas_y
*
canvas_x
*
batch_size
# Create the canvas for this sample
canvas
=
voxel_mean
.
new_zeros
(
canvas_channel
,
canvas_len
)
# Only include non-empty pillars
indices
=
(
voxel_coors
[:,
0
]
*
canvas_y
*
canvas_x
+
voxel_coors
[:,
2
]
*
canvas_x
+
voxel_coors
[:,
3
])
# Scatter the blob back to the canvas
canvas
[:,
indices
.
long
()]
=
voxel_mean
.
t
()
# Step 2: get voxel mean for each point
voxel_index
=
(
pts_coors
[:,
0
]
*
canvas_y
*
canvas_x
+
pts_coors
[:,
2
]
*
canvas_x
+
pts_coors
[:,
3
])
center_per_point
=
canvas
[:,
voxel_index
.
long
()].
t
()
return
center_per_point
def
forward
(
self
,
features
,
coors
):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
voxel_mean
,
mean_coors
=
self
.
cluster_scatter
(
features
,
coors
)
points_mean
=
self
.
map_voxel_center_to_point
(
coors
,
voxel_mean
,
mean_coors
)
# TODO: maybe also do cluster for reflectivity
f_cluster
=
features
[:,
:
3
]
-
points_mean
[:,
:
3
]
features_ls
.
append
(
f_cluster
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
.
new_zeros
(
size
=
(
features
.
size
(
0
),
2
))
f_center
[:,
0
]
=
features
[:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
1
]
=
features
[:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:
3
],
2
,
1
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
for
i
,
pfn
in
enumerate
(
self
.
pfn_layers
):
point_feats
=
pfn
(
features
)
voxel_feats
,
voxel_coors
=
self
.
pfn_scatter
(
point_feats
,
coors
)
if
i
!=
len
(
self
.
pfn_layers
)
-
1
:
# need to concat voxel feats if it is not the last pfn
feat_per_point
=
self
.
map_voxel_center_to_point
(
coors
,
voxel_feats
,
voxel_coors
)
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
return
voxel_feats
,
voxel_coors
@
VOXEL_ENCODERS
.
register_module
class
AlignedPillarFeatureNet
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super
(
AlignedPillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
print
(
'Use cluster center'
)
num_input_features
+=
3
if
with_voxel_center
:
print
(
'Use voxel center'
)
num_input_features
+=
2
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
num_filters
=
[
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
<
len
(
num_filters
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
pfn_layers
.
append
(
PFNLayer
(
in_filters
,
out_filters
,
use_norm
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
def
forward
(
self
,
features
,
num_points
,
coors
):
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
x_distance
=
features
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
y_distance
=
features
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
z_distance
=
features
[:,
:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vz
+
self
.
z_offset
)
normed_x_distance
=
1
-
torch
.
abs
(
x_distance
/
self
.
vx
)
normed_y_distance
=
1
-
torch
.
abs
(
y_distance
/
self
.
vy
)
normed_z_distance
=
1
-
torch
.
abs
(
z_distance
/
self
.
vz
)
x_mask
=
torch
.
gt
(
normed_x_distance
,
0
).
type_as
(
features
)
y_mask
=
torch
.
gt
(
normed_y_distance
,
0
).
type_as
(
features
)
z_mask
=
torch
.
gt
(
normed_z_distance
,
0
).
type_as
(
features
)
nonzero_points_mask
=
x_mask
.
mul
(
y_mask
).
mul
(
z_mask
)
aligned_distance
=
normed_x_distance
.
mul
(
normed_y_distance
).
mul
(
normed_z_distance
).
mul
(
nonzero_points_mask
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
[:,
:,
:
2
]
f_center
[:,
:,
0
]
=
f_center
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
f_center
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
features
*=
mask
for
pfn
in
self
.
pfn_layers
:
if
pfn
.
last_vfe
:
features
=
pfn
(
features
,
aligned_distance
)
else
:
features
=
pfn
(
features
)
return
features
.
squeeze
()
mmdet3d/models/voxel_encoders/utils.py
0 → 100644
View file @
d1aac35d
import
torch
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
..utils
import
build_norm_layer
class
Empty
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Empty
,
self
).
__init__
()
def
forward
(
self
,
*
args
,
**
kwargs
):
if
len
(
args
)
==
1
:
return
args
[
0
]
elif
len
(
args
)
==
0
:
return
None
return
args
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
"""Create boolean mask by actually number of a padded tensor.
Args:
actual_num ([type]): [description]
max_num ([type]): [description]
Returns:
[type]: [description]
"""
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
# tiled_actual_num: [N, M, 1]
max_num_shape
=
[
1
]
*
len
(
actual_num
.
shape
)
max_num_shape
[
axis
+
1
]
=
-
1
max_num
=
torch
.
arange
(
max_num
,
dtype
=
torch
.
int
,
device
=
actual_num
.
device
).
view
(
max_num_shape
)
# tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]]
# tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]]
paddings_indicator
=
actual_num
.
int
()
>
max_num
# paddings_indicator shape: [batch_size, max_num]
return
paddings_indicator
class
VFELayer
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
max_out
=
True
,
cat_max
=
True
):
super
(
VFELayer
,
self
).
__init__
()
self
.
cat_max
=
cat_max
self
.
max_out
=
max_out
# self.units = int(out_channels / 2)
if
norm_cfg
:
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_channels
)
self
.
norm
=
norm_layer
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
out_channels
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
True
)
def
forward
(
self
,
inputs
):
# [K, T, 7] tensordot [7, units] = [K, T, units]
voxel_count
=
inputs
.
shape
[
1
]
x
=
self
.
linear
(
inputs
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
pointwise
=
F
.
relu
(
x
)
# [K, T, units]
if
self
.
max_out
:
aggregated
=
torch
.
max
(
pointwise
,
dim
=
1
,
keepdim
=
True
)[
0
]
else
:
# this is for fusion layer
return
pointwise
if
not
self
.
cat_max
:
return
aggregated
.
squeeze
(
1
)
else
:
# [K, 1, units]
repeated
=
aggregated
.
repeat
(
1
,
voxel_count
,
1
)
concatenated
=
torch
.
cat
([
pointwise
,
repeated
],
dim
=
2
)
# [K, T, 2 * units]
return
concatenated
class
PFNLayer
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
use_norm
=
True
,
last_layer
=
False
,
mode
=
'max'
):
""" Pillar Feature Net Layer.
The Pillar Feature Net is composed of a series of these layers, but the
PointPillars paper results only used a single PFNLayer.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
last_layer (bool): If last_layer, there is no concatenation of
features.
"""
super
().
__init__
()
self
.
name
=
'PFNLayer'
self
.
last_vfe
=
last_layer
if
not
self
.
last_vfe
:
out_channels
=
out_channels
//
2
self
.
units
=
out_channels
if
use_norm
:
self
.
norm
=
nn
.
BatchNorm1d
(
self
.
units
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
self
.
unints
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
True
)
self
.
mode
=
mode
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
x
=
self
.
linear
(
inputs
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
x
=
F
.
relu
(
x
)
if
self
.
mode
==
'max'
:
if
aligned_distance
is
not
None
:
x
=
x
.
mul
(
aligned_distance
.
unsqueeze
(
-
1
))
x_max
=
torch
.
max
(
x
,
dim
=
1
,
keepdim
=
True
)[
0
]
elif
self
.
mode
==
'avg'
:
if
aligned_distance
is
not
None
:
x
=
x
.
mul
(
aligned_distance
.
unsqueeze
(
-
1
))
x_max
=
x
.
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
inputs
).
view
(
-
1
,
1
,
1
)
if
self
.
last_vfe
:
return
x_max
else
:
x_repeat
=
x_max
.
repeat
(
1
,
inputs
.
shape
[
1
],
1
)
x_concatenated
=
torch
.
cat
([
x
,
x_repeat
],
dim
=
2
)
return
x_concatenated
mmdet3d/models/voxel_encoders/voxel_encoder.py
0 → 100644
View file @
d1aac35d
import
torch
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.ops
import
DynamicScatter
from
..
import
builder
from
..registry
import
VOXEL_ENCODERS
from
..utils
import
build_norm_layer
from
.utils
import
Empty
,
VFELayer
,
get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
class
VoxelFeatureExtractor
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractor
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
==
2
num_input_features
+=
3
# add mean features
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
vfe1
=
VFELayer
(
num_input_features
,
num_filters
[
0
],
use_norm
)
self
.
vfe2
=
VFELayer
(
num_filters
[
0
],
num_filters
[
1
],
use_norm
)
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x
=
self
.
vfe1
(
features
)
x
*=
mask
x
=
self
.
vfe2
(
x
)
x
*=
mask
x
=
self
.
linear
(
x
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
x
=
F
.
relu
(
x
)
x
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
x
,
dim
=
1
)[
0
]
return
voxelwise
@
VOXEL_ENCODERS
.
register_module
class
VoxelFeatureExtractorV2
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV2
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
>
0
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
num_filters
=
[
num_input_features
]
+
num_filters
filters_pairs
=
[[
num_filters
[
i
],
num_filters
[
i
+
1
]]
for
i
in
range
(
len
(
num_filters
)
-
1
)]
self
.
vfe_layers
=
nn
.
ModuleList
(
[
VFELayer
(
i
,
o
,
use_norm
)
for
i
,
o
in
filters_pairs
])
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
-
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
-
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
for
vfe
in
self
.
vfe_layers
:
features
=
vfe
(
features
)
features
*=
mask
features
=
self
.
linear
(
features
)
features
=
self
.
norm
(
features
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
features
=
F
.
relu
(
features
)
features
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
features
,
dim
=
1
)[
0
]
return
voxelwise
@
VOXEL_ENCODERS
.
register_module
class
VoxelFeatureExtractorV3
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV3
,
self
).
__init__
()
self
.
name
=
name
def
forward
(
self
,
features
,
num_points
,
coors
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_points: [concated_num_points]
points_mean
=
features
[:,
:,
:
4
].
sum
(
dim
=
1
,
keepdim
=
False
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
)
return
points_mean
.
contiguous
()
@
VOXEL_ENCODERS
.
register_module
class
DynamicVFEV3
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
super
(
DynamicVFEV3
,
self
).
__init__
()
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
@
torch
.
no_grad
()
def
forward
(
self
,
features
,
coors
):
# This function is used from the start of the voxelnet
# num_points: [concated_num_points]
features
,
features_coors
=
self
.
scatter
(
features
,
coors
)
return
features
,
features_coors
@
VOXEL_ENCODERS
.
register_module
class
DynamicVFE
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
num_filters
=
[],
with_distance
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
,
fusion_layer
=
None
,
return_point_feats
=
False
):
super
(
DynamicVFE
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
num_input_features
+=
3
if
with_voxel_center
:
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
3
self
.
num_input_features
=
num_input_features
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
return_point_feats
=
return_point_feats
# Need pillar (voxel) size and x/y offset in order to calculate offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filters
)
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
vfe_layers
.
append
(
nn
.
Sequential
(
nn
.
Linear
(
in_filters
,
out_filters
,
bias
=
False
),
norm_layer
,
nn
.
ReLU
(
inplace
=
True
)))
self
.
vfe_layers
=
nn
.
ModuleList
(
vfe_layers
)
self
.
num_vfe
=
len
(
vfe_layers
)
self
.
vfe_scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
(
mode
!=
'max'
))
self
.
cluster_scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
average_points
=
True
)
self
.
fusion_layer
=
None
if
fusion_layer
is
not
None
:
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_z
=
int
(
(
self
.
point_cloud_range
[
5
]
-
self
.
point_cloud_range
[
2
])
/
self
.
vz
)
canvas_y
=
int
(
(
self
.
point_cloud_range
[
4
]
-
self
.
point_cloud_range
[
1
])
/
self
.
vy
)
canvas_x
=
int
(
(
self
.
point_cloud_range
[
3
]
-
self
.
point_cloud_range
[
0
])
/
self
.
vx
)
# canvas_channel = voxel_mean.size(1)
batch_size
=
pts_coors
[
-
1
,
0
]
+
1
canvas_len
=
canvas_z
*
canvas_y
*
canvas_x
*
batch_size
# Create the canvas for this sample
canvas
=
voxel_mean
.
new_zeros
(
canvas_len
,
dtype
=
torch
.
long
)
# Only include non-empty pillars
indices
=
(
voxel_coors
[:,
0
]
*
canvas_z
*
canvas_y
*
canvas_x
+
voxel_coors
[:,
1
]
*
canvas_y
*
canvas_x
+
voxel_coors
[:,
2
]
*
canvas_x
+
voxel_coors
[:,
3
])
# Scatter the blob back to the canvas
canvas
[
indices
.
long
()]
=
torch
.
arange
(
start
=
0
,
end
=
voxel_mean
.
size
(
0
),
device
=
voxel_mean
.
device
)
# Step 2: get voxel mean for each point
voxel_index
=
(
pts_coors
[:,
0
]
*
canvas_z
*
canvas_y
*
canvas_x
+
pts_coors
[:,
1
]
*
canvas_y
*
canvas_x
+
pts_coors
[:,
2
]
*
canvas_x
+
pts_coors
[:,
3
])
voxel_inds
=
canvas
[
voxel_index
.
long
()]
center_per_point
=
voxel_mean
[
voxel_inds
,
...]
return
center_per_point
def
forward
(
self
,
features
,
coors
,
points
=
None
,
img_feats
=
None
,
img_meta
=
None
):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
voxel_mean
,
mean_coors
=
self
.
cluster_scatter
(
features
,
coors
)
points_mean
=
self
.
map_voxel_center_to_point
(
coors
,
voxel_mean
,
mean_coors
)
# TODO: maybe also do cluster for reflectivity
f_cluster
=
features
[:,
:
3
]
-
points_mean
[:,
:
3
]
features_ls
.
append
(
f_cluster
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
.
new_zeros
(
size
=
(
features
.
size
(
0
),
3
))
f_center
[:,
0
]
=
features
[:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
1
]
=
features
[:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
)
*
self
.
vy
+
self
.
y_offset
)
f_center
[:,
2
]
=
features
[:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
)
*
self
.
vz
+
self
.
z_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:
3
],
2
,
1
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
point_feats
=
vfe
(
features
)
if
(
i
==
len
(
self
.
vfe_layers
)
-
1
and
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
img_meta
)
voxel_feats
,
voxel_coors
=
self
.
vfe_scatter
(
point_feats
,
coors
)
if
i
!=
len
(
self
.
vfe_layers
)
-
1
:
# need to concat voxel feats if it is not the last vfe
feat_per_point
=
self
.
map_voxel_center_to_point
(
coors
,
voxel_feats
,
voxel_coors
)
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
if
self
.
return_point_feats
:
return
point_feats
return
voxel_feats
,
voxel_coors
@
VOXEL_ENCODERS
.
register_module
class
HardVFE
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
num_filters
=
[],
with_distance
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
,
fusion_layer
=
None
,
return_point_feats
=
False
):
super
(
HardVFE
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
num_input_features
+=
3
if
with_voxel_center
:
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
3
self
.
num_input_features
=
num_input_features
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
return_point_feats
=
return_point_feats
# Need pillar (voxel) size and x/y offset to calculate pillar offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filters
)
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if
i
==
(
len
(
num_filters
)
-
2
):
cat_max
=
False
max_out
=
True
if
fusion_layer
:
max_out
=
False
else
:
max_out
=
True
cat_max
=
True
vfe_layers
.
append
(
VFELayer
(
in_filters
,
out_filters
,
norm_cfg
=
norm_cfg
,
max_out
=
max_out
,
cat_max
=
cat_max
))
self
.
vfe_layers
=
nn
.
ModuleList
(
vfe_layers
)
self
.
num_vfe
=
len
(
vfe_layers
)
self
.
fusion_layer
=
None
if
fusion_layer
is
not
None
:
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
def
forward
(
self
,
features
,
num_points
,
coors
,
img_feats
=
None
,
img_meta
=
None
):
"""
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
(
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
))
# TODO: maybe also do cluster for reflectivity
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
.
new_zeros
(
size
=
(
features
.
size
(
0
),
features
.
size
(
1
),
3
))
f_center
[:,
:,
0
]
=
features
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
features
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
f_center
[:,
:,
2
]
=
features
[:,
:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vz
+
self
.
z_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
voxel_feats
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to whether
# pillar was empty.
# Need to ensure that empty voxels remain set to zeros.
voxel_count
=
voxel_feats
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
voxel_feats
*=
mask
.
unsqueeze
(
-
1
).
type_as
(
voxel_feats
)
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
voxel_feats
=
vfe
(
voxel_feats
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
return
voxel_feats
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
):
# the features is consist of a batch of points
batch_size
=
coors
[
-
1
,
0
]
+
1
points
=
[]
for
i
in
range
(
batch_size
):
single_mask
=
(
coors
[:,
0
]
==
i
)
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
point_feats
=
voxel_feats
[
mask
]
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
img_meta
)
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
voxel_canvas
=
voxel_feats
.
new_zeros
(
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
point_feats
.
size
(
-
1
)))
voxel_canvas
[
mask
]
=
point_feats
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
if
torch
.
isnan
(
out
).
any
():
import
pdb
pdb
.
set_trace
()
return
out
mmdet3d/ops/__init__.py
0 → 100644
View file @
d1aac35d
from
mmdet.ops
import
(
RoIAlign
,
SigmoidFocalLoss
,
build_norm_layer
,
get_compiler_version
,
get_compiling_cuda_version
,
nms
,
roi_align
,
sigmoid_focal_loss
)
from
.voxel
import
DynamicScatter
,
Voxelization
,
dynamic_scatter
,
voxelization
__all__
=
[
'nms'
,
'soft_nms'
,
'RoIAlign'
,
'roi_align'
,
'get_compiler_version'
,
'get_compiling_cuda_version'
,
'build_conv_layer'
,
'build_norm_layer'
,
'batched_nms'
,
'Voxelization'
,
'voxelization'
,
'dynamic_scatter'
,
'DynamicScatter'
,
'sigmoid_focal_loss'
,
'SigmoidFocalLoss'
]
mmdet3d/ops/iou3d/__init__.py
0 → 100644
View file @
d1aac35d
from
.iou3d_utils
import
(
boxes_iou3d_gpu
,
boxes_iou_bev
,
nms_gpu
,
nms_normal_gpu
)
__all__
=
[
'boxes_iou_bev'
,
'boxes_iou3d_gpu'
,
'nms_gpu'
,
'nms_normal_gpu'
]
mmdet3d/ops/iou3d/iou3d_utils.py
0 → 100644
View file @
d1aac35d
import
torch
from
.
import
iou3d_cuda
def
boxes_iou_bev
(
boxes_a
,
boxes_b
):
"""
:param boxes_a: (M, 5)
:param boxes_b: (N, 5)
:return:
ans_iou: (M, N)
"""
ans_iou
=
torch
.
cuda
.
FloatTensor
(
torch
.
Size
((
boxes_a
.
shape
[
0
],
boxes_b
.
shape
[
0
]))).
zero_
()
iou3d_cuda
.
boxes_iou_bev_gpu
(
boxes_a
.
contiguous
(),
boxes_b
.
contiguous
(),
ans_iou
)
return
ans_iou
def
boxes_iou3d_gpu
(
boxes_a
,
boxes_b
,
mode
=
'iou'
):
"""
:param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
:param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
:param mode "iou" (intersection over union) or iof (intersection over
foreground).
:return:
ans_iou: (M, N)
"""
boxes_a_bev
=
boxes3d_to_bev_torch
(
boxes_a
)
boxes_b_bev
=
boxes3d_to_bev_torch
(
boxes_b
)
# bev overlap
overlaps_bev
=
torch
.
cuda
.
FloatTensor
(
torch
.
Size
((
boxes_a
.
shape
[
0
],
boxes_b
.
shape
[
0
]))).
zero_
()
# (N, M)
iou3d_cuda
.
boxes_overlap_bev_gpu
(
boxes_a_bev
.
contiguous
(),
boxes_b_bev
.
contiguous
(),
overlaps_bev
)
# height overlap
boxes_a_height_min
=
(
boxes_a
[:,
1
]
-
boxes_a
[:,
3
]).
view
(
-
1
,
1
)
boxes_a_height_max
=
boxes_a
[:,
1
].
view
(
-
1
,
1
)
boxes_b_height_min
=
(
boxes_b
[:,
1
]
-
boxes_b
[:,
3
]).
view
(
1
,
-
1
)
boxes_b_height_max
=
boxes_b
[:,
1
].
view
(
1
,
-
1
)
max_of_min
=
torch
.
max
(
boxes_a_height_min
,
boxes_b_height_min
)
min_of_max
=
torch
.
min
(
boxes_a_height_max
,
boxes_b_height_max
)
overlaps_h
=
torch
.
clamp
(
min_of_max
-
max_of_min
,
min
=
0
)
# 3d iou
overlaps_3d
=
overlaps_bev
*
overlaps_h
vol_a
=
(
boxes_a
[:,
3
]
*
boxes_a
[:,
4
]
*
boxes_a
[:,
5
]).
view
(
-
1
,
1
)
vol_b
=
(
boxes_b
[:,
3
]
*
boxes_b
[:,
4
]
*
boxes_b
[:,
5
]).
view
(
1
,
-
1
)
if
mode
==
'iou'
:
# the clamp func is used to avoid division of 0
iou3d
=
overlaps_3d
/
torch
.
clamp
(
vol_a
+
vol_b
-
overlaps_3d
,
min
=
1e-8
)
else
:
iou3d
=
overlaps_3d
/
torch
.
clamp
(
vol_a
,
min
=
1e-8
)
return
iou3d
def
nms_gpu
(
boxes
,
scores
,
thresh
):
"""
:param boxes: (N, 5) [x1, y1, x2, y2, ry]
:param scores: (N)
:param thresh:
:return:
"""
# areas = (x2 - x1) * (y2 - y1)
order
=
scores
.
sort
(
0
,
descending
=
True
)[
1
]
boxes
=
boxes
[
order
].
contiguous
()
keep
=
torch
.
LongTensor
(
boxes
.
size
(
0
))
num_out
=
iou3d_cuda
.
nms_gpu
(
boxes
,
keep
,
thresh
)
return
order
[
keep
[:
num_out
].
cuda
()].
contiguous
()
def
nms_normal_gpu
(
boxes
,
scores
,
thresh
):
"""
:param boxes: (N, 5) [x1, y1, x2, y2, ry]
:param scores: (N)
:param thresh:
:return:
"""
# areas = (x2 - x1) * (y2 - y1)
order
=
scores
.
sort
(
0
,
descending
=
True
)[
1
]
boxes
=
boxes
[
order
].
contiguous
()
keep
=
torch
.
LongTensor
(
boxes
.
size
(
0
))
num_out
=
iou3d_cuda
.
nms_normal_gpu
(
boxes
,
keep
,
thresh
)
return
order
[
keep
[:
num_out
].
cuda
()].
contiguous
()
def
boxes3d_to_bev_torch
(
boxes3d
):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev
=
boxes3d
.
new
(
torch
.
Size
((
boxes3d
.
shape
[
0
],
5
)))
cu
,
cv
=
boxes3d
[:,
0
],
boxes3d
[:,
2
]
half_l
,
half_w
=
boxes3d
[:,
5
]
/
2
,
boxes3d
[:,
4
]
/
2
boxes_bev
[:,
0
],
boxes_bev
[:,
1
]
=
cu
-
half_l
,
cv
-
half_w
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_l
,
cv
+
half_w
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
return
boxes_bev
mmdet3d/ops/iou3d/setup.py
0 → 100644
View file @
d1aac35d
from
setuptools
import
setup
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
setup
(
name
=
'iou3d'
,
ext_modules
=
[
CUDAExtension
(
'iou3d_cuda'
,
[
'src/iou3d.cpp'
,
'src/iou3d_kernel.cu'
,
],
extra_compile_args
=
{
'cxx'
:
[
'-g'
,
'-I /usr/local/cuda/include'
],
'nvcc'
:
[
'-O2'
]
})
],
cmdclass
=
{
'build_ext'
:
BuildExtension
})
Prev
1
…
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment