Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenPCDet
Commits
5666ea67
Commit
5666ea67
authored
Jun 18, 2022
by
Shaoshuai Shi
Browse files
Merge branch 'focalsconv' of
https://github.com/yukang2017/OpenPCDet
into yukang2017-focalsconv
parents
dadda9ed
f4071498
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
1267 additions
and
22 deletions
+1267
-22
pcdet/datasets/augmentor/augmentor_utils.py
pcdet/datasets/augmentor/augmentor_utils.py
+12
-7
pcdet/datasets/augmentor/data_augmentor.py
pcdet/datasets/augmentor/data_augmentor.py
+10
-7
pcdet/datasets/augmentor/database_sampler.py
pcdet/datasets/augmentor/database_sampler.py
+190
-5
pcdet/datasets/dataset.py
pcdet/datasets/dataset.py
+17
-3
pcdet/datasets/kitti/kitti_dataset.py
pcdet/datasets/kitti/kitti_dataset.py
+1
-0
pcdet/models/backbones_3d/__init__.py
pcdet/models/backbones_3d/__init__.py
+2
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
...ackbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
+65
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
...backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
+77
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
...ckbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
+173
-0
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
...odels/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
+225
-0
pcdet/models/backbones_3d/focal_sparse_conv/utils.py
pcdet/models/backbones_3d/focal_sparse_conv/utils.py
+147
-0
pcdet/models/backbones_3d/spconv_backbone_focal.py
pcdet/models/backbones_3d/spconv_backbone_focal.py
+258
-0
pcdet/models/detectors/pv_rcnn.py
pcdet/models/detectors/pv_rcnn.py
+3
-0
pcdet/models/detectors/voxel_rcnn.py
pcdet/models/detectors/voxel_rcnn.py
+4
-0
pcdet/utils/box2d_utils.py
pcdet/utils/box2d_utils.py
+46
-0
pcdet/utils/box_utils.py
pcdet/utils/box_utils.py
+37
-0
No files found.
pcdet/datasets/augmentor/augmentor_utils.py
View file @
5666ea67
...
...
@@ -5,7 +5,7 @@ from ...utils import common_utils
from
...utils
import
box_utils
def
random_flip_along_x
(
gt_boxes
,
points
):
def
random_flip_along_x
(
gt_boxes
,
points
,
return_flip
=
False
):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
...
@@ -20,11 +20,12 @@ def random_flip_along_x(gt_boxes, points):
if
gt_boxes
.
shape
[
1
]
>
7
:
gt_boxes
[:,
8
]
=
-
gt_boxes
[:,
8
]
if
return_flip
:
return
gt_boxes
,
points
,
enable
return
gt_boxes
,
points
def
random_flip_along_y
(
gt_boxes
,
points
):
def
random_flip_along_y
(
gt_boxes
,
points
,
return_flip
=
False
):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
...
@@ -39,11 +40,12 @@ def random_flip_along_y(gt_boxes, points):
if
gt_boxes
.
shape
[
1
]
>
7
:
gt_boxes
[:,
7
]
=
-
gt_boxes
[:,
7
]
if
return_flip
:
return
gt_boxes
,
points
,
enable
return
gt_boxes
,
points
def
global_rotation
(
gt_boxes
,
points
,
rot_range
):
def
global_rotation
(
gt_boxes
,
points
,
rot_range
,
return_rot
=
False
):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
...
@@ -61,10 +63,12 @@ def global_rotation(gt_boxes, points, rot_range):
np
.
array
([
noise_rotation
])
)[
0
][:,
0
:
2
]
if
return_rot
:
return
gt_boxes
,
points
,
noise_rotation
return
gt_boxes
,
points
def
global_scaling
(
gt_boxes
,
points
,
scale_range
):
def
global_scaling
(
gt_boxes
,
points
,
scale_range
,
return_scale
=
False
):
"""
Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
...
...
@@ -77,7 +81,8 @@ def global_scaling(gt_boxes, points, scale_range):
noise_scale
=
np
.
random
.
uniform
(
scale_range
[
0
],
scale_range
[
1
])
points
[:,
:
3
]
*=
noise_scale
gt_boxes
[:,
:
6
]
*=
noise_scale
if
return_scale
:
return
gt_boxes
,
points
,
noise_scale
return
gt_boxes
,
points
...
...
pcdet/datasets/augmentor/data_augmentor.py
View file @
5666ea67
...
...
@@ -46,10 +46,11 @@ class DataAugmentor(object):
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
for
cur_axis
in
config
[
'ALONG_AXIS_LIST'
]:
assert
cur_axis
in
[
'x'
,
'y'
]
gt_boxes
,
points
=
getattr
(
augmentor_utils
,
'random_flip_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
gt_boxes
,
points
,
enable
=
getattr
(
augmentor_utils
,
'random_flip_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
return_flip
=
True
)
data_dict
[
'flip_%s'
%
cur_axis
]
=
enable
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
...
...
@@ -60,23 +61,25 @@ class DataAugmentor(object):
rot_range
=
config
[
'WORLD_ROT_ANGLE'
]
if
not
isinstance
(
rot_range
,
list
):
rot_range
=
[
-
rot_range
,
rot_range
]
gt_boxes
,
points
=
augmentor_utils
.
global_rotation
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
gt_boxes
,
points
,
noise_rot
=
augmentor_utils
.
global_rotation
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
,
return_rot
=
True
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'noise_rot'
]
=
noise_rot
return
data_dict
def
random_world_scaling
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
return
partial
(
self
.
random_world_scaling
,
config
=
config
)
gt_boxes
,
points
=
augmentor_utils
.
global_scaling
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'WORLD_SCALE_RANGE'
]
gt_boxes
,
points
,
noise_scale
=
augmentor_utils
.
global_scaling
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'WORLD_SCALE_RANGE'
]
,
return_scale
=
True
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'noise_scale'
]
=
noise_scale
return
data_dict
def
random_image_flip
(
self
,
data_dict
=
None
,
config
=
None
):
...
...
pcdet/datasets/augmentor/database_sampler.py
View file @
5666ea67
...
...
@@ -3,18 +3,27 @@ import pickle
import
os
import
copy
import
numpy
as
np
from
skimage
import
io
import
torch
import
SharedArray
import
torch.distributed
as
dist
from
...ops.iou3d_nms
import
iou3d_nms_utils
from
...utils
import
box_utils
,
common_utils
from
...utils
import
box_utils
,
common_utils
,
box2d_utils
,
calibration_kitti
from
pcdet.datasets.kitti.kitti_object_eval_python
import
kitti_common
class
DataBaseSampler
(
object
):
def
__init__
(
self
,
root_path
,
sampler_cfg
,
class_names
,
logger
=
None
):
self
.
root_path
=
root_path
self
.
class_names
=
class_names
self
.
sampler_cfg
=
sampler_cfg
self
.
aug_with_img
=
sampler_cfg
.
get
(
'AUG_WITH_IMAGE'
,
False
)
self
.
joint_sample
=
sampler_cfg
.
get
(
'JOINT_SAMPLE'
,
False
)
self
.
keep_raw
=
sampler_cfg
.
get
(
'KEEP_RAW'
,
False
)
self
.
box_iou_thres
=
sampler_cfg
.
get
(
'BOX_IOU_THRES'
,
1.0
)
self
.
aug_use_type
=
sampler_cfg
.
get
(
'AUG_USE_TYPE'
,
'annotation'
)
self
.
point_refine
=
sampler_cfg
.
get
(
'POINT_REFINE'
,
False
)
self
.
logger
=
logger
self
.
db_infos
=
{}
for
class_name
in
class_names
:
...
...
@@ -153,12 +162,145 @@ class DataBaseSampler(object):
gt_boxes
[:,
2
]
-=
mv_height
# lidar view
return
gt_boxes
,
mv_height
def
add_sampled_boxes_to_scene
(
self
,
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
):
def
copy_paste_to_image_kitti
(
self
,
data_dict
,
crop_feat
,
gt_number
,
point_idxes
=
None
):
image
=
data_dict
[
'images'
]
boxes3d
=
data_dict
[
'gt_boxes'
]
boxes2d
=
data_dict
[
'gt_boxes2d'
]
corners_lidar
=
box_utils
.
boxes_to_corners_3d
(
boxes3d
)
img_aug_type
=
self
.
sampler_cfg
.
IMG_AUG_TYPE
if
'depth'
in
img_aug_type
:
paste_order
=
boxes3d
[:,
0
].
argsort
()
paste_order
=
paste_order
[::
-
1
]
else
:
paste_order
=
np
.
arange
(
len
(
boxes3d
),
dtype
=
np
.
int
)
if
'reverse'
in
img_aug_type
:
paste_order
=
paste_order
[::
-
1
]
paste_mask
=
-
255
*
np
.
ones
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
fg_mask
=
np
.
zeros
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
overlap_mask
=
np
.
zeros
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
depth_mask
=
np
.
zeros
((
*
image
.
shape
[:
2
],
2
),
dtype
=
np
.
float
)
points_2d
,
depth_2d
=
data_dict
[
'calib'
].
lidar_to_img
(
data_dict
[
'points'
][:,:
3
])
points_2d
[:,
0
]
=
np
.
clip
(
points_2d
[:,
0
],
a_min
=
0
,
a_max
=
image
.
shape
[
1
]
-
1
)
points_2d
[:,
1
]
=
np
.
clip
(
points_2d
[:,
1
],
a_min
=
0
,
a_max
=
image
.
shape
[
0
]
-
1
)
points_2d
=
points_2d
.
astype
(
np
.
int
)
for
_order
in
paste_order
:
_box2d
=
boxes2d
[
_order
]
image
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
crop_feat
[
_order
]
overlap_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
+=
\
(
paste_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
>
0
).
astype
(
np
.
int
)
paste_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
_order
if
'cover'
in
self
.
aug_use_type
:
# HxWx2 for min and max depth of each box region
depth_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
],
0
]
=
corners_lidar
[
_order
,:,
0
].
min
()
depth_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
],
1
]
=
corners_lidar
[
_order
,:,
0
].
max
()
# foreground area of original point cloud in image plane
if
_order
<
gt_number
:
fg_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
1
data_dict
[
'images'
]
=
image
if
not
self
.
joint_sample
:
return
data_dict
new_mask
=
paste_mask
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
==
(
point_idxes
+
gt_number
)
if
self
.
keep_raw
:
raw_mask
=
point_idxes
==-
1
else
:
raw_fg
=
(
fg_mask
==
1
)
&
(
paste_mask
>=
0
)
&
(
paste_mask
<
gt_number
)
raw_bg
=
(
fg_mask
==
0
)
&
(
paste_mask
<
0
)
raw_mask
=
raw_fg
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
|
raw_bg
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
keep_mask
=
new_mask
|
raw_mask
data_dict
[
'points_2d'
]
=
points_2d
if
'annotation'
in
self
.
aug_use_type
:
data_dict
[
'points'
]
=
data_dict
[
'points'
][
keep_mask
]
data_dict
[
'points_2d'
]
=
data_dict
[
'points_2d'
][
keep_mask
]
elif
'projection'
in
self
.
aug_use_type
:
overlap_mask
[
overlap_mask
>=
1
]
=
1
data_dict
[
'overlap_mask'
]
=
overlap_mask
if
'cover'
in
self
.
aug_use_type
:
data_dict
[
'depth_mask'
]
=
depth_mask
return
data_dict
def
collect_image_crops_kitti
(
self
,
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
):
calib_file
=
kitti_common
.
get_calib_path
(
int
(
info
[
'image_idx'
]),
self
.
root_path
,
relative_path
=
False
)
sampled_calib
=
calibration_kitti
.
Calibration
(
calib_file
)
points_2d
,
depth_2d
=
sampled_calib
.
lidar_to_img
(
obj_points
[:,:
3
])
if
self
.
point_refine
:
# align calibration metrics for points
points_ract
=
data_dict
[
'calib'
].
img_to_rect
(
points_2d
[:,
0
],
points_2d
[:,
1
],
depth_2d
)
points_lidar
=
data_dict
[
'calib'
].
rect_to_lidar
(
points_ract
)
obj_points
[:,
:
3
]
=
points_lidar
# align calibration metrics for boxes
box3d_raw
=
sampled_gt_boxes
[
idx
].
reshape
(
1
,
-
1
)
box3d_coords
=
box_utils
.
boxes_to_corners_3d
(
box3d_raw
)[
0
]
box3d_box
,
box3d_depth
=
sampled_calib
.
lidar_to_img
(
box3d_coords
)
box3d_coord_rect
=
data_dict
[
'calib'
].
img_to_rect
(
box3d_box
[:,
0
],
box3d_box
[:,
1
],
box3d_depth
)
box3d_rect
=
box_utils
.
corners_rect_to_camera
(
box3d_coord_rect
).
reshape
(
1
,
-
1
)
box3d_lidar
=
box_utils
.
boxes3d_kitti_camera_to_lidar
(
box3d_rect
,
data_dict
[
'calib'
])
box2d
=
box_utils
.
boxes3d_kitti_camera_to_imageboxes
(
box3d_rect
,
data_dict
[
'calib'
],
data_dict
[
'images'
].
shape
[:
2
])
sampled_gt_boxes
[
idx
]
=
box3d_lidar
[
0
]
sampled_gt_boxes2d
[
idx
]
=
box2d
[
0
]
obj_idx
=
idx
*
np
.
ones
(
len
(
obj_points
),
dtype
=
np
.
int
)
# copy crops from images
img_path
=
self
.
root_path
/
self
.
sampler_cfg
.
IMG_ROOT_PATH
/
(
info
[
'image_idx'
]
+
'.png'
)
raw_image
=
io
.
imread
(
img_path
)
raw_image
=
raw_image
.
astype
(
np
.
float32
)
raw_center
=
info
[
'bbox'
].
reshape
(
2
,
2
).
mean
(
0
)
new_box
=
sampled_gt_boxes2d
[
idx
].
astype
(
np
.
int
)
new_shape
=
np
.
array
([
new_box
[
2
]
-
new_box
[
0
],
new_box
[
3
]
-
new_box
[
1
]])
raw_box
=
np
.
concatenate
([
raw_center
-
new_shape
/
2
,
raw_center
+
new_shape
/
2
]).
astype
(
np
.
int
)
raw_box
[
0
::
2
]
=
np
.
clip
(
raw_box
[
0
::
2
],
a_min
=
0
,
a_max
=
raw_image
.
shape
[
1
])
raw_box
[
1
::
2
]
=
np
.
clip
(
raw_box
[
1
::
2
],
a_min
=
0
,
a_max
=
raw_image
.
shape
[
0
])
if
(
raw_box
[
2
]
-
raw_box
[
0
])
!=
new_shape
[
0
]
or
(
raw_box
[
3
]
-
raw_box
[
1
])
!=
new_shape
[
1
]:
new_center
=
new_box
.
reshape
(
2
,
2
).
mean
(
0
)
new_shape
=
np
.
array
([
raw_box
[
2
]
-
raw_box
[
0
],
raw_box
[
3
]
-
raw_box
[
1
]])
new_box
=
np
.
concatenate
([
new_center
-
new_shape
/
2
,
new_center
+
new_shape
/
2
]).
astype
(
np
.
int
)
img_crop2d
=
raw_image
[
raw_box
[
1
]:
raw_box
[
3
],
raw_box
[
0
]:
raw_box
[
2
]]
/
255
return
new_box
,
img_crop2d
,
obj_points
,
obj_idx
def
sample_gt_boxes_2d_kitti
(
self
,
data_dict
,
sampled_boxes
,
iou1
,
iou2
):
# filter out box2d iou > thres
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
sampled_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
)
# sampled_boxes2d = np.stack([x['bbox'] for x in sampled_dict], axis=0).astype(np.float32)
boxes3d_camera
=
box_utils
.
boxes3d_lidar_to_kitti_camera
(
sampled_boxes
,
data_dict
[
'calib'
])
sampled_boxes2d
=
box_utils
.
boxes3d_kitti_camera_to_imageboxes
(
boxes3d_camera
,
data_dict
[
'calib'
],
data_dict
[
'images'
].
shape
[:
2
])
sampled_boxes2d
=
torch
.
Tensor
(
sampled_boxes2d
)
existed_boxes2d
=
torch
.
Tensor
(
data_dict
[
'gt_boxes2d'
])
iou2d1
=
box2d_utils
.
pairwise_iou
(
sampled_boxes2d
,
existed_boxes2d
).
cpu
().
numpy
()
iou2d2
=
box2d_utils
.
pairwise_iou
(
sampled_boxes2d
,
sampled_boxes2d
).
cpu
().
numpy
()
iou2d2
[
range
(
sampled_boxes2d
.
shape
[
0
]),
range
(
sampled_boxes2d
.
shape
[
0
])]
=
0
iou2d1
=
iou2d1
if
iou2d1
.
shape
[
1
]
>
0
else
iou2d2
valid_mask
=
((
iou2d1
.
max
(
axis
=
1
)
<
self
.
box_iou_thres
)
&
(
iou2d2
.
max
(
axis
=
1
)
<
self
.
box_iou_thres
)
&
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
)).
nonzero
()[
0
]
sampled_boxes2d
=
sampled_boxes2d
[
valid_mask
].
cpu
().
numpy
()
return
sampled_boxes2d
,
mv_height
,
valid_mask
def
add_sampled_boxes_to_scene
(
self
,
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
,
mv_height
=
None
,
sampled_gt_boxes2d
=
None
):
gt_boxes_mask
=
data_dict
[
'gt_boxes_mask'
]
gt_boxes
=
data_dict
[
'gt_boxes'
][
gt_boxes_mask
]
gt_names
=
data_dict
[
'gt_names'
][
gt_boxes_mask
]
points
=
data_dict
[
'points'
]
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
)
and
not
self
.
aug_with_img
:
sampled_gt_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_gt_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
)
...
...
@@ -166,6 +308,13 @@ class DataBaseSampler(object):
data_dict
.
pop
(
'road_plane'
)
obj_points_list
=
[]
# convert sampled 3D boxes to image plane
if
self
.
aug_with_img
:
obj_index_list
,
crop_boxes2d
=
[],
[]
gt_number
=
gt_boxes_mask
.
sum
().
astype
(
np
.
int
)
gt_boxes2d
=
data_dict
[
'gt_boxes2d'
][
gt_boxes_mask
].
astype
(
np
.
int
)
gt_crops2d
=
[
data_dict
[
'images'
][
_x
[
1
]:
_x
[
3
],
_x
[
0
]:
_x
[
2
]]
for
_x
in
gt_boxes2d
]
if
self
.
use_shared_memory
:
gt_database_data
=
SharedArray
.
attach
(
f
"shm://
{
self
.
gt_database_data_key
}
"
)
gt_database_data
.
setflags
(
write
=
0
)
...
...
@@ -187,6 +336,13 @@ class DataBaseSampler(object):
# mv height
obj_points
[:,
2
]
-=
mv_height
[
idx
]
if
self
.
aug_with_img
:
new_box
,
img_crop2d
,
obj_points
,
obj_idx
=
self
.
collect_image_crops_kitti
(
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
)
crop_boxes2d
.
append
(
new_box
)
gt_crops2d
.
append
(
img_crop2d
)
obj_index_list
.
append
(
obj_idx
)
obj_points_list
.
append
(
obj_points
)
obj_points
=
np
.
concatenate
(
obj_points_list
,
axis
=
0
)
...
...
@@ -202,6 +358,16 @@ class DataBaseSampler(object):
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_names'
]
=
gt_names
data_dict
[
'points'
]
=
points
if
self
.
aug_with_img
:
obj_points_idx
=
np
.
concatenate
(
obj_index_list
,
axis
=
0
)
point_idxes
=
-
1
*
np
.
ones
(
len
(
points
),
dtype
=
np
.
int
)
point_idxes
=
np
.
concatenate
([
obj_points_idx
,
point_idxes
],
axis
=
0
)
data_dict
[
'gt_boxes2d'
]
=
np
.
concatenate
([
gt_boxes2d
,
np
.
array
(
crop_boxes2d
)],
axis
=
0
)
data_dict
=
self
.
copy_paste_to_image_kitti
(
data_dict
,
gt_crops2d
,
gt_number
,
point_idxes
)
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
data_dict
.
pop
(
'road_plane'
)
return
data_dict
def
__call__
(
self
,
data_dict
):
...
...
@@ -217,6 +383,8 @@ class DataBaseSampler(object):
gt_names
=
data_dict
[
'gt_names'
].
astype
(
str
)
existed_boxes
=
gt_boxes
total_valid_sampled_dict
=
[]
sampled_mv_height
=
[]
sampled_gt_boxes2d
=
[]
for
class_name
,
sample_group
in
self
.
sample_groups
.
items
():
if
self
.
limit_whole_scene
:
num_gt
=
np
.
sum
(
class_name
==
gt_names
)
...
...
@@ -234,6 +402,14 @@ class DataBaseSampler(object):
iou2
[
range
(
sampled_boxes
.
shape
[
0
]),
range
(
sampled_boxes
.
shape
[
0
])]
=
0
iou1
=
iou1
if
iou1
.
shape
[
1
]
>
0
else
iou2
valid_mask
=
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
).
nonzero
()[
0
]
if
self
.
aug_with_img
:
sampled_boxes2d
,
mv_height
,
valid_mask
=
self
.
sample_gt_boxes_2d_kitti
(
data_dict
,
sampled_boxes
,
iou1
,
iou2
)
sampled_gt_boxes2d
.
append
(
sampled_boxes2d
)
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
mv_height
=
mv_height
[
valid_mask
]
sampled_mv_height
=
np
.
concatenate
((
sampled_mv_height
,
mv_height
),
axis
=
0
)
valid_sampled_dict
=
[
sampled_dict
[
x
]
for
x
in
valid_mask
]
valid_sampled_boxes
=
sampled_boxes
[
valid_mask
]
...
...
@@ -241,8 +417,17 @@ class DataBaseSampler(object):
total_valid_sampled_dict
.
extend
(
valid_sampled_dict
)
sampled_gt_boxes
=
existed_boxes
[
gt_boxes
.
shape
[
0
]:,
:]
if
self
.
aug_with_img
:
if
len
(
sampled_gt_boxes2d
)
>
0
:
sampled_gt_boxes2d
=
np
.
concatenate
(
sampled_gt_boxes2d
,
axis
=
0
)
if
total_valid_sampled_dict
.
__len__
()
>
0
:
data_dict
=
self
.
add_sampled_boxes_to_scene
(
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
)
data_dict
=
self
.
add_sampled_boxes_to_scene
(
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
,
sampled_mv_height
,
sampled_gt_boxes2d
)
data_dict
.
pop
(
'gt_boxes_mask'
)
return
data_dict
pcdet/datasets/dataset.py
View file @
5666ea67
...
...
@@ -9,7 +9,6 @@ from .augmentor.data_augmentor import DataAugmentor
from
.processor.data_processor
import
DataProcessor
from
.processor.point_feature_encoder
import
PointFeatureEncoder
class
DatasetTemplate
(
torch_data
.
Dataset
):
def
__init__
(
self
,
dataset_cfg
=
None
,
class_names
=
None
,
training
=
True
,
root_path
=
None
,
logger
=
None
):
super
().
__init__
()
...
...
@@ -124,13 +123,14 @@ class DatasetTemplate(torch_data.Dataset):
assert
'gt_boxes'
in
data_dict
,
'gt_boxes should be provided for training'
gt_boxes_mask
=
np
.
array
([
n
in
self
.
class_names
for
n
in
data_dict
[
'gt_names'
]],
dtype
=
np
.
bool_
)
calib
=
data_dict
[
'calib'
]
data_dict
=
self
.
data_augmentor
.
forward
(
data_dict
=
{
**
data_dict
,
'gt_boxes_mask'
:
gt_boxes_mask
}
)
data_dict
[
'calib'
]
=
calib
if
data_dict
.
get
(
'gt_boxes'
,
None
)
is
not
None
:
selected
=
common_utils
.
keep_arrays_by_name
(
data_dict
[
'gt_names'
],
self
.
class_names
)
data_dict
[
'gt_boxes'
]
=
data_dict
[
'gt_boxes'
][
selected
]
...
...
@@ -205,7 +205,7 @@ class DatasetTemplate(torch_data.Dataset):
pad_w
=
common_utils
.
get_pad_params
(
desired_size
=
max_w
,
cur_size
=
image
.
shape
[
1
])
pad_width
=
(
pad_h
,
pad_w
)
# Pad with nan, to be replaced later in the pipeline.
pad_value
=
np
.
nan
pad_value
=
0
#
np.nan
if
key
==
"images"
:
pad_width
=
(
pad_h
,
pad_w
,
(
0
,
0
))
...
...
@@ -219,6 +219,20 @@ class DatasetTemplate(torch_data.Dataset):
images
.
append
(
image_pad
)
ret
[
key
]
=
np
.
stack
(
images
,
axis
=
0
)
elif
key
in
[
'calib'
]:
ret
[
key
]
=
val
elif
key
in
[
"points_2d"
]:
max_len
=
max
([
len
(
_val
)
for
_val
in
val
])
pad_value
=
0
points
=
[]
for
_points
in
val
:
pad_width
=
((
0
,
max_len
-
len
(
_points
)),
(
0
,
0
))
points_pad
=
np
.
pad
(
_points
,
pad_width
=
pad_width
,
mode
=
'constant'
,
constant_values
=
pad_value
)
points
.
append
(
points_pad
)
ret
[
key
]
=
np
.
stack
(
points
,
axis
=
0
)
else
:
ret
[
key
]
=
np
.
stack
(
val
,
axis
=
0
)
except
:
...
...
pcdet/datasets/kitti/kitti_dataset.py
View file @
5666ea67
...
...
@@ -421,6 +421,7 @@ class KittiDataset(DatasetTemplate):
if
"calib_matricies"
in
get_item_list
:
input_dict
[
"trans_lidar_to_cam"
],
input_dict
[
"trans_cam_to_img"
]
=
kitti_utils
.
calib_to_matricies
(
calib
)
input_dict
[
'calib'
]
=
calib
data_dict
=
self
.
prepare_data
(
data_dict
=
input_dict
)
data_dict
[
'image_shape'
]
=
img_shape
...
...
pcdet/models/backbones_3d/__init__.py
View file @
5666ea67
from
.pointnet2_backbone
import
PointNet2Backbone
,
PointNet2MSG
from
.spconv_backbone
import
VoxelBackBone8x
,
VoxelResBackBone8x
from
.spconv_backbone_focal
import
VoxelBackBone8xFocal
from
.spconv_unet
import
UNetV2
__all__
=
{
...
...
@@ -8,4 +9,5 @@ __all__ = {
'PointNet2Backbone'
:
PointNet2Backbone
,
'PointNet2MSG'
:
PointNet2MSG
,
'VoxelResBackBone8x'
:
VoxelResBackBone8x
,
'VoxelBackBone8xFocal'
:
VoxelBackBone8xFocal
,
}
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
0 → 100755
View file @
5666ea67
import
torch.nn
as
nn
class
BasicBlock1D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
**
kwargs
):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
conv
=
nn
.
Conv1d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm1d
(
out_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
features
):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x
=
self
.
conv
(
features
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
class
BasicBlock2D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
**
kwargs
):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
features
):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x
=
self
.
conv
(
features
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
0 → 100755
View file @
5666ea67
import
torch
import
torch.nn
as
nn
from
.basic_blocks
import
BasicBlock2D
from
.sem_deeplabv3
import
SemDeepLabV3
class
PyramidFeat2D
(
nn
.
Module
):
def
__init__
(
self
,
optimize
,
model_cfg
):
"""
Initialize 2D feature network via pretrained model
Args:
model_cfg: EasyDict, Dense classification network config
"""
super
().
__init__
()
self
.
model_cfg
=
model_cfg
self
.
is_optimize
=
optimize
# Create modules
self
.
ifn
=
SemDeepLabV3
(
num_classes
=
model_cfg
.
num_class
,
backbone_name
=
model_cfg
.
backbone
,
**
model_cfg
.
args
)
self
.
reduce_blocks
=
torch
.
nn
.
ModuleList
()
self
.
out_channels
=
{}
for
_idx
,
_channel
in
enumerate
(
model_cfg
.
channel_reduce
[
"in_channels"
]):
_channel_out
=
model_cfg
.
channel_reduce
[
"out_channels"
][
_idx
]
self
.
out_channels
[
model_cfg
.
args
[
'feat_extract_layer'
][
_idx
]]
=
_channel_out
block_cfg
=
{
"in_channels"
:
_channel
,
"out_channels"
:
_channel_out
,
"kernel_size"
:
model_cfg
.
channel_reduce
[
"kernel_size"
][
_idx
],
"stride"
:
model_cfg
.
channel_reduce
[
"stride"
][
_idx
],
"bias"
:
model_cfg
.
channel_reduce
[
"bias"
][
_idx
]}
self
.
reduce_blocks
.
append
(
BasicBlock2D
(
**
block_cfg
))
def
get_output_feature_dim
(
self
):
return
self
.
out_channels
def
forward
(
self
,
images
):
"""
Predicts depths and creates image depth feature volume using depth distributions
Args:
images: (N, 3, H_in, W_in), Input images
Returns:
batch_dict:
frustum_features: (N, C, D, H_out, W_out), Image depth features
"""
# Pixel-wise depth classification
batch_dict
=
{}
ifn_result
=
self
.
ifn
(
images
)
for
_idx
,
_layer
in
enumerate
(
self
.
model_cfg
.
args
[
'feat_extract_layer'
]):
image_features
=
ifn_result
[
_layer
]
# Channel reduce
if
self
.
reduce_blocks
[
_idx
]
is
not
None
:
image_features
=
self
.
reduce_blocks
[
_idx
](
image_features
)
batch_dict
[
_layer
+
"_feat2d"
]
=
image_features
if
self
.
training
:
# detach feature from graph if not optimize
if
"logits"
in
ifn_result
:
ifn_result
[
"logits"
].
detach_
()
if
not
self
.
is_optimize
:
image_features
.
detach_
()
return
batch_dict
def
get_loss
(
self
):
"""
Gets loss
Args:
Returns:
loss: (1), Network loss
tb_dict: dict[float], All losses to log in tensorboard
"""
return
None
,
None
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
0 → 100755
View file @
5666ea67
from
collections
import
OrderedDict
from
pathlib
import
Path
from
torch
import
hub
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torchvision
from
kornia.enhance.normalize
import
normalize
class
SegTemplate
(
nn
.
Module
):
def
__init__
(
self
,
constructor
,
feat_extract_layer
,
num_classes
,
pretrained_path
=
None
,
aux_loss
=
None
):
"""
Initializes depth distribution network.
Args:
constructor: function, Model constructor
feat_extract_layer: string, Layer to extract features from
num_classes: int, Number of classes
pretrained_path: string, (Optional) Path of the model to load weights from
aux_loss: bool, Flag to include auxillary loss
"""
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
pretrained_path
=
pretrained_path
self
.
pretrained
=
pretrained_path
is
not
None
self
.
aux_loss
=
aux_loss
if
self
.
pretrained
:
# Preprocess Module
self
.
norm_mean
=
torch
.
Tensor
([
0.485
,
0.456
,
0.406
])
self
.
norm_std
=
torch
.
Tensor
([
0.229
,
0.224
,
0.225
])
# Model
self
.
model
=
self
.
get_model
(
constructor
=
constructor
)
self
.
feat_extract_layer
=
feat_extract_layer
return_layers
=
{
_layer
:
_layer
for
_layer
in
feat_extract_layer
}
self
.
model
.
backbone
.
return_layers
.
update
(
return_layers
)
def
get_model
(
self
,
constructor
):
"""
Get model
Args:
constructor: function, Model constructor
Returns:
model: nn.Module, Model
"""
# Get model
model
=
constructor
(
pretrained
=
False
,
pretrained_backbone
=
False
,
num_classes
=
self
.
num_classes
,
aux_loss
=
self
.
aux_loss
)
# Update weights
if
self
.
pretrained_path
is
not
None
:
model_dict
=
model
.
state_dict
()
# Download pretrained model if not available yet
checkpoint_path
=
Path
(
self
.
pretrained_path
)
if
not
checkpoint_path
.
exists
():
checkpoint
=
checkpoint_path
.
name
save_dir
=
checkpoint_path
.
parent
save_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
url
=
f
'https://download.pytorch.org/models/
{
checkpoint
}
'
hub
.
load_state_dict_from_url
(
url
,
save_dir
)
# Get pretrained state dict
pretrained_dict
=
torch
.
load
(
self
.
pretrained_path
)
#pretrained_dict = self.filter_pretrained_dict(model_dict=model_dict, pretrained_dict=pretrained_dict)
# Update current model state dict
model_dict
.
update
(
pretrained_dict
)
model
.
load_state_dict
(
model_dict
,
strict
=
False
)
return
model
.
cuda
()
def
filter_pretrained_dict
(
self
,
model_dict
,
pretrained_dict
):
"""
Removes layers from pretrained state dict that are not used or changed in model
Args:
model_dict: dict, Default model state dictionary
pretrained_dict: dict, Pretrained model state dictionary
Returns:
pretrained_dict: dict, Pretrained model state dictionary with removed weights
"""
# Removes aux classifier weights if not used
if
"aux_classifier.0.weight"
in
pretrained_dict
and
"aux_classifier.0.weight"
not
in
model_dict
:
pretrained_dict
=
{
key
:
value
for
key
,
value
in
pretrained_dict
.
items
()
if
"aux_classifier"
not
in
key
}
# Removes final conv layer from weights if number of classes are different
model_num_classes
=
model_dict
[
"classifier.4.weight"
].
shape
[
0
]
pretrained_num_classes
=
pretrained_dict
[
"classifier.4.weight"
].
shape
[
0
]
if
model_num_classes
!=
pretrained_num_classes
:
pretrained_dict
.
pop
(
"classifier.4.weight"
)
pretrained_dict
.
pop
(
"classifier.4.bias"
)
return
pretrained_dict
def
forward
(
self
,
images
):
"""
Forward pass
Args:
images: (N, 3, H_in, W_in), Input images
Returns
result: dict[torch.Tensor], Depth distribution result
features: (N, C, H_out, W_out), Image features
logits: (N, num_classes, H_out, W_out), Classification logits
aux: (N, num_classes, H_out, W_out), Auxillary classification logits
"""
# Preprocess images
x
=
self
.
preprocess
(
images
)
# Extract features
result
=
OrderedDict
()
features
=
self
.
model
.
backbone
(
x
)
for
_layer
in
self
.
feat_extract_layer
:
result
[
_layer
]
=
features
[
_layer
]
return
result
if
'features'
in
features
.
keys
():
feat_shape
=
features
[
'features'
].
shape
[
-
2
:]
else
:
feat_shape
=
features
[
'layer1'
].
shape
[
-
2
:]
# Prediction classification logits
x
=
features
[
"out"
]
# comment the classifier to reduce memory
# x = self.model.classifier(x)
# x = F.interpolate(x, size=feat_shape, mode='bilinear', align_corners=False)
result
[
"logits"
]
=
x
# Prediction auxillary classification logits
if
self
.
model
.
aux_classifier
is
not
None
:
x
=
features
[
"aux"
]
x
=
self
.
model
.
aux_classifier
(
x
)
x
=
F
.
interpolate
(
x
,
size
=
feat_shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
result
[
"aux"
]
=
x
return
result
def
preprocess
(
self
,
images
):
"""
Preprocess images
Args:
images: (N, 3, H, W), Input images
Return
x: (N, 3, H, W), Preprocessed images
"""
x
=
images
if
self
.
pretrained
:
# Match ResNet pretrained preprocessing
x
=
normalize
(
x
,
mean
=
self
.
norm_mean
,
std
=
self
.
norm_std
)
return
x
.
cuda
()
class
SemDeepLabV3
(
SegTemplate
):
def
__init__
(
self
,
backbone_name
,
**
kwargs
):
"""
Initializes SemDeepLabV3 model
Args:
backbone_name: string, ResNet Backbone Name [ResNet50/ResNet101]
"""
if
backbone_name
==
"ResNet50"
:
constructor
=
torchvision
.
models
.
segmentation
.
deeplabv3_resnet50
elif
backbone_name
==
"ResNet101"
:
constructor
=
torchvision
.
models
.
segmentation
.
deeplabv3_resnet101
else
:
raise
NotImplementedError
super
().
__init__
(
constructor
=
constructor
,
**
kwargs
)
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
0 → 100644
View file @
5666ea67
import
torch
import
torch.nn
as
nn
import
spconv.pytorch
as
spconv
from
pcdet.ops.roiaware_pool3d.roiaware_pool3d_utils
import
points_in_boxes_gpu
from
pcdet.models.backbones_3d.focal_sparse_conv.utils
import
split_voxels
,
check_repeat
,
FocalLoss
from
pcdet.utils
import
common_utils
class
FocalSparseConv
(
spconv
.
SparseModule
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
voxel_stride
,
norm_fn
=
None
,
indice_key
=
None
,
image_channel
=
3
,
kernel_size
=
3
,
padding
=
1
,
mask_multi
=
False
,
use_img
=
False
,
topk
=
False
,
threshold
=
0.5
,
skip_mask_kernel
=
False
,
enlarge_voxel_channels
=-
1
,
point_cloud_range
=
[
-
3
,
-
40
,
0
,
1
,
40
,
70.4
],
voxel_size
=
[
0.1
,
0.05
,
0.05
]):
super
(
FocalSparseConv
,
self
).
__init__
()
self
.
conv
=
spconv
.
SubMConv3d
(
inplanes
,
planes
,
kernel_size
=
kernel_size
,
stride
=
1
,
bias
=
False
,
indice_key
=
indice_key
)
self
.
bn1
=
norm_fn
(
planes
)
self
.
relu
=
nn
.
ReLU
(
True
)
offset_channels
=
kernel_size
**
3
self
.
topk
=
topk
self
.
threshold
=
threshold
self
.
voxel_stride
=
voxel_stride
self
.
focal_loss
=
FocalLoss
()
self
.
mask_multi
=
mask_multi
self
.
skip_mask_kernel
=
skip_mask_kernel
self
.
use_img
=
use_img
voxel_channel
=
enlarge_voxel_channels
if
enlarge_voxel_channels
>
0
else
inplanes
in_channels
=
image_channel
+
voxel_channel
if
use_img
else
voxel_channel
self
.
conv_enlarge
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
inplanes
,
enlarge_voxel_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
indice_key
=
indice_key
+
'_enlarge'
),
norm_fn
(
enlarge_voxel_channels
),
nn
.
ReLU
(
True
))
if
enlarge_voxel_channels
>
0
else
None
self
.
conv_imp
=
spconv
.
SubMConv3d
(
in_channels
,
offset_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
indice_key
=
indice_key
+
'_imp'
)
_step
=
int
(
kernel_size
//
2
)
kernel_offsets
=
[[
i
,
j
,
k
]
for
i
in
range
(
-
_step
,
_step
+
1
)
for
j
in
range
(
-
_step
,
_step
+
1
)
for
k
in
range
(
-
_step
,
_step
+
1
)]
kernel_offsets
.
remove
([
0
,
0
,
0
])
self
.
kernel_offsets
=
torch
.
Tensor
(
kernel_offsets
).
cuda
()
self
.
inv_idx
=
torch
.
Tensor
([
2
,
1
,
0
]).
long
().
cuda
()
self
.
point_cloud_range
=
torch
.
Tensor
(
point_cloud_range
).
cuda
()
self
.
voxel_size
=
torch
.
Tensor
(
voxel_size
).
cuda
()
def
construct_multimodal_features
(
self
,
x
,
x_rgb
,
batch_dict
,
fuse_sum
=
False
):
"""
Construct the multimodal features with both lidar sparse features and image features.
Args:
x: [N, C] lidar sparse features
x_rgb: [b, c, h, w] image features
batch_dict: input and output information during forward
fuse_sum: bool, manner for fusion, True - sum, False - concat
Return:
image_with_voxelfeatures: [N, C] fused multimodal features
"""
batch_index
=
x
.
indices
[:,
0
]
spatial_indices
=
x
.
indices
[:,
1
:]
*
self
.
voxel_stride
voxels_3d
=
spatial_indices
*
self
.
voxel_size
+
self
.
point_cloud_range
[:
3
]
calibs
=
batch_dict
[
'calib'
]
batch_size
=
batch_dict
[
'batch_size'
]
h
,
w
=
batch_dict
[
'images'
].
shape
[
2
:]
if
not
x_rgb
.
shape
==
batch_dict
[
'images'
].
shape
:
x_rgb
=
nn
.
functional
.
interpolate
(
x_rgb
,
(
h
,
w
),
mode
=
'bilinear'
)
image_with_voxelfeatures
=
[]
voxels_2d_int_list
=
[]
filter_idx_list
=
[]
for
b
in
range
(
batch_size
):
x_rgb_batch
=
x_rgb
[
b
]
calib
=
calibs
[
b
]
voxels_3d_batch
=
voxels_3d
[
batch_index
==
b
]
voxel_features_sparse
=
x
.
features
[
batch_index
==
b
]
# Reverse the point cloud transformations to the original coords.
if
'noise_scale'
in
batch_dict
:
voxels_3d_batch
[:,
:
3
]
/=
batch_dict
[
'noise_scale'
][
b
]
if
'noise_rot'
in
batch_dict
:
voxels_3d_batch
=
common_utils
.
rotate_points_along_z
(
voxels_3d_batch
[:,
self
.
inv_idx
].
unsqueeze
(
0
),
-
batch_dict
[
'noise_rot'
][
b
].
unsqueeze
(
0
))[
0
,
:,
self
.
inv_idx
]
if
'flip_x'
in
batch_dict
:
voxels_3d_batch
[:,
1
]
*=
-
1
if
batch_dict
[
'flip_x'
][
b
]
else
1
if
'flip_y'
in
batch_dict
:
voxels_3d_batch
[:,
2
]
*=
-
1
if
batch_dict
[
'flip_y'
][
b
]
else
1
voxels_2d
,
_
=
calib
.
lidar_to_img
(
voxels_3d_batch
[:,
self
.
inv_idx
].
cpu
().
numpy
())
voxels_2d_int
=
torch
.
Tensor
(
voxels_2d
).
to
(
x_rgb_batch
.
device
).
long
()
filter_idx
=
(
0
<=
voxels_2d_int
[:,
1
])
*
(
voxels_2d_int
[:,
1
]
<
h
)
*
(
0
<=
voxels_2d_int
[:,
0
])
*
(
voxels_2d_int
[:,
0
]
<
w
)
filter_idx_list
.
append
(
filter_idx
)
voxels_2d_int
=
voxels_2d_int
[
filter_idx
]
voxels_2d_int_list
.
append
(
voxels_2d_int
)
image_features_batch
=
torch
.
zeros
((
voxel_features_sparse
.
shape
[
0
],
x_rgb_batch
.
shape
[
0
]),
device
=
x_rgb_batch
.
device
)
image_features_batch
[
filter_idx
]
=
x_rgb_batch
[:,
voxels_2d_int
[:,
1
],
voxels_2d_int
[:,
0
]].
permute
(
1
,
0
)
if
fuse_sum
:
image_with_voxelfeature
=
image_features_batch
+
voxel_features_sparse
else
:
image_with_voxelfeature
=
torch
.
cat
([
image_features_batch
,
voxel_features_sparse
],
dim
=
1
)
image_with_voxelfeatures
.
append
(
image_with_voxelfeature
)
image_with_voxelfeatures
=
torch
.
cat
(
image_with_voxelfeatures
)
return
image_with_voxelfeatures
def
_gen_sparse_features
(
self
,
x
,
imps_3d
,
batch_dict
,
voxels_3d
):
"""
Generate the output sparse features from the focal sparse conv.
Args:
x: [N, C], lidar sparse features
imps_3d: [N, kernelsize**3], the predicted importance values
batch_dict: input and output information during forward
voxels_3d: [N, 3], the 3d positions of voxel centers
"""
batch_size
=
x
.
batch_size
voxel_features_fore
=
[]
voxel_indices_fore
=
[]
voxel_features_back
=
[]
voxel_indices_back
=
[]
box_of_pts_cls_targets
=
[]
mask_voxels
=
[]
mask_kernel_list
=
[]
for
b
in
range
(
batch_size
):
if
self
.
training
:
index
=
x
.
indices
[:,
0
]
batch_index
=
index
==
b
mask_voxel
=
imps_3d
[
batch_index
,
-
1
].
sigmoid
()
voxels_3d_batch
=
voxels_3d
[
batch_index
].
unsqueeze
(
0
)
mask_voxels
.
append
(
mask_voxel
)
gt_boxes
=
batch_dict
[
'gt_boxes'
][
b
,
:,
:
-
1
].
unsqueeze
(
0
)
box_of_pts_batch
=
points_in_boxes_gpu
(
voxels_3d_batch
[:,
:,
self
.
inv_idx
],
gt_boxes
).
squeeze
(
0
)
box_of_pts_cls_targets
.
append
(
box_of_pts_batch
>=
0
)
features_fore
,
indices_fore
,
features_back
,
indices_back
,
mask_kernel
=
split_voxels
(
x
,
b
,
imps_3d
,
voxels_3d
,
self
.
kernel_offsets
,
mask_multi
=
self
.
mask_multi
,
topk
=
self
.
topk
,
threshold
=
self
.
threshold
)
mask_kernel_list
.
append
(
mask_kernel
)
voxel_features_fore
.
append
(
features_fore
)
voxel_indices_fore
.
append
(
indices_fore
)
voxel_features_back
.
append
(
features_back
)
voxel_indices_back
.
append
(
indices_back
)
voxel_features_fore
=
torch
.
cat
(
voxel_features_fore
,
dim
=
0
)
voxel_indices_fore
=
torch
.
cat
(
voxel_indices_fore
,
dim
=
0
)
voxel_features_back
=
torch
.
cat
(
voxel_features_back
,
dim
=
0
)
voxel_indices_back
=
torch
.
cat
(
voxel_indices_back
,
dim
=
0
)
mask_kernel
=
torch
.
cat
(
mask_kernel_list
,
dim
=
0
)
x_fore
=
spconv
.
SparseConvTensor
(
voxel_features_fore
,
voxel_indices_fore
,
x
.
spatial_shape
,
x
.
batch_size
)
x_back
=
spconv
.
SparseConvTensor
(
voxel_features_back
,
voxel_indices_back
,
x
.
spatial_shape
,
x
.
batch_size
)
loss_box_of_pts
=
0
if
self
.
training
:
mask_voxels
=
torch
.
cat
(
mask_voxels
)
box_of_pts_cls_targets
=
torch
.
cat
(
box_of_pts_cls_targets
)
mask_voxels_two_classes
=
torch
.
cat
([
1
-
mask_voxels
.
unsqueeze
(
-
1
),
mask_voxels
.
unsqueeze
(
-
1
)],
dim
=
1
)
loss_box_of_pts
=
self
.
focal_loss
(
mask_voxels_two_classes
,
box_of_pts_cls_targets
.
long
())
return
x_fore
,
x_back
,
loss_box_of_pts
,
mask_kernel
def
combine_out
(
self
,
x_fore
,
x_back
,
remove_repeat
=
False
):
"""
Combine the foreground and background sparse features together.
Args:
x_fore: [N1, C], foreground sparse features
x_back: [N2, C], background sparse features
remove_repeat: bool, whether to remove the spatial replicate features.
"""
x_fore_features
=
torch
.
cat
([
x_fore
.
features
,
x_back
.
features
],
dim
=
0
)
x_fore_indices
=
torch
.
cat
([
x_fore
.
indices
,
x_back
.
indices
],
dim
=
0
)
if
remove_repeat
:
index
=
x_fore_indices
[:,
0
]
features_out_list
=
[]
indices_coords_out_list
=
[]
for
b
in
range
(
x_fore
.
batch_size
):
batch_index
=
index
==
b
features_out
,
indices_coords_out
,
_
=
check_repeat
(
x_fore_features
[
batch_index
],
x_fore_indices
[
batch_index
],
flip_first
=
False
)
features_out_list
.
append
(
features_out
)
indices_coords_out_list
.
append
(
indices_coords_out
)
x_fore_features
=
torch
.
cat
(
features_out_list
,
dim
=
0
)
x_fore_indices
=
torch
.
cat
(
indices_coords_out_list
,
dim
=
0
)
x_fore
=
x_fore
.
replace_feature
(
x_fore_features
)
x_fore
.
indices
=
x_fore_indices
return
x_fore
def
forward
(
self
,
x
,
batch_dict
,
x_rgb
=
None
):
spatial_indices
=
x
.
indices
[:,
1
:]
*
self
.
voxel_stride
voxels_3d
=
spatial_indices
*
self
.
voxel_size
+
self
.
point_cloud_range
[:
3
]
if
self
.
use_img
:
features_multimodal
=
self
.
construct_multimodal_features
(
x
,
x_rgb
,
batch_dict
)
x_predict
=
spconv
.
SparseConvTensor
(
features_multimodal
,
x
.
indices
,
x
.
spatial_shape
,
x
.
batch_size
)
else
:
x_predict
=
self
.
conv_enlarge
(
x
)
if
self
.
conv_enlarge
else
x
imps_3d
=
self
.
conv_imp
(
x_predict
).
features
x_fore
,
x_back
,
loss_box_of_pts
,
mask_kernel
=
self
.
_gen_sparse_features
(
x
,
imps_3d
,
batch_dict
,
voxels_3d
)
if
not
self
.
skip_mask_kernel
:
x_fore
=
x_fore
.
replace_feature
(
x_fore
.
features
*
mask_kernel
.
unsqueeze
(
-
1
))
out
=
self
.
combine_out
(
x_fore
,
x_back
,
remove_repeat
=
True
)
out
=
self
.
conv
(
out
)
if
self
.
use_img
:
out
=
out
.
replace_feature
(
self
.
construct_multimodal_features
(
out
,
x_rgb
,
batch_dict
,
True
))
out
=
out
.
replace_feature
(
self
.
bn1
(
out
.
features
))
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
batch_dict
[
'loss_box_of_pts'
]
+=
loss_box_of_pts
return
out
,
batch_dict
pcdet/models/backbones_3d/focal_sparse_conv/utils.py
0 → 100644
View file @
5666ea67
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.autograd
import
Variable
class
FocalLoss
(
nn
.
Module
):
def
__init__
(
self
,
gamma
=
2.0
,
eps
=
1e-7
):
super
(
FocalLoss
,
self
).
__init__
()
self
.
gamma
=
gamma
self
.
eps
=
eps
def
one_hot
(
self
,
index
,
classes
):
size
=
index
.
size
()
+
(
classes
,)
view
=
index
.
size
()
+
(
1
,)
mask
=
torch
.
Tensor
(
*
size
).
fill_
(
0
).
to
(
index
.
device
)
index
=
index
.
view
(
*
view
)
ones
=
1.
if
isinstance
(
index
,
Variable
):
ones
=
Variable
(
torch
.
Tensor
(
index
.
size
()).
fill_
(
1
).
to
(
index
.
device
))
mask
=
Variable
(
mask
,
volatile
=
index
.
volatile
)
return
mask
.
scatter_
(
1
,
index
,
ones
)
def
forward
(
self
,
input
,
target
):
y
=
self
.
one_hot
(
target
,
input
.
size
(
-
1
))
logit
=
F
.
softmax
(
input
,
dim
=-
1
)
logit
=
logit
.
clamp
(
self
.
eps
,
1.
-
self
.
eps
)
loss
=
-
1
*
y
*
torch
.
log
(
logit
)
# cross entropy
loss
=
loss
*
(
1
-
logit
)
**
self
.
gamma
# focal loss
return
loss
.
mean
()
def
sort_by_indices
(
features
,
indices
,
features_add
=
None
):
"""
To sort the sparse features with its indices in a convenient manner.
Args:
features: [N, C], sparse features
indices: [N, 4], indices of sparse features
features_add: [N, C], additional features to sort
"""
idx
=
indices
[:,
1
:]
idx_sum
=
idx
.
select
(
1
,
0
)
*
idx
[:,
1
].
max
()
*
idx
[:,
2
].
max
()
+
idx
.
select
(
1
,
1
)
*
idx
[:,
2
].
max
()
+
idx
.
select
(
1
,
2
)
_
,
ind
=
idx_sum
.
sort
()
features
=
features
[
ind
]
indices
=
indices
[
ind
]
if
not
features_add
is
None
:
features_add
=
features_add
[
ind
]
return
features
,
indices
,
features_add
def
check_repeat
(
features
,
indices
,
features_add
=
None
,
sort_first
=
True
,
flip_first
=
True
):
"""
Check that whether there are replicate indices in the sparse features,
remove the replicate features if any.
"""
if
sort_first
:
features
,
indices
,
features_add
=
sort_by_indices
(
features
,
indices
,
features_add
)
if
flip_first
:
features
,
indices
=
features
.
flip
([
0
]),
indices
.
flip
([
0
])
if
not
features_add
is
None
:
features_add
=
features_add
.
flip
([
0
])
idx
=
indices
[:,
1
:].
int
()
idx_sum
=
torch
.
add
(
torch
.
add
(
idx
.
select
(
1
,
0
)
*
idx
[:,
1
].
max
()
*
idx
[:,
2
].
max
(),
idx
.
select
(
1
,
1
)
*
idx
[:,
2
].
max
()),
idx
.
select
(
1
,
2
))
_unique
,
inverse
,
counts
=
torch
.
unique_consecutive
(
idx_sum
,
return_inverse
=
True
,
return_counts
=
True
,
dim
=
0
)
if
_unique
.
shape
[
0
]
<
indices
.
shape
[
0
]:
perm
=
torch
.
arange
(
inverse
.
size
(
0
),
dtype
=
inverse
.
dtype
,
device
=
inverse
.
device
)
features_new
=
torch
.
zeros
((
_unique
.
shape
[
0
],
features
.
shape
[
-
1
]),
device
=
features
.
device
)
features_new
.
index_add_
(
0
,
inverse
.
long
(),
features
)
features
=
features_new
perm_
=
inverse
.
new_empty
(
_unique
.
size
(
0
)).
scatter_
(
0
,
inverse
,
perm
)
indices
=
indices
[
perm_
].
int
()
if
not
features_add
is
None
:
features_add_new
=
torch
.
zeros
((
_unique
.
shape
[
0
],),
device
=
features_add
.
device
)
features_add_new
.
index_add_
(
0
,
inverse
.
long
(),
features_add
)
features_add
=
features_add_new
/
counts
return
features
,
indices
,
features_add
def
split_voxels
(
x
,
b
,
imps_3d
,
voxels_3d
,
kernel_offsets
,
mask_multi
=
True
,
topk
=
True
,
threshold
=
0.5
):
"""
Generate and split the voxels into foreground and background sparse features, based on the predicted importance values.
Args:
x: [N, C], input sparse features
b: int, batch size id
imps_3d: [N, kernelsize**3], the prediced importance values
voxels_3d: [N, 3], the 3d positions of voxel centers
kernel_offsets: [kernelsize**3, 3], the offset coords in an kernel
mask_multi: bool, whether to multiply the predicted mask to features
topk: bool, whether to use topk or threshold for selection
threshold: float, threshold value
"""
index
=
x
.
indices
[:,
0
]
batch_index
=
index
==
b
indices_ori
=
x
.
indices
[
batch_index
]
features_ori
=
x
.
features
[
batch_index
]
mask_voxel
=
imps_3d
[
batch_index
,
-
1
].
sigmoid
()
mask_kernel
=
imps_3d
[
batch_index
,
:
-
1
].
sigmoid
()
if
mask_multi
:
features_ori
*=
mask_voxel
.
unsqueeze
(
-
1
)
if
topk
:
_
,
indices
=
mask_voxel
.
sort
(
descending
=
True
)
indices_fore
=
indices
[:
int
(
mask_voxel
.
shape
[
0
]
*
threshold
)]
indices_back
=
indices
[
int
(
mask_voxel
.
shape
[
0
]
*
threshold
):]
else
:
indices_fore
=
mask_voxel
>
threshold
indices_back
=
mask_voxel
<=
threshold
features_fore
=
features_ori
[
indices_fore
]
coords_fore
=
indices_ori
[
indices_fore
]
mask_kernel_fore
=
mask_kernel
[
indices_fore
]
mask_kernel_bool
=
mask_kernel_fore
>=
threshold
voxel_kerels_imp
=
kernel_offsets
.
unsqueeze
(
0
).
repeat
(
mask_kernel_bool
.
shape
[
0
],
1
,
1
)
mask_kernel_fore
=
mask_kernel
[
indices_fore
][
mask_kernel_bool
]
indices_fore_kernels
=
coords_fore
[:,
1
:].
unsqueeze
(
1
).
repeat
(
1
,
kernel_offsets
.
shape
[
0
],
1
)
indices_with_imp
=
indices_fore_kernels
+
voxel_kerels_imp
selected_indices
=
indices_with_imp
[
mask_kernel_bool
]
spatial_indices
=
(
selected_indices
[:,
0
]
>
0
)
*
(
selected_indices
[:,
1
]
>
0
)
*
(
selected_indices
[:,
2
]
>
0
)
*
\
(
selected_indices
[:,
0
]
<
x
.
spatial_shape
[
0
])
*
(
selected_indices
[:,
1
]
<
x
.
spatial_shape
[
1
])
*
(
selected_indices
[:,
2
]
<
x
.
spatial_shape
[
2
])
selected_indices
=
selected_indices
[
spatial_indices
]
mask_kernel_fore
=
mask_kernel_fore
[
spatial_indices
]
selected_indices
=
torch
.
cat
([
torch
.
ones
((
selected_indices
.
shape
[
0
],
1
),
device
=
features_fore
.
device
)
*
b
,
selected_indices
],
dim
=
1
)
selected_features
=
torch
.
zeros
((
selected_indices
.
shape
[
0
],
features_ori
.
shape
[
1
]),
device
=
features_fore
.
device
)
features_fore_cat
=
torch
.
cat
([
features_fore
,
selected_features
],
dim
=
0
)
coords_fore
=
torch
.
cat
([
coords_fore
,
selected_indices
],
dim
=
0
)
mask_kernel_fore
=
torch
.
cat
([
torch
.
ones
(
features_fore
.
shape
[
0
],
device
=
features_fore
.
device
),
mask_kernel_fore
],
dim
=
0
)
features_fore
,
coords_fore
,
mask_kernel_fore
=
check_repeat
(
features_fore_cat
,
coords_fore
,
features_add
=
mask_kernel_fore
)
features_back
=
features_ori
[
indices_back
]
coords_back
=
indices_ori
[
indices_back
]
return
features_fore
,
coords_fore
,
features_back
,
coords_back
,
mask_kernel_fore
pcdet/models/backbones_3d/spconv_backbone_focal.py
0 → 100755
View file @
5666ea67
from
functools
import
partial
import
torch
import
spconv.pytorch
as
spconv
import
torch.nn
as
nn
from
.focal_sparse_conv.focal_sparse_conv
import
FocalSparseConv
from
.focal_sparse_conv.SemanticSeg.pyramid_ffn
import
PyramidFeat2D
class
objDict
:
@
staticmethod
def
to_object
(
obj
:
object
,
**
data
):
obj
.
__dict__
.
update
(
data
)
class
ConfigDict
:
def
__init__
(
self
,
name
):
self
.
name
=
name
def
__getitem__
(
self
,
item
):
return
getattr
(
self
,
item
)
class
SparseSequentialBatchdict
(
spconv
.
SparseSequential
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
SparseSequentialBatchdict
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
forward
(
self
,
input
,
batch_dict
=
None
):
for
k
,
module
in
self
.
_modules
.
items
():
if
module
is
None
:
continue
if
isinstance
(
module
,
(
FocalSparseConv
,)):
input
,
batch_dict
=
module
(
input
,
batch_dict
)
else
:
input
=
module
(
input
)
return
input
,
batch_dict
def
post_act_block
(
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
None
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_fn
=
None
):
if
conv_type
==
'subm'
:
conv
=
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
)
elif
conv_type
==
'spconv'
:
conv
=
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
)
elif
conv_type
==
'inverseconv'
:
conv
=
spconv
.
SparseInverseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
indice_key
,
bias
=
False
)
else
:
raise
NotImplementedError
m
=
spconv
.
SparseSequential
(
conv
,
norm_fn
(
out_channels
),
nn
.
ReLU
(
True
),
)
return
m
class
SparseBasicBlock
(
spconv
.
SparseModule
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
norm_fn
=
None
,
downsample
=
None
,
indice_key
=
None
):
super
(
SparseBasicBlock
,
self
).
__init__
()
assert
norm_fn
is
not
None
bias
=
norm_fn
is
not
None
self
.
conv1
=
spconv
.
SubMConv3d
(
inplanes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
bias
,
indice_key
=
indice_key
)
self
.
bn1
=
norm_fn
(
planes
)
self
.
relu
=
nn
.
ReLU
(
True
)
self
.
conv2
=
spconv
.
SubMConv3d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
bias
,
indice_key
=
indice_key
)
self
.
bn2
=
norm_fn
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
out
.
replace_feature
(
self
.
bn1
(
out
.
features
))
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
out
=
self
.
conv2
(
out
)
out
=
out
.
replace_feature
(
self
.
bn2
(
out
.
features
))
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
=
out
.
replace_feature
(
out
.
features
+
identity
.
features
)
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
return
out
class
VoxelBackBone8xFocal
(
nn
.
Module
):
def
__init__
(
self
,
model_cfg
,
input_channels
,
grid_size
,
**
kwargs
):
super
().
__init__
()
self
.
model_cfg
=
model_cfg
norm_fn
=
partial
(
nn
.
BatchNorm1d
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
sparse_shape
=
grid_size
[::
-
1
]
+
[
1
,
0
,
0
]
self
.
conv_input
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
input_channels
,
16
,
3
,
padding
=
1
,
bias
=
False
,
indice_key
=
'subm1'
),
norm_fn
(
16
),
nn
.
ReLU
(
True
),
)
block
=
post_act_block
use_img
=
model_cfg
.
get
(
'USE_IMG'
,
False
)
topk
=
model_cfg
.
get
(
'TOPK'
,
True
)
threshold
=
model_cfg
.
get
(
'THRESHOLD'
,
0.5
)
kernel_size
=
model_cfg
.
get
(
'KERNEL_SIZE'
,
3
)
mask_multi
=
model_cfg
.
get
(
'MASK_MULTI'
,
False
)
skip_mask_kernel
=
model_cfg
.
get
(
'SKIP_MASK_KERNEL'
,
False
)
skip_mask_kernel_image
=
model_cfg
.
get
(
'SKIP_MASK_KERNEL_IMG'
,
False
)
enlarge_voxel_channels
=
model_cfg
.
get
(
'ENLARGE_VOXEL_CHANNELS'
,
-
1
)
img_pretrain
=
model_cfg
.
get
(
'IMG_PRETRAIN'
,
"../checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth"
)
if
use_img
:
model_cfg_seg
=
dict
(
name
=
'SemDeepLabV3'
,
backbone
=
'ResNet50'
,
num_class
=
21
,
# pretrained on COCO
args
=
{
"feat_extract_layer"
:
[
"layer1"
],
"pretrained_path"
:
img_pretrain
},
channel_reduce
=
{
"in_channels"
:
[
256
],
"out_channels"
:
[
16
],
"kernel_size"
:
[
1
],
"stride"
:
[
1
],
"bias"
:
[
False
]
}
)
cfg_dict
=
ConfigDict
(
'SemDeepLabV3'
)
objDict
.
to_object
(
cfg_dict
,
**
model_cfg_seg
)
self
.
semseg
=
PyramidFeat2D
(
optimize
=
True
,
model_cfg
=
cfg_dict
)
self
.
conv_focal_multimodal
=
FocalSparseConv
(
16
,
16
,
image_channel
=
model_cfg_seg
[
'channel_reduce'
][
'out_channels'
][
0
],
topk
=
topk
,
threshold
=
threshold
,
use_img
=
True
,
skip_mask_kernel
=
skip_mask_kernel_image
,
voxel_stride
=
1
,
norm_fn
=
norm_fn
,
indice_key
=
'spconv_focal_multimodal'
)
special_spconv_fn
=
partial
(
FocalSparseConv
,
mask_multi
=
mask_multi
,
enlarge_voxel_channels
=
enlarge_voxel_channels
,
topk
=
topk
,
threshold
=
threshold
,
kernel_size
=
kernel_size
,
padding
=
kernel_size
//
2
,
skip_mask_kernel
=
skip_mask_kernel
)
self
.
use_img
=
use_img
self
.
conv1
=
SparseSequentialBatchdict
(
block
(
16
,
16
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm1'
),
special_spconv_fn
(
16
,
16
,
voxel_stride
=
1
,
norm_fn
=
norm_fn
,
indice_key
=
'focal1'
),
)
self
.
conv2
=
SparseSequentialBatchdict
(
# [1600, 1408, 41] <- [800, 704, 21]
block
(
16
,
32
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv2'
,
conv_type
=
'spconv'
),
block
(
32
,
32
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm2'
),
block
(
32
,
32
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm2'
),
special_spconv_fn
(
32
,
32
,
voxel_stride
=
2
,
norm_fn
=
norm_fn
,
indice_key
=
'focal2'
),
)
self
.
conv3
=
SparseSequentialBatchdict
(
# [800, 704, 21] <- [400, 352, 11]
block
(
32
,
64
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv3'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm3'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm3'
),
special_spconv_fn
(
64
,
64
,
voxel_stride
=
4
,
norm_fn
=
norm_fn
,
indice_key
=
'focal3'
),
)
self
.
conv4
=
SparseSequentialBatchdict
(
# [400, 352, 11] <- [200, 176, 5]
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
(
0
,
1
,
1
),
indice_key
=
'spconv4'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm4'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm4'
),
)
last_pad
=
0
last_pad
=
self
.
model_cfg
.
get
(
'last_pad'
,
last_pad
)
self
.
conv_out
=
spconv
.
SparseSequential
(
# [200, 150, 5] -> [200, 150, 2]
spconv
.
SparseConv3d
(
64
,
128
,
(
3
,
1
,
1
),
stride
=
(
2
,
1
,
1
),
padding
=
last_pad
,
bias
=
False
,
indice_key
=
'spconv_down2'
),
norm_fn
(
128
),
nn
.
ReLU
(
True
),
)
self
.
num_point_features
=
128
self
.
backbone_channels
=
{
'x_conv1'
:
16
,
'x_conv2'
:
32
,
'x_conv3'
:
64
,
'x_conv4'
:
64
}
def
forward
(
self
,
batch_dict
):
"""
Args:
batch_dict:
batch_size: int
vfe_features: (num_voxels, C)
voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
Returns:
batch_dict:
encoded_spconv_tensor: sparse tensor
"""
voxel_features
,
voxel_coords
=
batch_dict
[
'voxel_features'
],
batch_dict
[
'voxel_coords'
]
batch_size
=
batch_dict
[
'batch_size'
]
input_sp_tensor
=
spconv
.
SparseConvTensor
(
features
=
voxel_features
,
indices
=
voxel_coords
.
int
(),
spatial_shape
=
self
.
sparse_shape
,
batch_size
=
batch_size
)
batch_dict
[
'loss_box_of_pts'
]
=
0
x
=
self
.
conv_input
(
input_sp_tensor
)
x_conv1
,
batch_dict
=
self
.
conv1
(
x
,
batch_dict
)
if
self
.
use_img
:
x_image
=
self
.
semseg
(
batch_dict
[
'images'
])[
'layer1_feat2d'
]
x_conv1
,
batch_dict
=
self
.
conv_focal_multimodal
(
x_conv1
,
batch_dict
,
x_image
)
x_conv2
,
batch_dict
=
self
.
conv2
(
x_conv1
,
batch_dict
)
x_conv3
,
batch_dict
=
self
.
conv3
(
x_conv2
,
batch_dict
)
x_conv4
,
batch_dict
=
self
.
conv4
(
x_conv3
,
batch_dict
)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out
=
self
.
conv_out
(
x_conv4
)
batch_dict
.
update
({
'encoded_spconv_tensor'
:
out
,
'encoded_spconv_tensor_stride'
:
8
})
batch_dict
.
update
({
'multi_scale_3d_features'
:
{
'x_conv1'
:
x_conv1
,
'x_conv2'
:
x_conv2
,
'x_conv3'
:
x_conv3
,
'x_conv4'
:
x_conv4
,
}
})
batch_dict
.
update
({
'multi_scale_3d_strides'
:
{
'x_conv1'
:
1
,
'x_conv2'
:
2
,
'x_conv3'
:
4
,
'x_conv4'
:
8
,
}
})
return
batch_dict
pcdet/models/detectors/pv_rcnn.py
View file @
5666ea67
...
...
@@ -12,6 +12,9 @@ class PVRCNN(Detector3DTemplate):
if
self
.
training
:
loss
,
tb_dict
,
disp_dict
=
self
.
get_training_loss
()
if
'loss_box_of_pts'
in
batch_dict
:
loss
+=
batch_dict
[
'loss_box_of_pts'
]
tb_dict
[
'loss_box_of_pts'
]
=
batch_dict
[
'loss_box_of_pts'
]
ret_dict
=
{
'loss'
:
loss
...
...
pcdet/models/detectors/voxel_rcnn.py
View file @
5666ea67
...
...
@@ -13,6 +13,10 @@ class VoxelRCNN(Detector3DTemplate):
if
self
.
training
:
loss
,
tb_dict
,
disp_dict
=
self
.
get_training_loss
()
if
'loss_box_of_pts'
in
batch_dict
:
loss
+=
batch_dict
[
'loss_box_of_pts'
]
tb_dict
[
'loss_box_of_pts'
]
=
batch_dict
[
'loss_box_of_pts'
]
ret_dict
=
{
'loss'
:
loss
}
...
...
pcdet/utils/box2d_utils.py
0 → 100755
View file @
5666ea67
import
torch
def
area
(
box
)
->
torch
.
Tensor
:
"""
Computes the area of all the boxes.
Returns:
torch.Tensor: a vector with areas of each box.
"""
area
=
(
box
[:,
2
]
-
box
[:,
0
])
*
(
box
[:,
3
]
-
box
[:,
1
])
return
area
# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
# with slight modifications
def
pairwise_iou
(
boxes1
,
boxes2
)
->
torch
.
Tensor
:
"""
Given two lists of boxes of size N and M,
compute the IoU (intersection over union)
between __all__ N x M pairs of boxes.
The box order must be (xmin, ymin, xmax, ymax).
Args:
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
Returns:
Tensor: IoU, sized [N,M].
"""
area1
=
area
(
boxes1
)
area2
=
area
(
boxes2
)
width_height
=
torch
.
min
(
boxes1
[:,
None
,
2
:],
boxes2
[:,
2
:])
-
torch
.
max
(
boxes1
[:,
None
,
:
2
],
boxes2
[:,
:
2
]
)
# [N,M,2]
width_height
.
clamp_
(
min
=
0
)
# [N,M,2]
inter
=
width_height
.
prod
(
dim
=
2
)
# [N,M]
del
width_height
# handle empty boxes
iou
=
torch
.
where
(
inter
>
0
,
inter
/
(
area1
[:,
None
]
+
area2
-
inter
),
torch
.
zeros
(
1
,
dtype
=
inter
.
dtype
,
device
=
inter
.
device
),
)
return
iou
\ No newline at end of file
pcdet/utils/box_utils.py
View file @
5666ea67
...
...
@@ -52,6 +52,43 @@ def boxes_to_corners_3d(boxes3d):
return
corners3d
.
numpy
()
if
is_numpy
else
corners3d
def
corners_rect_to_camera
(
corners
):
"""
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
corners: (8, 3) [x0, y0, z0, ...], (x, y, z) is the point coordinate in image rect
Returns:
boxes_rect: (7,) [x, y, z, l, h, w, r] in rect camera coords
"""
height_group
=
[(
0
,
4
),
(
1
,
5
),
(
2
,
6
),
(
3
,
7
)]
width_group
=
[(
0
,
1
),
(
2
,
3
),
(
4
,
5
),
(
6
,
7
)]
length_group
=
[(
0
,
3
),
(
1
,
2
),
(
4
,
7
),
(
5
,
6
)]
vector_group
=
[(
0
,
3
),
(
1
,
2
),
(
4
,
7
),
(
5
,
6
)]
height
,
width
,
length
=
0.
,
0.
,
0.
vector
=
np
.
zeros
(
2
,
dtype
=
np
.
float32
)
for
index_h
,
index_w
,
index_l
,
index_v
in
zip
(
height_group
,
width_group
,
length_group
,
vector_group
):
height
+=
np
.
linalg
.
norm
(
corners
[
index_h
[
0
],
:]
-
corners
[
index_h
[
1
],
:])
width
+=
np
.
linalg
.
norm
(
corners
[
index_w
[
0
],
:]
-
corners
[
index_w
[
1
],
:])
length
+=
np
.
linalg
.
norm
(
corners
[
index_l
[
0
],
:]
-
corners
[
index_l
[
1
],
:])
vector
[
0
]
+=
(
corners
[
index_v
[
0
],
:]
-
corners
[
index_v
[
1
],
:])[
0
]
vector
[
1
]
+=
(
corners
[
index_v
[
0
],
:]
-
corners
[
index_v
[
1
],
:])[
2
]
height
,
width
,
length
=
height
*
1.0
/
4
,
width
*
1.0
/
4
,
length
*
1.0
/
4
rotation_y
=
-
np
.
arctan2
(
vector
[
1
],
vector
[
0
])
center_point
=
corners
.
mean
(
axis
=
0
)
center_point
[
1
]
+=
height
/
2
camera_rect
=
np
.
concatenate
([
center_point
,
np
.
array
([
length
,
height
,
width
,
rotation_y
])])
return
camera_rect
def
mask_boxes_outside_range_numpy
(
boxes
,
limit_range
,
min_num_corners
=
1
):
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment