Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenPCDet
Commits
5666ea67
Commit
5666ea67
authored
Jun 18, 2022
by
Shaoshuai Shi
Browse files
Merge branch 'focalsconv' of
https://github.com/yukang2017/OpenPCDet
into yukang2017-focalsconv
parents
dadda9ed
f4071498
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
1267 additions
and
22 deletions
+1267
-22
pcdet/datasets/augmentor/augmentor_utils.py
pcdet/datasets/augmentor/augmentor_utils.py
+12
-7
pcdet/datasets/augmentor/data_augmentor.py
pcdet/datasets/augmentor/data_augmentor.py
+10
-7
pcdet/datasets/augmentor/database_sampler.py
pcdet/datasets/augmentor/database_sampler.py
+190
-5
pcdet/datasets/dataset.py
pcdet/datasets/dataset.py
+17
-3
pcdet/datasets/kitti/kitti_dataset.py
pcdet/datasets/kitti/kitti_dataset.py
+1
-0
pcdet/models/backbones_3d/__init__.py
pcdet/models/backbones_3d/__init__.py
+2
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
...ackbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
+65
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
...backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
+77
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
...ckbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
+173
-0
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
...odels/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
+225
-0
pcdet/models/backbones_3d/focal_sparse_conv/utils.py
pcdet/models/backbones_3d/focal_sparse_conv/utils.py
+147
-0
pcdet/models/backbones_3d/spconv_backbone_focal.py
pcdet/models/backbones_3d/spconv_backbone_focal.py
+258
-0
pcdet/models/detectors/pv_rcnn.py
pcdet/models/detectors/pv_rcnn.py
+3
-0
pcdet/models/detectors/voxel_rcnn.py
pcdet/models/detectors/voxel_rcnn.py
+4
-0
pcdet/utils/box2d_utils.py
pcdet/utils/box2d_utils.py
+46
-0
pcdet/utils/box_utils.py
pcdet/utils/box_utils.py
+37
-0
No files found.
pcdet/datasets/augmentor/augmentor_utils.py
View file @
5666ea67
...
@@ -5,7 +5,7 @@ from ...utils import common_utils
...
@@ -5,7 +5,7 @@ from ...utils import common_utils
from
...utils
import
box_utils
from
...utils
import
box_utils
def
random_flip_along_x
(
gt_boxes
,
points
):
def
random_flip_along_x
(
gt_boxes
,
points
,
return_flip
=
False
):
"""
"""
Args:
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
@@ -20,11 +20,12 @@ def random_flip_along_x(gt_boxes, points):
...
@@ -20,11 +20,12 @@ def random_flip_along_x(gt_boxes, points):
if
gt_boxes
.
shape
[
1
]
>
7
:
if
gt_boxes
.
shape
[
1
]
>
7
:
gt_boxes
[:,
8
]
=
-
gt_boxes
[:,
8
]
gt_boxes
[:,
8
]
=
-
gt_boxes
[:,
8
]
if
return_flip
:
return
gt_boxes
,
points
,
enable
return
gt_boxes
,
points
return
gt_boxes
,
points
def
random_flip_along_y
(
gt_boxes
,
points
):
def
random_flip_along_y
(
gt_boxes
,
points
,
return_flip
=
False
):
"""
"""
Args:
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
@@ -39,11 +40,12 @@ def random_flip_along_y(gt_boxes, points):
...
@@ -39,11 +40,12 @@ def random_flip_along_y(gt_boxes, points):
if
gt_boxes
.
shape
[
1
]
>
7
:
if
gt_boxes
.
shape
[
1
]
>
7
:
gt_boxes
[:,
7
]
=
-
gt_boxes
[:,
7
]
gt_boxes
[:,
7
]
=
-
gt_boxes
[:,
7
]
if
return_flip
:
return
gt_boxes
,
points
,
enable
return
gt_boxes
,
points
return
gt_boxes
,
points
def
global_rotation
(
gt_boxes
,
points
,
rot_range
):
def
global_rotation
(
gt_boxes
,
points
,
rot_range
,
return_rot
=
False
):
"""
"""
Args:
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
@@ -61,10 +63,12 @@ def global_rotation(gt_boxes, points, rot_range):
...
@@ -61,10 +63,12 @@ def global_rotation(gt_boxes, points, rot_range):
np
.
array
([
noise_rotation
])
np
.
array
([
noise_rotation
])
)[
0
][:,
0
:
2
]
)[
0
][:,
0
:
2
]
if
return_rot
:
return
gt_boxes
,
points
,
noise_rotation
return
gt_boxes
,
points
return
gt_boxes
,
points
def
global_scaling
(
gt_boxes
,
points
,
scale_range
):
def
global_scaling
(
gt_boxes
,
points
,
scale_range
,
return_scale
=
False
):
"""
"""
Args:
Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
...
@@ -77,7 +81,8 @@ def global_scaling(gt_boxes, points, scale_range):
...
@@ -77,7 +81,8 @@ def global_scaling(gt_boxes, points, scale_range):
noise_scale
=
np
.
random
.
uniform
(
scale_range
[
0
],
scale_range
[
1
])
noise_scale
=
np
.
random
.
uniform
(
scale_range
[
0
],
scale_range
[
1
])
points
[:,
:
3
]
*=
noise_scale
points
[:,
:
3
]
*=
noise_scale
gt_boxes
[:,
:
6
]
*=
noise_scale
gt_boxes
[:,
:
6
]
*=
noise_scale
if
return_scale
:
return
gt_boxes
,
points
,
noise_scale
return
gt_boxes
,
points
return
gt_boxes
,
points
...
...
pcdet/datasets/augmentor/data_augmentor.py
View file @
5666ea67
...
@@ -46,9 +46,10 @@ class DataAugmentor(object):
...
@@ -46,9 +46,10 @@ class DataAugmentor(object):
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
for
cur_axis
in
config
[
'ALONG_AXIS_LIST'
]:
for
cur_axis
in
config
[
'ALONG_AXIS_LIST'
]:
assert
cur_axis
in
[
'x'
,
'y'
]
assert
cur_axis
in
[
'x'
,
'y'
]
gt_boxes
,
points
=
getattr
(
augmentor_utils
,
'random_flip_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
enable
=
getattr
(
augmentor_utils
,
'random_flip_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
gt_boxes
,
points
,
return_flip
=
True
)
)
data_dict
[
'flip_%s'
%
cur_axis
]
=
enable
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'points'
]
=
points
...
@@ -60,23 +61,25 @@ class DataAugmentor(object):
...
@@ -60,23 +61,25 @@ class DataAugmentor(object):
rot_range
=
config
[
'WORLD_ROT_ANGLE'
]
rot_range
=
config
[
'WORLD_ROT_ANGLE'
]
if
not
isinstance
(
rot_range
,
list
):
if
not
isinstance
(
rot_range
,
list
):
rot_range
=
[
-
rot_range
,
rot_range
]
rot_range
=
[
-
rot_range
,
rot_range
]
gt_boxes
,
points
=
augmentor_utils
.
global_rotation
(
gt_boxes
,
points
,
noise_rot
=
augmentor_utils
.
global_rotation
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
,
return_rot
=
True
)
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'points'
]
=
points
data_dict
[
'noise_rot'
]
=
noise_rot
return
data_dict
return
data_dict
def
random_world_scaling
(
self
,
data_dict
=
None
,
config
=
None
):
def
random_world_scaling
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
if
data_dict
is
None
:
return
partial
(
self
.
random_world_scaling
,
config
=
config
)
return
partial
(
self
.
random_world_scaling
,
config
=
config
)
gt_boxes
,
points
=
augmentor_utils
.
global_scaling
(
gt_boxes
,
points
,
noise_scale
=
augmentor_utils
.
global_scaling
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'WORLD_SCALE_RANGE'
]
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'WORLD_SCALE_RANGE'
]
,
return_scale
=
True
)
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'points'
]
=
points
data_dict
[
'noise_scale'
]
=
noise_scale
return
data_dict
return
data_dict
def
random_image_flip
(
self
,
data_dict
=
None
,
config
=
None
):
def
random_image_flip
(
self
,
data_dict
=
None
,
config
=
None
):
...
...
pcdet/datasets/augmentor/database_sampler.py
View file @
5666ea67
...
@@ -3,18 +3,27 @@ import pickle
...
@@ -3,18 +3,27 @@ import pickle
import
os
import
os
import
copy
import
copy
import
numpy
as
np
import
numpy
as
np
from
skimage
import
io
import
torch
import
SharedArray
import
SharedArray
import
torch.distributed
as
dist
import
torch.distributed
as
dist
from
...ops.iou3d_nms
import
iou3d_nms_utils
from
...ops.iou3d_nms
import
iou3d_nms_utils
from
...utils
import
box_utils
,
common_utils
from
...utils
import
box_utils
,
common_utils
,
box2d_utils
,
calibration_kitti
from
pcdet.datasets.kitti.kitti_object_eval_python
import
kitti_common
class
DataBaseSampler
(
object
):
class
DataBaseSampler
(
object
):
def
__init__
(
self
,
root_path
,
sampler_cfg
,
class_names
,
logger
=
None
):
def
__init__
(
self
,
root_path
,
sampler_cfg
,
class_names
,
logger
=
None
):
self
.
root_path
=
root_path
self
.
root_path
=
root_path
self
.
class_names
=
class_names
self
.
class_names
=
class_names
self
.
sampler_cfg
=
sampler_cfg
self
.
sampler_cfg
=
sampler_cfg
self
.
aug_with_img
=
sampler_cfg
.
get
(
'AUG_WITH_IMAGE'
,
False
)
self
.
joint_sample
=
sampler_cfg
.
get
(
'JOINT_SAMPLE'
,
False
)
self
.
keep_raw
=
sampler_cfg
.
get
(
'KEEP_RAW'
,
False
)
self
.
box_iou_thres
=
sampler_cfg
.
get
(
'BOX_IOU_THRES'
,
1.0
)
self
.
aug_use_type
=
sampler_cfg
.
get
(
'AUG_USE_TYPE'
,
'annotation'
)
self
.
point_refine
=
sampler_cfg
.
get
(
'POINT_REFINE'
,
False
)
self
.
logger
=
logger
self
.
logger
=
logger
self
.
db_infos
=
{}
self
.
db_infos
=
{}
for
class_name
in
class_names
:
for
class_name
in
class_names
:
...
@@ -153,12 +162,145 @@ class DataBaseSampler(object):
...
@@ -153,12 +162,145 @@ class DataBaseSampler(object):
gt_boxes
[:,
2
]
-=
mv_height
# lidar view
gt_boxes
[:,
2
]
-=
mv_height
# lidar view
return
gt_boxes
,
mv_height
return
gt_boxes
,
mv_height
def
add_sampled_boxes_to_scene
(
self
,
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
):
def
copy_paste_to_image_kitti
(
self
,
data_dict
,
crop_feat
,
gt_number
,
point_idxes
=
None
):
image
=
data_dict
[
'images'
]
boxes3d
=
data_dict
[
'gt_boxes'
]
boxes2d
=
data_dict
[
'gt_boxes2d'
]
corners_lidar
=
box_utils
.
boxes_to_corners_3d
(
boxes3d
)
img_aug_type
=
self
.
sampler_cfg
.
IMG_AUG_TYPE
if
'depth'
in
img_aug_type
:
paste_order
=
boxes3d
[:,
0
].
argsort
()
paste_order
=
paste_order
[::
-
1
]
else
:
paste_order
=
np
.
arange
(
len
(
boxes3d
),
dtype
=
np
.
int
)
if
'reverse'
in
img_aug_type
:
paste_order
=
paste_order
[::
-
1
]
paste_mask
=
-
255
*
np
.
ones
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
fg_mask
=
np
.
zeros
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
overlap_mask
=
np
.
zeros
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
depth_mask
=
np
.
zeros
((
*
image
.
shape
[:
2
],
2
),
dtype
=
np
.
float
)
points_2d
,
depth_2d
=
data_dict
[
'calib'
].
lidar_to_img
(
data_dict
[
'points'
][:,:
3
])
points_2d
[:,
0
]
=
np
.
clip
(
points_2d
[:,
0
],
a_min
=
0
,
a_max
=
image
.
shape
[
1
]
-
1
)
points_2d
[:,
1
]
=
np
.
clip
(
points_2d
[:,
1
],
a_min
=
0
,
a_max
=
image
.
shape
[
0
]
-
1
)
points_2d
=
points_2d
.
astype
(
np
.
int
)
for
_order
in
paste_order
:
_box2d
=
boxes2d
[
_order
]
image
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
crop_feat
[
_order
]
overlap_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
+=
\
(
paste_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
>
0
).
astype
(
np
.
int
)
paste_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
_order
if
'cover'
in
self
.
aug_use_type
:
# HxWx2 for min and max depth of each box region
depth_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
],
0
]
=
corners_lidar
[
_order
,:,
0
].
min
()
depth_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
],
1
]
=
corners_lidar
[
_order
,:,
0
].
max
()
# foreground area of original point cloud in image plane
if
_order
<
gt_number
:
fg_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
1
data_dict
[
'images'
]
=
image
if
not
self
.
joint_sample
:
return
data_dict
new_mask
=
paste_mask
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
==
(
point_idxes
+
gt_number
)
if
self
.
keep_raw
:
raw_mask
=
point_idxes
==-
1
else
:
raw_fg
=
(
fg_mask
==
1
)
&
(
paste_mask
>=
0
)
&
(
paste_mask
<
gt_number
)
raw_bg
=
(
fg_mask
==
0
)
&
(
paste_mask
<
0
)
raw_mask
=
raw_fg
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
|
raw_bg
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
keep_mask
=
new_mask
|
raw_mask
data_dict
[
'points_2d'
]
=
points_2d
if
'annotation'
in
self
.
aug_use_type
:
data_dict
[
'points'
]
=
data_dict
[
'points'
][
keep_mask
]
data_dict
[
'points_2d'
]
=
data_dict
[
'points_2d'
][
keep_mask
]
elif
'projection'
in
self
.
aug_use_type
:
overlap_mask
[
overlap_mask
>=
1
]
=
1
data_dict
[
'overlap_mask'
]
=
overlap_mask
if
'cover'
in
self
.
aug_use_type
:
data_dict
[
'depth_mask'
]
=
depth_mask
return
data_dict
def
collect_image_crops_kitti
(
self
,
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
):
calib_file
=
kitti_common
.
get_calib_path
(
int
(
info
[
'image_idx'
]),
self
.
root_path
,
relative_path
=
False
)
sampled_calib
=
calibration_kitti
.
Calibration
(
calib_file
)
points_2d
,
depth_2d
=
sampled_calib
.
lidar_to_img
(
obj_points
[:,:
3
])
if
self
.
point_refine
:
# align calibration metrics for points
points_ract
=
data_dict
[
'calib'
].
img_to_rect
(
points_2d
[:,
0
],
points_2d
[:,
1
],
depth_2d
)
points_lidar
=
data_dict
[
'calib'
].
rect_to_lidar
(
points_ract
)
obj_points
[:,
:
3
]
=
points_lidar
# align calibration metrics for boxes
box3d_raw
=
sampled_gt_boxes
[
idx
].
reshape
(
1
,
-
1
)
box3d_coords
=
box_utils
.
boxes_to_corners_3d
(
box3d_raw
)[
0
]
box3d_box
,
box3d_depth
=
sampled_calib
.
lidar_to_img
(
box3d_coords
)
box3d_coord_rect
=
data_dict
[
'calib'
].
img_to_rect
(
box3d_box
[:,
0
],
box3d_box
[:,
1
],
box3d_depth
)
box3d_rect
=
box_utils
.
corners_rect_to_camera
(
box3d_coord_rect
).
reshape
(
1
,
-
1
)
box3d_lidar
=
box_utils
.
boxes3d_kitti_camera_to_lidar
(
box3d_rect
,
data_dict
[
'calib'
])
box2d
=
box_utils
.
boxes3d_kitti_camera_to_imageboxes
(
box3d_rect
,
data_dict
[
'calib'
],
data_dict
[
'images'
].
shape
[:
2
])
sampled_gt_boxes
[
idx
]
=
box3d_lidar
[
0
]
sampled_gt_boxes2d
[
idx
]
=
box2d
[
0
]
obj_idx
=
idx
*
np
.
ones
(
len
(
obj_points
),
dtype
=
np
.
int
)
# copy crops from images
img_path
=
self
.
root_path
/
self
.
sampler_cfg
.
IMG_ROOT_PATH
/
(
info
[
'image_idx'
]
+
'.png'
)
raw_image
=
io
.
imread
(
img_path
)
raw_image
=
raw_image
.
astype
(
np
.
float32
)
raw_center
=
info
[
'bbox'
].
reshape
(
2
,
2
).
mean
(
0
)
new_box
=
sampled_gt_boxes2d
[
idx
].
astype
(
np
.
int
)
new_shape
=
np
.
array
([
new_box
[
2
]
-
new_box
[
0
],
new_box
[
3
]
-
new_box
[
1
]])
raw_box
=
np
.
concatenate
([
raw_center
-
new_shape
/
2
,
raw_center
+
new_shape
/
2
]).
astype
(
np
.
int
)
raw_box
[
0
::
2
]
=
np
.
clip
(
raw_box
[
0
::
2
],
a_min
=
0
,
a_max
=
raw_image
.
shape
[
1
])
raw_box
[
1
::
2
]
=
np
.
clip
(
raw_box
[
1
::
2
],
a_min
=
0
,
a_max
=
raw_image
.
shape
[
0
])
if
(
raw_box
[
2
]
-
raw_box
[
0
])
!=
new_shape
[
0
]
or
(
raw_box
[
3
]
-
raw_box
[
1
])
!=
new_shape
[
1
]:
new_center
=
new_box
.
reshape
(
2
,
2
).
mean
(
0
)
new_shape
=
np
.
array
([
raw_box
[
2
]
-
raw_box
[
0
],
raw_box
[
3
]
-
raw_box
[
1
]])
new_box
=
np
.
concatenate
([
new_center
-
new_shape
/
2
,
new_center
+
new_shape
/
2
]).
astype
(
np
.
int
)
img_crop2d
=
raw_image
[
raw_box
[
1
]:
raw_box
[
3
],
raw_box
[
0
]:
raw_box
[
2
]]
/
255
return
new_box
,
img_crop2d
,
obj_points
,
obj_idx
def
sample_gt_boxes_2d_kitti
(
self
,
data_dict
,
sampled_boxes
,
iou1
,
iou2
):
# filter out box2d iou > thres
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
sampled_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
)
# sampled_boxes2d = np.stack([x['bbox'] for x in sampled_dict], axis=0).astype(np.float32)
boxes3d_camera
=
box_utils
.
boxes3d_lidar_to_kitti_camera
(
sampled_boxes
,
data_dict
[
'calib'
])
sampled_boxes2d
=
box_utils
.
boxes3d_kitti_camera_to_imageboxes
(
boxes3d_camera
,
data_dict
[
'calib'
],
data_dict
[
'images'
].
shape
[:
2
])
sampled_boxes2d
=
torch
.
Tensor
(
sampled_boxes2d
)
existed_boxes2d
=
torch
.
Tensor
(
data_dict
[
'gt_boxes2d'
])
iou2d1
=
box2d_utils
.
pairwise_iou
(
sampled_boxes2d
,
existed_boxes2d
).
cpu
().
numpy
()
iou2d2
=
box2d_utils
.
pairwise_iou
(
sampled_boxes2d
,
sampled_boxes2d
).
cpu
().
numpy
()
iou2d2
[
range
(
sampled_boxes2d
.
shape
[
0
]),
range
(
sampled_boxes2d
.
shape
[
0
])]
=
0
iou2d1
=
iou2d1
if
iou2d1
.
shape
[
1
]
>
0
else
iou2d2
valid_mask
=
((
iou2d1
.
max
(
axis
=
1
)
<
self
.
box_iou_thres
)
&
(
iou2d2
.
max
(
axis
=
1
)
<
self
.
box_iou_thres
)
&
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
)).
nonzero
()[
0
]
sampled_boxes2d
=
sampled_boxes2d
[
valid_mask
].
cpu
().
numpy
()
return
sampled_boxes2d
,
mv_height
,
valid_mask
def
add_sampled_boxes_to_scene
(
self
,
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
,
mv_height
=
None
,
sampled_gt_boxes2d
=
None
):
gt_boxes_mask
=
data_dict
[
'gt_boxes_mask'
]
gt_boxes_mask
=
data_dict
[
'gt_boxes_mask'
]
gt_boxes
=
data_dict
[
'gt_boxes'
][
gt_boxes_mask
]
gt_boxes
=
data_dict
[
'gt_boxes'
][
gt_boxes_mask
]
gt_names
=
data_dict
[
'gt_names'
][
gt_boxes_mask
]
gt_names
=
data_dict
[
'gt_names'
][
gt_boxes_mask
]
points
=
data_dict
[
'points'
]
points
=
data_dict
[
'points'
]
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
)
and
not
self
.
aug_with_img
:
sampled_gt_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_gt_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_gt_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
sampled_gt_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
)
)
...
@@ -166,6 +308,13 @@ class DataBaseSampler(object):
...
@@ -166,6 +308,13 @@ class DataBaseSampler(object):
data_dict
.
pop
(
'road_plane'
)
data_dict
.
pop
(
'road_plane'
)
obj_points_list
=
[]
obj_points_list
=
[]
# convert sampled 3D boxes to image plane
if
self
.
aug_with_img
:
obj_index_list
,
crop_boxes2d
=
[],
[]
gt_number
=
gt_boxes_mask
.
sum
().
astype
(
np
.
int
)
gt_boxes2d
=
data_dict
[
'gt_boxes2d'
][
gt_boxes_mask
].
astype
(
np
.
int
)
gt_crops2d
=
[
data_dict
[
'images'
][
_x
[
1
]:
_x
[
3
],
_x
[
0
]:
_x
[
2
]]
for
_x
in
gt_boxes2d
]
if
self
.
use_shared_memory
:
if
self
.
use_shared_memory
:
gt_database_data
=
SharedArray
.
attach
(
f
"shm://
{
self
.
gt_database_data_key
}
"
)
gt_database_data
=
SharedArray
.
attach
(
f
"shm://
{
self
.
gt_database_data_key
}
"
)
gt_database_data
.
setflags
(
write
=
0
)
gt_database_data
.
setflags
(
write
=
0
)
...
@@ -187,6 +336,13 @@ class DataBaseSampler(object):
...
@@ -187,6 +336,13 @@ class DataBaseSampler(object):
# mv height
# mv height
obj_points
[:,
2
]
-=
mv_height
[
idx
]
obj_points
[:,
2
]
-=
mv_height
[
idx
]
if
self
.
aug_with_img
:
new_box
,
img_crop2d
,
obj_points
,
obj_idx
=
self
.
collect_image_crops_kitti
(
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
)
crop_boxes2d
.
append
(
new_box
)
gt_crops2d
.
append
(
img_crop2d
)
obj_index_list
.
append
(
obj_idx
)
obj_points_list
.
append
(
obj_points
)
obj_points_list
.
append
(
obj_points
)
obj_points
=
np
.
concatenate
(
obj_points_list
,
axis
=
0
)
obj_points
=
np
.
concatenate
(
obj_points_list
,
axis
=
0
)
...
@@ -202,6 +358,16 @@ class DataBaseSampler(object):
...
@@ -202,6 +358,16 @@ class DataBaseSampler(object):
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_names'
]
=
gt_names
data_dict
[
'gt_names'
]
=
gt_names
data_dict
[
'points'
]
=
points
data_dict
[
'points'
]
=
points
if
self
.
aug_with_img
:
obj_points_idx
=
np
.
concatenate
(
obj_index_list
,
axis
=
0
)
point_idxes
=
-
1
*
np
.
ones
(
len
(
points
),
dtype
=
np
.
int
)
point_idxes
=
np
.
concatenate
([
obj_points_idx
,
point_idxes
],
axis
=
0
)
data_dict
[
'gt_boxes2d'
]
=
np
.
concatenate
([
gt_boxes2d
,
np
.
array
(
crop_boxes2d
)],
axis
=
0
)
data_dict
=
self
.
copy_paste_to_image_kitti
(
data_dict
,
gt_crops2d
,
gt_number
,
point_idxes
)
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
data_dict
.
pop
(
'road_plane'
)
return
data_dict
return
data_dict
def
__call__
(
self
,
data_dict
):
def
__call__
(
self
,
data_dict
):
...
@@ -217,6 +383,8 @@ class DataBaseSampler(object):
...
@@ -217,6 +383,8 @@ class DataBaseSampler(object):
gt_names
=
data_dict
[
'gt_names'
].
astype
(
str
)
gt_names
=
data_dict
[
'gt_names'
].
astype
(
str
)
existed_boxes
=
gt_boxes
existed_boxes
=
gt_boxes
total_valid_sampled_dict
=
[]
total_valid_sampled_dict
=
[]
sampled_mv_height
=
[]
sampled_gt_boxes2d
=
[]
for
class_name
,
sample_group
in
self
.
sample_groups
.
items
():
for
class_name
,
sample_group
in
self
.
sample_groups
.
items
():
if
self
.
limit_whole_scene
:
if
self
.
limit_whole_scene
:
num_gt
=
np
.
sum
(
class_name
==
gt_names
)
num_gt
=
np
.
sum
(
class_name
==
gt_names
)
...
@@ -234,6 +402,14 @@ class DataBaseSampler(object):
...
@@ -234,6 +402,14 @@ class DataBaseSampler(object):
iou2
[
range
(
sampled_boxes
.
shape
[
0
]),
range
(
sampled_boxes
.
shape
[
0
])]
=
0
iou2
[
range
(
sampled_boxes
.
shape
[
0
]),
range
(
sampled_boxes
.
shape
[
0
])]
=
0
iou1
=
iou1
if
iou1
.
shape
[
1
]
>
0
else
iou2
iou1
=
iou1
if
iou1
.
shape
[
1
]
>
0
else
iou2
valid_mask
=
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
).
nonzero
()[
0
]
valid_mask
=
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
).
nonzero
()[
0
]
if
self
.
aug_with_img
:
sampled_boxes2d
,
mv_height
,
valid_mask
=
self
.
sample_gt_boxes_2d_kitti
(
data_dict
,
sampled_boxes
,
iou1
,
iou2
)
sampled_gt_boxes2d
.
append
(
sampled_boxes2d
)
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
mv_height
=
mv_height
[
valid_mask
]
sampled_mv_height
=
np
.
concatenate
((
sampled_mv_height
,
mv_height
),
axis
=
0
)
valid_sampled_dict
=
[
sampled_dict
[
x
]
for
x
in
valid_mask
]
valid_sampled_dict
=
[
sampled_dict
[
x
]
for
x
in
valid_mask
]
valid_sampled_boxes
=
sampled_boxes
[
valid_mask
]
valid_sampled_boxes
=
sampled_boxes
[
valid_mask
]
...
@@ -241,8 +417,17 @@ class DataBaseSampler(object):
...
@@ -241,8 +417,17 @@ class DataBaseSampler(object):
total_valid_sampled_dict
.
extend
(
valid_sampled_dict
)
total_valid_sampled_dict
.
extend
(
valid_sampled_dict
)
sampled_gt_boxes
=
existed_boxes
[
gt_boxes
.
shape
[
0
]:,
:]
sampled_gt_boxes
=
existed_boxes
[
gt_boxes
.
shape
[
0
]:,
:]
if
self
.
aug_with_img
:
if
len
(
sampled_gt_boxes2d
)
>
0
:
sampled_gt_boxes2d
=
np
.
concatenate
(
sampled_gt_boxes2d
,
axis
=
0
)
if
total_valid_sampled_dict
.
__len__
()
>
0
:
if
total_valid_sampled_dict
.
__len__
()
>
0
:
data_dict
=
self
.
add_sampled_boxes_to_scene
(
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
)
data_dict
=
self
.
add_sampled_boxes_to_scene
(
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
,
sampled_mv_height
,
sampled_gt_boxes2d
)
data_dict
.
pop
(
'gt_boxes_mask'
)
data_dict
.
pop
(
'gt_boxes_mask'
)
return
data_dict
return
data_dict
pcdet/datasets/dataset.py
View file @
5666ea67
...
@@ -9,7 +9,6 @@ from .augmentor.data_augmentor import DataAugmentor
...
@@ -9,7 +9,6 @@ from .augmentor.data_augmentor import DataAugmentor
from
.processor.data_processor
import
DataProcessor
from
.processor.data_processor
import
DataProcessor
from
.processor.point_feature_encoder
import
PointFeatureEncoder
from
.processor.point_feature_encoder
import
PointFeatureEncoder
class
DatasetTemplate
(
torch_data
.
Dataset
):
class
DatasetTemplate
(
torch_data
.
Dataset
):
def
__init__
(
self
,
dataset_cfg
=
None
,
class_names
=
None
,
training
=
True
,
root_path
=
None
,
logger
=
None
):
def
__init__
(
self
,
dataset_cfg
=
None
,
class_names
=
None
,
training
=
True
,
root_path
=
None
,
logger
=
None
):
super
().
__init__
()
super
().
__init__
()
...
@@ -124,13 +123,14 @@ class DatasetTemplate(torch_data.Dataset):
...
@@ -124,13 +123,14 @@ class DatasetTemplate(torch_data.Dataset):
assert
'gt_boxes'
in
data_dict
,
'gt_boxes should be provided for training'
assert
'gt_boxes'
in
data_dict
,
'gt_boxes should be provided for training'
gt_boxes_mask
=
np
.
array
([
n
in
self
.
class_names
for
n
in
data_dict
[
'gt_names'
]],
dtype
=
np
.
bool_
)
gt_boxes_mask
=
np
.
array
([
n
in
self
.
class_names
for
n
in
data_dict
[
'gt_names'
]],
dtype
=
np
.
bool_
)
calib
=
data_dict
[
'calib'
]
data_dict
=
self
.
data_augmentor
.
forward
(
data_dict
=
self
.
data_augmentor
.
forward
(
data_dict
=
{
data_dict
=
{
**
data_dict
,
**
data_dict
,
'gt_boxes_mask'
:
gt_boxes_mask
'gt_boxes_mask'
:
gt_boxes_mask
}
}
)
)
data_dict
[
'calib'
]
=
calib
if
data_dict
.
get
(
'gt_boxes'
,
None
)
is
not
None
:
if
data_dict
.
get
(
'gt_boxes'
,
None
)
is
not
None
:
selected
=
common_utils
.
keep_arrays_by_name
(
data_dict
[
'gt_names'
],
self
.
class_names
)
selected
=
common_utils
.
keep_arrays_by_name
(
data_dict
[
'gt_names'
],
self
.
class_names
)
data_dict
[
'gt_boxes'
]
=
data_dict
[
'gt_boxes'
][
selected
]
data_dict
[
'gt_boxes'
]
=
data_dict
[
'gt_boxes'
][
selected
]
...
@@ -205,7 +205,7 @@ class DatasetTemplate(torch_data.Dataset):
...
@@ -205,7 +205,7 @@ class DatasetTemplate(torch_data.Dataset):
pad_w
=
common_utils
.
get_pad_params
(
desired_size
=
max_w
,
cur_size
=
image
.
shape
[
1
])
pad_w
=
common_utils
.
get_pad_params
(
desired_size
=
max_w
,
cur_size
=
image
.
shape
[
1
])
pad_width
=
(
pad_h
,
pad_w
)
pad_width
=
(
pad_h
,
pad_w
)
# Pad with nan, to be replaced later in the pipeline.
# Pad with nan, to be replaced later in the pipeline.
pad_value
=
np
.
nan
pad_value
=
0
#
np.nan
if
key
==
"images"
:
if
key
==
"images"
:
pad_width
=
(
pad_h
,
pad_w
,
(
0
,
0
))
pad_width
=
(
pad_h
,
pad_w
,
(
0
,
0
))
...
@@ -219,6 +219,20 @@ class DatasetTemplate(torch_data.Dataset):
...
@@ -219,6 +219,20 @@ class DatasetTemplate(torch_data.Dataset):
images
.
append
(
image_pad
)
images
.
append
(
image_pad
)
ret
[
key
]
=
np
.
stack
(
images
,
axis
=
0
)
ret
[
key
]
=
np
.
stack
(
images
,
axis
=
0
)
elif
key
in
[
'calib'
]:
ret
[
key
]
=
val
elif
key
in
[
"points_2d"
]:
max_len
=
max
([
len
(
_val
)
for
_val
in
val
])
pad_value
=
0
points
=
[]
for
_points
in
val
:
pad_width
=
((
0
,
max_len
-
len
(
_points
)),
(
0
,
0
))
points_pad
=
np
.
pad
(
_points
,
pad_width
=
pad_width
,
mode
=
'constant'
,
constant_values
=
pad_value
)
points
.
append
(
points_pad
)
ret
[
key
]
=
np
.
stack
(
points
,
axis
=
0
)
else
:
else
:
ret
[
key
]
=
np
.
stack
(
val
,
axis
=
0
)
ret
[
key
]
=
np
.
stack
(
val
,
axis
=
0
)
except
:
except
:
...
...
pcdet/datasets/kitti/kitti_dataset.py
View file @
5666ea67
...
@@ -421,6 +421,7 @@ class KittiDataset(DatasetTemplate):
...
@@ -421,6 +421,7 @@ class KittiDataset(DatasetTemplate):
if
"calib_matricies"
in
get_item_list
:
if
"calib_matricies"
in
get_item_list
:
input_dict
[
"trans_lidar_to_cam"
],
input_dict
[
"trans_cam_to_img"
]
=
kitti_utils
.
calib_to_matricies
(
calib
)
input_dict
[
"trans_lidar_to_cam"
],
input_dict
[
"trans_cam_to_img"
]
=
kitti_utils
.
calib_to_matricies
(
calib
)
input_dict
[
'calib'
]
=
calib
data_dict
=
self
.
prepare_data
(
data_dict
=
input_dict
)
data_dict
=
self
.
prepare_data
(
data_dict
=
input_dict
)
data_dict
[
'image_shape'
]
=
img_shape
data_dict
[
'image_shape'
]
=
img_shape
...
...
pcdet/models/backbones_3d/__init__.py
View file @
5666ea67
from
.pointnet2_backbone
import
PointNet2Backbone
,
PointNet2MSG
from
.pointnet2_backbone
import
PointNet2Backbone
,
PointNet2MSG
from
.spconv_backbone
import
VoxelBackBone8x
,
VoxelResBackBone8x
from
.spconv_backbone
import
VoxelBackBone8x
,
VoxelResBackBone8x
from
.spconv_backbone_focal
import
VoxelBackBone8xFocal
from
.spconv_unet
import
UNetV2
from
.spconv_unet
import
UNetV2
__all__
=
{
__all__
=
{
...
@@ -8,4 +9,5 @@ __all__ = {
...
@@ -8,4 +9,5 @@ __all__ = {
'PointNet2Backbone'
:
PointNet2Backbone
,
'PointNet2Backbone'
:
PointNet2Backbone
,
'PointNet2MSG'
:
PointNet2MSG
,
'PointNet2MSG'
:
PointNet2MSG
,
'VoxelResBackBone8x'
:
VoxelResBackBone8x
,
'VoxelResBackBone8x'
:
VoxelResBackBone8x
,
'VoxelBackBone8xFocal'
:
VoxelBackBone8xFocal
,
}
}
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
0 → 100755
View file @
5666ea67
import
torch.nn
as
nn
class
BasicBlock1D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
**
kwargs
):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
conv
=
nn
.
Conv1d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm1d
(
out_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
features
):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x
=
self
.
conv
(
features
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
class
BasicBlock2D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
**
kwargs
):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
features
):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x
=
self
.
conv
(
features
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
0 → 100755
View file @
5666ea67
import
torch
import
torch.nn
as
nn
from
.basic_blocks
import
BasicBlock2D
from
.sem_deeplabv3
import
SemDeepLabV3
class
PyramidFeat2D
(
nn
.
Module
):
def
__init__
(
self
,
optimize
,
model_cfg
):
"""
Initialize 2D feature network via pretrained model
Args:
model_cfg: EasyDict, Dense classification network config
"""
super
().
__init__
()
self
.
model_cfg
=
model_cfg
self
.
is_optimize
=
optimize
# Create modules
self
.
ifn
=
SemDeepLabV3
(
num_classes
=
model_cfg
.
num_class
,
backbone_name
=
model_cfg
.
backbone
,
**
model_cfg
.
args
)
self
.
reduce_blocks
=
torch
.
nn
.
ModuleList
()
self
.
out_channels
=
{}
for
_idx
,
_channel
in
enumerate
(
model_cfg
.
channel_reduce
[
"in_channels"
]):
_channel_out
=
model_cfg
.
channel_reduce
[
"out_channels"
][
_idx
]
self
.
out_channels
[
model_cfg
.
args
[
'feat_extract_layer'
][
_idx
]]
=
_channel_out
block_cfg
=
{
"in_channels"
:
_channel
,
"out_channels"
:
_channel_out
,
"kernel_size"
:
model_cfg
.
channel_reduce
[
"kernel_size"
][
_idx
],
"stride"
:
model_cfg
.
channel_reduce
[
"stride"
][
_idx
],
"bias"
:
model_cfg
.
channel_reduce
[
"bias"
][
_idx
]}
self
.
reduce_blocks
.
append
(
BasicBlock2D
(
**
block_cfg
))
def
get_output_feature_dim
(
self
):
return
self
.
out_channels
def
forward
(
self
,
images
):
"""
Predicts depths and creates image depth feature volume using depth distributions
Args:
images: (N, 3, H_in, W_in), Input images
Returns:
batch_dict:
frustum_features: (N, C, D, H_out, W_out), Image depth features
"""
# Pixel-wise depth classification
batch_dict
=
{}
ifn_result
=
self
.
ifn
(
images
)
for
_idx
,
_layer
in
enumerate
(
self
.
model_cfg
.
args
[
'feat_extract_layer'
]):
image_features
=
ifn_result
[
_layer
]
# Channel reduce
if
self
.
reduce_blocks
[
_idx
]
is
not
None
:
image_features
=
self
.
reduce_blocks
[
_idx
](
image_features
)
batch_dict
[
_layer
+
"_feat2d"
]
=
image_features
if
self
.
training
:
# detach feature from graph if not optimize
if
"logits"
in
ifn_result
:
ifn_result
[
"logits"
].
detach_
()
if
not
self
.
is_optimize
:
image_features
.
detach_
()
return
batch_dict
def
get_loss
(
self
):
"""
Gets loss
Args:
Returns:
loss: (1), Network loss
tb_dict: dict[float], All losses to log in tensorboard
"""
return
None
,
None
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
0 → 100755
View file @
5666ea67
from
collections
import
OrderedDict
from
pathlib
import
Path
from
torch
import
hub
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torchvision
from
kornia.enhance.normalize
import
normalize
class
SegTemplate
(
nn
.
Module
):
def
__init__
(
self
,
constructor
,
feat_extract_layer
,
num_classes
,
pretrained_path
=
None
,
aux_loss
=
None
):
"""
Initializes depth distribution network.
Args:
constructor: function, Model constructor
feat_extract_layer: string, Layer to extract features from
num_classes: int, Number of classes
pretrained_path: string, (Optional) Path of the model to load weights from
aux_loss: bool, Flag to include auxillary loss
"""
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
pretrained_path
=
pretrained_path
self
.
pretrained
=
pretrained_path
is
not
None
self
.
aux_loss
=
aux_loss
if
self
.
pretrained
:
# Preprocess Module
self
.
norm_mean
=
torch
.
Tensor
([
0.485
,
0.456
,
0.406
])
self
.
norm_std
=
torch
.
Tensor
([
0.229
,
0.224
,
0.225
])
# Model
self
.
model
=
self
.
get_model
(
constructor
=
constructor
)
self
.
feat_extract_layer
=
feat_extract_layer
return_layers
=
{
_layer
:
_layer
for
_layer
in
feat_extract_layer
}
self
.
model
.
backbone
.
return_layers
.
update
(
return_layers
)
def
get_model
(
self
,
constructor
):
"""
Get model
Args:
constructor: function, Model constructor
Returns:
model: nn.Module, Model
"""
# Get model
model
=
constructor
(
pretrained
=
False
,
pretrained_backbone
=
False
,
num_classes
=
self
.
num_classes
,
aux_loss
=
self
.
aux_loss
)
# Update weights
if
self
.
pretrained_path
is
not
None
:
model_dict
=
model
.
state_dict
()
# Download pretrained model if not available yet
checkpoint_path
=
Path
(
self
.
pretrained_path
)
if
not
checkpoint_path
.
exists
():
checkpoint
=
checkpoint_path
.
name
save_dir
=
checkpoint_path
.
parent
save_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
url
=
f
'https://download.pytorch.org/models/
{
checkpoint
}
'
hub
.
load_state_dict_from_url
(
url
,
save_dir
)
# Get pretrained state dict
pretrained_dict
=
torch
.
load
(
self
.
pretrained_path
)
#pretrained_dict = self.filter_pretrained_dict(model_dict=model_dict, pretrained_dict=pretrained_dict)
# Update current model state dict
model_dict
.
update
(
pretrained_dict
)
model
.
load_state_dict
(
model_dict
,
strict
=
False
)
return
model
.
cuda
()
def
filter_pretrained_dict
(
self
,
model_dict
,
pretrained_dict
):
"""
Removes layers from pretrained state dict that are not used or changed in model
Args:
model_dict: dict, Default model state dictionary
pretrained_dict: dict, Pretrained model state dictionary
Returns:
pretrained_dict: dict, Pretrained model state dictionary with removed weights
"""
# Removes aux classifier weights if not used
if
"aux_classifier.0.weight"
in
pretrained_dict
and
"aux_classifier.0.weight"
not
in
model_dict
:
pretrained_dict
=
{
key
:
value
for
key
,
value
in
pretrained_dict
.
items
()
if
"aux_classifier"
not
in
key
}
# Removes final conv layer from weights if number of classes are different
model_num_classes
=
model_dict
[
"classifier.4.weight"
].
shape
[
0
]
pretrained_num_classes
=
pretrained_dict
[
"classifier.4.weight"
].
shape
[
0
]
if
model_num_classes
!=
pretrained_num_classes
:
pretrained_dict
.
pop
(
"classifier.4.weight"
)
pretrained_dict
.
pop
(
"classifier.4.bias"
)
return
pretrained_dict
def
forward
(
self
,
images
):
"""
Forward pass
Args:
images: (N, 3, H_in, W_in), Input images
Returns
result: dict[torch.Tensor], Depth distribution result
features: (N, C, H_out, W_out), Image features
logits: (N, num_classes, H_out, W_out), Classification logits
aux: (N, num_classes, H_out, W_out), Auxillary classification logits
"""
# Preprocess images
x
=
self
.
preprocess
(
images
)
# Extract features
result
=
OrderedDict
()
features
=
self
.
model
.
backbone
(
x
)
for
_layer
in
self
.
feat_extract_layer
:
result
[
_layer
]
=
features
[
_layer
]
return
result
if
'features'
in
features
.
keys
():
feat_shape
=
features
[
'features'
].
shape
[
-
2
:]
else
:
feat_shape
=
features
[
'layer1'
].
shape
[
-
2
:]
# Prediction classification logits
x
=
features
[
"out"
]
# comment the classifier to reduce memory
# x = self.model.classifier(x)
# x = F.interpolate(x, size=feat_shape, mode='bilinear', align_corners=False)
result
[
"logits"
]
=
x
# Prediction auxillary classification logits
if
self
.
model
.
aux_classifier
is
not
None
:
x
=
features
[
"aux"
]
x
=
self
.
model
.
aux_classifier
(
x
)
x
=
F
.
interpolate
(
x
,
size
=
feat_shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
result
[
"aux"
]
=
x
return
result
def
preprocess
(
self
,
images
):
"""
Preprocess images
Args:
images: (N, 3, H, W), Input images
Return
x: (N, 3, H, W), Preprocessed images
"""
x
=
images
if
self
.
pretrained
:
# Match ResNet pretrained preprocessing
x
=
normalize
(
x
,
mean
=
self
.
norm_mean
,
std
=
self
.
norm_std
)
return
x
.
cuda
()
class
SemDeepLabV3
(
SegTemplate
):
def
__init__
(
self
,
backbone_name
,
**
kwargs
):
"""
Initializes SemDeepLabV3 model
Args:
backbone_name: string, ResNet Backbone Name [ResNet50/ResNet101]
"""
if
backbone_name
==
"ResNet50"
:
constructor
=
torchvision
.
models
.
segmentation
.
deeplabv3_resnet50
elif
backbone_name
==
"ResNet101"
:
constructor
=
torchvision
.
models
.
segmentation
.
deeplabv3_resnet101
else
:
raise
NotImplementedError
super
().
__init__
(
constructor
=
constructor
,
**
kwargs
)
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
0 → 100644
View file @
5666ea67
import
torch
import
torch.nn
as
nn
import
spconv.pytorch
as
spconv
from
pcdet.ops.roiaware_pool3d.roiaware_pool3d_utils
import
points_in_boxes_gpu
from
pcdet.models.backbones_3d.focal_sparse_conv.utils
import
split_voxels
,
check_repeat
,
FocalLoss
from
pcdet.utils
import
common_utils
class
FocalSparseConv
(
spconv
.
SparseModule
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
voxel_stride
,
norm_fn
=
None
,
indice_key
=
None
,
image_channel
=
3
,
kernel_size
=
3
,
padding
=
1
,
mask_multi
=
False
,
use_img
=
False
,
topk
=
False
,
threshold
=
0.5
,
skip_mask_kernel
=
False
,
enlarge_voxel_channels
=-
1
,
point_cloud_range
=
[
-
3
,
-
40
,
0
,
1
,
40
,
70.4
],
voxel_size
=
[
0.1
,
0.05
,
0.05
]):
super
(
FocalSparseConv
,
self
).
__init__
()
self
.
conv
=
spconv
.
SubMConv3d
(
inplanes
,
planes
,
kernel_size
=
kernel_size
,
stride
=
1
,
bias
=
False
,
indice_key
=
indice_key
)
self
.
bn1
=
norm_fn
(
planes
)
self
.
relu
=
nn
.
ReLU
(
True
)
offset_channels
=
kernel_size
**
3
self
.
topk
=
topk
self
.
threshold
=
threshold
self
.
voxel_stride
=
voxel_stride
self
.
focal_loss
=
FocalLoss
()
self
.
mask_multi
=
mask_multi
self
.
skip_mask_kernel
=
skip_mask_kernel
self
.
use_img
=
use_img
voxel_channel
=
enlarge_voxel_channels
if
enlarge_voxel_channels
>
0
else
inplanes
in_channels
=
image_channel
+
voxel_channel
if
use_img
else
voxel_channel
self
.
conv_enlarge
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
inplanes
,
enlarge_voxel_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
indice_key
=
indice_key
+
'_enlarge'
),
norm_fn
(
enlarge_voxel_channels
),
nn
.
ReLU
(
True
))
if
enlarge_voxel_channels
>
0
else
None
self
.
conv_imp
=
spconv
.
SubMConv3d
(
in_channels
,
offset_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
indice_key
=
indice_key
+
'_imp'
)
_step
=
int
(
kernel_size
//
2
)
kernel_offsets
=
[[
i
,
j
,
k
]
for
i
in
range
(
-
_step
,
_step
+
1
)
for
j
in
range
(
-
_step
,
_step
+
1
)
for
k
in
range
(
-
_step
,
_step
+
1
)]
kernel_offsets
.
remove
([
0
,
0
,
0
])
self
.
kernel_offsets
=
torch
.
Tensor
(
kernel_offsets
).
cuda
()
self
.
inv_idx
=
torch
.
Tensor
([
2
,
1
,
0
]).
long
().
cuda
()
self
.
point_cloud_range
=
torch
.
Tensor
(
point_cloud_range
).
cuda
()
self
.
voxel_size
=
torch
.
Tensor
(
voxel_size
).
cuda
()
def
construct_multimodal_features
(
self
,
x
,
x_rgb
,
batch_dict
,
fuse_sum
=
False
):
"""
Construct the multimodal features with both lidar sparse features and image features.
Args:
x: [N, C] lidar sparse features
x_rgb: [b, c, h, w] image features
batch_dict: input and output information during forward
fuse_sum: bool, manner for fusion, True - sum, False - concat
Return:
image_with_voxelfeatures: [N, C] fused multimodal features
"""
batch_index
=
x
.
indices
[:,
0
]
spatial_indices
=
x
.
indices
[:,
1
:]
*
self
.
voxel_stride
voxels_3d
=
spatial_indices
*
self
.
voxel_size
+
self
.
point_cloud_range
[:
3
]
calibs
=
batch_dict
[
'calib'
]
batch_size
=
batch_dict
[
'batch_size'
]
h
,
w
=
batch_dict
[
'images'
].
shape
[
2
:]
if
not
x_rgb
.
shape
==
batch_dict
[
'images'
].
shape
:
x_rgb
=
nn
.
functional
.
interpolate
(
x_rgb
,
(
h
,
w
),
mode
=
'bilinear'
)
image_with_voxelfeatures
=
[]
voxels_2d_int_list
=
[]
filter_idx_list
=
[]
for
b
in
range
(
batch_size
):
x_rgb_batch
=
x_rgb
[
b
]
calib
=
calibs
[
b
]
voxels_3d_batch
=
voxels_3d
[
batch_index
==
b
]
voxel_features_sparse
=
x
.
features
[
batch_index
==
b
]
# Reverse the point cloud transformations to the original coords.
if
'noise_scale'
in
batch_dict
:
voxels_3d_batch
[:,
:
3
]
/=
batch_dict
[
'noise_scale'
][
b
]
if
'noise_rot'
in
batch_dict
:
voxels_3d_batch
=
common_utils
.
rotate_points_along_z
(
voxels_3d_batch
[:,
self
.
inv_idx
].
unsqueeze
(
0
),
-
batch_dict
[
'noise_rot'
][
b
].
unsqueeze
(
0
))[
0
,
:,
self
.
inv_idx
]
if
'flip_x'
in
batch_dict
:
voxels_3d_batch
[:,
1
]
*=
-
1
if
batch_dict
[
'flip_x'
][
b
]
else
1
if
'flip_y'
in
batch_dict
:
voxels_3d_batch
[:,
2
]
*=
-
1
if
batch_dict
[
'flip_y'
][
b
]
else
1
voxels_2d
,
_
=
calib
.
lidar_to_img
(
voxels_3d_batch
[:,
self
.
inv_idx
].
cpu
().
numpy
())
voxels_2d_int
=
torch
.
Tensor
(
voxels_2d
).
to
(
x_rgb_batch
.
device
).
long
()
filter_idx
=
(
0
<=
voxels_2d_int
[:,
1
])
*
(
voxels_2d_int
[:,
1
]
<
h
)
*
(
0
<=
voxels_2d_int
[:,
0
])
*
(
voxels_2d_int
[:,
0
]
<
w
)
filter_idx_list
.
append
(
filter_idx
)
voxels_2d_int
=
voxels_2d_int
[
filter_idx
]
voxels_2d_int_list
.
append
(
voxels_2d_int
)
image_features_batch
=
torch
.
zeros
((
voxel_features_sparse
.
shape
[
0
],
x_rgb_batch
.
shape
[
0
]),
device
=
x_rgb_batch
.
device
)
image_features_batch
[
filter_idx
]
=
x_rgb_batch
[:,
voxels_2d_int
[:,
1
],
voxels_2d_int
[:,
0
]].
permute
(
1
,
0
)
if
fuse_sum
:
image_with_voxelfeature
=
image_features_batch
+
voxel_features_sparse
else
:
image_with_voxelfeature
=
torch
.
cat
([
image_features_batch
,
voxel_features_sparse
],
dim
=
1
)
image_with_voxelfeatures
.
append
(
image_with_voxelfeature
)
image_with_voxelfeatures
=
torch
.
cat
(
image_with_voxelfeatures
)
return
image_with_voxelfeatures
def
_gen_sparse_features
(
self
,
x
,
imps_3d
,
batch_dict
,
voxels_3d
):
"""
Generate the output sparse features from the focal sparse conv.
Args:
x: [N, C], lidar sparse features
imps_3d: [N, kernelsize**3], the predicted importance values
batch_dict: input and output information during forward
voxels_3d: [N, 3], the 3d positions of voxel centers
"""
batch_size
=
x
.
batch_size
voxel_features_fore
=
[]
voxel_indices_fore
=
[]
voxel_features_back
=
[]
voxel_indices_back
=
[]
box_of_pts_cls_targets
=
[]
mask_voxels
=
[]
mask_kernel_list
=
[]
for
b
in
range
(
batch_size
):
if
self
.
training
:
index
=
x
.
indices
[:,
0
]
batch_index
=
index
==
b
mask_voxel
=
imps_3d
[
batch_index
,
-
1
].
sigmoid
()
voxels_3d_batch
=
voxels_3d
[
batch_index
].
unsqueeze
(
0
)
mask_voxels
.
append
(
mask_voxel
)
gt_boxes
=
batch_dict
[
'gt_boxes'
][
b
,
:,
:
-
1
].
unsqueeze
(
0
)
box_of_pts_batch
=
points_in_boxes_gpu
(
voxels_3d_batch
[:,
:,
self
.
inv_idx
],
gt_boxes
).
squeeze
(
0
)
box_of_pts_cls_targets
.
append
(
box_of_pts_batch
>=
0
)
features_fore
,
indices_fore
,
features_back
,
indices_back
,
mask_kernel
=
split_voxels
(
x
,
b
,
imps_3d
,
voxels_3d
,
self
.
kernel_offsets
,
mask_multi
=
self
.
mask_multi
,
topk
=
self
.
topk
,
threshold
=
self
.
threshold
)
mask_kernel_list
.
append
(
mask_kernel
)
voxel_features_fore
.
append
(
features_fore
)
voxel_indices_fore
.
append
(
indices_fore
)
voxel_features_back
.
append
(
features_back
)
voxel_indices_back
.
append
(
indices_back
)
voxel_features_fore
=
torch
.
cat
(
voxel_features_fore
,
dim
=
0
)
voxel_indices_fore
=
torch
.
cat
(
voxel_indices_fore
,
dim
=
0
)
voxel_features_back
=
torch
.
cat
(
voxel_features_back
,
dim
=
0
)
voxel_indices_back
=
torch
.
cat
(
voxel_indices_back
,
dim
=
0
)
mask_kernel
=
torch
.
cat
(
mask_kernel_list
,
dim
=
0
)
x_fore
=
spconv
.
SparseConvTensor
(
voxel_features_fore
,
voxel_indices_fore
,
x
.
spatial_shape
,
x
.
batch_size
)
x_back
=
spconv
.
SparseConvTensor
(
voxel_features_back
,
voxel_indices_back
,
x
.
spatial_shape
,
x
.
batch_size
)
loss_box_of_pts
=
0
if
self
.
training
:
mask_voxels
=
torch
.
cat
(
mask_voxels
)
box_of_pts_cls_targets
=
torch
.
cat
(
box_of_pts_cls_targets
)
mask_voxels_two_classes
=
torch
.
cat
([
1
-
mask_voxels
.
unsqueeze
(
-
1
),
mask_voxels
.
unsqueeze
(
-
1
)],
dim
=
1
)
loss_box_of_pts
=
self
.
focal_loss
(
mask_voxels_two_classes
,
box_of_pts_cls_targets
.
long
())
return
x_fore
,
x_back
,
loss_box_of_pts
,
mask_kernel
def
combine_out
(
self
,
x_fore
,
x_back
,
remove_repeat
=
False
):
"""
Combine the foreground and background sparse features together.
Args:
x_fore: [N1, C], foreground sparse features
x_back: [N2, C], background sparse features
remove_repeat: bool, whether to remove the spatial replicate features.
"""
x_fore_features
=
torch
.
cat
([
x_fore
.
features
,
x_back
.
features
],
dim
=
0
)
x_fore_indices
=
torch
.
cat
([
x_fore
.
indices
,
x_back
.
indices
],
dim
=
0
)
if
remove_repeat
:
index
=
x_fore_indices
[:,
0
]
features_out_list
=
[]
indices_coords_out_list
=
[]
for
b
in
range
(
x_fore
.
batch_size
):
batch_index
=
index
==
b
features_out
,
indices_coords_out
,
_
=
check_repeat
(
x_fore_features
[
batch_index
],
x_fore_indices
[
batch_index
],
flip_first
=
False
)
features_out_list
.
append
(
features_out
)
indices_coords_out_list
.
append
(
indices_coords_out
)
x_fore_features
=
torch
.
cat
(
features_out_list
,
dim
=
0
)
x_fore_indices
=
torch
.
cat
(
indices_coords_out_list
,
dim
=
0
)
x_fore
=
x_fore
.
replace_feature
(
x_fore_features
)
x_fore
.
indices
=
x_fore_indices
return
x_fore
def
forward
(
self
,
x
,
batch_dict
,
x_rgb
=
None
):
spatial_indices
=
x
.
indices
[:,
1
:]
*
self
.
voxel_stride
voxels_3d
=
spatial_indices
*
self
.
voxel_size
+
self
.
point_cloud_range
[:
3
]
if
self
.
use_img
:
features_multimodal
=
self
.
construct_multimodal_features
(
x
,
x_rgb
,
batch_dict
)
x_predict
=
spconv
.
SparseConvTensor
(
features_multimodal
,
x
.
indices
,
x
.
spatial_shape
,
x
.
batch_size
)
else
:
x_predict
=
self
.
conv_enlarge
(
x
)
if
self
.
conv_enlarge
else
x
imps_3d
=
self
.
conv_imp
(
x_predict
).
features
x_fore
,
x_back
,
loss_box_of_pts
,
mask_kernel
=
self
.
_gen_sparse_features
(
x
,
imps_3d
,
batch_dict
,
voxels_3d
)
if
not
self
.
skip_mask_kernel
:
x_fore
=
x_fore
.
replace_feature
(
x_fore
.
features
*
mask_kernel
.
unsqueeze
(
-
1
))
out
=
self
.
combine_out
(
x_fore
,
x_back
,
remove_repeat
=
True
)
out
=
self
.
conv
(
out
)
if
self
.
use_img
:
out
=
out
.
replace_feature
(
self
.
construct_multimodal_features
(
out
,
x_rgb
,
batch_dict
,
True
))
out
=
out
.
replace_feature
(
self
.
bn1
(
out
.
features
))
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
batch_dict
[
'loss_box_of_pts'
]
+=
loss_box_of_pts
return
out
,
batch_dict
pcdet/models/backbones_3d/focal_sparse_conv/utils.py
0 → 100644
View file @
5666ea67
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.autograd
import
Variable
class
FocalLoss
(
nn
.
Module
):
def
__init__
(
self
,
gamma
=
2.0
,
eps
=
1e-7
):
super
(
FocalLoss
,
self
).
__init__
()
self
.
gamma
=
gamma
self
.
eps
=
eps
def
one_hot
(
self
,
index
,
classes
):
size
=
index
.
size
()
+
(
classes
,)
view
=
index
.
size
()
+
(
1
,)
mask
=
torch
.
Tensor
(
*
size
).
fill_
(
0
).
to
(
index
.
device
)
index
=
index
.
view
(
*
view
)
ones
=
1.
if
isinstance
(
index
,
Variable
):
ones
=
Variable
(
torch
.
Tensor
(
index
.
size
()).
fill_
(
1
).
to
(
index
.
device
))
mask
=
Variable
(
mask
,
volatile
=
index
.
volatile
)
return
mask
.
scatter_
(
1
,
index
,
ones
)
def
forward
(
self
,
input
,
target
):
y
=
self
.
one_hot
(
target
,
input
.
size
(
-
1
))
logit
=
F
.
softmax
(
input
,
dim
=-
1
)
logit
=
logit
.
clamp
(
self
.
eps
,
1.
-
self
.
eps
)
loss
=
-
1
*
y
*
torch
.
log
(
logit
)
# cross entropy
loss
=
loss
*
(
1
-
logit
)
**
self
.
gamma
# focal loss
return
loss
.
mean
()
def
sort_by_indices
(
features
,
indices
,
features_add
=
None
):
"""
To sort the sparse features with its indices in a convenient manner.
Args:
features: [N, C], sparse features
indices: [N, 4], indices of sparse features
features_add: [N, C], additional features to sort
"""
idx
=
indices
[:,
1
:]
idx_sum
=
idx
.
select
(
1
,
0
)
*
idx
[:,
1
].
max
()
*
idx
[:,
2
].
max
()
+
idx
.
select
(
1
,
1
)
*
idx
[:,
2
].
max
()
+
idx
.
select
(
1
,
2
)
_
,
ind
=
idx_sum
.
sort
()
features
=
features
[
ind
]
indices
=
indices
[
ind
]
if
not
features_add
is
None
:
features_add
=
features_add
[
ind
]
return
features
,
indices
,
features_add
def
check_repeat
(
features
,
indices
,
features_add
=
None
,
sort_first
=
True
,
flip_first
=
True
):
"""
Check that whether there are replicate indices in the sparse features,
remove the replicate features if any.
"""
if
sort_first
:
features
,
indices
,
features_add
=
sort_by_indices
(
features
,
indices
,
features_add
)
if
flip_first
:
features
,
indices
=
features
.
flip
([
0
]),
indices
.
flip
([
0
])
if
not
features_add
is
None
:
features_add
=
features_add
.
flip
([
0
])
idx
=
indices
[:,
1
:].
int
()
idx_sum
=
torch
.
add
(
torch
.
add
(
idx
.
select
(
1
,
0
)
*
idx
[:,
1
].
max
()
*
idx
[:,
2
].
max
(),
idx
.
select
(
1
,
1
)
*
idx
[:,
2
].
max
()),
idx
.
select
(
1
,
2
))
_unique
,
inverse
,
counts
=
torch
.
unique_consecutive
(
idx_sum
,
return_inverse
=
True
,
return_counts
=
True
,
dim
=
0
)
if
_unique
.
shape
[
0
]
<
indices
.
shape
[
0
]:
perm
=
torch
.
arange
(
inverse
.
size
(
0
),
dtype
=
inverse
.
dtype
,
device
=
inverse
.
device
)
features_new
=
torch
.
zeros
((
_unique
.
shape
[
0
],
features
.
shape
[
-
1
]),
device
=
features
.
device
)
features_new
.
index_add_
(
0
,
inverse
.
long
(),
features
)
features
=
features_new
perm_
=
inverse
.
new_empty
(
_unique
.
size
(
0
)).
scatter_
(
0
,
inverse
,
perm
)
indices
=
indices
[
perm_
].
int
()
if
not
features_add
is
None
:
features_add_new
=
torch
.
zeros
((
_unique
.
shape
[
0
],),
device
=
features_add
.
device
)
features_add_new
.
index_add_
(
0
,
inverse
.
long
(),
features_add
)
features_add
=
features_add_new
/
counts
return
features
,
indices
,
features_add
def
split_voxels
(
x
,
b
,
imps_3d
,
voxels_3d
,
kernel_offsets
,
mask_multi
=
True
,
topk
=
True
,
threshold
=
0.5
):
"""
Generate and split the voxels into foreground and background sparse features, based on the predicted importance values.
Args:
x: [N, C], input sparse features
b: int, batch size id
imps_3d: [N, kernelsize**3], the prediced importance values
voxels_3d: [N, 3], the 3d positions of voxel centers
kernel_offsets: [kernelsize**3, 3], the offset coords in an kernel
mask_multi: bool, whether to multiply the predicted mask to features
topk: bool, whether to use topk or threshold for selection
threshold: float, threshold value
"""
index
=
x
.
indices
[:,
0
]
batch_index
=
index
==
b
indices_ori
=
x
.
indices
[
batch_index
]
features_ori
=
x
.
features
[
batch_index
]
mask_voxel
=
imps_3d
[
batch_index
,
-
1
].
sigmoid
()
mask_kernel
=
imps_3d
[
batch_index
,
:
-
1
].
sigmoid
()
if
mask_multi
:
features_ori
*=
mask_voxel
.
unsqueeze
(
-
1
)
if
topk
:
_
,
indices
=
mask_voxel
.
sort
(
descending
=
True
)
indices_fore
=
indices
[:
int
(
mask_voxel
.
shape
[
0
]
*
threshold
)]
indices_back
=
indices
[
int
(
mask_voxel
.
shape
[
0
]
*
threshold
):]
else
:
indices_fore
=
mask_voxel
>
threshold
indices_back
=
mask_voxel
<=
threshold
features_fore
=
features_ori
[
indices_fore
]
coords_fore
=
indices_ori
[
indices_fore
]
mask_kernel_fore
=
mask_kernel
[
indices_fore
]
mask_kernel_bool
=
mask_kernel_fore
>=
threshold
voxel_kerels_imp
=
kernel_offsets
.
unsqueeze
(
0
).
repeat
(
mask_kernel_bool
.
shape
[
0
],
1
,
1
)
mask_kernel_fore
=
mask_kernel
[
indices_fore
][
mask_kernel_bool
]
indices_fore_kernels
=
coords_fore
[:,
1
:].
unsqueeze
(
1
).
repeat
(
1
,
kernel_offsets
.
shape
[
0
],
1
)
indices_with_imp
=
indices_fore_kernels
+
voxel_kerels_imp
selected_indices
=
indices_with_imp
[
mask_kernel_bool
]
spatial_indices
=
(
selected_indices
[:,
0
]
>
0
)
*
(
selected_indices
[:,
1
]
>
0
)
*
(
selected_indices
[:,
2
]
>
0
)
*
\
(
selected_indices
[:,
0
]
<
x
.
spatial_shape
[
0
])
*
(
selected_indices
[:,
1
]
<
x
.
spatial_shape
[
1
])
*
(
selected_indices
[:,
2
]
<
x
.
spatial_shape
[
2
])
selected_indices
=
selected_indices
[
spatial_indices
]
mask_kernel_fore
=
mask_kernel_fore
[
spatial_indices
]
selected_indices
=
torch
.
cat
([
torch
.
ones
((
selected_indices
.
shape
[
0
],
1
),
device
=
features_fore
.
device
)
*
b
,
selected_indices
],
dim
=
1
)
selected_features
=
torch
.
zeros
((
selected_indices
.
shape
[
0
],
features_ori
.
shape
[
1
]),
device
=
features_fore
.
device
)
features_fore_cat
=
torch
.
cat
([
features_fore
,
selected_features
],
dim
=
0
)
coords_fore
=
torch
.
cat
([
coords_fore
,
selected_indices
],
dim
=
0
)
mask_kernel_fore
=
torch
.
cat
([
torch
.
ones
(
features_fore
.
shape
[
0
],
device
=
features_fore
.
device
),
mask_kernel_fore
],
dim
=
0
)
features_fore
,
coords_fore
,
mask_kernel_fore
=
check_repeat
(
features_fore_cat
,
coords_fore
,
features_add
=
mask_kernel_fore
)
features_back
=
features_ori
[
indices_back
]
coords_back
=
indices_ori
[
indices_back
]
return
features_fore
,
coords_fore
,
features_back
,
coords_back
,
mask_kernel_fore
pcdet/models/backbones_3d/spconv_backbone_focal.py
0 → 100755
View file @
5666ea67
from
functools
import
partial
import
torch
import
spconv.pytorch
as
spconv
import
torch.nn
as
nn
from
.focal_sparse_conv.focal_sparse_conv
import
FocalSparseConv
from
.focal_sparse_conv.SemanticSeg.pyramid_ffn
import
PyramidFeat2D
class
objDict
:
@
staticmethod
def
to_object
(
obj
:
object
,
**
data
):
obj
.
__dict__
.
update
(
data
)
class
ConfigDict
:
def
__init__
(
self
,
name
):
self
.
name
=
name
def
__getitem__
(
self
,
item
):
return
getattr
(
self
,
item
)
class
SparseSequentialBatchdict
(
spconv
.
SparseSequential
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
SparseSequentialBatchdict
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
forward
(
self
,
input
,
batch_dict
=
None
):
for
k
,
module
in
self
.
_modules
.
items
():
if
module
is
None
:
continue
if
isinstance
(
module
,
(
FocalSparseConv
,)):
input
,
batch_dict
=
module
(
input
,
batch_dict
)
else
:
input
=
module
(
input
)
return
input
,
batch_dict
def
post_act_block
(
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
None
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_fn
=
None
):
if
conv_type
==
'subm'
:
conv
=
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
)
elif
conv_type
==
'spconv'
:
conv
=
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
)
elif
conv_type
==
'inverseconv'
:
conv
=
spconv
.
SparseInverseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
indice_key
,
bias
=
False
)
else
:
raise
NotImplementedError
m
=
spconv
.
SparseSequential
(
conv
,
norm_fn
(
out_channels
),
nn
.
ReLU
(
True
),
)
return
m
class
SparseBasicBlock
(
spconv
.
SparseModule
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
norm_fn
=
None
,
downsample
=
None
,
indice_key
=
None
):
super
(
SparseBasicBlock
,
self
).
__init__
()
assert
norm_fn
is
not
None
bias
=
norm_fn
is
not
None
self
.
conv1
=
spconv
.
SubMConv3d
(
inplanes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
bias
,
indice_key
=
indice_key
)
self
.
bn1
=
norm_fn
(
planes
)
self
.
relu
=
nn
.
ReLU
(
True
)
self
.
conv2
=
spconv
.
SubMConv3d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
bias
,
indice_key
=
indice_key
)
self
.
bn2
=
norm_fn
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
out
.
replace_feature
(
self
.
bn1
(
out
.
features
))
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
out
=
self
.
conv2
(
out
)
out
=
out
.
replace_feature
(
self
.
bn2
(
out
.
features
))
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
=
out
.
replace_feature
(
out
.
features
+
identity
.
features
)
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
return
out
class
VoxelBackBone8xFocal
(
nn
.
Module
):
def
__init__
(
self
,
model_cfg
,
input_channels
,
grid_size
,
**
kwargs
):
super
().
__init__
()
self
.
model_cfg
=
model_cfg
norm_fn
=
partial
(
nn
.
BatchNorm1d
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
sparse_shape
=
grid_size
[::
-
1
]
+
[
1
,
0
,
0
]
self
.
conv_input
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
input_channels
,
16
,
3
,
padding
=
1
,
bias
=
False
,
indice_key
=
'subm1'
),
norm_fn
(
16
),
nn
.
ReLU
(
True
),
)
block
=
post_act_block
use_img
=
model_cfg
.
get
(
'USE_IMG'
,
False
)
topk
=
model_cfg
.
get
(
'TOPK'
,
True
)
threshold
=
model_cfg
.
get
(
'THRESHOLD'
,
0.5
)
kernel_size
=
model_cfg
.
get
(
'KERNEL_SIZE'
,
3
)
mask_multi
=
model_cfg
.
get
(
'MASK_MULTI'
,
False
)
skip_mask_kernel
=
model_cfg
.
get
(
'SKIP_MASK_KERNEL'
,
False
)
skip_mask_kernel_image
=
model_cfg
.
get
(
'SKIP_MASK_KERNEL_IMG'
,
False
)
enlarge_voxel_channels
=
model_cfg
.
get
(
'ENLARGE_VOXEL_CHANNELS'
,
-
1
)
img_pretrain
=
model_cfg
.
get
(
'IMG_PRETRAIN'
,
"../checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth"
)
if
use_img
:
model_cfg_seg
=
dict
(
name
=
'SemDeepLabV3'
,
backbone
=
'ResNet50'
,
num_class
=
21
,
# pretrained on COCO
args
=
{
"feat_extract_layer"
:
[
"layer1"
],
"pretrained_path"
:
img_pretrain
},
channel_reduce
=
{
"in_channels"
:
[
256
],
"out_channels"
:
[
16
],
"kernel_size"
:
[
1
],
"stride"
:
[
1
],
"bias"
:
[
False
]
}
)
cfg_dict
=
ConfigDict
(
'SemDeepLabV3'
)
objDict
.
to_object
(
cfg_dict
,
**
model_cfg_seg
)
self
.
semseg
=
PyramidFeat2D
(
optimize
=
True
,
model_cfg
=
cfg_dict
)
self
.
conv_focal_multimodal
=
FocalSparseConv
(
16
,
16
,
image_channel
=
model_cfg_seg
[
'channel_reduce'
][
'out_channels'
][
0
],
topk
=
topk
,
threshold
=
threshold
,
use_img
=
True
,
skip_mask_kernel
=
skip_mask_kernel_image
,
voxel_stride
=
1
,
norm_fn
=
norm_fn
,
indice_key
=
'spconv_focal_multimodal'
)
special_spconv_fn
=
partial
(
FocalSparseConv
,
mask_multi
=
mask_multi
,
enlarge_voxel_channels
=
enlarge_voxel_channels
,
topk
=
topk
,
threshold
=
threshold
,
kernel_size
=
kernel_size
,
padding
=
kernel_size
//
2
,
skip_mask_kernel
=
skip_mask_kernel
)
self
.
use_img
=
use_img
self
.
conv1
=
SparseSequentialBatchdict
(
block
(
16
,
16
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm1'
),
special_spconv_fn
(
16
,
16
,
voxel_stride
=
1
,
norm_fn
=
norm_fn
,
indice_key
=
'focal1'
),
)
self
.
conv2
=
SparseSequentialBatchdict
(
# [1600, 1408, 41] <- [800, 704, 21]
block
(
16
,
32
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv2'
,
conv_type
=
'spconv'
),
block
(
32
,
32
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm2'
),
block
(
32
,
32
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm2'
),
special_spconv_fn
(
32
,
32
,
voxel_stride
=
2
,
norm_fn
=
norm_fn
,
indice_key
=
'focal2'
),
)
self
.
conv3
=
SparseSequentialBatchdict
(
# [800, 704, 21] <- [400, 352, 11]
block
(
32
,
64
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv3'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm3'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm3'
),
special_spconv_fn
(
64
,
64
,
voxel_stride
=
4
,
norm_fn
=
norm_fn
,
indice_key
=
'focal3'
),
)
self
.
conv4
=
SparseSequentialBatchdict
(
# [400, 352, 11] <- [200, 176, 5]
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
(
0
,
1
,
1
),
indice_key
=
'spconv4'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm4'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm4'
),
)
last_pad
=
0
last_pad
=
self
.
model_cfg
.
get
(
'last_pad'
,
last_pad
)
self
.
conv_out
=
spconv
.
SparseSequential
(
# [200, 150, 5] -> [200, 150, 2]
spconv
.
SparseConv3d
(
64
,
128
,
(
3
,
1
,
1
),
stride
=
(
2
,
1
,
1
),
padding
=
last_pad
,
bias
=
False
,
indice_key
=
'spconv_down2'
),
norm_fn
(
128
),
nn
.
ReLU
(
True
),
)
self
.
num_point_features
=
128
self
.
backbone_channels
=
{
'x_conv1'
:
16
,
'x_conv2'
:
32
,
'x_conv3'
:
64
,
'x_conv4'
:
64
}
def
forward
(
self
,
batch_dict
):
"""
Args:
batch_dict:
batch_size: int
vfe_features: (num_voxels, C)
voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
Returns:
batch_dict:
encoded_spconv_tensor: sparse tensor
"""
voxel_features
,
voxel_coords
=
batch_dict
[
'voxel_features'
],
batch_dict
[
'voxel_coords'
]
batch_size
=
batch_dict
[
'batch_size'
]
input_sp_tensor
=
spconv
.
SparseConvTensor
(
features
=
voxel_features
,
indices
=
voxel_coords
.
int
(),
spatial_shape
=
self
.
sparse_shape
,
batch_size
=
batch_size
)
batch_dict
[
'loss_box_of_pts'
]
=
0
x
=
self
.
conv_input
(
input_sp_tensor
)
x_conv1
,
batch_dict
=
self
.
conv1
(
x
,
batch_dict
)
if
self
.
use_img
:
x_image
=
self
.
semseg
(
batch_dict
[
'images'
])[
'layer1_feat2d'
]
x_conv1
,
batch_dict
=
self
.
conv_focal_multimodal
(
x_conv1
,
batch_dict
,
x_image
)
x_conv2
,
batch_dict
=
self
.
conv2
(
x_conv1
,
batch_dict
)
x_conv3
,
batch_dict
=
self
.
conv3
(
x_conv2
,
batch_dict
)
x_conv4
,
batch_dict
=
self
.
conv4
(
x_conv3
,
batch_dict
)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out
=
self
.
conv_out
(
x_conv4
)
batch_dict
.
update
({
'encoded_spconv_tensor'
:
out
,
'encoded_spconv_tensor_stride'
:
8
})
batch_dict
.
update
({
'multi_scale_3d_features'
:
{
'x_conv1'
:
x_conv1
,
'x_conv2'
:
x_conv2
,
'x_conv3'
:
x_conv3
,
'x_conv4'
:
x_conv4
,
}
})
batch_dict
.
update
({
'multi_scale_3d_strides'
:
{
'x_conv1'
:
1
,
'x_conv2'
:
2
,
'x_conv3'
:
4
,
'x_conv4'
:
8
,
}
})
return
batch_dict
pcdet/models/detectors/pv_rcnn.py
View file @
5666ea67
...
@@ -12,6 +12,9 @@ class PVRCNN(Detector3DTemplate):
...
@@ -12,6 +12,9 @@ class PVRCNN(Detector3DTemplate):
if
self
.
training
:
if
self
.
training
:
loss
,
tb_dict
,
disp_dict
=
self
.
get_training_loss
()
loss
,
tb_dict
,
disp_dict
=
self
.
get_training_loss
()
if
'loss_box_of_pts'
in
batch_dict
:
loss
+=
batch_dict
[
'loss_box_of_pts'
]
tb_dict
[
'loss_box_of_pts'
]
=
batch_dict
[
'loss_box_of_pts'
]
ret_dict
=
{
ret_dict
=
{
'loss'
:
loss
'loss'
:
loss
...
...
pcdet/models/detectors/voxel_rcnn.py
View file @
5666ea67
...
@@ -13,6 +13,10 @@ class VoxelRCNN(Detector3DTemplate):
...
@@ -13,6 +13,10 @@ class VoxelRCNN(Detector3DTemplate):
if
self
.
training
:
if
self
.
training
:
loss
,
tb_dict
,
disp_dict
=
self
.
get_training_loss
()
loss
,
tb_dict
,
disp_dict
=
self
.
get_training_loss
()
if
'loss_box_of_pts'
in
batch_dict
:
loss
+=
batch_dict
[
'loss_box_of_pts'
]
tb_dict
[
'loss_box_of_pts'
]
=
batch_dict
[
'loss_box_of_pts'
]
ret_dict
=
{
ret_dict
=
{
'loss'
:
loss
'loss'
:
loss
}
}
...
...
pcdet/utils/box2d_utils.py
0 → 100755
View file @
5666ea67
import
torch
def
area
(
box
)
->
torch
.
Tensor
:
"""
Computes the area of all the boxes.
Returns:
torch.Tensor: a vector with areas of each box.
"""
area
=
(
box
[:,
2
]
-
box
[:,
0
])
*
(
box
[:,
3
]
-
box
[:,
1
])
return
area
# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
# with slight modifications
def
pairwise_iou
(
boxes1
,
boxes2
)
->
torch
.
Tensor
:
"""
Given two lists of boxes of size N and M,
compute the IoU (intersection over union)
between __all__ N x M pairs of boxes.
The box order must be (xmin, ymin, xmax, ymax).
Args:
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
Returns:
Tensor: IoU, sized [N,M].
"""
area1
=
area
(
boxes1
)
area2
=
area
(
boxes2
)
width_height
=
torch
.
min
(
boxes1
[:,
None
,
2
:],
boxes2
[:,
2
:])
-
torch
.
max
(
boxes1
[:,
None
,
:
2
],
boxes2
[:,
:
2
]
)
# [N,M,2]
width_height
.
clamp_
(
min
=
0
)
# [N,M,2]
inter
=
width_height
.
prod
(
dim
=
2
)
# [N,M]
del
width_height
# handle empty boxes
iou
=
torch
.
where
(
inter
>
0
,
inter
/
(
area1
[:,
None
]
+
area2
-
inter
),
torch
.
zeros
(
1
,
dtype
=
inter
.
dtype
,
device
=
inter
.
device
),
)
return
iou
\ No newline at end of file
pcdet/utils/box_utils.py
View file @
5666ea67
...
@@ -52,6 +52,43 @@ def boxes_to_corners_3d(boxes3d):
...
@@ -52,6 +52,43 @@ def boxes_to_corners_3d(boxes3d):
return
corners3d
.
numpy
()
if
is_numpy
else
corners3d
return
corners3d
.
numpy
()
if
is_numpy
else
corners3d
def
corners_rect_to_camera
(
corners
):
"""
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
corners: (8, 3) [x0, y0, z0, ...], (x, y, z) is the point coordinate in image rect
Returns:
boxes_rect: (7,) [x, y, z, l, h, w, r] in rect camera coords
"""
height_group
=
[(
0
,
4
),
(
1
,
5
),
(
2
,
6
),
(
3
,
7
)]
width_group
=
[(
0
,
1
),
(
2
,
3
),
(
4
,
5
),
(
6
,
7
)]
length_group
=
[(
0
,
3
),
(
1
,
2
),
(
4
,
7
),
(
5
,
6
)]
vector_group
=
[(
0
,
3
),
(
1
,
2
),
(
4
,
7
),
(
5
,
6
)]
height
,
width
,
length
=
0.
,
0.
,
0.
vector
=
np
.
zeros
(
2
,
dtype
=
np
.
float32
)
for
index_h
,
index_w
,
index_l
,
index_v
in
zip
(
height_group
,
width_group
,
length_group
,
vector_group
):
height
+=
np
.
linalg
.
norm
(
corners
[
index_h
[
0
],
:]
-
corners
[
index_h
[
1
],
:])
width
+=
np
.
linalg
.
norm
(
corners
[
index_w
[
0
],
:]
-
corners
[
index_w
[
1
],
:])
length
+=
np
.
linalg
.
norm
(
corners
[
index_l
[
0
],
:]
-
corners
[
index_l
[
1
],
:])
vector
[
0
]
+=
(
corners
[
index_v
[
0
],
:]
-
corners
[
index_v
[
1
],
:])[
0
]
vector
[
1
]
+=
(
corners
[
index_v
[
0
],
:]
-
corners
[
index_v
[
1
],
:])[
2
]
height
,
width
,
length
=
height
*
1.0
/
4
,
width
*
1.0
/
4
,
length
*
1.0
/
4
rotation_y
=
-
np
.
arctan2
(
vector
[
1
],
vector
[
0
])
center_point
=
corners
.
mean
(
axis
=
0
)
center_point
[
1
]
+=
height
/
2
camera_rect
=
np
.
concatenate
([
center_point
,
np
.
array
([
length
,
height
,
width
,
rotation_y
])])
return
camera_rect
def
mask_boxes_outside_range_numpy
(
boxes
,
limit_range
,
min_num_corners
=
1
):
def
mask_boxes_outside_range_numpy
(
boxes
,
limit_range
,
min_num_corners
=
1
):
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment