Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenPCDet
Commits
4c8009fc
Unverified
Commit
4c8009fc
authored
Jul 04, 2022
by
Shaoshuai Shi
Committed by
GitHub
Jul 04, 2022
Browse files
Merge pull request #988 from yukang2017/focalsconv
Merge to support Focals Conv (CVPR 2022 paper).
parents
dadda9ed
fa330622
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1568 additions
and
67 deletions
+1568
-67
pcdet/datasets/augmentor/augmentor_utils.py
pcdet/datasets/augmentor/augmentor_utils.py
+12
-7
pcdet/datasets/augmentor/data_augmentor.py
pcdet/datasets/augmentor/data_augmentor.py
+43
-40
pcdet/datasets/augmentor/database_sampler.py
pcdet/datasets/augmentor/database_sampler.py
+244
-13
pcdet/datasets/dataset.py
pcdet/datasets/dataset.py
+21
-6
pcdet/datasets/kitti/kitti_dataset.py
pcdet/datasets/kitti/kitti_dataset.py
+1
-0
pcdet/models/backbones_3d/__init__.py
pcdet/models/backbones_3d/__init__.py
+2
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
...ackbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
+65
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
...backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
+77
-0
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
...ckbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
+160
-0
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
...odels/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
+224
-0
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_utils.py
...dels/backbones_3d/focal_sparse_conv/focal_sparse_utils.py
+147
-0
pcdet/models/backbones_3d/spconv_backbone_focal.py
pcdet/models/backbones_3d/spconv_backbone_focal.py
+269
-0
pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn/ddn_template.py
...ackbones_3d/vfe/image_vfe_modules/ffn/ddn/ddn_template.py
+1
-1
pcdet/models/detectors/pv_rcnn.py
pcdet/models/detectors/pv_rcnn.py
+5
-0
pcdet/models/detectors/voxel_rcnn.py
pcdet/models/detectors/voxel_rcnn.py
+5
-0
pcdet/utils/box_utils.py
pcdet/utils/box_utils.py
+83
-0
tools/cfgs/kitti_models/voxel_rcnn_car_focal_multimodal.yaml
tools/cfgs/kitti_models/voxel_rcnn_car_focal_multimodal.yaml
+209
-0
No files found.
pcdet/datasets/augmentor/augmentor_utils.py
View file @
4c8009fc
...
...
@@ -5,7 +5,7 @@ from ...utils import common_utils
from
...utils
import
box_utils
def
random_flip_along_x
(
gt_boxes
,
points
):
def
random_flip_along_x
(
gt_boxes
,
points
,
return_flip
=
False
):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
...
@@ -20,11 +20,12 @@ def random_flip_along_x(gt_boxes, points):
if
gt_boxes
.
shape
[
1
]
>
7
:
gt_boxes
[:,
8
]
=
-
gt_boxes
[:,
8
]
if
return_flip
:
return
gt_boxes
,
points
,
enable
return
gt_boxes
,
points
def
random_flip_along_y
(
gt_boxes
,
points
):
def
random_flip_along_y
(
gt_boxes
,
points
,
return_flip
=
False
):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
...
@@ -39,11 +40,12 @@ def random_flip_along_y(gt_boxes, points):
if
gt_boxes
.
shape
[
1
]
>
7
:
gt_boxes
[:,
7
]
=
-
gt_boxes
[:,
7
]
if
return_flip
:
return
gt_boxes
,
points
,
enable
return
gt_boxes
,
points
def
global_rotation
(
gt_boxes
,
points
,
rot_range
):
def
global_rotation
(
gt_boxes
,
points
,
rot_range
,
return_rot
=
False
):
"""
Args:
gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
...
...
@@ -61,10 +63,12 @@ def global_rotation(gt_boxes, points, rot_range):
np
.
array
([
noise_rotation
])
)[
0
][:,
0
:
2
]
if
return_rot
:
return
gt_boxes
,
points
,
noise_rotation
return
gt_boxes
,
points
def
global_scaling
(
gt_boxes
,
points
,
scale_range
):
def
global_scaling
(
gt_boxes
,
points
,
scale_range
,
return_scale
=
False
):
"""
Args:
gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
...
...
@@ -77,7 +81,8 @@ def global_scaling(gt_boxes, points, scale_range):
noise_scale
=
np
.
random
.
uniform
(
scale_range
[
0
],
scale_range
[
1
])
points
[:,
:
3
]
*=
noise_scale
gt_boxes
[:,
:
6
]
*=
noise_scale
if
return_scale
:
return
gt_boxes
,
points
,
noise_scale
return
gt_boxes
,
points
...
...
pcdet/datasets/augmentor/data_augmentor.py
View file @
4c8009fc
...
...
@@ -11,18 +11,18 @@ class DataAugmentor(object):
self
.
root_path
=
root_path
self
.
class_names
=
class_names
self
.
logger
=
logger
self
.
data_augmentor_queue
=
[]
aug_config_list
=
augmentor_configs
if
isinstance
(
augmentor_configs
,
list
)
\
else
augmentor_configs
.
AUG_CONFIG_LIST
for
cur_cfg
in
aug_config_list
:
if
not
isinstance
(
augmentor_configs
,
list
):
if
cur_cfg
.
NAME
in
augmentor_configs
.
DISABLE_AUG_LIST
:
continue
cur_augmentor
=
getattr
(
self
,
cur_cfg
.
NAME
)(
config
=
cur_cfg
)
self
.
data_augmentor_queue
.
append
(
cur_augmentor
)
def
gt_sampling
(
self
,
config
=
None
):
db_sampler
=
database_sampler
.
DataBaseSampler
(
root_path
=
self
.
root_path
,
...
...
@@ -31,54 +31,57 @@ class DataAugmentor(object):
logger
=
self
.
logger
)
return
db_sampler
def
__getstate__
(
self
):
d
=
dict
(
self
.
__dict__
)
del
d
[
'logger'
]
return
d
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
def
random_world_flip
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
return
partial
(
self
.
random_world_flip
,
config
=
config
)
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
for
cur_axis
in
config
[
'ALONG_AXIS_LIST'
]:
assert
cur_axis
in
[
'x'
,
'y'
]
gt_boxes
,
points
=
getattr
(
augmentor_utils
,
'random_flip_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
gt_boxes
,
points
,
enable
=
getattr
(
augmentor_utils
,
'random_flip_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
return_flip
=
True
)
data_dict
[
'flip_%s'
%
cur_axis
]
=
enable
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
random_world_rotation
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
return
partial
(
self
.
random_world_rotation
,
config
=
config
)
rot_range
=
config
[
'WORLD_ROT_ANGLE'
]
if
not
isinstance
(
rot_range
,
list
):
rot_range
=
[
-
rot_range
,
rot_range
]
gt_boxes
,
points
=
augmentor_utils
.
global_rotation
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
gt_boxes
,
points
,
noise_rot
=
augmentor_utils
.
global_rotation
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
,
return_rot
=
True
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'noise_rot'
]
=
noise_rot
return
data_dict
def
random_world_scaling
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
return
partial
(
self
.
random_world_scaling
,
config
=
config
)
gt_boxes
,
points
=
augmentor_utils
.
global_scaling
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'WORLD_SCALE_RANGE'
]
gt_boxes
,
points
,
noise_scale
=
augmentor_utils
.
global_scaling
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'WORLD_SCALE_RANGE'
]
,
return_scale
=
True
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
data_dict
[
'noise_scale'
]
=
noise_scale
return
data_dict
def
random_image_flip
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
return
partial
(
self
.
random_image_flip
,
config
=
config
)
...
...
@@ -92,12 +95,12 @@ class DataAugmentor(object):
images
,
depth_maps
,
gt_boxes
=
getattr
(
augmentor_utils
,
'random_image_flip_%s'
%
cur_axis
)(
images
,
depth_maps
,
gt_boxes
,
calib
,
)
data_dict
[
'images'
]
=
images
data_dict
[
'depth_maps'
]
=
depth_maps
data_dict
[
'gt_boxes'
]
=
gt_boxes
return
data_dict
def
random_world_translation
(
self
,
data_dict
=
None
,
config
=
None
):
if
data_dict
is
None
:
return
partial
(
self
.
random_world_translation
,
config
=
config
)
...
...
@@ -128,11 +131,11 @@ class DataAugmentor(object):
gt_boxes
,
points
=
getattr
(
augmentor_utils
,
'random_local_translation_along_%s'
%
cur_axis
)(
gt_boxes
,
points
,
offset_range
,
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
random_local_rotation
(
self
,
data_dict
=
None
,
config
=
None
):
"""
Please check the correctness of it before using.
...
...
@@ -145,11 +148,11 @@ class DataAugmentor(object):
gt_boxes
,
points
=
augmentor_utils
.
local_rotation
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
rot_range
=
rot_range
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
random_local_scaling
(
self
,
data_dict
=
None
,
config
=
None
):
"""
Please check the correctness of it before using.
...
...
@@ -159,18 +162,18 @@ class DataAugmentor(object):
gt_boxes
,
points
=
augmentor_utils
.
local_scaling
(
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
],
config
[
'LOCAL_SCALE_RANGE'
]
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
random_world_frustum_dropout
(
self
,
data_dict
=
None
,
config
=
None
):
"""
Please check the correctness of it before using.
"""
if
data_dict
is
None
:
return
partial
(
self
.
random_world_frustum_dropout
,
config
=
config
)
intensity_range
=
config
[
'INTENSITY_RANGE'
]
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
for
direction
in
config
[
'DIRECTION'
]:
...
...
@@ -178,18 +181,18 @@ class DataAugmentor(object):
gt_boxes
,
points
=
getattr
(
augmentor_utils
,
'global_frustum_dropout_%s'
%
direction
)(
gt_boxes
,
points
,
intensity_range
,
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
random_local_frustum_dropout
(
self
,
data_dict
=
None
,
config
=
None
):
"""
Please check the correctness of it before using.
"""
if
data_dict
is
None
:
return
partial
(
self
.
random_local_frustum_dropout
,
config
=
config
)
intensity_range
=
config
[
'INTENSITY_RANGE'
]
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
for
direction
in
config
[
'DIRECTION'
]:
...
...
@@ -197,21 +200,21 @@ class DataAugmentor(object):
gt_boxes
,
points
=
getattr
(
augmentor_utils
,
'local_frustum_dropout_%s'
%
direction
)(
gt_boxes
,
points
,
intensity_range
,
)
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
random_local_pyramid_aug
(
self
,
data_dict
=
None
,
config
=
None
):
"""
Refer to the paper:
Refer to the paper:
SE-SSD: Self-Ensembling Single-Stage Object Detector From Point Cloud
"""
if
data_dict
is
None
:
return
partial
(
self
.
random_local_pyramid_aug
,
config
=
config
)
gt_boxes
,
points
=
data_dict
[
'gt_boxes'
],
data_dict
[
'points'
]
gt_boxes
,
points
,
pyramids
=
augmentor_utils
.
local_pyramid_dropout
(
gt_boxes
,
points
,
config
[
'DROP_PROB'
])
gt_boxes
,
points
,
pyramids
=
augmentor_utils
.
local_pyramid_sparsify
(
gt_boxes
,
points
,
config
[
'SPARSIFY_PROB'
],
...
...
@@ -224,7 +227,7 @@ class DataAugmentor(object):
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'points'
]
=
points
return
data_dict
def
forward
(
self
,
data_dict
):
"""
Args:
...
...
@@ -238,12 +241,12 @@ class DataAugmentor(object):
"""
for
cur_augmentor
in
self
.
data_augmentor_queue
:
data_dict
=
cur_augmentor
(
data_dict
=
data_dict
)
data_dict
[
'gt_boxes'
][:,
6
]
=
common_utils
.
limit_period
(
data_dict
[
'gt_boxes'
][:,
6
],
offset
=
0.5
,
period
=
2
*
np
.
pi
)
if
'calib'
in
data_dict
:
data_dict
.
pop
(
'calib'
)
#
if 'calib' in data_dict:
#
data_dict.pop('calib')
if
'road_plane'
in
data_dict
:
data_dict
.
pop
(
'road_plane'
)
if
'gt_boxes_mask'
in
data_dict
:
...
...
@@ -252,6 +255,6 @@ class DataAugmentor(object):
data_dict
[
'gt_names'
]
=
data_dict
[
'gt_names'
][
gt_boxes_mask
]
if
'gt_boxes2d'
in
data_dict
:
data_dict
[
'gt_boxes2d'
]
=
data_dict
[
'gt_boxes2d'
][
gt_boxes_mask
]
data_dict
.
pop
(
'gt_boxes_mask'
)
return
data_dict
pcdet/datasets/augmentor/database_sampler.py
View file @
4c8009fc
...
...
@@ -3,25 +3,31 @@ import pickle
import
os
import
copy
import
numpy
as
np
from
skimage
import
io
import
torch
import
SharedArray
import
torch.distributed
as
dist
from
...ops.iou3d_nms
import
iou3d_nms_utils
from
...utils
import
box_utils
,
common_utils
from
...utils
import
box_utils
,
common_utils
,
calibration_kitti
from
pcdet.datasets.kitti.kitti_object_eval_python
import
kitti_common
class
DataBaseSampler
(
object
):
def
__init__
(
self
,
root_path
,
sampler_cfg
,
class_names
,
logger
=
None
):
self
.
root_path
=
root_path
self
.
class_names
=
class_names
self
.
sampler_cfg
=
sampler_cfg
self
.
img_aug_type
=
sampler_cfg
.
get
(
'IMG_AUG_TYPE'
,
None
)
self
.
img_aug_iou_thresh
=
sampler_cfg
.
get
(
'IMG_AUG_IOU_THRESH'
,
0.5
)
self
.
logger
=
logger
self
.
db_infos
=
{}
for
class_name
in
class_names
:
self
.
db_infos
[
class_name
]
=
[]
self
.
use_shared_memory
=
sampler_cfg
.
get
(
'USE_SHARED_MEMORY'
,
False
)
for
db_info_path
in
sampler_cfg
.
DB_INFO_PATH
:
db_info_path
=
self
.
root_path
.
resolve
()
/
db_info_path
with
open
(
str
(
db_info_path
),
'rb'
)
as
f
:
...
...
@@ -30,7 +36,7 @@ class DataBaseSampler(object):
for
func_name
,
val
in
sampler_cfg
.
PREPARE
.
items
():
self
.
db_infos
=
getattr
(
self
,
func_name
)(
self
.
db_infos
,
val
)
self
.
gt_database_data_key
=
self
.
load_db_to_shared_memory
()
if
self
.
use_shared_memory
else
None
self
.
sample_groups
=
{}
...
...
@@ -79,7 +85,7 @@ class DataBaseSampler(object):
if
cur_rank
%
num_gpus
==
0
and
not
os
.
path
.
exists
(
f
"/dev/shm/
{
sa_key
}
"
):
gt_database_data
=
np
.
load
(
db_data_path
)
common_utils
.
sa_create
(
f
"shm://
{
sa_key
}
"
,
gt_database_data
)
if
num_gpus
>
1
:
dist
.
barrier
()
self
.
logger
.
info
(
'GT database has been saved to shared memory'
)
...
...
@@ -153,12 +159,208 @@ class DataBaseSampler(object):
gt_boxes
[:,
2
]
-=
mv_height
# lidar view
return
gt_boxes
,
mv_height
def
add_sampled_boxes_to_scene
(
self
,
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
):
def
copy_paste_to_image_kitti
(
self
,
data_dict
,
crop_feat
,
gt_number
,
point_idxes
=
None
):
kitti_img_aug_type
=
'by_depth'
kitti_img_aug_use_type
=
'annotation'
image
=
data_dict
[
'images'
]
boxes3d
=
data_dict
[
'gt_boxes'
]
boxes2d
=
data_dict
[
'gt_boxes2d'
]
corners_lidar
=
box_utils
.
boxes_to_corners_3d
(
boxes3d
)
if
'depth'
in
kitti_img_aug_type
:
paste_order
=
boxes3d
[:,
0
].
argsort
()
paste_order
=
paste_order
[::
-
1
]
else
:
paste_order
=
np
.
arange
(
len
(
boxes3d
),
dtype
=
np
.
int
)
if
'reverse'
in
kitti_img_aug_type
:
paste_order
=
paste_order
[::
-
1
]
paste_mask
=
-
255
*
np
.
ones
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
fg_mask
=
np
.
zeros
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
overlap_mask
=
np
.
zeros
(
image
.
shape
[:
2
],
dtype
=
np
.
int
)
depth_mask
=
np
.
zeros
((
*
image
.
shape
[:
2
],
2
),
dtype
=
np
.
float
)
points_2d
,
depth_2d
=
data_dict
[
'calib'
].
lidar_to_img
(
data_dict
[
'points'
][:,:
3
])
points_2d
[:,
0
]
=
np
.
clip
(
points_2d
[:,
0
],
a_min
=
0
,
a_max
=
image
.
shape
[
1
]
-
1
)
points_2d
[:,
1
]
=
np
.
clip
(
points_2d
[:,
1
],
a_min
=
0
,
a_max
=
image
.
shape
[
0
]
-
1
)
points_2d
=
points_2d
.
astype
(
np
.
int
)
for
_order
in
paste_order
:
_box2d
=
boxes2d
[
_order
]
image
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
crop_feat
[
_order
]
overlap_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
+=
\
(
paste_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
>
0
).
astype
(
np
.
int
)
paste_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
_order
if
'cover'
in
kitti_img_aug_use_type
:
# HxWx2 for min and max depth of each box region
depth_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
],
0
]
=
corners_lidar
[
_order
,:,
0
].
min
()
depth_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
],
1
]
=
corners_lidar
[
_order
,:,
0
].
max
()
# foreground area of original point cloud in image plane
if
_order
<
gt_number
:
fg_mask
[
_box2d
[
1
]:
_box2d
[
3
],
_box2d
[
0
]:
_box2d
[
2
]]
=
1
data_dict
[
'images'
]
=
image
# if not self.joint_sample:
# return data_dict
new_mask
=
paste_mask
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
==
(
point_idxes
+
gt_number
)
if
False
:
# self.keep_raw:
raw_mask
=
(
point_idxes
==
-
1
)
else
:
raw_fg
=
(
fg_mask
==
1
)
&
(
paste_mask
>=
0
)
&
(
paste_mask
<
gt_number
)
raw_bg
=
(
fg_mask
==
0
)
&
(
paste_mask
<
0
)
raw_mask
=
raw_fg
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
|
raw_bg
[
points_2d
[:,
1
],
points_2d
[:,
0
]]
keep_mask
=
new_mask
|
raw_mask
data_dict
[
'points_2d'
]
=
points_2d
if
'annotation'
in
kitti_img_aug_use_type
:
data_dict
[
'points'
]
=
data_dict
[
'points'
][
keep_mask
]
data_dict
[
'points_2d'
]
=
data_dict
[
'points_2d'
][
keep_mask
]
elif
'projection'
in
kitti_img_aug_use_type
:
overlap_mask
[
overlap_mask
>=
1
]
=
1
data_dict
[
'overlap_mask'
]
=
overlap_mask
if
'cover'
in
kitti_img_aug_use_type
:
data_dict
[
'depth_mask'
]
=
depth_mask
return
data_dict
def
collect_image_crops_kitti
(
self
,
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
):
calib_file
=
kitti_common
.
get_calib_path
(
int
(
info
[
'image_idx'
]),
self
.
root_path
,
relative_path
=
False
)
sampled_calib
=
calibration_kitti
.
Calibration
(
calib_file
)
points_2d
,
depth_2d
=
sampled_calib
.
lidar_to_img
(
obj_points
[:,:
3
])
if
True
:
# self.point_refine:
# align calibration metrics for points
points_ract
=
data_dict
[
'calib'
].
img_to_rect
(
points_2d
[:,
0
],
points_2d
[:,
1
],
depth_2d
)
points_lidar
=
data_dict
[
'calib'
].
rect_to_lidar
(
points_ract
)
obj_points
[:,
:
3
]
=
points_lidar
# align calibration metrics for boxes
box3d_raw
=
sampled_gt_boxes
[
idx
].
reshape
(
1
,
-
1
)
box3d_coords
=
box_utils
.
boxes_to_corners_3d
(
box3d_raw
)[
0
]
box3d_box
,
box3d_depth
=
sampled_calib
.
lidar_to_img
(
box3d_coords
)
box3d_coord_rect
=
data_dict
[
'calib'
].
img_to_rect
(
box3d_box
[:,
0
],
box3d_box
[:,
1
],
box3d_depth
)
box3d_rect
=
box_utils
.
corners_rect_to_camera
(
box3d_coord_rect
).
reshape
(
1
,
-
1
)
box3d_lidar
=
box_utils
.
boxes3d_kitti_camera_to_lidar
(
box3d_rect
,
data_dict
[
'calib'
])
box2d
=
box_utils
.
boxes3d_kitti_camera_to_imageboxes
(
box3d_rect
,
data_dict
[
'calib'
],
data_dict
[
'images'
].
shape
[:
2
])
sampled_gt_boxes
[
idx
]
=
box3d_lidar
[
0
]
sampled_gt_boxes2d
[
idx
]
=
box2d
[
0
]
obj_idx
=
idx
*
np
.
ones
(
len
(
obj_points
),
dtype
=
np
.
int
)
# copy crops from images
img_path
=
self
.
root_path
/
f
'training/image_2/
{
info
[
"image_idx"
]
}
.png'
raw_image
=
io
.
imread
(
img_path
)
raw_image
=
raw_image
.
astype
(
np
.
float32
)
raw_center
=
info
[
'bbox'
].
reshape
(
2
,
2
).
mean
(
0
)
new_box
=
sampled_gt_boxes2d
[
idx
].
astype
(
np
.
int
)
new_shape
=
np
.
array
([
new_box
[
2
]
-
new_box
[
0
],
new_box
[
3
]
-
new_box
[
1
]])
raw_box
=
np
.
concatenate
([
raw_center
-
new_shape
/
2
,
raw_center
+
new_shape
/
2
]).
astype
(
np
.
int
)
raw_box
[
0
::
2
]
=
np
.
clip
(
raw_box
[
0
::
2
],
a_min
=
0
,
a_max
=
raw_image
.
shape
[
1
])
raw_box
[
1
::
2
]
=
np
.
clip
(
raw_box
[
1
::
2
],
a_min
=
0
,
a_max
=
raw_image
.
shape
[
0
])
if
(
raw_box
[
2
]
-
raw_box
[
0
])
!=
new_shape
[
0
]
or
(
raw_box
[
3
]
-
raw_box
[
1
])
!=
new_shape
[
1
]:
new_center
=
new_box
.
reshape
(
2
,
2
).
mean
(
0
)
new_shape
=
np
.
array
([
raw_box
[
2
]
-
raw_box
[
0
],
raw_box
[
3
]
-
raw_box
[
1
]])
new_box
=
np
.
concatenate
([
new_center
-
new_shape
/
2
,
new_center
+
new_shape
/
2
]).
astype
(
np
.
int
)
img_crop2d
=
raw_image
[
raw_box
[
1
]:
raw_box
[
3
],
raw_box
[
0
]:
raw_box
[
2
]]
/
255
return
new_box
,
img_crop2d
,
obj_points
,
obj_idx
def
sample_gt_boxes_2d_kitti
(
self
,
data_dict
,
sampled_boxes
,
valid_mask
):
mv_height
=
None
# filter out box2d iou > thres
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
sampled_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
)
# sampled_boxes2d = np.stack([x['bbox'] for x in sampled_dict], axis=0).astype(np.float32)
boxes3d_camera
=
box_utils
.
boxes3d_lidar_to_kitti_camera
(
sampled_boxes
,
data_dict
[
'calib'
])
sampled_boxes2d
=
box_utils
.
boxes3d_kitti_camera_to_imageboxes
(
boxes3d_camera
,
data_dict
[
'calib'
],
data_dict
[
'images'
].
shape
[:
2
])
sampled_boxes2d
=
torch
.
Tensor
(
sampled_boxes2d
)
existed_boxes2d
=
torch
.
Tensor
(
data_dict
[
'gt_boxes2d'
])
iou2d1
=
box_utils
.
pairwise_iou
(
sampled_boxes2d
,
existed_boxes2d
).
cpu
().
numpy
()
iou2d2
=
box_utils
.
pairwise_iou
(
sampled_boxes2d
,
sampled_boxes2d
).
cpu
().
numpy
()
iou2d2
[
range
(
sampled_boxes2d
.
shape
[
0
]),
range
(
sampled_boxes2d
.
shape
[
0
])]
=
0
iou2d1
=
iou2d1
if
iou2d1
.
shape
[
1
]
>
0
else
iou2d2
ret_valid_mask
=
((
iou2d1
.
max
(
axis
=
1
)
<
self
.
img_aug_iou_thresh
)
&
(
iou2d2
.
max
(
axis
=
1
)
<
self
.
img_aug_iou_thresh
)
&
(
valid_mask
))
sampled_boxes2d
=
sampled_boxes2d
[
ret_valid_mask
].
cpu
().
numpy
()
if
mv_height
is
not
None
:
mv_height
=
mv_height
[
ret_valid_mask
]
return
sampled_boxes2d
,
mv_height
,
ret_valid_mask
def
sample_gt_boxes_2d
(
self
,
data_dict
,
sampled_boxes
,
valid_mask
):
mv_height
=
None
if
self
.
img_aug_type
==
'kitti'
:
sampled_boxes2d
,
mv_height
,
ret_valid_mask
=
self
.
sample_gt_boxes_2d_kitti
(
data_dict
,
sampled_boxes
,
valid_mask
)
else
:
raise
NotImplementedError
return
sampled_boxes2d
,
mv_height
,
ret_valid_mask
def
initilize_image_aug_dict
(
self
,
data_dict
,
gt_boxes_mask
):
img_aug_gt_dict
=
None
if
self
.
img_aug_type
is
None
:
pass
elif
self
.
img_aug_type
==
'kitti'
:
obj_index_list
,
crop_boxes2d
=
[],
[]
gt_number
=
gt_boxes_mask
.
sum
().
astype
(
np
.
int
)
gt_boxes2d
=
data_dict
[
'gt_boxes2d'
][
gt_boxes_mask
].
astype
(
np
.
int
)
gt_crops2d
=
[
data_dict
[
'images'
][
_x
[
1
]:
_x
[
3
],
_x
[
0
]:
_x
[
2
]]
for
_x
in
gt_boxes2d
]
img_aug_gt_dict
=
{
'obj_index_list'
:
obj_index_list
,
'gt_crops2d'
:
gt_crops2d
,
'gt_boxes2d'
:
gt_boxes2d
,
'gt_number'
:
gt_number
,
'crop_boxes2d'
:
crop_boxes2d
}
else
:
raise
NotImplementedError
return
img_aug_gt_dict
def
collect_image_crops
(
self
,
img_aug_gt_dict
,
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
):
if
self
.
img_aug_type
==
'kitti'
:
new_box
,
img_crop2d
,
obj_points
,
obj_idx
=
self
.
collect_image_crops_kitti
(
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
)
img_aug_gt_dict
[
'crop_boxes2d'
].
append
(
new_box
)
img_aug_gt_dict
[
'gt_crops2d'
].
append
(
img_crop2d
)
img_aug_gt_dict
[
'obj_index_list'
].
append
(
obj_idx
)
else
:
raise
NotImplementedError
return
img_aug_gt_dict
,
obj_points
def
copy_paste_to_image
(
self
,
img_aug_gt_dict
,
data_dict
,
points
):
if
self
.
img_aug_type
==
'kitti'
:
obj_points_idx
=
np
.
concatenate
(
img_aug_gt_dict
[
'obj_index_list'
],
axis
=
0
)
point_idxes
=
-
1
*
np
.
ones
(
len
(
points
),
dtype
=
np
.
int
)
point_idxes
[:
obj_points_idx
.
shape
[
0
]]
=
obj_points_idx
data_dict
[
'gt_boxes2d'
]
=
np
.
concatenate
([
img_aug_gt_dict
[
'gt_boxes2d'
],
np
.
array
(
img_aug_gt_dict
[
'crop_boxes2d'
])],
axis
=
0
)
data_dict
=
self
.
copy_paste_to_image_kitti
(
data_dict
,
img_aug_gt_dict
[
'gt_crops2d'
],
img_aug_gt_dict
[
'gt_number'
],
point_idxes
)
if
'road_plane'
in
data_dict
:
data_dict
.
pop
(
'road_plane'
)
else
:
raise
NotImplementedError
return
data_dict
def
add_sampled_boxes_to_scene
(
self
,
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
,
mv_height
=
None
,
sampled_gt_boxes2d
=
None
):
gt_boxes_mask
=
data_dict
[
'gt_boxes_mask'
]
gt_boxes
=
data_dict
[
'gt_boxes'
][
gt_boxes_mask
]
gt_names
=
data_dict
[
'gt_names'
][
gt_boxes_mask
]
points
=
data_dict
[
'points'
]
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
):
if
self
.
sampler_cfg
.
get
(
'USE_ROAD_PLANE'
,
False
)
and
mv_height
is
None
:
sampled_gt_boxes
,
mv_height
=
self
.
put_boxes_on_road_planes
(
sampled_gt_boxes
,
data_dict
[
'road_plane'
],
data_dict
[
'calib'
]
)
...
...
@@ -166,11 +368,15 @@ class DataBaseSampler(object):
data_dict
.
pop
(
'road_plane'
)
obj_points_list
=
[]
# convert sampled 3D boxes to image plane
img_aug_gt_dict
=
self
.
initilize_image_aug_dict
(
data_dict
,
gt_boxes_mask
)
if
self
.
use_shared_memory
:
gt_database_data
=
SharedArray
.
attach
(
f
"shm://
{
self
.
gt_database_data_key
}
"
)
gt_database_data
.
setflags
(
write
=
0
)
else
:
gt_database_data
=
None
gt_database_data
=
None
for
idx
,
info
in
enumerate
(
total_valid_sampled_dict
):
if
self
.
use_shared_memory
:
...
...
@@ -187,6 +393,11 @@ class DataBaseSampler(object):
# mv height
obj_points
[:,
2
]
-=
mv_height
[
idx
]
if
self
.
img_aug_type
is
not
None
:
img_aug_gt_dict
,
obj_points
=
self
.
collect_image_crops
(
img_aug_gt_dict
,
info
,
data_dict
,
obj_points
,
sampled_gt_boxes
,
sampled_gt_boxes2d
,
idx
)
obj_points_list
.
append
(
obj_points
)
obj_points
=
np
.
concatenate
(
obj_points_list
,
axis
=
0
)
...
...
@@ -202,6 +413,10 @@ class DataBaseSampler(object):
data_dict
[
'gt_boxes'
]
=
gt_boxes
data_dict
[
'gt_names'
]
=
gt_names
data_dict
[
'points'
]
=
points
if
self
.
img_aug_type
is
not
None
:
data_dict
=
self
.
copy_paste_to_image
(
img_aug_gt_dict
,
data_dict
,
points
)
return
data_dict
def
__call__
(
self
,
data_dict
):
...
...
@@ -217,6 +432,9 @@ class DataBaseSampler(object):
gt_names
=
data_dict
[
'gt_names'
].
astype
(
str
)
existed_boxes
=
gt_boxes
total_valid_sampled_dict
=
[]
sampled_mv_height
=
[]
sampled_gt_boxes2d
=
[]
for
class_name
,
sample_group
in
self
.
sample_groups
.
items
():
if
self
.
limit_whole_scene
:
num_gt
=
np
.
sum
(
class_name
==
gt_names
)
...
...
@@ -226,14 +444,21 @@ class DataBaseSampler(object):
sampled_boxes
=
np
.
stack
([
x
[
'box3d_lidar'
]
for
x
in
sampled_dict
],
axis
=
0
).
astype
(
np
.
float32
)
if
self
.
sampler_cfg
.
get
(
'DATABASE_WITH_FAKELIDAR'
,
False
):
sampled_boxes
=
box_utils
.
boxes3d_kitti_fakelidar_to_lidar
(
sampled_boxes
)
assert
not
self
.
sampler_cfg
.
get
(
'DATABASE_WITH_FAKELIDAR'
,
False
),
'Please use latest codes to generate GT_DATABASE'
iou1
=
iou3d_nms_utils
.
boxes_bev_iou_cpu
(
sampled_boxes
[:,
0
:
7
],
existed_boxes
[:,
0
:
7
])
iou2
=
iou3d_nms_utils
.
boxes_bev_iou_cpu
(
sampled_boxes
[:,
0
:
7
],
sampled_boxes
[:,
0
:
7
])
iou2
[
range
(
sampled_boxes
.
shape
[
0
]),
range
(
sampled_boxes
.
shape
[
0
])]
=
0
iou1
=
iou1
if
iou1
.
shape
[
1
]
>
0
else
iou2
valid_mask
=
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
).
nonzero
()[
0
]
valid_mask
=
((
iou1
.
max
(
axis
=
1
)
+
iou2
.
max
(
axis
=
1
))
==
0
)
if
self
.
img_aug_type
is
not
None
:
sampled_boxes2d
,
mv_height
,
valid_mask
=
self
.
sample_gt_boxes_2d
(
data_dict
,
sampled_boxes
,
valid_mask
)
sampled_gt_boxes2d
.
append
(
sampled_boxes2d
)
if
mv_height
is
not
None
:
sampled_mv_height
.
append
(
mv_height
)
valid_mask
=
valid_mask
.
nonzero
()[
0
]
valid_sampled_dict
=
[
sampled_dict
[
x
]
for
x
in
valid_mask
]
valid_sampled_boxes
=
sampled_boxes
[
valid_mask
]
...
...
@@ -241,8 +466,14 @@ class DataBaseSampler(object):
total_valid_sampled_dict
.
extend
(
valid_sampled_dict
)
sampled_gt_boxes
=
existed_boxes
[
gt_boxes
.
shape
[
0
]:,
:]
if
total_valid_sampled_dict
.
__len__
()
>
0
:
data_dict
=
self
.
add_sampled_boxes_to_scene
(
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
)
sampled_gt_boxes2d
=
np
.
concatenate
(
sampled_gt_boxes2d
,
axis
=
0
)
if
len
(
sampled_gt_boxes2d
)
>
0
else
None
sampled_mv_height
=
np
.
concatenate
(
sampled_mv_height
,
axis
=
0
)
if
len
(
sampled_mv_height
)
>
0
else
None
data_dict
=
self
.
add_sampled_boxes_to_scene
(
data_dict
,
sampled_gt_boxes
,
total_valid_sampled_dict
,
sampled_mv_height
,
sampled_gt_boxes2d
)
data_dict
.
pop
(
'gt_boxes_mask'
)
return
data_dict
pcdet/datasets/dataset.py
View file @
4c8009fc
...
...
@@ -9,7 +9,6 @@ from .augmentor.data_augmentor import DataAugmentor
from
.processor.data_processor
import
DataProcessor
from
.processor.point_feature_encoder
import
PointFeatureEncoder
class
DatasetTemplate
(
torch_data
.
Dataset
):
def
__init__
(
self
,
dataset_cfg
=
None
,
class_names
=
None
,
training
=
True
,
root_path
=
None
,
logger
=
None
):
super
().
__init__
()
...
...
@@ -44,7 +43,7 @@ class DatasetTemplate(torch_data.Dataset):
self
.
depth_downsample_factor
=
self
.
data_processor
.
depth_downsample_factor
else
:
self
.
depth_downsample_factor
=
None
@
property
def
mode
(
self
):
return
'train'
if
self
.
training
else
'test'
...
...
@@ -123,14 +122,17 @@ class DatasetTemplate(torch_data.Dataset):
if
self
.
training
:
assert
'gt_boxes'
in
data_dict
,
'gt_boxes should be provided for training'
gt_boxes_mask
=
np
.
array
([
n
in
self
.
class_names
for
n
in
data_dict
[
'gt_names'
]],
dtype
=
np
.
bool_
)
if
'calib'
in
data_dict
:
calib
=
data_dict
[
'calib'
]
data_dict
=
self
.
data_augmentor
.
forward
(
data_dict
=
{
**
data_dict
,
'gt_boxes_mask'
:
gt_boxes_mask
}
)
if
'calib'
in
data_dict
:
data_dict
[
'calib'
]
=
calib
if
data_dict
.
get
(
'gt_boxes'
,
None
)
is
not
None
:
selected
=
common_utils
.
keep_arrays_by_name
(
data_dict
[
'gt_names'
],
self
.
class_names
)
data_dict
[
'gt_boxes'
]
=
data_dict
[
'gt_boxes'
][
selected
]
...
...
@@ -204,8 +206,7 @@ class DatasetTemplate(torch_data.Dataset):
pad_h
=
common_utils
.
get_pad_params
(
desired_size
=
max_h
,
cur_size
=
image
.
shape
[
0
])
pad_w
=
common_utils
.
get_pad_params
(
desired_size
=
max_w
,
cur_size
=
image
.
shape
[
1
])
pad_width
=
(
pad_h
,
pad_w
)
# Pad with nan, to be replaced later in the pipeline.
pad_value
=
np
.
nan
pad_value
=
0
if
key
==
"images"
:
pad_width
=
(
pad_h
,
pad_w
,
(
0
,
0
))
...
...
@@ -219,6 +220,20 @@ class DatasetTemplate(torch_data.Dataset):
images
.
append
(
image_pad
)
ret
[
key
]
=
np
.
stack
(
images
,
axis
=
0
)
elif
key
in
[
'calib'
]:
ret
[
key
]
=
val
elif
key
in
[
"points_2d"
]:
max_len
=
max
([
len
(
_val
)
for
_val
in
val
])
pad_value
=
0
points
=
[]
for
_points
in
val
:
pad_width
=
((
0
,
max_len
-
len
(
_points
)),
(
0
,
0
))
points_pad
=
np
.
pad
(
_points
,
pad_width
=
pad_width
,
mode
=
'constant'
,
constant_values
=
pad_value
)
points
.
append
(
points_pad
)
ret
[
key
]
=
np
.
stack
(
points
,
axis
=
0
)
else
:
ret
[
key
]
=
np
.
stack
(
val
,
axis
=
0
)
except
:
...
...
pcdet/datasets/kitti/kitti_dataset.py
View file @
4c8009fc
...
...
@@ -421,6 +421,7 @@ class KittiDataset(DatasetTemplate):
if
"calib_matricies"
in
get_item_list
:
input_dict
[
"trans_lidar_to_cam"
],
input_dict
[
"trans_cam_to_img"
]
=
kitti_utils
.
calib_to_matricies
(
calib
)
input_dict
[
'calib'
]
=
calib
data_dict
=
self
.
prepare_data
(
data_dict
=
input_dict
)
data_dict
[
'image_shape'
]
=
img_shape
...
...
pcdet/models/backbones_3d/__init__.py
View file @
4c8009fc
from
.pointnet2_backbone
import
PointNet2Backbone
,
PointNet2MSG
from
.spconv_backbone
import
VoxelBackBone8x
,
VoxelResBackBone8x
from
.spconv_backbone_focal
import
VoxelBackBone8xFocal
from
.spconv_unet
import
UNetV2
__all__
=
{
...
...
@@ -8,4 +9,5 @@ __all__ = {
'PointNet2Backbone'
:
PointNet2Backbone
,
'PointNet2MSG'
:
PointNet2MSG
,
'VoxelResBackBone8x'
:
VoxelResBackBone8x
,
'VoxelBackBone8xFocal'
:
VoxelBackBone8xFocal
,
}
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py
0 → 100755
View file @
4c8009fc
import
torch.nn
as
nn
class
BasicBlock1D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
**
kwargs
):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
conv
=
nn
.
Conv1d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm1d
(
out_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
features
):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x
=
self
.
conv
(
features
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
class
BasicBlock2D
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
**
kwargs
):
"""
Initializes convolutional block
Args:
in_channels: int, Number of input channels
out_channels: int, Number of output channels
**kwargs: Dict, Extra arguments for nn.Conv2d
"""
super
().
__init__
()
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
**
kwargs
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
features
):
"""
Applies convolutional block
Args:
features: (B, C_in, H, W), Input features
Returns:
x: (B, C_out, H, W), Output features
"""
x
=
self
.
conv
(
features
)
x
=
self
.
bn
(
x
)
x
=
self
.
relu
(
x
)
return
x
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py
0 → 100755
View file @
4c8009fc
import
torch
import
torch.nn
as
nn
from
.basic_blocks
import
BasicBlock2D
from
.sem_deeplabv3
import
SemDeepLabV3
class
PyramidFeat2D
(
nn
.
Module
):
def
__init__
(
self
,
optimize
,
model_cfg
):
"""
Initialize 2D feature network via pretrained model
Args:
model_cfg: EasyDict, Dense classification network config
"""
super
().
__init__
()
self
.
model_cfg
=
model_cfg
self
.
is_optimize
=
optimize
# Create modules
self
.
ifn
=
SemDeepLabV3
(
num_classes
=
model_cfg
.
num_class
,
backbone_name
=
model_cfg
.
backbone
,
**
model_cfg
.
args
)
self
.
reduce_blocks
=
torch
.
nn
.
ModuleList
()
self
.
out_channels
=
{}
for
_idx
,
_channel
in
enumerate
(
model_cfg
.
channel_reduce
[
"in_channels"
]):
_channel_out
=
model_cfg
.
channel_reduce
[
"out_channels"
][
_idx
]
self
.
out_channels
[
model_cfg
.
args
[
'feat_extract_layer'
][
_idx
]]
=
_channel_out
block_cfg
=
{
"in_channels"
:
_channel
,
"out_channels"
:
_channel_out
,
"kernel_size"
:
model_cfg
.
channel_reduce
[
"kernel_size"
][
_idx
],
"stride"
:
model_cfg
.
channel_reduce
[
"stride"
][
_idx
],
"bias"
:
model_cfg
.
channel_reduce
[
"bias"
][
_idx
]}
self
.
reduce_blocks
.
append
(
BasicBlock2D
(
**
block_cfg
))
def
get_output_feature_dim
(
self
):
return
self
.
out_channels
def
forward
(
self
,
images
):
"""
Predicts depths and creates image depth feature volume using depth distributions
Args:
images: (N, 3, H_in, W_in), Input images
Returns:
batch_dict:
frustum_features: (N, C, D, H_out, W_out), Image depth features
"""
# Pixel-wise depth classification
batch_dict
=
{}
ifn_result
=
self
.
ifn
(
images
)
for
_idx
,
_layer
in
enumerate
(
self
.
model_cfg
.
args
[
'feat_extract_layer'
]):
image_features
=
ifn_result
[
_layer
]
# Channel reduce
if
self
.
reduce_blocks
[
_idx
]
is
not
None
:
image_features
=
self
.
reduce_blocks
[
_idx
](
image_features
)
batch_dict
[
_layer
+
"_feat2d"
]
=
image_features
if
self
.
training
:
# detach feature from graph if not optimize
if
"logits"
in
ifn_result
:
ifn_result
[
"logits"
].
detach_
()
if
not
self
.
is_optimize
:
image_features
.
detach_
()
return
batch_dict
def
get_loss
(
self
):
"""
Gets loss
Args:
Returns:
loss: (1), Network loss
tb_dict: dict[float], All losses to log in tensorboard
"""
return
None
,
None
pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py
0 → 100755
View file @
4c8009fc
from
collections
import
OrderedDict
from
pathlib
import
Path
from
torch
import
hub
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torchvision
class
SegTemplate
(
nn
.
Module
):
def
__init__
(
self
,
constructor
,
feat_extract_layer
,
num_classes
,
pretrained_path
=
None
,
aux_loss
=
None
):
"""
Initializes depth distribution network.
Args:
constructor: function, Model constructor
feat_extract_layer: string, Layer to extract features from
num_classes: int, Number of classes
pretrained_path: string, (Optional) Path of the model to load weights from
aux_loss: bool, Flag to include auxillary loss
"""
super
().
__init__
()
self
.
num_classes
=
num_classes
self
.
pretrained_path
=
pretrained_path
self
.
pretrained
=
pretrained_path
is
not
None
self
.
aux_loss
=
aux_loss
if
self
.
pretrained
:
# Preprocess Module
self
.
norm_mean
=
torch
.
Tensor
([
0.485
,
0.456
,
0.406
])
self
.
norm_std
=
torch
.
Tensor
([
0.229
,
0.224
,
0.225
])
# Model
self
.
model
=
self
.
get_model
(
constructor
=
constructor
)
self
.
feat_extract_layer
=
feat_extract_layer
return_layers
=
{
_layer
:
_layer
for
_layer
in
feat_extract_layer
}
self
.
model
.
backbone
.
return_layers
.
update
(
return_layers
)
def
get_model
(
self
,
constructor
):
"""
Get model
Args:
constructor: function, Model constructor
Returns:
model: nn.Module, Model
"""
# Get model
model
=
constructor
(
pretrained
=
False
,
pretrained_backbone
=
False
,
num_classes
=
self
.
num_classes
,
aux_loss
=
self
.
aux_loss
)
# Update weights
if
self
.
pretrained_path
is
not
None
:
model_dict
=
model
.
state_dict
()
# Download pretrained model if not available yet
checkpoint_path
=
Path
(
self
.
pretrained_path
)
if
not
checkpoint_path
.
exists
():
checkpoint
=
checkpoint_path
.
name
save_dir
=
checkpoint_path
.
parent
save_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
url
=
f
'https://download.pytorch.org/models/
{
checkpoint
}
'
hub
.
load_state_dict_from_url
(
url
,
save_dir
)
# Get pretrained state dict
pretrained_dict
=
torch
.
load
(
self
.
pretrained_path
)
#pretrained_dict = self.filter_pretrained_dict(model_dict=model_dict, pretrained_dict=pretrained_dict)
# Update current model state dict
model_dict
.
update
(
pretrained_dict
)
model
.
load_state_dict
(
model_dict
,
strict
=
False
)
return
model
.
cuda
()
def
filter_pretrained_dict
(
self
,
model_dict
,
pretrained_dict
):
"""
Removes layers from pretrained state dict that are not used or changed in model
Args:
model_dict: dict, Default model state dictionary
pretrained_dict: dict, Pretrained model state dictionary
Returns:
pretrained_dict: dict, Pretrained model state dictionary with removed weights
"""
# Removes aux classifier weights if not used
if
"aux_classifier.0.weight"
in
pretrained_dict
and
"aux_classifier.0.weight"
not
in
model_dict
:
pretrained_dict
=
{
key
:
value
for
key
,
value
in
pretrained_dict
.
items
()
if
"aux_classifier"
not
in
key
}
# Removes final conv layer from weights if number of classes are different
model_num_classes
=
model_dict
[
"classifier.4.weight"
].
shape
[
0
]
pretrained_num_classes
=
pretrained_dict
[
"classifier.4.weight"
].
shape
[
0
]
if
model_num_classes
!=
pretrained_num_classes
:
pretrained_dict
.
pop
(
"classifier.4.weight"
)
pretrained_dict
.
pop
(
"classifier.4.bias"
)
return
pretrained_dict
def
forward
(
self
,
images
):
"""
Forward pass
Args:
images: (N, 3, H_in, W_in), Input images
Returns
result: dict[torch.Tensor], Depth distribution result
features: (N, C, H_out, W_out), Image features
logits: (N, num_classes, H_out, W_out), Classification logits
aux: (N, num_classes, H_out, W_out), Auxillary classification logits
"""
# Preprocess images
if
self
.
pretrained
:
images
=
(
images
-
self
.
norm_mean
[
None
,
:,
None
,
None
].
type_as
(
images
))
/
self
.
norm_std
[
None
,
:,
None
,
None
].
type_as
(
images
)
x
=
images
.
cuda
()
# Extract features
result
=
OrderedDict
()
features
=
self
.
model
.
backbone
(
x
)
for
_layer
in
self
.
feat_extract_layer
:
result
[
_layer
]
=
features
[
_layer
]
return
result
if
'features'
in
features
.
keys
():
feat_shape
=
features
[
'features'
].
shape
[
-
2
:]
else
:
feat_shape
=
features
[
'layer1'
].
shape
[
-
2
:]
# Prediction classification logits
x
=
features
[
"out"
]
# comment the classifier to reduce memory
# x = self.model.classifier(x)
# x = F.interpolate(x, size=feat_shape, mode='bilinear', align_corners=False)
result
[
"logits"
]
=
x
# Prediction auxillary classification logits
if
self
.
model
.
aux_classifier
is
not
None
:
x
=
features
[
"aux"
]
x
=
self
.
model
.
aux_classifier
(
x
)
x
=
F
.
interpolate
(
x
,
size
=
feat_shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
result
[
"aux"
]
=
x
return
result
class
SemDeepLabV3
(
SegTemplate
):
def
__init__
(
self
,
backbone_name
,
**
kwargs
):
"""
Initializes SemDeepLabV3 model
Args:
backbone_name: string, ResNet Backbone Name [ResNet50/ResNet101]
"""
if
backbone_name
==
"ResNet50"
:
constructor
=
torchvision
.
models
.
segmentation
.
deeplabv3_resnet50
elif
backbone_name
==
"ResNet101"
:
constructor
=
torchvision
.
models
.
segmentation
.
deeplabv3_resnet101
else
:
raise
NotImplementedError
super
().
__init__
(
constructor
=
constructor
,
**
kwargs
)
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py
0 → 100644
View file @
4c8009fc
import
torch
import
torch.nn
as
nn
import
spconv.pytorch
as
spconv
from
pcdet.ops.roiaware_pool3d.roiaware_pool3d_utils
import
points_in_boxes_gpu
from
pcdet.models.backbones_3d.focal_sparse_conv.focal_sparse_utils
import
split_voxels
,
check_repeat
,
FocalLoss
from
pcdet.utils
import
common_utils
class
FocalSparseConv
(
spconv
.
SparseModule
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
voxel_stride
,
norm_fn
=
None
,
indice_key
=
None
,
image_channel
=
3
,
kernel_size
=
3
,
padding
=
1
,
mask_multi
=
False
,
use_img
=
False
,
topk
=
False
,
threshold
=
0.5
,
skip_mask_kernel
=
False
,
enlarge_voxel_channels
=-
1
,
point_cloud_range
=
[
-
3
,
-
40
,
0
,
1
,
40
,
70.4
],
voxel_size
=
[
0.1
,
0.05
,
0.05
]):
super
(
FocalSparseConv
,
self
).
__init__
()
self
.
conv
=
spconv
.
SubMConv3d
(
inplanes
,
planes
,
kernel_size
=
kernel_size
,
stride
=
1
,
bias
=
False
,
indice_key
=
indice_key
)
self
.
bn1
=
norm_fn
(
planes
)
self
.
relu
=
nn
.
ReLU
(
True
)
offset_channels
=
kernel_size
**
3
self
.
topk
=
topk
self
.
threshold
=
threshold
self
.
voxel_stride
=
voxel_stride
self
.
focal_loss
=
FocalLoss
()
self
.
mask_multi
=
mask_multi
self
.
skip_mask_kernel
=
skip_mask_kernel
self
.
use_img
=
use_img
voxel_channel
=
enlarge_voxel_channels
if
enlarge_voxel_channels
>
0
else
inplanes
in_channels
=
image_channel
+
voxel_channel
if
use_img
else
voxel_channel
self
.
conv_enlarge
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
inplanes
,
enlarge_voxel_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
indice_key
=
indice_key
+
'_enlarge'
),
norm_fn
(
enlarge_voxel_channels
),
nn
.
ReLU
(
True
))
if
enlarge_voxel_channels
>
0
else
None
self
.
conv_imp
=
spconv
.
SubMConv3d
(
in_channels
,
offset_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
,
indice_key
=
indice_key
+
'_imp'
)
_step
=
int
(
kernel_size
//
2
)
kernel_offsets
=
[[
i
,
j
,
k
]
for
i
in
range
(
-
_step
,
_step
+
1
)
for
j
in
range
(
-
_step
,
_step
+
1
)
for
k
in
range
(
-
_step
,
_step
+
1
)]
kernel_offsets
.
remove
([
0
,
0
,
0
])
self
.
kernel_offsets
=
torch
.
Tensor
(
kernel_offsets
).
cuda
()
self
.
inv_idx
=
torch
.
Tensor
([
2
,
1
,
0
]).
long
().
cuda
()
self
.
point_cloud_range
=
torch
.
Tensor
(
point_cloud_range
).
cuda
()
self
.
voxel_size
=
torch
.
Tensor
(
voxel_size
).
cuda
()
def
construct_multimodal_features
(
self
,
x
,
x_rgb
,
batch_dict
,
fuse_sum
=
False
):
"""
Construct the multimodal features with both lidar sparse features and image features.
Args:
x: [N, C] lidar sparse features
x_rgb: [b, c, h, w] image features
batch_dict: input and output information during forward
fuse_sum: bool, manner for fusion, True - sum, False - concat
Return:
image_with_voxelfeatures: [N, C] fused multimodal features
"""
batch_index
=
x
.
indices
[:,
0
]
spatial_indices
=
x
.
indices
[:,
1
:]
*
self
.
voxel_stride
voxels_3d
=
spatial_indices
*
self
.
voxel_size
+
self
.
point_cloud_range
[:
3
]
calibs
=
batch_dict
[
'calib'
]
batch_size
=
batch_dict
[
'batch_size'
]
h
,
w
=
batch_dict
[
'images'
].
shape
[
2
:]
if
not
x_rgb
.
shape
==
batch_dict
[
'images'
].
shape
:
x_rgb
=
nn
.
functional
.
interpolate
(
x_rgb
,
(
h
,
w
),
mode
=
'bilinear'
)
image_with_voxelfeatures
=
[]
voxels_2d_int_list
=
[]
filter_idx_list
=
[]
for
b
in
range
(
batch_size
):
x_rgb_batch
=
x_rgb
[
b
]
calib
=
calibs
[
b
]
voxels_3d_batch
=
voxels_3d
[
batch_index
==
b
]
voxel_features_sparse
=
x
.
features
[
batch_index
==
b
]
# Reverse the point cloud transformations to the original coords.
if
'noise_scale'
in
batch_dict
:
voxels_3d_batch
[:,
:
3
]
/=
batch_dict
[
'noise_scale'
][
b
]
if
'noise_rot'
in
batch_dict
:
voxels_3d_batch
=
common_utils
.
rotate_points_along_z
(
voxels_3d_batch
[:,
self
.
inv_idx
].
unsqueeze
(
0
),
-
batch_dict
[
'noise_rot'
][
b
].
unsqueeze
(
0
))[
0
,
:,
self
.
inv_idx
]
if
'flip_x'
in
batch_dict
:
voxels_3d_batch
[:,
1
]
*=
-
1
if
batch_dict
[
'flip_x'
][
b
]
else
1
if
'flip_y'
in
batch_dict
:
voxels_3d_batch
[:,
2
]
*=
-
1
if
batch_dict
[
'flip_y'
][
b
]
else
1
voxels_2d
,
_
=
calib
.
lidar_to_img
(
voxels_3d_batch
[:,
self
.
inv_idx
].
cpu
().
numpy
())
voxels_2d_int
=
torch
.
Tensor
(
voxels_2d
).
to
(
x_rgb_batch
.
device
).
long
()
filter_idx
=
(
0
<=
voxels_2d_int
[:,
1
])
*
(
voxels_2d_int
[:,
1
]
<
h
)
*
(
0
<=
voxels_2d_int
[:,
0
])
*
(
voxels_2d_int
[:,
0
]
<
w
)
filter_idx_list
.
append
(
filter_idx
)
voxels_2d_int
=
voxels_2d_int
[
filter_idx
]
voxels_2d_int_list
.
append
(
voxels_2d_int
)
image_features_batch
=
torch
.
zeros
((
voxel_features_sparse
.
shape
[
0
],
x_rgb_batch
.
shape
[
0
]),
device
=
x_rgb_batch
.
device
)
image_features_batch
[
filter_idx
]
=
x_rgb_batch
[:,
voxels_2d_int
[:,
1
],
voxels_2d_int
[:,
0
]].
permute
(
1
,
0
)
if
fuse_sum
:
image_with_voxelfeature
=
image_features_batch
+
voxel_features_sparse
else
:
image_with_voxelfeature
=
torch
.
cat
([
image_features_batch
,
voxel_features_sparse
],
dim
=
1
)
image_with_voxelfeatures
.
append
(
image_with_voxelfeature
)
image_with_voxelfeatures
=
torch
.
cat
(
image_with_voxelfeatures
)
return
image_with_voxelfeatures
def
_gen_sparse_features
(
self
,
x
,
imps_3d
,
batch_dict
,
voxels_3d
):
"""
Generate the output sparse features from the focal sparse conv.
Args:
x: [N, C], lidar sparse features
imps_3d: [N, kernelsize**3], the predicted importance values
batch_dict: input and output information during forward
voxels_3d: [N, 3], the 3d positions of voxel centers
"""
batch_size
=
x
.
batch_size
voxel_features_fore
=
[]
voxel_indices_fore
=
[]
voxel_features_back
=
[]
voxel_indices_back
=
[]
box_of_pts_cls_targets
=
[]
mask_voxels
=
[]
mask_kernel_list
=
[]
for
b
in
range
(
batch_size
):
if
self
.
training
:
index
=
x
.
indices
[:,
0
]
batch_index
=
index
==
b
mask_voxel
=
imps_3d
[
batch_index
,
-
1
].
sigmoid
()
voxels_3d_batch
=
voxels_3d
[
batch_index
].
unsqueeze
(
0
)
mask_voxels
.
append
(
mask_voxel
)
gt_boxes
=
batch_dict
[
'gt_boxes'
][
b
,
:,
:
-
1
].
unsqueeze
(
0
)
box_of_pts_batch
=
points_in_boxes_gpu
(
voxels_3d_batch
[:,
:,
self
.
inv_idx
],
gt_boxes
).
squeeze
(
0
)
box_of_pts_cls_targets
.
append
(
box_of_pts_batch
>=
0
)
features_fore
,
indices_fore
,
features_back
,
indices_back
,
mask_kernel
=
split_voxels
(
x
,
b
,
imps_3d
,
voxels_3d
,
self
.
kernel_offsets
,
mask_multi
=
self
.
mask_multi
,
topk
=
self
.
topk
,
threshold
=
self
.
threshold
)
mask_kernel_list
.
append
(
mask_kernel
)
voxel_features_fore
.
append
(
features_fore
)
voxel_indices_fore
.
append
(
indices_fore
)
voxel_features_back
.
append
(
features_back
)
voxel_indices_back
.
append
(
indices_back
)
voxel_features_fore
=
torch
.
cat
(
voxel_features_fore
,
dim
=
0
)
voxel_indices_fore
=
torch
.
cat
(
voxel_indices_fore
,
dim
=
0
)
voxel_features_back
=
torch
.
cat
(
voxel_features_back
,
dim
=
0
)
voxel_indices_back
=
torch
.
cat
(
voxel_indices_back
,
dim
=
0
)
mask_kernel
=
torch
.
cat
(
mask_kernel_list
,
dim
=
0
)
x_fore
=
spconv
.
SparseConvTensor
(
voxel_features_fore
,
voxel_indices_fore
,
x
.
spatial_shape
,
x
.
batch_size
)
x_back
=
spconv
.
SparseConvTensor
(
voxel_features_back
,
voxel_indices_back
,
x
.
spatial_shape
,
x
.
batch_size
)
loss_box_of_pts
=
0
if
self
.
training
:
mask_voxels
=
torch
.
cat
(
mask_voxels
)
box_of_pts_cls_targets
=
torch
.
cat
(
box_of_pts_cls_targets
)
mask_voxels_two_classes
=
torch
.
cat
([
1
-
mask_voxels
.
unsqueeze
(
-
1
),
mask_voxels
.
unsqueeze
(
-
1
)],
dim
=
1
)
loss_box_of_pts
=
self
.
focal_loss
(
mask_voxels_two_classes
,
box_of_pts_cls_targets
.
long
())
return
x_fore
,
x_back
,
loss_box_of_pts
,
mask_kernel
def
combine_out
(
self
,
x_fore
,
x_back
,
remove_repeat
=
False
):
"""
Combine the foreground and background sparse features together.
Args:
x_fore: [N1, C], foreground sparse features
x_back: [N2, C], background sparse features
remove_repeat: bool, whether to remove the spatial replicate features.
"""
x_fore_features
=
torch
.
cat
([
x_fore
.
features
,
x_back
.
features
],
dim
=
0
)
x_fore_indices
=
torch
.
cat
([
x_fore
.
indices
,
x_back
.
indices
],
dim
=
0
)
if
remove_repeat
:
index
=
x_fore_indices
[:,
0
]
features_out_list
=
[]
indices_coords_out_list
=
[]
for
b
in
range
(
x_fore
.
batch_size
):
batch_index
=
index
==
b
features_out
,
indices_coords_out
,
_
=
check_repeat
(
x_fore_features
[
batch_index
],
x_fore_indices
[
batch_index
],
flip_first
=
False
)
features_out_list
.
append
(
features_out
)
indices_coords_out_list
.
append
(
indices_coords_out
)
x_fore_features
=
torch
.
cat
(
features_out_list
,
dim
=
0
)
x_fore_indices
=
torch
.
cat
(
indices_coords_out_list
,
dim
=
0
)
x_fore
=
x_fore
.
replace_feature
(
x_fore_features
)
x_fore
.
indices
=
x_fore_indices
return
x_fore
def
forward
(
self
,
x
,
batch_dict
,
x_rgb
=
None
):
spatial_indices
=
x
.
indices
[:,
1
:]
*
self
.
voxel_stride
voxels_3d
=
spatial_indices
*
self
.
voxel_size
+
self
.
point_cloud_range
[:
3
]
if
self
.
use_img
:
features_multimodal
=
self
.
construct_multimodal_features
(
x
,
x_rgb
,
batch_dict
)
x_predict
=
spconv
.
SparseConvTensor
(
features_multimodal
,
x
.
indices
,
x
.
spatial_shape
,
x
.
batch_size
)
else
:
x_predict
=
self
.
conv_enlarge
(
x
)
if
self
.
conv_enlarge
else
x
imps_3d
=
self
.
conv_imp
(
x_predict
).
features
x_fore
,
x_back
,
loss_box_of_pts
,
mask_kernel
=
self
.
_gen_sparse_features
(
x
,
imps_3d
,
batch_dict
,
voxels_3d
)
if
not
self
.
skip_mask_kernel
:
x_fore
=
x_fore
.
replace_feature
(
x_fore
.
features
*
mask_kernel
.
unsqueeze
(
-
1
))
out
=
self
.
combine_out
(
x_fore
,
x_back
,
remove_repeat
=
True
)
out
=
self
.
conv
(
out
)
if
self
.
use_img
:
out
=
out
.
replace_feature
(
self
.
construct_multimodal_features
(
out
,
x_rgb
,
batch_dict
,
True
))
out
=
out
.
replace_feature
(
self
.
bn1
(
out
.
features
))
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
return
out
,
batch_dict
,
loss_box_of_pts
pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_utils.py
0 → 100644
View file @
4c8009fc
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.autograd
import
Variable
class
FocalLoss
(
nn
.
Module
):
def
__init__
(
self
,
gamma
=
2.0
,
eps
=
1e-7
):
super
(
FocalLoss
,
self
).
__init__
()
self
.
gamma
=
gamma
self
.
eps
=
eps
def
one_hot
(
self
,
index
,
classes
):
size
=
index
.
size
()
+
(
classes
,)
view
=
index
.
size
()
+
(
1
,)
mask
=
torch
.
Tensor
(
*
size
).
fill_
(
0
).
to
(
index
.
device
)
index
=
index
.
view
(
*
view
)
ones
=
1.
if
isinstance
(
index
,
Variable
):
ones
=
Variable
(
torch
.
Tensor
(
index
.
size
()).
fill_
(
1
).
to
(
index
.
device
))
mask
=
Variable
(
mask
,
volatile
=
index
.
volatile
)
return
mask
.
scatter_
(
1
,
index
,
ones
)
def
forward
(
self
,
input
,
target
):
y
=
self
.
one_hot
(
target
,
input
.
size
(
-
1
))
logit
=
F
.
softmax
(
input
,
dim
=-
1
)
logit
=
logit
.
clamp
(
self
.
eps
,
1.
-
self
.
eps
)
loss
=
-
1
*
y
*
torch
.
log
(
logit
)
# cross entropy
loss
=
loss
*
(
1
-
logit
)
**
self
.
gamma
# focal loss
return
loss
.
mean
()
def
sort_by_indices
(
features
,
indices
,
features_add
=
None
):
"""
To sort the sparse features with its indices in a convenient manner.
Args:
features: [N, C], sparse features
indices: [N, 4], indices of sparse features
features_add: [N, C], additional features to sort
"""
idx
=
indices
[:,
1
:]
idx_sum
=
idx
.
select
(
1
,
0
)
*
idx
[:,
1
].
max
()
*
idx
[:,
2
].
max
()
+
idx
.
select
(
1
,
1
)
*
idx
[:,
2
].
max
()
+
idx
.
select
(
1
,
2
)
_
,
ind
=
idx_sum
.
sort
()
features
=
features
[
ind
]
indices
=
indices
[
ind
]
if
not
features_add
is
None
:
features_add
=
features_add
[
ind
]
return
features
,
indices
,
features_add
def
check_repeat
(
features
,
indices
,
features_add
=
None
,
sort_first
=
True
,
flip_first
=
True
):
"""
Check that whether there are replicate indices in the sparse features,
remove the replicate features if any.
"""
if
sort_first
:
features
,
indices
,
features_add
=
sort_by_indices
(
features
,
indices
,
features_add
)
if
flip_first
:
features
,
indices
=
features
.
flip
([
0
]),
indices
.
flip
([
0
])
if
not
features_add
is
None
:
features_add
=
features_add
.
flip
([
0
])
idx
=
indices
[:,
1
:].
int
()
idx_sum
=
torch
.
add
(
torch
.
add
(
idx
.
select
(
1
,
0
)
*
idx
[:,
1
].
max
()
*
idx
[:,
2
].
max
(),
idx
.
select
(
1
,
1
)
*
idx
[:,
2
].
max
()),
idx
.
select
(
1
,
2
))
_unique
,
inverse
,
counts
=
torch
.
unique_consecutive
(
idx_sum
,
return_inverse
=
True
,
return_counts
=
True
,
dim
=
0
)
if
_unique
.
shape
[
0
]
<
indices
.
shape
[
0
]:
perm
=
torch
.
arange
(
inverse
.
size
(
0
),
dtype
=
inverse
.
dtype
,
device
=
inverse
.
device
)
features_new
=
torch
.
zeros
((
_unique
.
shape
[
0
],
features
.
shape
[
-
1
]),
device
=
features
.
device
)
features_new
.
index_add_
(
0
,
inverse
.
long
(),
features
)
features
=
features_new
perm_
=
inverse
.
new_empty
(
_unique
.
size
(
0
)).
scatter_
(
0
,
inverse
,
perm
)
indices
=
indices
[
perm_
].
int
()
if
not
features_add
is
None
:
features_add_new
=
torch
.
zeros
((
_unique
.
shape
[
0
],),
device
=
features_add
.
device
)
features_add_new
.
index_add_
(
0
,
inverse
.
long
(),
features_add
)
features_add
=
features_add_new
/
counts
return
features
,
indices
,
features_add
def
split_voxels
(
x
,
b
,
imps_3d
,
voxels_3d
,
kernel_offsets
,
mask_multi
=
True
,
topk
=
True
,
threshold
=
0.5
):
"""
Generate and split the voxels into foreground and background sparse features, based on the predicted importance values.
Args:
x: [N, C], input sparse features
b: int, batch size id
imps_3d: [N, kernelsize**3], the prediced importance values
voxels_3d: [N, 3], the 3d positions of voxel centers
kernel_offsets: [kernelsize**3, 3], the offset coords in an kernel
mask_multi: bool, whether to multiply the predicted mask to features
topk: bool, whether to use topk or threshold for selection
threshold: float, threshold value
"""
index
=
x
.
indices
[:,
0
]
batch_index
=
index
==
b
indices_ori
=
x
.
indices
[
batch_index
]
features_ori
=
x
.
features
[
batch_index
]
mask_voxel
=
imps_3d
[
batch_index
,
-
1
].
sigmoid
()
mask_kernel
=
imps_3d
[
batch_index
,
:
-
1
].
sigmoid
()
if
mask_multi
:
features_ori
*=
mask_voxel
.
unsqueeze
(
-
1
)
if
topk
:
_
,
indices
=
mask_voxel
.
sort
(
descending
=
True
)
indices_fore
=
indices
[:
int
(
mask_voxel
.
shape
[
0
]
*
threshold
)]
indices_back
=
indices
[
int
(
mask_voxel
.
shape
[
0
]
*
threshold
):]
else
:
indices_fore
=
mask_voxel
>
threshold
indices_back
=
mask_voxel
<=
threshold
features_fore
=
features_ori
[
indices_fore
]
coords_fore
=
indices_ori
[
indices_fore
]
mask_kernel_fore
=
mask_kernel
[
indices_fore
]
mask_kernel_bool
=
mask_kernel_fore
>=
threshold
voxel_kerels_imp
=
kernel_offsets
.
unsqueeze
(
0
).
repeat
(
mask_kernel_bool
.
shape
[
0
],
1
,
1
)
mask_kernel_fore
=
mask_kernel
[
indices_fore
][
mask_kernel_bool
]
indices_fore_kernels
=
coords_fore
[:,
1
:].
unsqueeze
(
1
).
repeat
(
1
,
kernel_offsets
.
shape
[
0
],
1
)
indices_with_imp
=
indices_fore_kernels
+
voxel_kerels_imp
selected_indices
=
indices_with_imp
[
mask_kernel_bool
]
spatial_indices
=
(
selected_indices
[:,
0
]
>
0
)
*
(
selected_indices
[:,
1
]
>
0
)
*
(
selected_indices
[:,
2
]
>
0
)
*
\
(
selected_indices
[:,
0
]
<
x
.
spatial_shape
[
0
])
*
(
selected_indices
[:,
1
]
<
x
.
spatial_shape
[
1
])
*
(
selected_indices
[:,
2
]
<
x
.
spatial_shape
[
2
])
selected_indices
=
selected_indices
[
spatial_indices
]
mask_kernel_fore
=
mask_kernel_fore
[
spatial_indices
]
selected_indices
=
torch
.
cat
([
torch
.
ones
((
selected_indices
.
shape
[
0
],
1
),
device
=
features_fore
.
device
)
*
b
,
selected_indices
],
dim
=
1
)
selected_features
=
torch
.
zeros
((
selected_indices
.
shape
[
0
],
features_ori
.
shape
[
1
]),
device
=
features_fore
.
device
)
features_fore_cat
=
torch
.
cat
([
features_fore
,
selected_features
],
dim
=
0
)
coords_fore
=
torch
.
cat
([
coords_fore
,
selected_indices
],
dim
=
0
)
mask_kernel_fore
=
torch
.
cat
([
torch
.
ones
(
features_fore
.
shape
[
0
],
device
=
features_fore
.
device
),
mask_kernel_fore
],
dim
=
0
)
features_fore
,
coords_fore
,
mask_kernel_fore
=
check_repeat
(
features_fore_cat
,
coords_fore
,
features_add
=
mask_kernel_fore
)
features_back
=
features_ori
[
indices_back
]
coords_back
=
indices_ori
[
indices_back
]
return
features_fore
,
coords_fore
,
features_back
,
coords_back
,
mask_kernel_fore
pcdet/models/backbones_3d/spconv_backbone_focal.py
0 → 100755
View file @
4c8009fc
from
functools
import
partial
import
torch
import
spconv.pytorch
as
spconv
import
torch.nn
as
nn
from
.focal_sparse_conv.focal_sparse_conv
import
FocalSparseConv
from
.focal_sparse_conv.SemanticSeg.pyramid_ffn
import
PyramidFeat2D
class
objDict
:
@
staticmethod
def
to_object
(
obj
:
object
,
**
data
):
obj
.
__dict__
.
update
(
data
)
class
ConfigDict
:
def
__init__
(
self
,
name
):
self
.
name
=
name
def
__getitem__
(
self
,
item
):
return
getattr
(
self
,
item
)
class
SparseSequentialBatchdict
(
spconv
.
SparseSequential
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
SparseSequentialBatchdict
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
forward
(
self
,
input
,
batch_dict
=
None
):
loss
=
0
for
k
,
module
in
self
.
_modules
.
items
():
if
module
is
None
:
continue
if
isinstance
(
module
,
(
FocalSparseConv
,)):
input
,
batch_dict
,
_loss
=
module
(
input
,
batch_dict
)
loss
+=
_loss
else
:
input
=
module
(
input
)
return
input
,
batch_dict
,
loss
def
post_act_block
(
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
None
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_fn
=
None
):
if
conv_type
==
'subm'
:
conv
=
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
)
elif
conv_type
==
'spconv'
:
conv
=
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
)
elif
conv_type
==
'inverseconv'
:
conv
=
spconv
.
SparseInverseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
indice_key
=
indice_key
,
bias
=
False
)
else
:
raise
NotImplementedError
m
=
spconv
.
SparseSequential
(
conv
,
norm_fn
(
out_channels
),
nn
.
ReLU
(
True
),
)
return
m
class
SparseBasicBlock
(
spconv
.
SparseModule
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
norm_fn
=
None
,
downsample
=
None
,
indice_key
=
None
):
super
(
SparseBasicBlock
,
self
).
__init__
()
assert
norm_fn
is
not
None
bias
=
norm_fn
is
not
None
self
.
conv1
=
spconv
.
SubMConv3d
(
inplanes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
bias
,
indice_key
=
indice_key
)
self
.
bn1
=
norm_fn
(
planes
)
self
.
relu
=
nn
.
ReLU
(
True
)
self
.
conv2
=
spconv
.
SubMConv3d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
bias
,
indice_key
=
indice_key
)
self
.
bn2
=
norm_fn
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
out
.
replace_feature
(
self
.
bn1
(
out
.
features
))
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
out
=
self
.
conv2
(
out
)
out
=
out
.
replace_feature
(
self
.
bn2
(
out
.
features
))
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
=
out
.
replace_feature
(
out
.
features
+
identity
.
features
)
out
=
out
.
replace_feature
(
self
.
relu
(
out
.
features
))
return
out
class
VoxelBackBone8xFocal
(
nn
.
Module
):
def
__init__
(
self
,
model_cfg
,
input_channels
,
grid_size
,
**
kwargs
):
super
().
__init__
()
self
.
model_cfg
=
model_cfg
norm_fn
=
partial
(
nn
.
BatchNorm1d
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
sparse_shape
=
grid_size
[::
-
1
]
+
[
1
,
0
,
0
]
self
.
conv_input
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
input_channels
,
16
,
3
,
padding
=
1
,
bias
=
False
,
indice_key
=
'subm1'
),
norm_fn
(
16
),
nn
.
ReLU
(
True
),
)
block
=
post_act_block
use_img
=
model_cfg
.
get
(
'USE_IMG'
,
False
)
topk
=
model_cfg
.
get
(
'TOPK'
,
True
)
threshold
=
model_cfg
.
get
(
'THRESHOLD'
,
0.5
)
kernel_size
=
model_cfg
.
get
(
'KERNEL_SIZE'
,
3
)
mask_multi
=
model_cfg
.
get
(
'MASK_MULTI'
,
False
)
skip_mask_kernel
=
model_cfg
.
get
(
'SKIP_MASK_KERNEL'
,
False
)
skip_mask_kernel_image
=
model_cfg
.
get
(
'SKIP_MASK_KERNEL_IMG'
,
False
)
enlarge_voxel_channels
=
model_cfg
.
get
(
'ENLARGE_VOXEL_CHANNELS'
,
-
1
)
img_pretrain
=
model_cfg
.
get
(
'IMG_PRETRAIN'
,
"../checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth"
)
if
use_img
:
model_cfg_seg
=
dict
(
name
=
'SemDeepLabV3'
,
backbone
=
'ResNet50'
,
num_class
=
21
,
# pretrained on COCO
args
=
{
"feat_extract_layer"
:
[
"layer1"
],
"pretrained_path"
:
img_pretrain
},
channel_reduce
=
{
"in_channels"
:
[
256
],
"out_channels"
:
[
16
],
"kernel_size"
:
[
1
],
"stride"
:
[
1
],
"bias"
:
[
False
]
}
)
cfg_dict
=
ConfigDict
(
'SemDeepLabV3'
)
objDict
.
to_object
(
cfg_dict
,
**
model_cfg_seg
)
self
.
semseg
=
PyramidFeat2D
(
optimize
=
True
,
model_cfg
=
cfg_dict
)
self
.
conv_focal_multimodal
=
FocalSparseConv
(
16
,
16
,
image_channel
=
model_cfg_seg
[
'channel_reduce'
][
'out_channels'
][
0
],
topk
=
topk
,
threshold
=
threshold
,
use_img
=
True
,
skip_mask_kernel
=
skip_mask_kernel_image
,
voxel_stride
=
1
,
norm_fn
=
norm_fn
,
indice_key
=
'spconv_focal_multimodal'
)
special_spconv_fn
=
partial
(
FocalSparseConv
,
mask_multi
=
mask_multi
,
enlarge_voxel_channels
=
enlarge_voxel_channels
,
topk
=
topk
,
threshold
=
threshold
,
kernel_size
=
kernel_size
,
padding
=
kernel_size
//
2
,
skip_mask_kernel
=
skip_mask_kernel
)
self
.
use_img
=
use_img
self
.
conv1
=
SparseSequentialBatchdict
(
block
(
16
,
16
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm1'
),
special_spconv_fn
(
16
,
16
,
voxel_stride
=
1
,
norm_fn
=
norm_fn
,
indice_key
=
'focal1'
),
)
self
.
conv2
=
SparseSequentialBatchdict
(
# [1600, 1408, 41] <- [800, 704, 21]
block
(
16
,
32
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv2'
,
conv_type
=
'spconv'
),
block
(
32
,
32
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm2'
),
block
(
32
,
32
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm2'
),
special_spconv_fn
(
32
,
32
,
voxel_stride
=
2
,
norm_fn
=
norm_fn
,
indice_key
=
'focal2'
),
)
self
.
conv3
=
SparseSequentialBatchdict
(
# [800, 704, 21] <- [400, 352, 11]
block
(
32
,
64
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
1
,
indice_key
=
'spconv3'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm3'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm3'
),
special_spconv_fn
(
64
,
64
,
voxel_stride
=
4
,
norm_fn
=
norm_fn
,
indice_key
=
'focal3'
),
)
self
.
conv4
=
SparseSequentialBatchdict
(
# [400, 352, 11] <- [200, 176, 5]
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
stride
=
2
,
padding
=
(
0
,
1
,
1
),
indice_key
=
'spconv4'
,
conv_type
=
'spconv'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm4'
),
block
(
64
,
64
,
3
,
norm_fn
=
norm_fn
,
padding
=
1
,
indice_key
=
'subm4'
),
)
last_pad
=
0
last_pad
=
self
.
model_cfg
.
get
(
'last_pad'
,
last_pad
)
self
.
conv_out
=
spconv
.
SparseSequential
(
# [200, 150, 5] -> [200, 150, 2]
spconv
.
SparseConv3d
(
64
,
128
,
(
3
,
1
,
1
),
stride
=
(
2
,
1
,
1
),
padding
=
last_pad
,
bias
=
False
,
indice_key
=
'spconv_down2'
),
norm_fn
(
128
),
nn
.
ReLU
(
True
),
)
self
.
num_point_features
=
128
self
.
backbone_channels
=
{
'x_conv1'
:
16
,
'x_conv2'
:
32
,
'x_conv3'
:
64
,
'x_conv4'
:
64
}
self
.
forward_ret_dict
=
{}
def
get_loss
(
self
,
tb_dict
=
None
):
loss
=
self
.
forward_ret_dict
[
'loss_box_of_pts'
]
if
tb_dict
is
None
:
tb_dict
=
{}
tb_dict
[
'loss_box_of_pts'
]
=
loss
.
item
()
return
loss
,
tb_dict
def
forward
(
self
,
batch_dict
):
"""
Args:
batch_dict:
batch_size: int
vfe_features: (num_voxels, C)
voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
Returns:
batch_dict:
encoded_spconv_tensor: sparse tensor
"""
voxel_features
,
voxel_coords
=
batch_dict
[
'voxel_features'
],
batch_dict
[
'voxel_coords'
]
batch_size
=
batch_dict
[
'batch_size'
]
input_sp_tensor
=
spconv
.
SparseConvTensor
(
features
=
voxel_features
,
indices
=
voxel_coords
.
int
(),
spatial_shape
=
self
.
sparse_shape
,
batch_size
=
batch_size
)
loss_img
=
0
x
=
self
.
conv_input
(
input_sp_tensor
)
x_conv1
,
batch_dict
,
loss1
=
self
.
conv1
(
x
,
batch_dict
)
if
self
.
use_img
:
x_image
=
self
.
semseg
(
batch_dict
[
'images'
])[
'layer1_feat2d'
]
x_conv1
,
batch_dict
,
loss_img
=
self
.
conv_focal_multimodal
(
x_conv1
,
batch_dict
,
x_image
)
x_conv2
,
batch_dict
,
loss2
=
self
.
conv2
(
x_conv1
,
batch_dict
)
x_conv3
,
batch_dict
,
loss3
=
self
.
conv3
(
x_conv2
,
batch_dict
)
x_conv4
,
batch_dict
,
loss4
=
self
.
conv4
(
x_conv3
,
batch_dict
)
self
.
forward_ret_dict
[
'loss_box_of_pts'
]
=
loss1
+
loss2
+
loss3
+
loss4
+
loss_img
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out
=
self
.
conv_out
(
x_conv4
)
batch_dict
.
update
({
'encoded_spconv_tensor'
:
out
,
'encoded_spconv_tensor_stride'
:
8
})
batch_dict
.
update
({
'multi_scale_3d_features'
:
{
'x_conv1'
:
x_conv1
,
'x_conv2'
:
x_conv2
,
'x_conv3'
:
x_conv3
,
'x_conv4'
:
x_conv4
,
}
})
batch_dict
.
update
({
'multi_scale_3d_strides'
:
{
'x_conv1'
:
1
,
'x_conv2'
:
2
,
'x_conv3'
:
4
,
'x_conv4'
:
8
,
}
})
return
batch_dict
pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn/ddn_template.py
View file @
4c8009fc
...
...
@@ -151,7 +151,7 @@ class DDNTemplate(nn.Module):
x
=
images
if
self
.
pretrained
:
# Create a mask for padded pixels
mask
=
torch
.
isnan
(
x
)
mask
=
(
x
==
0
)
# Match ResNet pretrained preprocessing
x
=
normalize
(
x
,
mean
=
self
.
norm_mean
,
std
=
self
.
norm_std
)
...
...
pcdet/models/detectors/pv_rcnn.py
View file @
4c8009fc
...
...
@@ -28,4 +28,9 @@ class PVRCNN(Detector3DTemplate):
loss_rcnn
,
tb_dict
=
self
.
roi_head
.
get_loss
(
tb_dict
)
loss
=
loss_rpn
+
loss_point
+
loss_rcnn
if
hasattr
(
self
.
backbone_3d
,
'get_loss'
):
loss_backbone3d
,
tb_dict
=
self
.
backbone_3d
.
get_loss
(
tb_dict
)
loss
+=
loss_backbone3d
return
loss
,
tb_dict
,
disp_dict
pcdet/models/detectors/voxel_rcnn.py
View file @
4c8009fc
...
...
@@ -29,4 +29,9 @@ class VoxelRCNN(Detector3DTemplate):
loss_rcnn
,
tb_dict
=
self
.
roi_head
.
get_loss
(
tb_dict
)
loss
=
loss
+
loss_rpn
+
loss_rcnn
if
hasattr
(
self
.
backbone_3d
,
'get_loss'
):
loss_backbone3d
,
tb_dict
=
self
.
backbone_3d
.
get_loss
(
tb_dict
)
loss
+=
loss_backbone3d
return
loss
,
tb_dict
,
disp_dict
pcdet/utils/box_utils.py
View file @
4c8009fc
...
...
@@ -52,6 +52,43 @@ def boxes_to_corners_3d(boxes3d):
return
corners3d
.
numpy
()
if
is_numpy
else
corners3d
def
corners_rect_to_camera
(
corners
):
"""
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
corners: (8, 3) [x0, y0, z0, ...], (x, y, z) is the point coordinate in image rect
Returns:
boxes_rect: (7,) [x, y, z, l, h, w, r] in rect camera coords
"""
height_group
=
[(
0
,
4
),
(
1
,
5
),
(
2
,
6
),
(
3
,
7
)]
width_group
=
[(
0
,
1
),
(
2
,
3
),
(
4
,
5
),
(
6
,
7
)]
length_group
=
[(
0
,
3
),
(
1
,
2
),
(
4
,
7
),
(
5
,
6
)]
vector_group
=
[(
0
,
3
),
(
1
,
2
),
(
4
,
7
),
(
5
,
6
)]
height
,
width
,
length
=
0.
,
0.
,
0.
vector
=
np
.
zeros
(
2
,
dtype
=
np
.
float32
)
for
index_h
,
index_w
,
index_l
,
index_v
in
zip
(
height_group
,
width_group
,
length_group
,
vector_group
):
height
+=
np
.
linalg
.
norm
(
corners
[
index_h
[
0
],
:]
-
corners
[
index_h
[
1
],
:])
width
+=
np
.
linalg
.
norm
(
corners
[
index_w
[
0
],
:]
-
corners
[
index_w
[
1
],
:])
length
+=
np
.
linalg
.
norm
(
corners
[
index_l
[
0
],
:]
-
corners
[
index_l
[
1
],
:])
vector
[
0
]
+=
(
corners
[
index_v
[
0
],
:]
-
corners
[
index_v
[
1
],
:])[
0
]
vector
[
1
]
+=
(
corners
[
index_v
[
0
],
:]
-
corners
[
index_v
[
1
],
:])[
2
]
height
,
width
,
length
=
height
*
1.0
/
4
,
width
*
1.0
/
4
,
length
*
1.0
/
4
rotation_y
=
-
np
.
arctan2
(
vector
[
1
],
vector
[
0
])
center_point
=
corners
.
mean
(
axis
=
0
)
center_point
[
1
]
+=
height
/
2
camera_rect
=
np
.
concatenate
([
center_point
,
np
.
array
([
length
,
height
,
width
,
rotation_y
])])
return
camera_rect
def
mask_boxes_outside_range_numpy
(
boxes
,
limit_range
,
min_num_corners
=
1
):
"""
...
...
@@ -296,3 +333,49 @@ def boxes3d_nearest_bev_iou(boxes_a, boxes_b):
boxes_bev_b
=
boxes3d_lidar_to_aligned_bev_boxes
(
boxes_b
)
return
boxes_iou_normal
(
boxes_bev_a
,
boxes_bev_b
)
def
area
(
box
)
->
torch
.
Tensor
:
"""
Computes the area of all the boxes.
Returns:
torch.Tensor: a vector with areas of each box.
"""
area
=
(
box
[:,
2
]
-
box
[:,
0
])
*
(
box
[:,
3
]
-
box
[:,
1
])
return
area
# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
# with slight modifications
def
pairwise_iou
(
boxes1
,
boxes2
)
->
torch
.
Tensor
:
"""
Given two lists of boxes of size N and M,
compute the IoU (intersection over union)
between __all__ N x M pairs of boxes.
The box order must be (xmin, ymin, xmax, ymax).
Args:
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
Returns:
Tensor: IoU, sized [N,M].
"""
area1
=
area
(
boxes1
)
area2
=
area
(
boxes2
)
width_height
=
torch
.
min
(
boxes1
[:,
None
,
2
:],
boxes2
[:,
2
:])
-
torch
.
max
(
boxes1
[:,
None
,
:
2
],
boxes2
[:,
:
2
]
)
# [N,M,2]
width_height
.
clamp_
(
min
=
0
)
# [N,M,2]
inter
=
width_height
.
prod
(
dim
=
2
)
# [N,M]
del
width_height
# handle empty boxes
iou
=
torch
.
where
(
inter
>
0
,
inter
/
(
area1
[:,
None
]
+
area2
-
inter
),
torch
.
zeros
(
1
,
dtype
=
inter
.
dtype
,
device
=
inter
.
device
),
)
return
iou
tools/cfgs/kitti_models/voxel_rcnn_car_focal_multimodal.yaml
0 → 100644
View file @
4c8009fc
CLASS_NAMES
:
[
'
Car'
]
DATA_CONFIG
:
_BASE_CONFIG_
:
cfgs/dataset_configs/kitti_dataset.yaml
GET_ITEM_LIST
:
[
"
images"
,
"
points"
,
"
calib_matricies"
,
"
gt_boxes2d"
]
DATA_AUGMENTOR
:
DISABLE_AUG_LIST
:
[
'
placeholder'
]
AUG_CONFIG_LIST
:
-
NAME
:
gt_sampling
# AUG_WITH_IMAGE: True # use PC-Image Aug
IMG_AUG_TYPE
:
kitti
USE_ROAD_PLANE
:
True
DB_INFO_PATH
:
-
kitti_dbinfos_train.pkl
PREPARE
:
{
filter_by_min_points
:
[
'
Car:5'
],
filter_by_difficulty
:
[
-1
],
}
SAMPLE_GROUPS
:
[
'
Car:15'
]
NUM_POINT_FEATURES
:
4
DATABASE_WITH_FAKELIDAR
:
False
REMOVE_EXTRA_WIDTH
:
[
0.0
,
0.0
,
0.0
]
LIMIT_WHOLE_SCENE
:
False
-
NAME
:
random_world_flip
ALONG_AXIS_LIST
:
[
'
x'
]
-
NAME
:
random_world_rotation
WORLD_ROT_ANGLE
:
[
-0.78539816
,
0.78539816
]
-
NAME
:
random_world_scaling
WORLD_SCALE_RANGE
:
[
0.95
,
1.05
]
MODEL
:
NAME
:
VoxelRCNN
VFE
:
NAME
:
MeanVFE
BACKBONE_3D
:
NAME
:
VoxelBackBone8xFocal
USE_IMG
:
True
IMG_PRETRAIN
:
"
../checkpoints/deeplabv3_resnet50_coco-cd0a2569.pth"
MAP_TO_BEV
:
NAME
:
HeightCompression
NUM_BEV_FEATURES
:
256
BACKBONE_2D
:
NAME
:
BaseBEVBackbone
LAYER_NUMS
:
[
5
,
5
]
LAYER_STRIDES
:
[
1
,
2
]
NUM_FILTERS
:
[
64
,
128
]
UPSAMPLE_STRIDES
:
[
1
,
2
]
NUM_UPSAMPLE_FILTERS
:
[
128
,
128
]
DENSE_HEAD
:
NAME
:
AnchorHeadSingle
CLASS_AGNOSTIC
:
False
USE_DIRECTION_CLASSIFIER
:
True
DIR_OFFSET
:
0.78539
DIR_LIMIT_OFFSET
:
0.0
NUM_DIR_BINS
:
2
ANCHOR_GENERATOR_CONFIG
:
[
{
'
class_name'
:
'
Car'
,
'
anchor_sizes'
:
[[
3.9
,
1.6
,
1.56
]],
'
anchor_rotations'
:
[
0
,
1.57
],
'
anchor_bottom_heights'
:
[
-1.78
],
'
align_center'
:
False
,
'
feature_map_stride'
:
8
,
'
matched_threshold'
:
0.6
,
'
unmatched_threshold'
:
0.45
},
]
TARGET_ASSIGNER_CONFIG
:
NAME
:
AxisAlignedTargetAssigner
POS_FRACTION
:
-1.0
SAMPLE_SIZE
:
512
NORM_BY_NUM_EXAMPLES
:
False
MATCH_HEIGHT
:
False
BOX_CODER
:
ResidualCoder
LOSS_CONFIG
:
LOSS_WEIGHTS
:
{
'
cls_weight'
:
1.0
,
'
loc_weight'
:
2.0
,
'
dir_weight'
:
0.2
,
'
code_weights'
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
}
ROI_HEAD
:
NAME
:
VoxelRCNNHead
CLASS_AGNOSTIC
:
True
SHARED_FC
:
[
256
,
256
]
CLS_FC
:
[
256
,
256
]
REG_FC
:
[
256
,
256
]
DP_RATIO
:
0.3
NMS_CONFIG
:
TRAIN
:
NMS_TYPE
:
nms_gpu
MULTI_CLASSES_NMS
:
False
NMS_PRE_MAXSIZE
:
9000
NMS_POST_MAXSIZE
:
512
NMS_THRESH
:
0.8
TEST
:
NMS_TYPE
:
nms_gpu
MULTI_CLASSES_NMS
:
False
USE_FAST_NMS
:
False
SCORE_THRESH
:
0.0
NMS_PRE_MAXSIZE
:
2048
NMS_POST_MAXSIZE
:
100
NMS_THRESH
:
0.7
ROI_GRID_POOL
:
FEATURES_SOURCE
:
[
'
x_conv2'
,
'
x_conv3'
,
'
x_conv4'
]
PRE_MLP
:
True
GRID_SIZE
:
6
POOL_LAYERS
:
x_conv2
:
MLPS
:
[[
32
,
32
]]
QUERY_RANGES
:
[[
4
,
4
,
4
]]
POOL_RADIUS
:
[
0.4
]
NSAMPLE
:
[
16
]
POOL_METHOD
:
max_pool
x_conv3
:
MLPS
:
[[
32
,
32
]]
QUERY_RANGES
:
[[
4
,
4
,
4
]]
POOL_RADIUS
:
[
0.8
]
NSAMPLE
:
[
16
]
POOL_METHOD
:
max_pool
x_conv4
:
MLPS
:
[[
32
,
32
]]
QUERY_RANGES
:
[[
4
,
4
,
4
]]
POOL_RADIUS
:
[
1.6
]
NSAMPLE
:
[
16
]
POOL_METHOD
:
max_pool
TARGET_CONFIG
:
BOX_CODER
:
ResidualCoder
ROI_PER_IMAGE
:
128
FG_RATIO
:
0.5
SAMPLE_ROI_BY_EACH_CLASS
:
True
CLS_SCORE_TYPE
:
roi_iou
CLS_FG_THRESH
:
0.75
CLS_BG_THRESH
:
0.25
CLS_BG_THRESH_LO
:
0.1
HARD_BG_RATIO
:
0.8
REG_FG_THRESH
:
0.55
LOSS_CONFIG
:
CLS_LOSS
:
BinaryCrossEntropy
REG_LOSS
:
smooth-l1
CORNER_LOSS_REGULARIZATION
:
True
GRID_3D_IOU_LOSS
:
False
LOSS_WEIGHTS
:
{
'
rcnn_cls_weight'
:
1.0
,
'
rcnn_reg_weight'
:
1.0
,
'
rcnn_corner_weight'
:
1.0
,
'
rcnn_iou3d_weight'
:
1.0
,
'
code_weights'
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
}
POST_PROCESSING
:
RECALL_THRESH_LIST
:
[
0.3
,
0.5
,
0.7
]
SCORE_THRESH
:
0.3
OUTPUT_RAW_SCORE
:
False
EVAL_METRIC
:
kitti
NMS_CONFIG
:
MULTI_CLASSES_NMS
:
False
NMS_TYPE
:
nms_gpu
NMS_THRESH
:
0.1
NMS_PRE_MAXSIZE
:
4096
NMS_POST_MAXSIZE
:
500
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
2
NUM_EPOCHS
:
80
OPTIMIZER
:
adam_onecycle
LR
:
0.01
WEIGHT_DECAY
:
0.01
MOMENTUM
:
0.9
MOMS
:
[
0.95
,
0.85
]
PCT_START
:
0.4
DIV_FACTOR
:
10
DECAY_STEP_LIST
:
[
35
,
45
]
LR_DECAY
:
0.1
LR_CLIP
:
0.0000001
LR_WARMUP
:
False
WARMUP_EPOCH
:
1
GRAD_NORM_CLIP
:
10
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment