Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
d7067e44
Unverified
Commit
d7067e44
authored
Dec 03, 2022
by
Wenwei Zhang
Committed by
GitHub
Dec 03, 2022
Browse files
Bump version to v1.1.0rc2
Bump to v1.1.0rc2
parents
28fe73d2
fb0e57e5
Changes
360
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
959 additions
and
209 deletions
+959
-209
mmdet3d/evaluation/metrics/__init__.py
mmdet3d/evaluation/metrics/__init__.py
+1
-1
mmdet3d/evaluation/metrics/indoor_metric.py
mmdet3d/evaluation/metrics/indoor_metric.py
+6
-5
mmdet3d/evaluation/metrics/instance_seg_metric.py
mmdet3d/evaluation/metrics/instance_seg_metric.py
+1
-1
mmdet3d/evaluation/metrics/kitti_metric.py
mmdet3d/evaluation/metrics/kitti_metric.py
+74
-63
mmdet3d/evaluation/metrics/lyft_metric.py
mmdet3d/evaluation/metrics/lyft_metric.py
+1
-1
mmdet3d/evaluation/metrics/nuscenes_metric.py
mmdet3d/evaluation/metrics/nuscenes_metric.py
+1
-1
mmdet3d/evaluation/metrics/waymo_metric.py
mmdet3d/evaluation/metrics/waymo_metric.py
+78
-45
mmdet3d/models/backbones/__init__.py
mmdet3d/models/backbones/__init__.py
+1
-0
mmdet3d/models/backbones/mink_resnet.py
mmdet3d/models/backbones/mink_resnet.py
+13
-12
mmdet3d/models/backbones/nostem_regnet.py
mmdet3d/models/backbones/nostem_regnet.py
+2
-1
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+51
-48
mmdet3d/models/data_preprocessors/utils.py
mmdet3d/models/data_preprocessors/utils.py
+6
-6
mmdet3d/models/decode_heads/decode_head.py
mmdet3d/models/decode_heads/decode_head.py
+20
-19
mmdet3d/models/dense_heads/__init__.py
mmdet3d/models/dense_heads/__init__.py
+2
-1
mmdet3d/models/dense_heads/anchor3d_head.py
mmdet3d/models/dense_heads/anchor3d_head.py
+1
-1
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
+1
-1
mmdet3d/models/dense_heads/base_3d_dense_head.py
mmdet3d/models/dense_heads/base_3d_dense_head.py
+1
-1
mmdet3d/models/dense_heads/centerpoint_head.py
mmdet3d/models/dense_heads/centerpoint_head.py
+1
-1
mmdet3d/models/dense_heads/fcaf3d_head.py
mmdet3d/models/dense_heads/fcaf3d_head.py
+697
-0
mmdet3d/models/dense_heads/fcos_mono3d_head.py
mmdet3d/models/dense_heads/fcos_mono3d_head.py
+1
-1
No files found.
mmdet3d/evaluation/metrics/__init__.py
View file @
d7067e44
...
...
@@ -7,7 +7,7 @@ from .nuscenes_metric import NuScenesMetric # noqa: F401,F403
from
.seg_metric
import
SegMetric
# noqa: F401,F403
from
.waymo_metric
import
WaymoMetric
# noqa: F401,F403
__all_
=
[
__all_
_
=
[
'KittiMetric'
,
'NuScenesMetric'
,
'IndoorMetric'
,
'LyftMetric'
,
'SegMetric'
,
'InstanceSegMetric'
,
'WaymoMetric'
]
mmdet3d/evaluation/metrics/indoor_metric.py
View file @
d7067e44
...
...
@@ -3,13 +3,13 @@ from collections import OrderedDict
from
typing
import
Dict
,
List
,
Optional
,
Sequence
import
numpy
as
np
from
mmdet.evaluation
import
eval_map
from
mmengine.evaluator
import
BaseMetric
from
mmengine.logging
import
MMLogger
from
mmdet3d.evaluation
import
indoor_eval
from
mmdet3d.registry
import
METRICS
from
mmdet3d.structures
import
get_box_type
from
mmdet.evaluation
import
eval_map
@
METRICS
.
register_module
()
...
...
@@ -78,14 +78,15 @@ class IndoorMetric(BaseMetric):
ann_infos
.
append
(
eval_ann
)
pred_results
.
append
(
sinlge_pred_results
)
# some checkpoints may not record the key "box_type_3d"
box_type_3d
,
box_mode_3d
=
get_box_type
(
self
.
dataset_meta
[
'box_type_3d'
]
)
self
.
dataset_meta
.
get
(
'box_type_3d'
,
'depth'
)
)
ret_dict
=
indoor_eval
(
ann_infos
,
pred_results
,
self
.
iou_thr
,
self
.
dataset_meta
[
'
CLASSES
'
],
self
.
dataset_meta
[
'
classes
'
],
logger
=
logger
,
box_mode_3d
=
box_mode_3d
)
...
...
@@ -141,7 +142,7 @@ class Indoor2DMetric(BaseMetric):
pred_labels
=
pred
[
'labels'
].
cpu
().
numpy
()
dets
=
[]
for
label
in
range
(
len
(
self
.
dataset_meta
[
'
CLASSES
'
])):
for
label
in
range
(
len
(
self
.
dataset_meta
[
'
classes
'
])):
index
=
np
.
where
(
pred_labels
==
label
)[
0
]
pred_bbox_scores
=
np
.
hstack
(
[
pred_bboxes
[
index
],
pred_scores
[
index
].
reshape
((
-
1
,
1
))])
...
...
@@ -170,7 +171,7 @@ class Indoor2DMetric(BaseMetric):
annotations
,
scale_ranges
=
None
,
iou_thr
=
iou_thr_2d_single
,
dataset
=
self
.
dataset_meta
[
'
CLASSES
'
],
dataset
=
self
.
dataset_meta
[
'
classes
'
],
logger
=
logger
)
eval_results
[
'mAP_'
+
str
(
iou_thr_2d_single
)]
=
mean_ap
return
eval_results
mmdet3d/evaluation/metrics/instance_seg_metric.py
View file @
d7067e44
...
...
@@ -64,7 +64,7 @@ class InstanceSegMetric(BaseMetric):
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
self
.
classes
=
self
.
dataset_meta
[
'
CLASSES
'
]
self
.
classes
=
self
.
dataset_meta
[
'
classes
'
]
self
.
valid_class_ids
=
self
.
dataset_meta
[
'seg_valid_class_ids'
]
gt_semantic_masks
=
[]
...
...
mmdet3d/evaluation/metrics/kitti_metric.py
View file @
d7067e44
...
...
@@ -36,6 +36,10 @@ class KittiMetric(BaseMetric):
If not specified, a temp file will be created. Default: None.
default_cam_key (str, optional): The default camera for lidar to
camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
format_only (bool): Format the output results without perform
evaluation. It is useful when you want to format the result
to a specific format and submit it to the test server.
Defaults to False.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
Default: None.
...
...
@@ -52,6 +56,7 @@ class KittiMetric(BaseMetric):
prefix
:
Optional
[
str
]
=
None
,
pklfile_prefix
:
str
=
None
,
default_cam_key
:
str
=
'CAM2'
,
format_only
:
bool
=
False
,
submission_prefix
:
str
=
None
,
collect_device
:
str
=
'cpu'
,
file_client_args
:
dict
=
dict
(
backend
=
'disk'
)):
...
...
@@ -61,6 +66,13 @@ class KittiMetric(BaseMetric):
self
.
pcd_limit_range
=
pcd_limit_range
self
.
ann_file
=
ann_file
self
.
pklfile_prefix
=
pklfile_prefix
self
.
format_only
=
format_only
if
self
.
format_only
:
assert
submission_prefix
is
not
None
,
'submission_prefix must be'
'not None when format_only is True, otherwise the result files'
'will be saved to a temp directory which will be cleaned up at'
'the end.'
self
.
submission_prefix
=
submission_prefix
self
.
pred_box_type_3d
=
pred_box_type_3d
self
.
default_cam_key
=
default_cam_key
...
...
@@ -74,68 +86,62 @@ class KittiMetric(BaseMetric):
raise
KeyError
(
"metric should be one of 'bbox', 'img_bbox', "
'but got {metric}.'
)
def
convert_annos_to_kitti_annos
(
self
,
data_annos
:
list
,
classes
:
list
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
,
'Van'
,
'Truck'
,
'Person_sitting'
,
'Tram'
,
'Misc'
]
)
->
list
:
def
convert_annos_to_kitti_annos
(
self
,
data_infos
:
dict
)
->
list
:
"""Convert loading annotations to Kitti annotations.
Args:
data_annos (list[dict]): Annotations loaded from ann_file.
classes (list[str]): Classes used in the dataset. Default used
['Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck',
'Person_sitting', 'Tram', 'Misc'].
data_infos (dict): Data infos including metainfo and annotations
loaded from ann_file.
Returns:
List[dict]: List of Kitti annotations.
"""
assert
'instances'
in
data_annos
[
0
]
for
i
,
annos
in
enumerate
(
data_annos
):
if
len
(
annos
[
'instances'
])
==
0
:
kitti_annos
=
{
'name'
:
np
.
array
([]),
'truncated'
:
np
.
array
([]),
'occluded'
:
np
.
array
([]),
'alpha'
:
np
.
array
([]),
'bbox'
:
np
.
zeros
([
0
,
4
]),
'dimensions'
:
np
.
zeros
([
0
,
3
]),
'location'
:
np
.
zeros
([
0
,
3
]),
'rotation_y'
:
np
.
array
([]),
'score'
:
np
.
array
([]),
}
else
:
kitti_annos
=
{
'name'
:
[],
'truncated'
:
[],
'occluded'
:
[],
'alpha'
:
[],
'bbox'
:
[],
'location'
:
[],
'dimensions'
:
[],
'rotation_y'
:
[],
'score'
:
[]
}
for
instance
in
annos
[
'instances'
]:
labels
=
instance
[
'bbox_label'
]
if
labels
==
-
1
:
kitti_annos
[
'name'
].
append
(
'DontCare'
)
else
:
kitti_annos
[
'name'
].
append
(
classes
[
labels
])
kitti_annos
[
'truncated'
].
append
(
instance
[
'truncated'
])
kitti_annos
[
'occluded'
].
append
(
instance
[
'occluded'
])
kitti_annos
[
'alpha'
].
append
(
instance
[
'alpha'
])
kitti_annos
[
'bbox'
].
append
(
instance
[
'bbox'
])
kitti_annos
[
'location'
].
append
(
instance
[
'bbox_3d'
][:
3
])
kitti_annos
[
'dimensions'
].
append
(
instance
[
'bbox_3d'
][
3
:
6
])
kitti_annos
[
'rotation_y'
].
append
(
instance
[
'bbox_3d'
][
6
])
kitti_annos
[
'score'
].
append
(
instance
[
'score'
])
for
name
in
kitti_annos
:
kitti_annos
[
name
]
=
np
.
array
(
kitti_annos
[
name
])
data_annos
[
i
][
'kitti_annos'
]
=
kitti_annos
data_annos
=
data_infos
[
'data_list'
]
if
not
self
.
format_only
:
cat2label
=
data_infos
[
'metainfo'
][
'categories'
]
label2cat
=
dict
((
v
,
k
)
for
(
k
,
v
)
in
cat2label
.
items
())
assert
'instances'
in
data_annos
[
0
]
for
i
,
annos
in
enumerate
(
data_annos
):
if
len
(
annos
[
'instances'
])
==
0
:
kitti_annos
=
{
'name'
:
np
.
array
([]),
'truncated'
:
np
.
array
([]),
'occluded'
:
np
.
array
([]),
'alpha'
:
np
.
array
([]),
'bbox'
:
np
.
zeros
([
0
,
4
]),
'dimensions'
:
np
.
zeros
([
0
,
3
]),
'location'
:
np
.
zeros
([
0
,
3
]),
'rotation_y'
:
np
.
array
([]),
'score'
:
np
.
array
([]),
}
else
:
kitti_annos
=
{
'name'
:
[],
'truncated'
:
[],
'occluded'
:
[],
'alpha'
:
[],
'bbox'
:
[],
'location'
:
[],
'dimensions'
:
[],
'rotation_y'
:
[],
'score'
:
[]
}
for
instance
in
annos
[
'instances'
]:
label
=
instance
[
'bbox_label'
]
kitti_annos
[
'name'
].
append
(
label2cat
[
label
])
kitti_annos
[
'truncated'
].
append
(
instance
[
'truncated'
])
kitti_annos
[
'occluded'
].
append
(
instance
[
'occluded'
])
kitti_annos
[
'alpha'
].
append
(
instance
[
'alpha'
])
kitti_annos
[
'bbox'
].
append
(
instance
[
'bbox'
])
kitti_annos
[
'location'
].
append
(
instance
[
'bbox_3d'
][:
3
])
kitti_annos
[
'dimensions'
].
append
(
instance
[
'bbox_3d'
][
3
:
6
])
kitti_annos
[
'rotation_y'
].
append
(
instance
[
'bbox_3d'
][
6
])
kitti_annos
[
'score'
].
append
(
instance
[
'score'
])
for
name
in
kitti_annos
:
kitti_annos
[
name
]
=
np
.
array
(
kitti_annos
[
name
])
data_annos
[
i
][
'kitti_annos'
]
=
kitti_annos
return
data_annos
def
process
(
self
,
data_batch
:
dict
,
data_samples
:
Sequence
[
dict
])
->
None
:
...
...
@@ -176,24 +182,29 @@ class KittiMetric(BaseMetric):
the metrics, and the values are corresponding results.
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
self
.
classes
=
self
.
dataset_meta
[
'
CLASSES
'
]
self
.
classes
=
self
.
dataset_meta
[
'
classes
'
]
# load annotations
pkl_annos
=
load
(
self
.
ann_file
,
file_client_args
=
self
.
file_client_args
)[
'data_list'
]
self
.
data_infos
=
self
.
convert_annos_to_kitti_annos
(
pkl_annos
)
pkl_infos
=
load
(
self
.
ann_file
,
file_client_args
=
self
.
file_client_args
)
self
.
data_infos
=
self
.
convert_annos_to_kitti_annos
(
pkl_infos
)
result_dict
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
=
self
.
pklfile_prefix
,
submission_prefix
=
self
.
submission_prefix
,
classes
=
self
.
classes
)
metric_dict
=
{}
if
self
.
format_only
:
logger
.
info
(
'results are saved in '
f
'
{
osp
.
dirname
(
self
.
submission_prefix
)
}
'
)
return
metric_dict
gt_annos
=
[
self
.
data_infos
[
result
[
'sample_idx'
]][
'kitti_annos'
]
for
result
in
results
]
metric_dict
=
{}
for
metric
in
self
.
metrics
:
ap_dict
=
self
.
kitti_evaluate
(
result_dict
,
...
...
@@ -331,7 +342,7 @@ class KittiMetric(BaseMetric):
mmengine
.
mkdir_or_exist
(
submission_prefix
)
det_annos
=
[]
print
(
'
\n
Converting prediction to KITTI format'
)
print
(
'
\n
Converting
3D
prediction to KITTI format'
)
for
idx
,
pred_dicts
in
enumerate
(
mmengine
.
track_iter_progress
(
net_outputs
)):
annos
=
[]
...
...
@@ -457,7 +468,7 @@ class KittiMetric(BaseMetric):
assert
len
(
net_outputs
)
==
len
(
self
.
data_infos
),
\
'invalid list length of network outputs'
det_annos
=
[]
print
(
'
\n
Converting prediction to KITTI format'
)
print
(
'
\n
Converting
2D
prediction to KITTI format'
)
for
i
,
bboxes_per_sample
in
enumerate
(
mmengine
.
track_iter_progress
(
net_outputs
)):
annos
=
[]
...
...
@@ -526,7 +537,7 @@ class KittiMetric(BaseMetric):
mmengine
.
mkdir_or_exist
(
submission_prefix
)
print
(
f
'Saving KITTI submission to
{
submission_prefix
}
'
)
for
i
,
anno
in
enumerate
(
det_annos
):
sample_idx
=
s
elf
.
data_infos
[
i
][
'image'
][
'image_idx'
]
sample_idx
=
s
ample_id_list
[
i
]
cur_det_file
=
f
'
{
submission_prefix
}
/
{
sample_idx
:
06
d
}
.txt'
with
open
(
cur_det_file
,
'w'
)
as
f
:
bbox
=
anno
[
'bbox'
]
...
...
mmdet3d/evaluation/metrics/lyft_metric.py
View file @
d7067e44
...
...
@@ -110,7 +110,7 @@ class LyftMetric(BaseMetric):
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
classes
=
self
.
dataset_meta
[
'
CLASSES
'
]
classes
=
self
.
dataset_meta
[
'
classes
'
]
self
.
version
=
self
.
dataset_meta
[
'version'
]
# load annotations
...
...
mmdet3d/evaluation/metrics/nuscenes_metric.py
View file @
d7067e44
...
...
@@ -151,7 +151,7 @@ class NuScenesMetric(BaseMetric):
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
classes
=
self
.
dataset_meta
[
'
CLASSES
'
]
classes
=
self
.
dataset_meta
[
'
classes
'
]
self
.
version
=
self
.
dataset_meta
[
'version'
]
# load annotations
self
.
data_infos
=
load
(
...
...
mmdet3d/evaluation/metrics/waymo_metric.py
View file @
d7067e44
...
...
@@ -36,14 +36,24 @@ class WaymoMetric(KittiMetric):
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Defaults to None.
convert_kitti_format (bool, optional): Whether convert the reuslts to
kitti format. Now, in order to be compatible with camera-based
methods, defaults to True.
pklfile_prefix (str, optional): The prefix of pkl files, including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
Default: None.
task: (str, optional): task for 3D detection, if cam, would filter
the points that outside the image.
load_type (str, optional): Type of loading mode during training.
- 'frame_based': Load all of the instances in the frame.
- 'mv_image_based': Load all of the instances in the frame and need
to convert to the FOV-based data type to support image-based
detector.
- 'fov_image_base': Only load the instances inside the default cam,
and need to convert to the FOV-based data type to support
image-based detector.
default_cam_key (str, optional): The default camera for lidar to
camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
use_pred_sample_idx (bool, optional): In formating results, use the
...
...
@@ -54,6 +64,11 @@ class WaymoMetric(KittiMetric):
from different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
file_client_args (dict): file client for reading gt in waymo format.
Defaults to ``dict(backend='disk')``.
idx2metainfo (Optional[str], optional): The file path of the metainfo
in waymmo. It stores the mapping from sample_idx to metainfo.
The metainfo must contain the keys: 'idx2contextname' and
'idx2timestamp'. Defaults to None.
"""
num_cams
=
5
...
...
@@ -64,19 +79,28 @@ class WaymoMetric(KittiMetric):
split
:
str
=
'training'
,
metric
:
Union
[
str
,
List
[
str
]]
=
'mAP'
,
pcd_limit_range
:
List
[
float
]
=
[
-
85
,
-
85
,
-
5
,
85
,
85
,
5
],
convert_kitti_format
:
bool
=
True
,
prefix
:
Optional
[
str
]
=
None
,
pklfile_prefix
:
str
=
None
,
submission_prefix
:
str
=
None
,
task
=
'lidar
'
,
load_type
:
str
=
'frame_based
'
,
default_cam_key
:
str
=
'CAM_FRONT'
,
use_pred_sample_idx
:
bool
=
False
,
collect_device
:
str
=
'cpu'
,
file_client_args
:
dict
=
dict
(
backend
=
'disk'
)):
file_client_args
:
dict
=
dict
(
backend
=
'disk'
),
idx2metainfo
:
Optional
[
str
]
=
None
):
self
.
waymo_bin_file
=
waymo_bin_file
self
.
data_root
=
data_root
self
.
split
=
split
self
.
task
=
task
self
.
load_type
=
load_type
self
.
use_pred_sample_idx
=
use_pred_sample_idx
self
.
convert_kitti_format
=
convert_kitti_format
if
idx2metainfo
is
not
None
:
self
.
idx2metainfo
=
mmengine
.
load
(
idx2metainfo
)
else
:
self
.
idx2metainfo
=
None
super
().
__init__
(
ann_file
=
ann_file
,
metric
=
metric
,
...
...
@@ -100,13 +124,15 @@ class WaymoMetric(KittiMetric):
the metrics, and the values are corresponding results.
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
self
.
classes
=
self
.
dataset_meta
[
'
CLASSES
'
]
self
.
classes
=
self
.
dataset_meta
[
'
classes
'
]
# load annotations
self
.
data_infos
=
load
(
self
.
ann_file
)[
'data_list'
]
assert
len
(
results
)
==
len
(
self
.
data_infos
),
\
'invalid list length of network outputs'
# different from kitti, waymo do not need to convert the ann file
# handle the m
ono3d task
if
self
.
task
==
'mono3
d'
:
# handle the m
v_image_based load_mode
if
self
.
load_type
==
'mv_image_base
d'
:
new_data_infos
=
[]
for
info
in
self
.
data_infos
:
height
=
info
[
'images'
][
self
.
default_cam_key
][
'height'
]
...
...
@@ -131,7 +157,7 @@ class WaymoMetric(KittiMetric):
# TODO check if need to modify the sample id
# TODO check when will use it except for evaluation.
camera_info
[
'sample_id'
]
=
info
[
'sample_id'
]
camera_info
[
'sample_id
x
'
]
=
info
[
'sample_id
x
'
]
new_data_infos
.
append
(
camera_info
)
self
.
data_infos
=
new_data_infos
...
...
@@ -142,8 +168,6 @@ class WaymoMetric(KittiMetric):
eval_tmp_dir
=
None
pklfile_prefix
=
self
.
pklfile_prefix
# load annotations
result_dict
,
tmp_dir
=
self
.
format_results
(
results
,
pklfile_prefix
=
pklfile_prefix
,
...
...
@@ -186,11 +210,7 @@ class WaymoMetric(KittiMetric):
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
\
f
'
{
self
.
waymo_bin_file
}
'
print
(
eval_str
)
ret_bytes
=
subprocess
.
check_output
(
'mmdet3d/evaluation/functional/waymo_utils/'
+
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
f
'
{
self
.
waymo_bin_file
}
'
,
shell
=
True
)
ret_bytes
=
subprocess
.
check_output
(
eval_str
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
...
...
@@ -292,7 +312,7 @@ class WaymoMetric(KittiMetric):
pklfile_prefix
:
str
=
None
,
submission_prefix
:
str
=
None
,
classes
:
List
[
str
]
=
None
):
"""Format the results to
pkl
file.
"""Format the results to
bin
file.
Args:
results (list[dict]): Testing results of the
...
...
@@ -313,9 +333,22 @@ class WaymoMetric(KittiMetric):
the formatted result, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
result_files
,
tmp_dir
=
super
().
format_results
(
results
,
pklfile_prefix
,
submission_prefix
,
classes
)
waymo_save_tmp_dir
=
tempfile
.
TemporaryDirectory
()
waymo_results_save_dir
=
waymo_save_tmp_dir
.
name
waymo_results_final_path
=
f
'
{
pklfile_prefix
}
.bin'
if
self
.
convert_kitti_format
:
results_kitti_format
,
tmp_dir
=
super
().
format_results
(
results
,
pklfile_prefix
,
submission_prefix
,
classes
)
final_results
=
results_kitti_format
[
'pred_instances_3d'
]
else
:
final_results
=
results
for
i
,
res
in
enumerate
(
final_results
):
# Actually, `sample_idx` here is the filename without suffix.
# It's for identitying the sample in formating.
res
[
'sample_idx'
]
=
self
.
data_infos
[
i
][
'sample_idx'
]
res
[
'pred_instances_3d'
][
'bboxes_3d'
].
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
waymo_root
=
self
.
data_root
if
self
.
split
==
'training'
:
...
...
@@ -326,21 +359,23 @@ class WaymoMetric(KittiMetric):
prefix
=
'2'
else
:
raise
ValueError
(
'Not supported split value.'
)
waymo_save_tmp_dir
=
tempfile
.
TemporaryDirectory
()
waymo_results_save_dir
=
waymo_save_tmp_dir
.
name
waymo_results_final_path
=
f
'
{
pklfile_prefix
}
.bin'
from
..functional.waymo_utils.prediction_kitti_to_waymo
import
\
KITTI2Waymo
converter
=
KITTI2Waymo
(
result_files
[
'pred_instances_3d'
],
from
..functional.waymo_utils.prediction_to_waymo
import
\
Prediction2Waymo
converter
=
Prediction2Waymo
(
final_results
,
waymo_tfrecords_dir
,
waymo_results_save_dir
,
waymo_results_final_path
,
prefix
,
file_client_args
=
self
.
file_client_args
)
classes
,
file_client_args
=
self
.
file_client_args
,
from_kitti_format
=
self
.
convert_kitti_format
,
idx2metainfo
=
self
.
idx2metainfo
)
converter
.
convert
()
waymo_save_tmp_dir
.
cleanup
()
return
result_files
,
waymo_save_tmp_dir
return
final_results
,
waymo_save_tmp_dir
def
merge_multi_view_boxes
(
self
,
box_dict_per_frame
:
List
[
dict
],
cam0_info
:
dict
):
...
...
@@ -379,7 +414,7 @@ class WaymoMetric(KittiMetric):
torch
.
from_numpy
(
box_dict
[
'box3d_lidar'
]).
cuda
())
scores
=
torch
.
from_numpy
(
box_dict
[
'scores'
]).
cuda
()
labels
=
torch
.
from_numpy
(
box_dict
[
'label_preds'
]).
long
().
cuda
()
nms_scores
=
scores
.
new_zeros
(
scores
.
shape
[
0
],
len
(
self
.
CLASSES
)
+
1
)
nms_scores
=
scores
.
new_zeros
(
scores
.
shape
[
0
],
len
(
self
.
classes
)
+
1
)
indices
=
labels
.
new_tensor
(
list
(
range
(
scores
.
shape
[
0
])))
nms_scores
[
indices
,
labels
]
=
scores
lidar_boxes3d_for_nms
=
xywhr2xyxyr
(
lidar_boxes3d
.
bev
)
...
...
@@ -397,7 +432,7 @@ class WaymoMetric(KittiMetric):
lidar2cam
=
cam0_info
[
'images'
][
self
.
default_cam_key
][
'lidar2img'
]
lidar2cam
=
np
.
array
(
lidar2cam
).
astype
(
np
.
float32
)
box_preds_camera
=
box_preds_lidar
.
convert_to
(
Box3DMode
.
CAM
,
np
.
linalg
.
inv
(
lidar2cam
)
,
correct_yaw
=
True
)
Box3DMode
.
CAM
,
lidar2cam
,
correct_yaw
=
True
)
# Note: bbox is meaningless in final evaluation, set to 0
merged_box_dict
=
dict
(
bbox
=
np
.
zeros
([
box_preds_lidar
.
tensor
.
shape
[
0
],
4
]),
...
...
@@ -405,7 +440,7 @@ class WaymoMetric(KittiMetric):
box3d_lidar
=
box_preds_lidar
.
tensor
.
numpy
(),
scores
=
scores
.
numpy
(),
label_preds
=
labels
.
numpy
(),
sample_idx
=
box_dict
[
'sample_id'
],
sample_idx
=
box_dict
[
'sample_id
x
'
],
)
return
merged_box_dict
...
...
@@ -431,8 +466,6 @@ class WaymoMetric(KittiMetric):
Returns:
list[dict]: A list of dictionaries with the kitti format.
"""
assert
len
(
net_outputs
)
==
len
(
self
.
data_infos
),
\
'invalid list length of network outputs'
if
submission_prefix
is
not
None
:
mmengine
.
mkdir_or_exist
(
submission_prefix
)
...
...
@@ -444,7 +477,7 @@ class WaymoMetric(KittiMetric):
sample_idx
=
sample_id_list
[
idx
]
info
=
self
.
data_infos
[
sample_idx
]
if
self
.
task
==
'mono_det
'
:
if
self
.
load_type
==
'mv_image_based
'
:
if
idx
%
self
.
num_cams
==
0
:
box_dict_per_frame
=
[]
cam0_key
=
list
(
info
[
'images'
].
keys
())[
0
]
...
...
@@ -461,7 +494,7 @@ class WaymoMetric(KittiMetric):
# If you want to use another camera, please modify it.
image_shape
=
(
info
[
'images'
][
self
.
default_cam_key
][
'height'
],
info
[
'images'
][
self
.
default_cam_key
][
'width'
])
if
self
.
task
==
'mono3
d'
:
if
self
.
load_type
==
'mv_image_base
d'
:
box_dict_per_frame
.
append
(
box_dict
)
if
(
idx
+
1
)
%
self
.
num_cams
!=
0
:
continue
...
...
@@ -544,7 +577,7 @@ class WaymoMetric(KittiMetric):
# In waymo validation sample_idx in prediction is 000xxx
# but in info file it is 1000xxx
save_sample_idx
=
box_dict
[
'sample_idx'
]
annos
[
-
1
][
'sample_id'
]
=
np
.
array
(
annos
[
-
1
][
'sample_id
x
'
]
=
np
.
array
(
[
save_sample_idx
]
*
len
(
annos
[
-
1
][
'score'
]),
dtype
=
np
.
int64
)
det_annos
+=
annos
...
...
@@ -561,12 +594,12 @@ class WaymoMetric(KittiMetric):
def
convert_valid_bboxes
(
self
,
box_dict
:
dict
,
info
:
dict
):
"""Convert the predicted boxes into valid ones. Should handle the
different task
mode (
mono3d, mv3d, lidar
), separately.
load_
mode
l
(
frame_based, mv_image_based, fov_image_based
), separately.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
-
b
boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
...
...
@@ -587,7 +620,7 @@ class WaymoMetric(KittiMetric):
box_preds
=
box_dict
[
'bboxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'sample_id'
]
sample_idx
=
info
[
'sample_id
x
'
]
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
...
...
@@ -598,11 +631,11 @@ class WaymoMetric(KittiMetric):
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
# Here default used 'CAM
2
' to compute metric. If you want to
# Here default used 'CAM
_FRONT
' to compute metric. If you want to
# use another camera, please modify it.
if
self
.
task
in
[
'mv3d'
,
'lidar
'
]:
if
self
.
load_type
in
[
'frame_based'
,
'fov_image_based
'
]:
cam_key
=
self
.
default_cam_key
elif
self
.
task
==
'mono3
d'
:
elif
self
.
load_type
==
'mv_image_base
d'
:
cam_key
=
list
(
info
[
'images'
].
keys
())[
0
]
else
:
raise
NotImplementedError
...
...
@@ -635,12 +668,12 @@ class WaymoMetric(KittiMetric):
(
box_2d_preds
[:,
1
]
<
image_shape
[
0
])
&
(
box_2d_preds
[:,
2
]
>
0
)
&
(
box_2d_preds
[:,
3
]
>
0
))
# check box_preds_lidar
if
self
.
task
in
[
'lidar'
,
'mono3
d'
]:
if
self
.
load_type
in
[
'frame_base
d'
]:
limit_range
=
box_preds
.
tensor
.
new_tensor
(
self
.
pcd_limit_range
)
valid_pcd_inds
=
((
box_preds_lidar
.
center
>
limit_range
[:
3
])
&
(
box_preds_lidar
.
center
<
limit_range
[
3
:]))
valid_inds
=
valid_pcd_inds
.
all
(
-
1
)
el
if
self
.
task
==
'mono3
d'
:
if
self
.
load_type
in
[
'mv_image_based'
,
'fov_image_base
d'
]
:
valid_inds
=
valid_cam_inds
if
valid_inds
.
sum
()
>
0
:
...
...
mmdet3d/models/backbones/__init__.py
View file @
d7067e44
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet.models.backbones
import
SSDVGG
,
HRNet
,
ResNet
,
ResNetV1d
,
ResNeXt
from
.dgcnn
import
DGCNNBackbone
from
.dla
import
DLANet
from
.mink_resnet
import
MinkResNet
...
...
mmdet3d/models/backbones/mink_resnet.py
View file @
d7067e44
...
...
@@ -5,28 +5,25 @@ try:
import
MinkowskiEngine
as
ME
from
MinkowskiEngine.modules.resnet_block
import
BasicBlock
,
Bottleneck
except
ImportError
:
import
warnings
warnings
.
warn
(
'Please follow `getting_started.md` to install MinkowskiEngine.`'
)
# blocks are used in the static part of MinkResNet
BasicBlock
,
Bottleneck
=
None
,
None
ME
=
BasicBlock
=
Bottleneck
=
None
import
torch.nn
as
nn
from
mmdet3d.
models.builder
import
BACKBONE
S
from
mmdet3d.
registry
import
MODEL
S
@
BACKBONE
S
.
register_module
()
@
MODEL
S
.
register_module
()
class
MinkResNet
(
nn
.
Module
):
r
"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets
<https://arxiv.org/abs/1904.08755>`_ for more details.
Args:
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
in_channels (
o
nt): Number of input channels, 3 for RGB.
num_stages (int
, optional
): Resnet stages. Default
:
4.
pool (bool
, optional): A
dd max pooling after first conv
if True
.
Default
:
True.
in_channels (
i
nt): Number of input channels, 3 for RGB.
num_stages (int): Resnet stages. Default
s to
4.
pool (bool
): Whether to a
dd max pooling after first conv.
Default
s to
True.
"""
arch_settings
=
{
18
:
(
BasicBlock
,
(
2
,
2
,
2
,
2
)),
...
...
@@ -38,6 +35,10 @@ class MinkResNet(nn.Module):
def
__init__
(
self
,
depth
,
in_channels
,
num_stages
=
4
,
pool
=
True
):
super
(
MinkResNet
,
self
).
__init__
()
if
ME
is
None
:
raise
ImportError
(
'Please follow `getting_started.md` to install MinkowskiEngine.`'
# noqa: E501
)
if
depth
not
in
self
.
arch_settings
:
raise
KeyError
(
f
'invalid depth
{
depth
}
for resnet'
)
assert
4
>=
num_stages
>=
1
...
...
@@ -58,7 +59,7 @@ class MinkResNet(nn.Module):
for
i
,
num_blocks
in
enumerate
(
stage_blocks
):
setattr
(
self
,
f
'layer
{
i
}
'
,
self
,
f
'layer
{
i
+
1
}
'
,
self
.
_make_layer
(
block
,
64
*
2
**
i
,
stage_blocks
[
i
],
stride
=
2
))
def
init_weights
(
self
):
...
...
@@ -111,6 +112,6 @@ class MinkResNet(nn.Module):
x
=
self
.
maxpool
(
x
)
outs
=
[]
for
i
in
range
(
self
.
num_stages
):
x
=
getattr
(
self
,
f
'layer
{
i
}
'
)(
x
)
x
=
getattr
(
self
,
f
'layer
{
i
+
1
}
'
)(
x
)
outs
.
append
(
x
)
return
outs
mmdet3d/models/backbones/nostem_regnet.py
View file @
d7067e44
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet3d.registry
import
MODELS
from
mmdet.models.backbones
import
RegNet
from
mmdet3d.registry
import
MODELS
@
MODELS
.
register_module
()
class
NoStemRegNet
(
RegNet
):
...
...
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
d7067e44
# Copyright (c) OpenMMLab. All rights reserved.
import
math
from
numbers
import
Number
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Union
import
numpy
as
np
import
torch
from
mmcv.ops
import
Voxelization
from
mmdet.models
import
DetDataPreprocessor
from
mmengine.model
import
stack_batch
from
mmengine.utils
import
is_list_of
from
torch.nn
import
functional
as
F
from
mmdet3d.registry
import
MODELS
from
mmdet3d.utils
import
OptConfigType
from
mmdet.models
import
DetDataPreprocessor
from
.utils
import
multiview_img_stack_batch
...
...
@@ -28,24 +28,25 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
- 1) For image data:
- Pad images in inputs to the maximum size of current batch with defined
``pad_value``. The padding size can be divisible by a defined
``pad_size_divisor``
``pad_size_divisor``
.
- Stack images in inputs to batch_imgs.
- Convert images in inputs from bgr to rgb if the shape of input is
(3, H, W).
(3, H, W).
- Normalize images in inputs with defined std and mean.
- Do batch augmentations during training.
- 2) For point cloud data:
-
i
f no voxelization, directly return list of point cloud data.
-
i
f voxelization is applied, voxelize point cloud according to
-
I
f no voxelization, directly return list of point cloud data.
-
I
f voxelization is applied, voxelize point cloud according to
``voxel_type`` and obtain ``voxels``.
Args:
voxel (bool): Whether to apply voxelziation to point cloud.
voxel (bool): Whether to apply voxelization to point cloud.
Defaults to False.
voxel_type (str): Voxelization type. Two voxelization types are
provided: 'hard' and 'dynamic', respectively for hard
voxelization and dynamic voxelization. Defaults to 'hard'.
voxel_layer (:obj:`ConfigDict`, optional): Voxelization layer
voxel_layer (
dict or
:obj:`ConfigDict`, optional): Voxelization layer
config. Defaults to None.
mean (Sequence[Number], optional): The pixel mean of R, G, B channels.
Defaults to None.
...
...
@@ -54,11 +55,21 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
pad_size_divisor (int): The size of padded image should be
divisible by ``pad_size_divisor``. Defaults to 1.
pad_value (Number): The padded pixel value. Defaults to 0.
bgr_to_rgb (bool): whether to convert image from BGR to RGB.
pad_mask (bool): Whether to pad instance masks. Defaults to False.
mask_pad_value (int): The padded pixel value for instance masks.
Defaults to 0.
pad_seg (bool): Whether to pad semantic segmentation maps.
Defaults to False.
seg_pad_value (int): The padded pixel value for semantic
segmentation maps. Defaults to 255.
bgr_to_rgb (bool): Whether to convert image from BGR to RGB.
Defaults to False.
rgb_to_bgr (bool):
w
hether to convert image from RGB to
RGB
.
rgb_to_bgr (bool):
W
hether to convert image from RGB to
BGR
.
Defaults to False.
batch_augments (list[dict], optional): Batch-level augmentations
boxtype2tensor (bool): Whether to keep the ``BaseBoxes`` type of
bboxes data or not. Defaults to True.
batch_augments (List[dict], optional): Batch-level augmentations.
Defaults to None.
"""
def
__init__
(
self
,
...
...
@@ -76,8 +87,8 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
bgr_to_rgb
:
bool
=
False
,
rgb_to_bgr
:
bool
=
False
,
boxtype2tensor
:
bool
=
True
,
batch_augments
:
Optional
[
List
[
dict
]]
=
None
):
super
().
__init__
(
batch_augments
:
Optional
[
List
[
dict
]]
=
None
)
->
None
:
super
(
Det3DDataPreprocessor
,
self
).
__init__
(
mean
=
mean
,
std
=
std
,
pad_size_divisor
=
pad_size_divisor
,
...
...
@@ -94,24 +105,21 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
voxel
:
self
.
voxel_layer
=
Voxelization
(
**
voxel_layer
)
def
forward
(
self
,
data
:
Union
[
dict
,
List
[
dict
]],
training
:
bool
=
False
)
->
Tuple
[
Union
[
dict
,
List
[
dict
]],
Optional
[
list
]]:
"""Perform normalization、padding and bgr2rgb conversion based on
def
forward
(
self
,
data
:
Union
[
dict
,
List
[
dict
]],
training
:
bool
=
False
)
->
Union
[
dict
,
List
[
dict
]]:
"""Perform normalization, padding and bgr2rgb conversion based on
``BaseDataPreprocessor``.
Args:
data (dict
|
List[dict]):
d
ata from dataloader.
data (dict
or
List[dict]):
D
ata from dataloader.
The dict contains the whole batch data, when it is
a list[dict], the list indicate test time augmentation.
training (bool): Whether to enable training time augmentation.
Defaults to False.
Returns:
D
ict
|
List[
D
ict]: Data in the same format as the model input.
d
ict
or
List[
d
ict]: Data in the same format as the model input.
"""
if
isinstance
(
data
,
list
):
num_augs
=
len
(
data
)
...
...
@@ -126,7 +134,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return
self
.
simple_process
(
data
,
training
)
def
simple_process
(
self
,
data
:
dict
,
training
:
bool
=
False
)
->
dict
:
"""Perform normalization
、
padding and bgr2rgb conversion for img data
"""Perform normalization
,
padding and bgr2rgb conversion for img data
based on ``BaseDataPreprocessor``, and voxelize point cloud if `voxel`
is set to be True.
...
...
@@ -188,7 +196,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return
{
'inputs'
:
batch_inputs
,
'data_samples'
:
data_samples
}
def
preprocess_img
(
self
,
_batch_img
)
:
def
preprocess_img
(
self
,
_batch_img
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# channel transform
if
self
.
_channel_conversion
:
_batch_img
=
_batch_img
[[
2
,
1
,
0
],
...]
...
...
@@ -206,7 +214,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return
_batch_img
def
collate_data
(
self
,
data
:
dict
)
->
dict
:
"""Copying data to the target device and Performs normalization
、
"""Copying data to the target device and Performs normalization
,
padding and bgr2rgb conversion and stack based on
``BaseDataPreprocessor``.
...
...
@@ -273,7 +281,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
raise
TypeError
(
'Output of `cast_data` should be a list of dict '
'or a tuple with inputs and data_samples, but got'
f
'
{
type
(
data
)
}
:
{
data
}
'
)
f
'
{
type
(
data
)
}
:
{
data
}
'
)
data
[
'inputs'
][
'imgs'
]
=
batch_imgs
...
...
@@ -284,14 +292,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
def
_get_pad_shape
(
self
,
data
:
dict
)
->
List
[
tuple
]:
"""Get the pad_shape of each image based on data and
pad_size_divisor."""
# rewrite `_get_pad_shape` for obaining image inputs.
# rewrite `_get_pad_shape` for ob
t
aining image inputs.
_batch_inputs
=
data
[
'inputs'
][
'img'
]
# Process data with `pseudo_collate`.
if
is_list_of
(
_batch_inputs
,
torch
.
Tensor
):
batch_pad_shape
=
[]
for
ori_input
in
_batch_inputs
:
if
ori_input
.
dim
()
==
4
:
# mean multi
i
vew input, select on
t
of the
# mean multiv
i
ew input, select on
e
of the
# image to calculate the pad shape
ori_input
=
ori_input
[
0
]
pad_h
=
int
(
...
...
@@ -316,24 +324,24 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
batch_pad_shape
=
[(
pad_h
,
pad_w
)]
*
_batch_inputs
.
shape
[
0
]
else
:
raise
TypeError
(
'Output of `cast_data` should be a list of dict '
'or a tuple with inputs and data_samples, but got'
'or a tuple with inputs and data_samples, but got
'
f
'
{
type
(
data
)
}
:
{
data
}
'
)
return
batch_pad_shape
@
torch
.
no_grad
()
def
voxelize
(
self
,
points
:
List
[
torch
.
Tensor
])
->
Dict
:
def
voxelize
(
self
,
points
:
List
[
torch
.
Tensor
])
->
Dict
[
str
,
torch
.
Tensor
]
:
"""Apply voxelization to point cloud.
Args:
points (List[Tensor]): Point cloud in one data batch.
Returns:
d
ict[str, Tensor]: Voxelization information.
D
ict[str, Tensor]: Voxelization information.
- voxels (Tensor): Features of voxels, shape is M
X
NxC for hard
voxelization, N
X
C for dynamic voxelization.
- coors (Tensor): Coordinates of voxels, shape is
Nx(1+NDim),
where 1 represents the batch index.
- voxels (Tensor): Features of voxels, shape is M
x
NxC for hard
voxelization, N
x
C for dynamic voxelization.
- coors (Tensor): Coordinates of voxels, shape is Nx(1+NDim),
where 1 represents the batch index.
- num_points (Tensor, optional): Number of points in each voxel.
- voxel_centers (Tensor, optional): Centers of voxels.
"""
...
...
@@ -342,43 +350,38 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
self
.
voxel_type
==
'hard'
:
voxels
,
coors
,
num_points
,
voxel_centers
=
[],
[],
[],
[]
for
res
in
points
:
for
i
,
res
in
enumerate
(
points
)
:
res_voxels
,
res_coors
,
res_num_points
=
self
.
voxel_layer
(
res
)
res_voxel_centers
=
(
res_coors
[:,
[
2
,
1
,
0
]]
+
0.5
)
*
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
voxel_size
)
+
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
point_cloud_range
[
0
:
3
])
res_coors
=
F
.
pad
(
res_coors
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
voxels
.
append
(
res_voxels
)
coors
.
append
(
res_coors
)
num_points
.
append
(
res_num_points
)
voxel_centers
.
append
(
res_voxel_centers
)
voxels
=
torch
.
cat
(
voxels
,
dim
=
0
)
coors
=
torch
.
cat
(
coors
,
dim
=
0
)
num_points
=
torch
.
cat
(
num_points
,
dim
=
0
)
voxel_centers
=
torch
.
cat
(
voxel_centers
,
dim
=
0
)
coors_batch
=
[]
for
i
,
coor
in
enumerate
(
coors
):
coor_pad
=
F
.
pad
(
coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coors_batch
.
append
(
coor_pad
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
voxel_dict
[
'num_points'
]
=
num_points
voxel_dict
[
'voxel_centers'
]
=
voxel_centers
elif
self
.
voxel_type
==
'dynamic'
:
coors
=
[]
# dynamic voxelization only provide a coors mapping
for
res
in
points
:
for
i
,
res
in
enumerate
(
points
)
:
res_coors
=
self
.
voxel_layer
(
res
)
res_coors
=
F
.
pad
(
res_coors
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coors
.
append
(
res_coors
)
voxels
=
torch
.
cat
(
points
,
dim
=
0
)
coors_batch
=
[]
for
i
,
coor
in
enumerate
(
coors
):
coor_pad
=
F
.
pad
(
coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coors_batch
.
append
(
coor_pad
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
coors
=
torch
.
cat
(
coors
,
dim
=
0
)
else
:
raise
ValueError
(
f
'Invalid voxelization type
{
self
.
voxel_type
}
'
)
voxel_dict
[
'voxels'
]
=
voxels
voxel_dict
[
'coors'
]
=
coors
_batch
voxel_dict
[
'coors'
]
=
coors
return
voxel_dict
mmdet3d/models/data_preprocessors/utils.py
View file @
d7067e44
...
...
@@ -12,7 +12,7 @@ def multiview_img_stack_batch(
"""
Compared to the stack_batch in mmengine.model.utils,
multiview_img_stack_batch further handle the multiview images.
see diff of padded_sizes[:, :-2] = 0 vs padded_size
e
s[:, 0] = 0 in line 47
see diff of padded_sizes[:, :-2] = 0 vs padded_sizes[:, 0] = 0 in line 47
Stack multiple tensors to form a batch and pad the tensor to the max
shape use the right bottom padding mode in these images. If
``pad_size_divisor > 0``, add padding to ensure the shape of each dim is
...
...
@@ -23,20 +23,20 @@ def multiview_img_stack_batch(
pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding
to ensure the shape of each dim is divisible by
``pad_size_divisor``. This depends on the model, and many
models need to be divisible by 32. Defaults to 1
pad_value (int
,
float): The padding value. Defaults to 0.
models need to be divisible by 32. Defaults to 1
.
pad_value (int
or
float): The padding value. Defaults to 0.
Returns:
Tensor: The n dim tensor.
"""
assert
isinstance
(
tensor_list
,
list
),
(
f
'Expected input type to be list, but got
{
type
(
tensor_list
)
}
'
)
list
),
f
'Expected input type to be list, but got
{
type
(
tensor_list
)
}
'
assert
tensor_list
,
'`tensor_list` could not be an empty list'
assert
len
({
tensor
.
ndim
for
tensor
in
tensor_list
})
==
1
,
(
f
'Expected the dimensions of all tensors must be the same, '
})
==
1
,
(
'Expected the dimensions of all tensors must be the same, '
f
'but got
{
[
tensor
.
ndim
for
tensor
in
tensor_list
]
}
'
)
dim
=
tensor_list
[
0
].
dim
()
...
...
@@ -46,7 +46,7 @@ def multiview_img_stack_batch(
max_sizes
=
torch
.
ceil
(
torch
.
max
(
all_sizes
,
dim
=
0
)[
0
]
/
pad_size_divisor
)
*
pad_size_divisor
padded_sizes
=
max_sizes
-
all_sizes
# The first dim normally means channel,
which should not be padded.
# The first dim normally means channel, which should not be padded.
padded_sizes
[:,
:
-
2
]
=
0
if
padded_sizes
.
sum
()
==
0
:
return
torch
.
stack
(
tensor_list
)
...
...
mmdet3d/models/decode_heads/decode_head.py
View file @
d7067e44
...
...
@@ -41,19 +41,20 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
Args:
channels (int): Channels after modules, before conv_seg.
num_classes (int): Number of classes.
dropout_ratio (float
, optional
): Ratio of dropout layer. Default
:
0.5.
conv_cfg (dict
, optional
): Config of conv layers.
Default
:
dict(type='Conv1d').
norm_cfg (dict
, optional
): Config of norm layers.
Default
:
dict(type='BN1d').
act_cfg (dict
, optional
): Config of activation layers.
Default
:
dict(type='ReLU').
loss_decode (dict
, optional
): Config of decode loss.
Default
:
dict(type='CrossEntropyLoss').
ignore_index (int
, optional
): The label index to be ignored.
dropout_ratio (float): Ratio of dropout layer. Default
s to
0.5.
conv_cfg (dict): Config of conv layers.
Default
s to
dict(type='Conv1d').
norm_cfg (dict): Config of norm layers.
Default
s to
dict(type='BN1d').
act_cfg (dict): Config of activation layers.
Default
s to
dict(type='ReLU').
loss_decode (dict): Config of decode loss.
Default
s to
dict(type='CrossEntropyLoss').
ignore_index (int): The label index to be ignored.
When using masked BCE loss, ignore_index should be set to None.
Default
:
255.
Default
s to
255.
init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None.
"""
def
__init__
(
self
,
...
...
@@ -86,8 +87,6 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
else
:
self
.
dropout
=
None
self
.
fp16_enabled
=
False
def
init_weights
(
self
):
"""Initialize weights of classification layer."""
super
().
init_weights
()
...
...
@@ -105,15 +104,15 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
output
=
self
.
conv_seg
(
feat
)
return
output
def
loss
(
self
,
inputs
:
List
[
Tensor
],
batch_data_samples
:
SampleList
)
->
dict
:
def
loss
(
self
,
inputs
:
List
[
Tensor
],
batch_data_samples
:
SampleList
,
train_cfg
:
ConfigType
)
->
dict
:
"""Forward function for training.
Args:
inputs (list[torch.Tensor]): List of multi-level point features.
img_meta
s (
l
ist[
dict]): Meta information of each sample.
pts_semantic_mask (torch.Tensor): Semantic segmentation masks
used if the architecture supports semantic segmentation task
.
batch_data_sample
s (
L
ist[
:obj:`Det3DDataSample`]): The seg
data samples. It usually includes information such
as `metainfo` and `gt_pts_seg`
.
train_cfg (dict): The training config.
Returns:
...
...
@@ -129,7 +128,9 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
Args:
inputs (list[Tensor]): List of multi-level point features.
batch_img_metas (list[dict]): Meta information of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The seg
data samples. It usually includes information such
as `metainfo` and `gt_pts_seg`.
test_cfg (dict): The testing config.
Returns:
...
...
mmdet3d/models/dense_heads/__init__.py
View file @
d7067e44
...
...
@@ -5,6 +5,7 @@ from .base_3d_dense_head import Base3DDenseHead
from
.base_conv_bbox_head
import
BaseConvBboxHead
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
from
.centerpoint_head
import
CenterHead
from
.fcaf3d_head
import
FCAF3DHead
from
.fcos_mono3d_head
import
FCOSMono3DHead
from
.free_anchor3d_head
import
FreeAnchor3DHead
from
.groupfree3d_head
import
GroupFree3DHead
...
...
@@ -22,5 +23,5 @@ __all__ = [
'SSD3DHead'
,
'BaseConvBboxHead'
,
'CenterHead'
,
'ShapeAwareHead'
,
'BaseMono3DDenseHead'
,
'AnchorFreeMono3DHead'
,
'FCOSMono3DHead'
,
'GroupFree3DHead'
,
'PointRPNHead'
,
'SMOKEMono3DHead'
,
'PGDHead'
,
'MonoFlexHead'
,
'Base3DDenseHead'
'MonoFlexHead'
,
'Base3DDenseHead'
,
'FCAF3DHead'
]
mmdet3d/models/dense_heads/anchor3d_head.py
View file @
d7067e44
...
...
@@ -4,6 +4,7 @@ from typing import List, Tuple
import
numpy
as
np
import
torch
from
mmdet.models.utils
import
multi_apply
from
torch
import
Tensor
from
torch
import
nn
as
nn
...
...
@@ -12,7 +13,6 @@ from mmdet3d.models.test_time_augs import merge_aug_bboxes_3d
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.utils.typing
import
(
ConfigType
,
InstanceList
,
OptConfigType
,
OptInstanceList
)
from
mmdet.models.utils
import
multi_apply
from
.base_3d_dense_head
import
Base3DDenseHead
from
.train_mixins
import
AnchorTrainMixin
...
...
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
View file @
d7067e44
...
...
@@ -4,13 +4,13 @@ from typing import Any, List, Sequence, Tuple, Union
import
torch
from
mmcv.cnn
import
ConvModule
from
mmdet.models.utils
import
multi_apply
from
mmengine.model
import
bias_init_with_prob
,
normal_init
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
mmdet3d.registry
import
MODELS
from
mmdet3d.utils
import
ConfigType
,
InstanceList
,
OptConfigType
from
mmdet.models.utils
import
multi_apply
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
...
...
mmdet3d/models/dense_heads/base_3d_dense_head.py
View file @
d7067e44
...
...
@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple
import
numpy
as
np
import
torch
from
mmdet.models.utils
import
select_single_mlvl
from
mmengine.config
import
ConfigDict
from
mmengine.model
import
BaseModule
,
constant_init
from
mmengine.structures
import
InstanceData
...
...
@@ -13,7 +14,6 @@ from mmdet3d.models.layers import box3d_multiclass_nms
from
mmdet3d.structures
import
limit_period
,
xywhr2xyxyr
from
mmdet3d.structures.det3d_data_sample
import
SampleList
from
mmdet3d.utils.typing
import
InstanceList
,
OptMultiConfig
from
mmdet.models.utils
import
select_single_mlvl
class
Base3DDenseHead
(
BaseModule
,
metaclass
=
ABCMeta
):
...
...
mmdet3d/models/dense_heads/centerpoint_head.py
View file @
d7067e44
...
...
@@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union
import
torch
from
mmcv.cnn
import
ConvModule
,
build_conv_layer
from
mmdet.models.utils
import
multi_apply
from
mmengine.model
import
BaseModule
from
mmengine.structures
import
InstanceData
from
torch
import
Tensor
,
nn
...
...
@@ -12,7 +13,6 @@ from mmdet3d.models.utils import (clip_sigmoid, draw_heatmap_gaussian,
gaussian_radius
)
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.structures
import
Det3DDataSample
,
xywhr2xyxyr
from
mmdet.models.utils
import
multi_apply
from
..
import
builder
from
..layers
import
circle_nms
,
nms_bev
...
...
mmdet3d/models/dense_heads/fcaf3d_head.py
0 → 100644
View file @
d7067e44
# Copyright (c) OpenMMLab. All rights reserved.
# Adapted from https://github.com/SamsungLabs/fcaf3d/blob/master/mmdet3d/models/dense_heads/fcaf3d_neck_with_head.py # noqa
from
typing
import
List
,
Optional
,
Tuple
try
:
import
MinkowskiEngine
as
ME
from
MinkowskiEngine
import
SparseTensor
except
ImportError
:
# Please follow getting_started.md to install MinkowskiEngine.
ME
=
SparseTensor
=
None
pass
import
torch
from
mmcv.cnn
import
Scale
from
mmcv.ops
import
nms3d
,
nms3d_normal
from
mmdet.utils
import
reduce_mean
from
mmengine.model
import
bias_init_with_prob
from
mmengine.structures
import
InstanceData
from
torch
import
Tensor
,
nn
from
mmdet3d.registry
import
MODELS
from
mmdet3d.structures
import
BaseInstance3DBoxes
,
rotation_3d_in_axis
from
mmdet3d.utils
import
InstanceList
,
OptInstanceList
from
.base_3d_dense_head
import
Base3DDenseHead
@
MODELS
.
register_module
()
class
FCAF3DHead
(
Base3DDenseHead
):
r
"""Bbox head of `FCAF3D <https://arxiv.org/abs/2112.00322>`_.
Actually here we store both the sparse 3D FPN and a head. The neck and
the head can not be simply separated as pruning score on the i-th level
of FPN requires classification scores from i+1-th level of the head.
Args:
num_classes (int): Number of classes.
in_channels (int): Number of channels in input tensors.
out_channels (int): Number of channels in the neck output tensors.
num_reg_outs (int): Number of regression layer channels.
voxel_size (float): Voxel size in meters.
pts_prune_threshold (int): Pruning threshold on each feature level.
pts_assign_threshold (int): Box to location assigner parameter.
Assigner selects the maximum feature level with more locations
inside the box than pts_assign_threshold.
pts_center_threshold (int): Box to location assigner parameter.
After feature level for the box is determined, assigner selects
pts_center_threshold locations closest to the box center.
center_loss (dict): Config of centerness loss. Defaults to
dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True).
bbox_loss (dict): Config of bbox loss. Defaults to
dict(type='AxisAlignedIoULoss').
cls_loss (dict): Config of classification loss. Defaults to
dict = dict(type='mmdet.FocalLoss').
train_cfg (dict, optional): Config for train stage. Defaults to None.
test_cfg (dict, optional): Config for test stage. Defaults to None.
init_cfg (dict, optional): Config for weight initialization.
Defaults to None.
"""
def
__init__
(
self
,
num_classes
:
int
,
in_channels
:
int
,
out_channels
:
int
,
num_reg_outs
:
int
,
voxel_size
:
float
,
pts_prune_threshold
:
int
,
pts_assign_threshold
:
int
,
pts_center_threshold
:
int
,
center_loss
:
dict
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
True
),
bbox_loss
:
dict
=
dict
(
type
=
'AxisAlignedIoULoss'
),
cls_loss
:
dict
=
dict
(
type
=
'mmdet.FocalLoss'
),
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
):
super
(
FCAF3DHead
,
self
).
__init__
(
init_cfg
)
if
ME
is
None
:
raise
ImportError
(
'Please follow `getting_started.md` to install MinkowskiEngine.`'
# noqa: E501
)
self
.
voxel_size
=
voxel_size
self
.
pts_prune_threshold
=
pts_prune_threshold
self
.
pts_assign_threshold
=
pts_assign_threshold
self
.
pts_center_threshold
=
pts_center_threshold
self
.
center_loss
=
MODELS
.
build
(
center_loss
)
self
.
bbox_loss
=
MODELS
.
build
(
bbox_loss
)
self
.
cls_loss
=
MODELS
.
build
(
cls_loss
)
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
self
.
_init_layers
(
in_channels
,
out_channels
,
num_reg_outs
,
num_classes
)
@
staticmethod
def
_make_block
(
in_channels
:
int
,
out_channels
:
int
)
->
nn
.
Module
:
"""Construct Conv-Norm-Act block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: With corresponding layers.
"""
return
nn
.
Sequential
(
ME
.
MinkowskiConvolution
(
in_channels
,
out_channels
,
kernel_size
=
3
,
dimension
=
3
),
ME
.
MinkowskiBatchNorm
(
out_channels
),
ME
.
MinkowskiELU
())
@
staticmethod
def
_make_up_block
(
in_channels
:
int
,
out_channels
:
int
)
->
nn
.
Module
:
"""Construct DeConv-Norm-Act-Conv-Norm-Act block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: With corresponding layers.
"""
return
nn
.
Sequential
(
ME
.
MinkowskiGenerativeConvolutionTranspose
(
in_channels
,
out_channels
,
kernel_size
=
2
,
stride
=
2
,
dimension
=
3
),
ME
.
MinkowskiBatchNorm
(
out_channels
),
ME
.
MinkowskiELU
(),
ME
.
MinkowskiConvolution
(
out_channels
,
out_channels
,
kernel_size
=
3
,
dimension
=
3
),
ME
.
MinkowskiBatchNorm
(
out_channels
),
ME
.
MinkowskiELU
())
def
_init_layers
(
self
,
in_channels
:
Tuple
[
int
],
out_channels
:
int
,
num_reg_outs
:
int
,
num_classes
:
int
):
"""Initialize layers.
Args:
in_channels (tuple[int]): Number of channels in input tensors.
out_channels (int): Number of channels in the neck output tensors.
num_reg_outs (int): Number of regression layer channels.
num_classes (int): Number of classes.
"""
# neck layers
self
.
pruning
=
ME
.
MinkowskiPruning
()
for
i
in
range
(
len
(
in_channels
)):
if
i
>
0
:
self
.
__setattr__
(
f
'up_block_
{
i
}
'
,
self
.
_make_up_block
(
in_channels
[
i
],
in_channels
[
i
-
1
]))
self
.
__setattr__
(
f
'out_block_
{
i
}
'
,
self
.
_make_block
(
in_channels
[
i
],
out_channels
))
# head layers
self
.
conv_center
=
ME
.
MinkowskiConvolution
(
out_channels
,
1
,
kernel_size
=
1
,
dimension
=
3
)
self
.
conv_reg
=
ME
.
MinkowskiConvolution
(
out_channels
,
num_reg_outs
,
kernel_size
=
1
,
dimension
=
3
)
self
.
conv_cls
=
ME
.
MinkowskiConvolution
(
out_channels
,
num_classes
,
kernel_size
=
1
,
bias
=
True
,
dimension
=
3
)
self
.
scales
=
nn
.
ModuleList
(
[
Scale
(
1.
)
for
_
in
range
(
len
(
in_channels
))])
def
init_weights
(
self
):
"""Initialize weights."""
nn
.
init
.
normal_
(
self
.
conv_center
.
kernel
,
std
=
.
01
)
nn
.
init
.
normal_
(
self
.
conv_reg
.
kernel
,
std
=
.
01
)
nn
.
init
.
normal_
(
self
.
conv_cls
.
kernel
,
std
=
.
01
)
nn
.
init
.
constant_
(
self
.
conv_cls
.
bias
,
bias_init_with_prob
(.
01
))
def
forward
(
self
,
x
:
List
[
Tensor
])
->
Tuple
[
List
[
Tensor
],
...]:
"""Forward pass.
Args:
x (list[Tensor]): Features from the backbone.
Returns:
Tuple[List[Tensor], ...]: Predictions of the head.
"""
center_preds
,
bbox_preds
,
cls_preds
,
points
=
[],
[],
[],
[]
inputs
=
x
x
=
inputs
[
-
1
]
prune_score
=
None
for
i
in
range
(
len
(
inputs
)
-
1
,
-
1
,
-
1
):
if
i
<
len
(
inputs
)
-
1
:
x
=
self
.
__getattr__
(
f
'up_block_
{
i
+
1
}
'
)(
x
)
x
=
inputs
[
i
]
+
x
x
=
self
.
_prune
(
x
,
prune_score
)
out
=
self
.
__getattr__
(
f
'out_block_
{
i
}
'
)(
x
)
center_pred
,
bbox_pred
,
cls_pred
,
point
,
prune_score
=
\
self
.
_forward_single
(
out
,
self
.
scales
[
i
])
center_preds
.
append
(
center_pred
)
bbox_preds
.
append
(
bbox_pred
)
cls_preds
.
append
(
cls_pred
)
points
.
append
(
point
)
return
center_preds
[::
-
1
],
bbox_preds
[::
-
1
],
cls_preds
[::
-
1
],
\
points
[::
-
1
]
def
_prune
(
self
,
x
:
SparseTensor
,
scores
:
SparseTensor
)
->
SparseTensor
:
"""Prunes the tensor by score thresholding.
Args:
x (SparseTensor): Tensor to be pruned.
scores (SparseTensor): Scores for thresholding.
Returns:
SparseTensor: Pruned tensor.
"""
with
torch
.
no_grad
():
coordinates
=
x
.
C
.
float
()
interpolated_scores
=
scores
.
features_at_coordinates
(
coordinates
)
prune_mask
=
interpolated_scores
.
new_zeros
(
(
len
(
interpolated_scores
)),
dtype
=
torch
.
bool
)
for
permutation
in
x
.
decomposition_permutations
:
score
=
interpolated_scores
[
permutation
]
mask
=
score
.
new_zeros
((
len
(
score
)),
dtype
=
torch
.
bool
)
topk
=
min
(
len
(
score
),
self
.
pts_prune_threshold
)
ids
=
torch
.
topk
(
score
.
squeeze
(
1
),
topk
,
sorted
=
False
).
indices
mask
[
ids
]
=
True
prune_mask
[
permutation
[
mask
]]
=
True
x
=
self
.
pruning
(
x
,
prune_mask
)
return
x
def
_forward_single
(
self
,
x
:
SparseTensor
,
scale
:
Scale
)
->
Tuple
[
Tensor
,
...]:
"""Forward pass per level.
Args:
x (SparseTensor): Per level neck output tensor.
scale (mmcv.cnn.Scale): Per level multiplication weight.
Returns:
tuple[Tensor]: Per level head predictions.
"""
center_pred
=
self
.
conv_center
(
x
).
features
scores
=
self
.
conv_cls
(
x
)
cls_pred
=
scores
.
features
prune_scores
=
ME
.
SparseTensor
(
scores
.
features
.
max
(
dim
=
1
,
keepdim
=
True
).
values
,
coordinate_map_key
=
scores
.
coordinate_map_key
,
coordinate_manager
=
scores
.
coordinate_manager
)
reg_final
=
self
.
conv_reg
(
x
).
features
reg_distance
=
torch
.
exp
(
scale
(
reg_final
[:,
:
6
]))
reg_angle
=
reg_final
[:,
6
:]
bbox_pred
=
torch
.
cat
((
reg_distance
,
reg_angle
),
dim
=
1
)
center_preds
,
bbox_preds
,
cls_preds
,
points
=
[],
[],
[],
[]
for
permutation
in
x
.
decomposition_permutations
:
center_preds
.
append
(
center_pred
[
permutation
])
bbox_preds
.
append
(
bbox_pred
[
permutation
])
cls_preds
.
append
(
cls_pred
[
permutation
])
points
=
x
.
decomposed_coordinates
for
i
in
range
(
len
(
points
)):
points
[
i
]
=
points
[
i
]
*
self
.
voxel_size
return
center_preds
,
bbox_preds
,
cls_preds
,
points
,
prune_scores
def
_loss_by_feat_single
(
self
,
center_preds
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
cls_preds
:
List
[
Tensor
],
points
:
List
[
Tensor
],
gt_bboxes
:
BaseInstance3DBoxes
,
gt_labels
:
Tensor
,
input_meta
:
dict
)
->
Tuple
[
Tensor
,
...]:
"""Loss function of single sample.
Args:
center_preds (list[Tensor]): Centerness predictions for all levels.
bbox_preds (list[Tensor]): Bbox predictions for all levels.
cls_preds (list[Tensor]): Classification predictions for all
levels.
points (list[Tensor]): Final location coordinates for all levels.
gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
gt_labels (Tensor): Ground truth labels.
input_meta (dict): Scene meta info.
Returns:
tuple[Tensor, ...]: Centerness, bbox, and classification loss
values.
"""
center_targets
,
bbox_targets
,
cls_targets
=
self
.
get_targets
(
points
,
gt_bboxes
,
gt_labels
)
center_preds
=
torch
.
cat
(
center_preds
)
bbox_preds
=
torch
.
cat
(
bbox_preds
)
cls_preds
=
torch
.
cat
(
cls_preds
)
points
=
torch
.
cat
(
points
)
# cls loss
pos_inds
=
torch
.
nonzero
(
cls_targets
>=
0
).
squeeze
(
1
)
n_pos
=
points
.
new_tensor
(
len
(
pos_inds
))
n_pos
=
max
(
reduce_mean
(
n_pos
),
1.
)
cls_loss
=
self
.
cls_loss
(
cls_preds
,
cls_targets
,
avg_factor
=
n_pos
)
# bbox and centerness losses
pos_center_preds
=
center_preds
[
pos_inds
]
pos_bbox_preds
=
bbox_preds
[
pos_inds
]
pos_center_targets
=
center_targets
[
pos_inds
].
unsqueeze
(
1
)
pos_bbox_targets
=
bbox_targets
[
pos_inds
]
# reduce_mean is outside if / else block to prevent deadlock
center_denorm
=
max
(
reduce_mean
(
pos_center_targets
.
sum
().
detach
()),
1e-6
)
if
len
(
pos_inds
)
>
0
:
pos_points
=
points
[
pos_inds
]
center_loss
=
self
.
center_loss
(
pos_center_preds
,
pos_center_targets
,
avg_factor
=
n_pos
)
bbox_loss
=
self
.
bbox_loss
(
self
.
_bbox_to_loss
(
self
.
_bbox_pred_to_bbox
(
pos_points
,
pos_bbox_preds
)),
self
.
_bbox_to_loss
(
pos_bbox_targets
),
weight
=
pos_center_targets
.
squeeze
(
1
),
avg_factor
=
center_denorm
)
else
:
center_loss
=
pos_center_preds
.
sum
()
bbox_loss
=
pos_bbox_preds
.
sum
()
return
center_loss
,
bbox_loss
,
cls_loss
def
loss_by_feat
(
self
,
center_preds
:
List
[
List
[
Tensor
]],
bbox_preds
:
List
[
List
[
Tensor
]],
cls_preds
:
List
[
List
[
Tensor
]],
points
:
List
[
List
[
Tensor
]],
batch_gt_instances_3d
:
InstanceList
,
batch_input_metas
:
List
[
dict
],
batch_gt_instances_ignore
:
OptInstanceList
=
None
,
**
kwargs
)
->
dict
:
"""Loss function about feature.
Args:
center_preds (list[list[Tensor]]): Centerness predictions for
all scenes. The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
bbox_preds (list[list[Tensor]]): Bbox predictions for all scenes.
The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
cls_preds (list[list[Tensor]]): Classification predictions for all
scenes. The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
points (list[list[Tensor]]): Final location coordinates for all
scenes. The first list contains predictions from different
levels. The second list contains predictions in a mini-batch.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes_3d``、`
`labels_3d``、``depths``、``centers_2d`` and attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict: Centerness, bbox, and classification losses.
"""
center_losses
,
bbox_losses
,
cls_losses
=
[],
[],
[]
for
i
in
range
(
len
(
batch_input_metas
)):
center_loss
,
bbox_loss
,
cls_loss
=
self
.
_loss_by_feat_single
(
center_preds
=
[
x
[
i
]
for
x
in
center_preds
],
bbox_preds
=
[
x
[
i
]
for
x
in
bbox_preds
],
cls_preds
=
[
x
[
i
]
for
x
in
cls_preds
],
points
=
[
x
[
i
]
for
x
in
points
],
input_meta
=
batch_input_metas
[
i
],
gt_bboxes
=
batch_gt_instances_3d
[
i
].
bboxes_3d
,
gt_labels
=
batch_gt_instances_3d
[
i
].
labels_3d
)
center_losses
.
append
(
center_loss
)
bbox_losses
.
append
(
bbox_loss
)
cls_losses
.
append
(
cls_loss
)
return
dict
(
center_loss
=
torch
.
mean
(
torch
.
stack
(
center_losses
)),
bbox_loss
=
torch
.
mean
(
torch
.
stack
(
bbox_losses
)),
cls_loss
=
torch
.
mean
(
torch
.
stack
(
cls_losses
)))
def
_predict_by_feat_single
(
self
,
center_preds
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
cls_preds
:
List
[
Tensor
],
points
:
List
[
Tensor
],
input_meta
:
dict
)
->
InstanceData
:
"""Generate boxes for single sample.
Args:
center_preds (list[Tensor]): Centerness predictions for all levels.
bbox_preds (list[Tensor]): Bbox predictions for all levels.
cls_preds (list[Tensor]): Classification predictions for all
levels.
points (list[Tensor]): Final location coordinates for all levels.
input_meta (dict): Scene meta info.
Returns:
InstanceData: Predicted bounding boxes, scores and labels.
"""
mlvl_bboxes
,
mlvl_scores
=
[],
[]
for
center_pred
,
bbox_pred
,
cls_pred
,
point
in
zip
(
center_preds
,
bbox_preds
,
cls_preds
,
points
):
scores
=
cls_pred
.
sigmoid
()
*
center_pred
.
sigmoid
()
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
if
len
(
scores
)
>
self
.
test_cfg
.
nms_pre
>
0
:
_
,
ids
=
max_scores
.
topk
(
self
.
test_cfg
.
nms_pre
)
bbox_pred
=
bbox_pred
[
ids
]
scores
=
scores
[
ids
]
point
=
point
[
ids
]
bboxes
=
self
.
_bbox_pred_to_bbox
(
point
,
bbox_pred
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
bboxes
=
torch
.
cat
(
mlvl_bboxes
)
scores
=
torch
.
cat
(
mlvl_scores
)
bboxes
,
scores
,
labels
=
self
.
_single_scene_multiclass_nms
(
bboxes
,
scores
,
input_meta
)
bboxes
=
input_meta
[
'box_type_3d'
](
bboxes
,
box_dim
=
bboxes
.
shape
[
1
],
with_yaw
=
bboxes
.
shape
[
1
]
==
7
,
origin
=
(.
5
,
.
5
,
.
5
))
results
=
InstanceData
()
results
.
bboxes_3d
=
bboxes
results
.
scores_3d
=
scores
results
.
labels_3d
=
labels
return
results
def
predict_by_feat
(
self
,
center_preds
:
List
[
List
[
Tensor
]],
bbox_preds
:
List
[
List
[
Tensor
]],
cls_preds
,
points
:
List
[
List
[
Tensor
]],
batch_input_metas
:
List
[
dict
],
**
kwargs
)
->
List
[
InstanceData
]:
"""Generate boxes for all scenes.
Args:
center_preds (list[list[Tensor]]): Centerness predictions for
all scenes.
bbox_preds (list[list[Tensor]]): Bbox predictions for all scenes.
cls_preds (list[list[Tensor]]): Classification predictions for all
scenes.
points (list[list[Tensor]]): Final location coordinates for all
scenes.
batch_input_metas (list[dict]): Meta infos for all scenes.
Returns:
list[InstanceData]: Predicted bboxes, scores, and labels for
all scenes.
"""
results
=
[]
for
i
in
range
(
len
(
batch_input_metas
)):
result
=
self
.
_predict_by_feat_single
(
center_preds
=
[
x
[
i
]
for
x
in
center_preds
],
bbox_preds
=
[
x
[
i
]
for
x
in
bbox_preds
],
cls_preds
=
[
x
[
i
]
for
x
in
cls_preds
],
points
=
[
x
[
i
]
for
x
in
points
],
input_meta
=
batch_input_metas
[
i
])
results
.
append
(
result
)
return
results
@
staticmethod
def
_bbox_to_loss
(
bbox
:
Tensor
)
->
Tensor
:
"""Transform box to the axis-aligned or rotated iou loss format.
Args:
bbox (Tensor): 3D box of shape (N, 6) or (N, 7).
Returns:
Tensor: Transformed 3D box of shape (N, 6) or (N, 7).
"""
# rotated iou loss accepts (x, y, z, w, h, l, heading)
if
bbox
.
shape
[
-
1
]
!=
6
:
return
bbox
# axis-aligned case: x, y, z, w, h, l -> x1, y1, z1, x2, y2, z2
return
torch
.
stack
(
(
bbox
[...,
0
]
-
bbox
[...,
3
]
/
2
,
bbox
[...,
1
]
-
bbox
[...,
4
]
/
2
,
bbox
[...,
2
]
-
bbox
[...,
5
]
/
2
,
bbox
[...,
0
]
+
bbox
[...,
3
]
/
2
,
bbox
[...,
1
]
+
bbox
[...,
4
]
/
2
,
bbox
[...,
2
]
+
bbox
[...,
5
]
/
2
),
dim
=-
1
)
@
staticmethod
def
_bbox_pred_to_bbox
(
points
:
Tensor
,
bbox_pred
:
Tensor
)
->
Tensor
:
"""Transform predicted bbox parameters to bbox.
Args:
points (Tensor): Final locations of shape (N, 3)
bbox_pred (Tensor): Predicted bbox parameters of shape (N, 6)
or (N, 8).
Returns:
Tensor: Transformed 3D box of shape (N, 6) or (N, 7).
"""
if
bbox_pred
.
shape
[
0
]
==
0
:
return
bbox_pred
x_center
=
points
[:,
0
]
+
(
bbox_pred
[:,
1
]
-
bbox_pred
[:,
0
])
/
2
y_center
=
points
[:,
1
]
+
(
bbox_pred
[:,
3
]
-
bbox_pred
[:,
2
])
/
2
z_center
=
points
[:,
2
]
+
(
bbox_pred
[:,
5
]
-
bbox_pred
[:,
4
])
/
2
# dx_min, dx_max, dy_min, dy_max, dz_min, dz_max -> x, y, z, w, l, h
base_bbox
=
torch
.
stack
([
x_center
,
y_center
,
z_center
,
bbox_pred
[:,
0
]
+
bbox_pred
[:,
1
],
bbox_pred
[:,
2
]
+
bbox_pred
[:,
3
],
bbox_pred
[:,
4
]
+
bbox_pred
[:,
5
],
],
-
1
)
# axis-aligned case
if
bbox_pred
.
shape
[
1
]
==
6
:
return
base_bbox
# rotated case: ..., sin(2a)ln(q), cos(2a)ln(q)
scale
=
bbox_pred
[:,
0
]
+
bbox_pred
[:,
1
]
+
\
bbox_pred
[:,
2
]
+
bbox_pred
[:,
3
]
q
=
torch
.
exp
(
torch
.
sqrt
(
torch
.
pow
(
bbox_pred
[:,
6
],
2
)
+
torch
.
pow
(
bbox_pred
[:,
7
],
2
)))
alpha
=
0.5
*
torch
.
atan2
(
bbox_pred
[:,
6
],
bbox_pred
[:,
7
])
return
torch
.
stack
(
(
x_center
,
y_center
,
z_center
,
scale
/
(
1
+
q
),
scale
/
(
1
+
q
)
*
q
,
bbox_pred
[:,
5
]
+
bbox_pred
[:,
4
],
alpha
),
dim
=-
1
)
@
staticmethod
def
_get_face_distances
(
points
:
Tensor
,
boxes
:
Tensor
)
->
Tensor
:
"""Calculate distances from point to box faces.
Args:
points (Tensor): Final locations of shape (N_points, N_boxes, 3).
boxes (Tensor): 3D boxes of shape (N_points, N_boxes, 7)
Returns:
Tensor: Face distances of shape (N_points, N_boxes, 6),
(dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).
"""
shift
=
torch
.
stack
(
(
points
[...,
0
]
-
boxes
[...,
0
],
points
[...,
1
]
-
boxes
[...,
1
],
points
[...,
2
]
-
boxes
[...,
2
]),
dim
=-
1
).
permute
(
1
,
0
,
2
)
shift
=
rotation_3d_in_axis
(
shift
,
-
boxes
[
0
,
:,
6
],
axis
=
2
).
permute
(
1
,
0
,
2
)
centers
=
boxes
[...,
:
3
]
+
shift
dx_min
=
centers
[...,
0
]
-
boxes
[...,
0
]
+
boxes
[...,
3
]
/
2
dx_max
=
boxes
[...,
0
]
+
boxes
[...,
3
]
/
2
-
centers
[...,
0
]
dy_min
=
centers
[...,
1
]
-
boxes
[...,
1
]
+
boxes
[...,
4
]
/
2
dy_max
=
boxes
[...,
1
]
+
boxes
[...,
4
]
/
2
-
centers
[...,
1
]
dz_min
=
centers
[...,
2
]
-
boxes
[...,
2
]
+
boxes
[...,
5
]
/
2
dz_max
=
boxes
[...,
2
]
+
boxes
[...,
5
]
/
2
-
centers
[...,
2
]
return
torch
.
stack
((
dx_min
,
dx_max
,
dy_min
,
dy_max
,
dz_min
,
dz_max
),
dim
=-
1
)
@
staticmethod
def
_get_centerness
(
face_distances
:
Tensor
)
->
Tensor
:
"""Compute point centerness w.r.t containing box.
Args:
face_distances (Tensor): Face distances of shape (B, N, 6),
(dx_min, dx_max, dy_min, dy_max, dz_min, dz_max).
Returns:
Tensor: Centerness of shape (B, N).
"""
x_dims
=
face_distances
[...,
[
0
,
1
]]
y_dims
=
face_distances
[...,
[
2
,
3
]]
z_dims
=
face_distances
[...,
[
4
,
5
]]
centerness_targets
=
x_dims
.
min
(
dim
=-
1
)[
0
]
/
x_dims
.
max
(
dim
=-
1
)[
0
]
*
\
y_dims
.
min
(
dim
=-
1
)[
0
]
/
y_dims
.
max
(
dim
=-
1
)[
0
]
*
\
z_dims
.
min
(
dim
=-
1
)[
0
]
/
z_dims
.
max
(
dim
=-
1
)[
0
]
return
torch
.
sqrt
(
centerness_targets
)
@
torch
.
no_grad
()
def
get_targets
(
self
,
points
:
Tensor
,
gt_bboxes
:
BaseInstance3DBoxes
,
gt_labels
:
Tensor
)
->
Tuple
[
Tensor
,
...]:
"""Compute targets for final locations for a single scene.
Args:
points (list[Tensor]): Final locations for all levels.
gt_bboxes (BaseInstance3DBoxes): Ground truth boxes.
gt_labels (Tensor): Ground truth labels.
Returns:
tuple[Tensor, ...]: Centerness, bbox and classification
targets for all locations.
"""
float_max
=
points
[
0
].
new_tensor
(
1e8
)
n_levels
=
len
(
points
)
levels
=
torch
.
cat
([
points
[
i
].
new_tensor
(
i
).
expand
(
len
(
points
[
i
]))
for
i
in
range
(
len
(
points
))
])
points
=
torch
.
cat
(
points
)
gt_bboxes
=
gt_bboxes
.
to
(
points
.
device
)
n_points
=
len
(
points
)
n_boxes
=
len
(
gt_bboxes
)
volumes
=
gt_bboxes
.
volume
.
unsqueeze
(
0
).
expand
(
n_points
,
n_boxes
)
# condition 1: point inside box
boxes
=
torch
.
cat
((
gt_bboxes
.
gravity_center
,
gt_bboxes
.
tensor
[:,
3
:]),
dim
=
1
)
boxes
=
boxes
.
expand
(
n_points
,
n_boxes
,
7
)
points
=
points
.
unsqueeze
(
1
).
expand
(
n_points
,
n_boxes
,
3
)
face_distances
=
self
.
_get_face_distances
(
points
,
boxes
)
inside_box_condition
=
face_distances
.
min
(
dim
=-
1
).
values
>
0
# condition 2: positive points per level >= limit
# calculate positive points per scale
n_pos_points_per_level
=
[]
for
i
in
range
(
n_levels
):
n_pos_points_per_level
.
append
(
torch
.
sum
(
inside_box_condition
[
levels
==
i
],
dim
=
0
))
# find best level
n_pos_points_per_level
=
torch
.
stack
(
n_pos_points_per_level
,
dim
=
0
)
lower_limit_mask
=
n_pos_points_per_level
<
self
.
pts_assign_threshold
lower_index
=
torch
.
argmax
(
lower_limit_mask
.
int
(),
dim
=
0
)
-
1
lower_index
=
torch
.
where
(
lower_index
<
0
,
0
,
lower_index
)
all_upper_limit_mask
=
torch
.
all
(
torch
.
logical_not
(
lower_limit_mask
),
dim
=
0
)
best_level
=
torch
.
where
(
all_upper_limit_mask
,
n_levels
-
1
,
lower_index
)
# keep only points with best level
best_level
=
best_level
.
expand
(
n_points
,
n_boxes
)
levels
=
torch
.
unsqueeze
(
levels
,
1
).
expand
(
n_points
,
n_boxes
)
level_condition
=
best_level
==
levels
# condition 3: limit topk points per box by centerness
centerness
=
self
.
_get_centerness
(
face_distances
)
centerness
=
torch
.
where
(
inside_box_condition
,
centerness
,
torch
.
ones_like
(
centerness
)
*
-
1
)
centerness
=
torch
.
where
(
level_condition
,
centerness
,
torch
.
ones_like
(
centerness
)
*
-
1
)
top_centerness
=
torch
.
topk
(
centerness
,
min
(
self
.
pts_center_threshold
+
1
,
len
(
centerness
)),
dim
=
0
).
values
[
-
1
]
topk_condition
=
centerness
>
top_centerness
.
unsqueeze
(
0
)
# condition 4: min volume box per point
volumes
=
torch
.
where
(
inside_box_condition
,
volumes
,
float_max
)
volumes
=
torch
.
where
(
level_condition
,
volumes
,
float_max
)
volumes
=
torch
.
where
(
topk_condition
,
volumes
,
float_max
)
min_volumes
,
min_inds
=
volumes
.
min
(
dim
=
1
)
center_targets
=
centerness
[
torch
.
arange
(
n_points
),
min_inds
]
bbox_targets
=
boxes
[
torch
.
arange
(
n_points
),
min_inds
]
if
not
gt_bboxes
.
with_yaw
:
bbox_targets
=
bbox_targets
[:,
:
-
1
]
cls_targets
=
gt_labels
[
min_inds
]
cls_targets
=
torch
.
where
(
min_volumes
==
float_max
,
-
1
,
cls_targets
)
return
center_targets
,
bbox_targets
,
cls_targets
def
_single_scene_multiclass_nms
(
self
,
bboxes
:
Tensor
,
scores
:
Tensor
,
input_meta
:
dict
)
->
Tuple
[
Tensor
,
...]:
"""Multi-class nms for a single scene.
Args:
bboxes (Tensor): Predicted boxes of shape (N_boxes, 6) or
(N_boxes, 7).
scores (Tensor): Predicted scores of shape (N_boxes, N_classes).
input_meta (dict): Scene meta data.
Returns:
tuple[Tensor, ...]: Predicted bboxes, scores and labels.
"""
num_classes
=
scores
.
shape
[
1
]
with_yaw
=
bboxes
.
shape
[
1
]
==
7
nms_bboxes
,
nms_scores
,
nms_labels
=
[],
[],
[]
for
i
in
range
(
num_classes
):
ids
=
scores
[:,
i
]
>
self
.
test_cfg
.
score_thr
if
not
ids
.
any
():
continue
class_scores
=
scores
[
ids
,
i
]
class_bboxes
=
bboxes
[
ids
]
if
with_yaw
:
nms_function
=
nms3d
else
:
class_bboxes
=
torch
.
cat
(
(
class_bboxes
,
torch
.
zeros_like
(
class_bboxes
[:,
:
1
])),
dim
=
1
)
nms_function
=
nms3d_normal
nms_ids
=
nms_function
(
class_bboxes
,
class_scores
,
self
.
test_cfg
.
iou_thr
)
nms_bboxes
.
append
(
class_bboxes
[
nms_ids
])
nms_scores
.
append
(
class_scores
[
nms_ids
])
nms_labels
.
append
(
bboxes
.
new_full
(
class_scores
[
nms_ids
].
shape
,
i
,
dtype
=
torch
.
long
))
if
len
(
nms_bboxes
):
nms_bboxes
=
torch
.
cat
(
nms_bboxes
,
dim
=
0
)
nms_scores
=
torch
.
cat
(
nms_scores
,
dim
=
0
)
nms_labels
=
torch
.
cat
(
nms_labels
,
dim
=
0
)
else
:
nms_bboxes
=
bboxes
.
new_zeros
((
0
,
bboxes
.
shape
[
1
]))
nms_scores
=
bboxes
.
new_zeros
((
0
,
))
nms_labels
=
bboxes
.
new_zeros
((
0
,
))
if
not
with_yaw
:
nms_bboxes
=
nms_bboxes
[:,
:
6
]
return
nms_bboxes
,
nms_scores
,
nms_labels
mmdet3d/models/dense_heads/fcos_mono3d_head.py
View file @
d7067e44
...
...
@@ -4,6 +4,7 @@ from typing import List, Optional, Sequence, Tuple
import
numpy
as
np
import
torch
from
mmcv.cnn
import
Scale
from
mmdet.models.utils
import
multi_apply
,
select_single_mlvl
from
mmengine.model
import
normal_init
from
mmengine.structures
import
InstanceData
from
torch
import
Tensor
...
...
@@ -14,7 +15,6 @@ from mmdet3d.registry import MODELS, TASK_UTILS
from
mmdet3d.structures
import
limit_period
,
points_img2cam
,
xywhr2xyxyr
from
mmdet3d.utils
import
(
ConfigType
,
InstanceList
,
OptConfigType
,
OptInstanceList
)
from
mmdet.models.utils
import
multi_apply
,
select_single_mlvl
from
.anchor_free_mono3d_head
import
AnchorFreeMono3DHead
RangeType
=
Sequence
[
Tuple
[
int
,
int
]]
...
...
Prev
1
…
7
8
9
10
11
12
13
14
15
…
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment