Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
6c03a971
Unverified
Commit
6c03a971
authored
Oct 14, 2022
by
Tai-Wang
Committed by
GitHub
Oct 14, 2022
Browse files
Release v1.1.0rc1
Release v1.1.0rc1
parents
9611c2d0
ca42c312
Changes
174
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1031 additions
and
243 deletions
+1031
-243
mmdet3d/engine/hooks/benchmark_hook.py
mmdet3d/engine/hooks/benchmark_hook.py
+38
-0
mmdet3d/engine/hooks/visualization_hook.py
mmdet3d/engine/hooks/visualization_hook.py
+34
-10
mmdet3d/evaluation/metrics/kitti_metric.py
mmdet3d/evaluation/metrics/kitti_metric.py
+4
-3
mmdet3d/evaluation/metrics/waymo_metric.py
mmdet3d/evaluation/metrics/waymo_metric.py
+292
-75
mmdet3d/models/builder.py
mmdet3d/models/builder.py
+1
-1
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+53
-29
mmdet3d/models/data_preprocessors/utils.py
mmdet3d/models/data_preprocessors/utils.py
+65
-0
mmdet3d/models/dense_heads/base_3d_dense_head.py
mmdet3d/models/dense_heads/base_3d_dense_head.py
+1
-1
mmdet3d/models/dense_heads/parta2_rpn_head.py
mmdet3d/models/dense_heads/parta2_rpn_head.py
+4
-6
mmdet3d/models/dense_heads/point_rpn_head.py
mmdet3d/models/dense_heads/point_rpn_head.py
+201
-71
mmdet3d/models/dense_heads/ssd_3d_head.py
mmdet3d/models/dense_heads/ssd_3d_head.py
+2
-3
mmdet3d/models/detectors/__init__.py
mmdet3d/models/detectors/__init__.py
+23
-5
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+1
-1
mmdet3d/models/detectors/dfm.py
mmdet3d/models/detectors/dfm.py
+235
-0
mmdet3d/models/detectors/dynamic_voxelnet.py
mmdet3d/models/detectors/dynamic_voxelnet.py
+2
-1
mmdet3d/models/detectors/fcos_mono3d.py
mmdet3d/models/detectors/fcos_mono3d.py
+3
-2
mmdet3d/models/detectors/groupfree3dnet.py
mmdet3d/models/detectors/groupfree3dnet.py
+2
-2
mmdet3d/models/detectors/h3dnet.py
mmdet3d/models/detectors/h3dnet.py
+1
-1
mmdet3d/models/detectors/imvotenet.py
mmdet3d/models/detectors/imvotenet.py
+2
-2
mmdet3d/models/detectors/imvoxelnet.py
mmdet3d/models/detectors/imvoxelnet.py
+67
-30
No files found.
mmdet3d/engine/hooks/benchmark_hook.py
0 → 100644
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.hooks
import
Hook
from
mmdet3d.registry
import
HOOKS
@
HOOKS
.
register_module
()
class
BenchmarkHook
(
Hook
):
"""A hook that logs the training speed of each epch."""
priority
=
'NORMAL'
def
after_train_epoch
(
self
,
runner
)
->
None
:
"""We use the average throughput in iterations of the entire training
run and skip the first 50 iterations of each epoch to skip GPU warmup
time.
Args:
runner (Runner): The runner of the training process.
"""
message_hub
=
runner
.
message_hub
max_iter_num
=
len
(
runner
.
train_dataloader
)
speed
=
message_hub
.
get_scalar
(
'train/time'
).
mean
(
max_iter_num
-
50
)
message_hub
.
update_scalar
(
'train/speed'
,
speed
)
runner
.
logger
.
info
(
f
'Training speed of epoch
{
runner
.
epoch
+
1
}
is
{
speed
}
s/iter'
)
def
after_train
(
self
,
runner
)
->
None
:
"""Log average training speed of entire training process.
Args:
runner (Runner): The runner of the training process.
"""
message_hub
=
runner
.
message_hub
avg_speed
=
message_hub
.
get_scalar
(
'train/speed'
).
mean
()
runner
.
logger
.
info
(
'Average training speed of entire training process'
f
'is
{
avg_speed
}
s/iter'
)
mmdet3d/engine/hooks/visualization_hook.py
View file @
6c03a971
...
...
@@ -4,6 +4,7 @@ import warnings
from
typing
import
Optional
,
Sequence
import
mmcv
import
numpy
as
np
from
mmengine.fileio
import
FileClient
from
mmengine.hooks
import
Hook
from
mmengine.runner
import
Runner
...
...
@@ -95,15 +96,27 @@ class Det3DVisualizationHook(Hook):
# is visualized for each evaluation.
total_curr_iter
=
runner
.
iter
+
batch_idx
data_input
=
dict
()
# Visualize only the first data
img_path
=
outputs
[
0
].
img_path
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
if
'img_path'
in
outputs
[
0
]:
img_path
=
outputs
[
0
].
img_path
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
data_input
[
'img'
]
=
img
if
'lidar_path'
in
outputs
[
0
]:
lidar_path
=
outputs
[
0
].
lidar_path
num_pts_feats
=
outputs
[
0
].
num_pts_feats
pts_bytes
=
self
.
file_client
.
get
(
lidar_path
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
points
=
points
.
reshape
(
-
1
,
num_pts_feats
)
data_input
[
'points'
]
=
points
if
total_curr_iter
%
self
.
interval
==
0
:
self
.
_visualizer
.
add_datasample
(
osp
.
basename
(
img_path
)
if
self
.
show
else
'val_img
'
,
img
,
'val sample
'
,
data_input
,
data_sample
=
outputs
[
0
],
show
=
self
.
show
,
wait_time
=
self
.
wait_time
,
...
...
@@ -135,9 +148,20 @@ class Det3DVisualizationHook(Hook):
for
data_sample
in
outputs
:
self
.
_test_index
+=
1
img_path
=
data_sample
.
img_path
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
data_input
=
dict
()
if
'img_path'
in
data_sample
:
img_path
=
data_sample
.
img_path
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
data_input
[
'img'
]
=
img
if
'lidar_path'
in
data_sample
:
lidar_path
=
data_sample
.
lidar_path
num_pts_feats
=
data_sample
.
num_pts_feats
pts_bytes
=
self
.
file_client
.
get
(
lidar_path
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
points
=
points
.
reshape
(
-
1
,
num_pts_feats
)
data_input
[
'points'
]
=
points
out_file
=
None
if
self
.
test_out_dir
is
not
None
:
...
...
@@ -145,8 +169,8 @@ class Det3DVisualizationHook(Hook):
out_file
=
osp
.
join
(
self
.
test_out_dir
,
out_file
)
self
.
_visualizer
.
add_datasample
(
osp
.
basename
(
img_path
)
if
self
.
show
else
'test_img
'
,
img
,
'test sample
'
,
data_input
,
data_sample
=
data_sample
,
show
=
self
.
show
,
wait_time
=
self
.
wait_time
,
...
...
mmdet3d/evaluation/metrics/kitti_metric.py
View file @
6c03a971
...
...
@@ -66,7 +66,8 @@ class KittiMetric(BaseMetric):
self
.
default_cam_key
=
default_cam_key
self
.
file_client_args
=
file_client_args
self
.
default_cam_key
=
default_cam_key
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
]
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
,
'LET_mAP'
]
self
.
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
for
metric
in
self
.
metrics
:
if
metric
not
in
allowed_metrics
:
...
...
@@ -168,7 +169,7 @@ class KittiMetric(BaseMetric):
"""Compute the metrics from processed results.
Args:
results (list): The processed results of
each batch
.
results (list): The processed results of
the whole dataset
.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
...
...
@@ -575,7 +576,7 @@ class KittiMetric(BaseMetric):
box_preds
=
box_dict
[
'bboxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'sample_id'
]
sample_idx
=
info
[
'sample_id
x
'
]
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
...
...
mmdet3d/evaluation/metrics/waymo_metric.py
View file @
6c03a971
...
...
@@ -11,8 +11,9 @@ from mmengine.logging import MMLogger, print_log
from
mmdet3d.models.layers
import
box3d_multiclass_nms
from
mmdet3d.registry
import
METRICS
from
mmdet3d.structures
import
(
Box3DMode
,
LiDARInstance3DBoxes
,
bbox3d2result
,
xywhr2xyxyr
)
from
mmdet3d.structures
import
(
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
,
bbox3d2result
,
points_cam2img
,
xywhr2xyxyr
)
from
.kitti_metric
import
KittiMetric
...
...
@@ -27,7 +28,7 @@ class WaymoMetric(KittiMetric):
Used for storing waymo evaluation programs.
split (str): The split of the evaluation set.
metric (str | list[str]): Metrics to be evaluated.
Default to '
bbox
'.
Default to '
mAP
'.
pcd_limit_range (list): The range of point cloud used to
filter invalid predicted boxes.
Default to [0, -40, -3, 70.4, 40, 0.0].
...
...
@@ -54,13 +55,14 @@ class WaymoMetric(KittiMetric):
'gpu'. Defaults to 'cpu'.
file_client_args (dict): file client for reading gt in waymo format.
"""
num_cams
=
5
def
__init__
(
self
,
ann_file
:
str
,
waymo_bin_file
:
str
,
data_root
:
str
,
split
:
str
=
'training'
,
metric
:
Union
[
str
,
List
[
str
]]
=
'
bbox
'
,
metric
:
Union
[
str
,
List
[
str
]]
=
'
mAP
'
,
pcd_limit_range
:
List
[
float
]
=
[
-
85
,
-
85
,
-
5
,
85
,
85
,
5
],
prefix
:
Optional
[
str
]
=
None
,
pklfile_prefix
:
str
=
None
,
...
...
@@ -70,7 +72,6 @@ class WaymoMetric(KittiMetric):
use_pred_sample_idx
:
bool
=
False
,
collect_device
:
str
=
'cpu'
,
file_client_args
:
dict
=
dict
(
backend
=
'disk'
)):
self
.
waymo_bin_file
=
waymo_bin_file
self
.
data_root
=
data_root
self
.
split
=
split
...
...
@@ -92,7 +93,7 @@ class WaymoMetric(KittiMetric):
"""Compute the metrics from processed results.
Args:
results (list): The processed results of
each batch
.
results (list): The processed results of
the whole dataset
.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
...
...
@@ -104,6 +105,35 @@ class WaymoMetric(KittiMetric):
# load annotations
self
.
data_infos
=
load
(
self
.
ann_file
)[
'data_list'
]
# different from kitti, waymo do not need to convert the ann file
# handle the mono3d task
if
self
.
task
==
'mono3d'
:
new_data_infos
=
[]
for
info
in
self
.
data_infos
:
height
=
info
[
'images'
][
self
.
default_cam_key
][
'height'
]
width
=
info
[
'images'
][
self
.
default_cam_key
][
'width'
]
for
(
cam_key
,
img_info
)
in
info
[
'images'
].
items
():
camera_info
=
dict
()
camera_info
[
'images'
]
=
dict
()
camera_info
[
'images'
][
cam_key
]
=
img_info
# TODO remove the check by updating the data info;
if
'height'
not
in
img_info
:
img_info
[
'height'
]
=
height
img_info
[
'width'
]
=
width
if
'cam_instances'
in
info
\
and
cam_key
in
info
[
'cam_instances'
]:
camera_info
[
'instances'
]
=
info
[
'cam_instances'
][
cam_key
]
else
:
camera_info
[
'instances'
]
=
[]
camera_info
[
'ego2global'
]
=
info
[
'ego2global'
]
if
'image_sweeps'
in
info
:
camera_info
[
'image_sweeps'
]
=
info
[
'image_sweeps'
]
# TODO check if need to modify the sample id
# TODO check when will use it except for evaluation.
camera_info
[
'sample_id'
]
=
info
[
'sample_id'
]
new_data_infos
.
append
(
camera_info
)
self
.
data_infos
=
new_data_infos
if
self
.
pklfile_prefix
is
None
:
eval_tmp_dir
=
tempfile
.
TemporaryDirectory
()
...
...
@@ -120,65 +150,141 @@ class WaymoMetric(KittiMetric):
submission_prefix
=
self
.
submission_prefix
,
classes
=
self
.
classes
)
import
subprocess
eval_str
=
'mmdet3d/evaluation/functional/waymo_utils/'
+
\
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
\
f
'
{
self
.
waymo_bin_file
}
'
print
(
eval_str
)
ret_bytes
=
subprocess
.
check_output
(
eval_str
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
ap_dict
=
{
'Vehicle/L1 mAP'
:
0
,
'Vehicle/L1 mAPH'
:
0
,
'Vehicle/L2 mAP'
:
0
,
'Vehicle/L2 mAPH'
:
0
,
'Pedestrian/L1 mAP'
:
0
,
'Pedestrian/L1 mAPH'
:
0
,
'Pedestrian/L2 mAP'
:
0
,
'Pedestrian/L2 mAPH'
:
0
,
'Sign/L1 mAP'
:
0
,
'Sign/L1 mAPH'
:
0
,
'Sign/L2 mAP'
:
0
,
'Sign/L2 mAPH'
:
0
,
'Cyclist/L1 mAP'
:
0
,
'Cyclist/L1 mAPH'
:
0
,
'Cyclist/L2 mAP'
:
0
,
'Cyclist/L2 mAPH'
:
0
,
'Overall/L1 mAP'
:
0
,
'Overall/L1 mAPH'
:
0
,
'Overall/L2 mAP'
:
0
,
'Overall/L2 mAPH'
:
0
}
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
for
idx
,
key
in
enumerate
(
ap_dict
.
keys
()):
split_idx
=
int
(
idx
/
2
)
+
1
if
idx
%
2
==
0
:
# mAP
ap_dict
[
key
]
=
float
(
mAP_splits
[
split_idx
].
split
(
']'
)[
0
])
else
:
# mAPH
ap_dict
[
key
]
=
float
(
mAPH_splits
[
split_idx
].
split
(
']'
)[
0
])
ap_dict
[
'Overall/L1 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAP'
]
+
ap_dict
[
'Pedestrian/L1 mAP'
]
+
ap_dict
[
'Cyclist/L1 mAP'
])
/
3
ap_dict
[
'Overall/L1 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAPH'
]
+
ap_dict
[
'Pedestrian/L1 mAPH'
]
+
ap_dict
[
'Cyclist/L1 mAPH'
])
/
3
ap_dict
[
'Overall/L2 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAP'
]
+
ap_dict
[
'Pedestrian/L2 mAP'
]
+
ap_dict
[
'Cyclist/L2 mAP'
])
/
3
ap_dict
[
'Overall/L2 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAPH'
]
+
ap_dict
[
'Pedestrian/L2 mAPH'
]
+
ap_dict
[
'Cyclist/L2 mAPH'
])
/
3
metric_dict
=
{}
for
metric
in
self
.
metrics
:
ap_dict
=
self
.
waymo_evaluate
(
pklfile_prefix
,
metric
=
metric
,
logger
=
logger
)
metric_dict
[
metric
]
=
ap_dict
if
eval_tmp_dir
is
not
None
:
eval_tmp_dir
.
cleanup
()
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
return
metric_dict
def
waymo_evaluate
(
self
,
pklfile_prefix
:
str
,
metric
:
str
=
None
,
logger
:
MMLogger
=
None
)
->
dict
:
"""Evaluation in Waymo protocol.
Args:
pklfile_prefix (str): The location that stored the prediction
results.
metric (str): Metric to be evaluated. Defaults to None.
logger (MMLogger, optional): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
import
subprocess
if
metric
==
'mAP'
:
eval_str
=
'mmdet3d/evaluation/functional/waymo_utils/'
+
\
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
\
f
'
{
self
.
waymo_bin_file
}
'
print
(
eval_str
)
ret_bytes
=
subprocess
.
check_output
(
'mmdet3d/evaluation/functional/waymo_utils/'
+
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
f
'
{
self
.
waymo_bin_file
}
'
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
ap_dict
=
{
'Vehicle/L1 mAP'
:
0
,
'Vehicle/L1 mAPH'
:
0
,
'Vehicle/L2 mAP'
:
0
,
'Vehicle/L2 mAPH'
:
0
,
'Pedestrian/L1 mAP'
:
0
,
'Pedestrian/L1 mAPH'
:
0
,
'Pedestrian/L2 mAP'
:
0
,
'Pedestrian/L2 mAPH'
:
0
,
'Sign/L1 mAP'
:
0
,
'Sign/L1 mAPH'
:
0
,
'Sign/L2 mAP'
:
0
,
'Sign/L2 mAPH'
:
0
,
'Cyclist/L1 mAP'
:
0
,
'Cyclist/L1 mAPH'
:
0
,
'Cyclist/L2 mAP'
:
0
,
'Cyclist/L2 mAPH'
:
0
,
'Overall/L1 mAP'
:
0
,
'Overall/L1 mAPH'
:
0
,
'Overall/L2 mAP'
:
0
,
'Overall/L2 mAPH'
:
0
}
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
for
idx
,
key
in
enumerate
(
ap_dict
.
keys
()):
split_idx
=
int
(
idx
/
2
)
+
1
if
idx
%
2
==
0
:
# mAP
ap_dict
[
key
]
=
float
(
mAP_splits
[
split_idx
].
split
(
']'
)[
0
])
else
:
# mAPH
ap_dict
[
key
]
=
float
(
mAPH_splits
[
split_idx
].
split
(
']'
)[
0
])
ap_dict
[
'Overall/L1 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAP'
]
+
ap_dict
[
'Pedestrian/L1 mAP'
]
+
ap_dict
[
'Cyclist/L1 mAP'
])
/
3
ap_dict
[
'Overall/L1 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAPH'
]
+
ap_dict
[
'Pedestrian/L1 mAPH'
]
+
ap_dict
[
'Cyclist/L1 mAPH'
])
/
3
ap_dict
[
'Overall/L2 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAP'
]
+
ap_dict
[
'Pedestrian/L2 mAP'
]
+
ap_dict
[
'Cyclist/L2 mAP'
])
/
3
ap_dict
[
'Overall/L2 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAPH'
]
+
ap_dict
[
'Pedestrian/L2 mAPH'
]
+
ap_dict
[
'Cyclist/L2 mAPH'
])
/
3
elif
metric
==
'LET_mAP'
:
eval_str
=
'mmdet3d/evaluation/functional/waymo_utils/'
+
\
f
'compute_detection_let_metrics_main
{
pklfile_prefix
}
.bin '
+
\
f
'
{
self
.
waymo_bin_file
}
'
print
(
eval_str
)
ret_bytes
=
subprocess
.
check_output
(
eval_str
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
ap_dict
=
{
'Vehicle mAPL'
:
0
,
'Vehicle mAP'
:
0
,
'Vehicle mAPH'
:
0
,
'Pedestrian mAPL'
:
0
,
'Pedestrian mAP'
:
0
,
'Pedestrian mAPH'
:
0
,
'Sign mAPL'
:
0
,
'Sign mAP'
:
0
,
'Sign mAPH'
:
0
,
'Cyclist mAPL'
:
0
,
'Cyclist mAP'
:
0
,
'Cyclist mAPH'
:
0
,
'Overall mAPL'
:
0
,
'Overall mAP'
:
0
,
'Overall mAPH'
:
0
}
mAPL_splits
=
ret_texts
.
split
(
'mAPL '
)
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
for
idx
,
key
in
enumerate
(
ap_dict
.
keys
()):
split_idx
=
int
(
idx
/
3
)
+
1
if
idx
%
3
==
0
:
# mAPL
ap_dict
[
key
]
=
float
(
mAPL_splits
[
split_idx
].
split
(
']'
)[
0
])
elif
idx
%
3
==
1
:
# mAP
ap_dict
[
key
]
=
float
(
mAP_splits
[
split_idx
].
split
(
']'
)[
0
])
else
:
# mAPH
ap_dict
[
key
]
=
float
(
mAPH_splits
[
split_idx
].
split
(
']'
)[
0
])
ap_dict
[
'Overall mAPL'
]
=
\
(
ap_dict
[
'Vehicle mAPL'
]
+
ap_dict
[
'Pedestrian mAPL'
]
+
ap_dict
[
'Cyclist mAPL'
])
/
3
ap_dict
[
'Overall mAP'
]
=
\
(
ap_dict
[
'Vehicle mAP'
]
+
ap_dict
[
'Pedestrian mAP'
]
+
ap_dict
[
'Cyclist mAP'
])
/
3
ap_dict
[
'Overall mAPH'
]
=
\
(
ap_dict
[
'Vehicle mAPH'
]
+
ap_dict
[
'Pedestrian mAPH'
]
+
ap_dict
[
'Cyclist mAPH'
])
/
3
return
ap_dict
def
format_results
(
self
,
...
...
@@ -254,7 +360,7 @@ class WaymoMetric(KittiMetric):
for
cam_idx
in
range
(
self
.
num_cams
):
box_dict
[
key
].
append
(
box_dict_per_frame
[
cam_idx
][
key
])
# merge each elements
box_dict
[
'sample_id'
]
=
cam0_info
[
'image_id'
]
box_dict
[
'sample_id
x
'
]
=
cam0_info
[
'image_id'
]
for
key
in
[
'bbox'
,
'box3d_lidar'
,
'scores'
,
'label_preds'
]:
box_dict
[
key
]
=
np
.
concatenate
(
box_dict
[
key
])
...
...
@@ -284,14 +390,14 @@ class WaymoMetric(KittiMetric):
nms_cfg
.
max_per_frame
,
nms_cfg
)
lidar_boxes3d
=
LiDARInstance3DBoxes
(
boxes3d
)
det
=
bbox3d2result
(
lidar_boxes3d
,
scores
,
labels
)
box_preds_lidar
=
det
[
'boxes_3d'
]
box_preds_lidar
=
det
[
'
b
boxes_3d'
]
scores
=
det
[
'scores_3d'
]
labels
=
det
[
'labels_3d'
]
# box_preds_camera is in the cam0 system
rect
=
cam0_info
[
'
calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
Trv2c
=
cam0_info
[
'calib'
][
'Tr_velo_to_
cam
'
]
.
astype
(
np
.
float32
)
lidar2cam
=
cam0_info
[
'
images'
][
self
.
default_cam_key
][
'lidar2img'
]
lidar2cam
=
np
.
array
(
lidar2
cam
)
.
astype
(
np
.
float32
)
box_preds_camera
=
box_preds_lidar
.
convert_to
(
Box3DMode
.
CAM
,
rect
@
Trv2c
,
correct_yaw
=
True
)
Box3DMode
.
CAM
,
np
.
linalg
.
inv
(
lidar2cam
)
,
correct_yaw
=
True
)
# Note: bbox is meaningless in final evaluation, set to 0
merged_box_dict
=
dict
(
bbox
=
np
.
zeros
([
box_preds_lidar
.
tensor
.
shape
[
0
],
4
]),
...
...
@@ -299,7 +405,7 @@ class WaymoMetric(KittiMetric):
box3d_lidar
=
box_preds_lidar
.
tensor
.
numpy
(),
scores
=
scores
.
numpy
(),
label_preds
=
labels
.
numpy
(),
sample_idx
=
box_dict
[
'sample_id
x
'
],
sample_idx
=
box_dict
[
'sample_id'
],
)
return
merged_box_dict
...
...
@@ -337,23 +443,31 @@ class WaymoMetric(KittiMetric):
annos
=
[]
sample_idx
=
sample_id_list
[
idx
]
info
=
self
.
data_infos
[
sample_idx
]
# Here default used 'CAM2' to compute metric. If you want to
# use another camera, please modify it.
image_shape
=
(
info
[
'images'
][
self
.
default_cam_key
][
'height'
],
info
[
'images'
][
self
.
default_cam_key
][
'width'
])
if
self
.
task
==
'mono
3d
'
:
if
self
.
task
==
'mono
_det
'
:
if
idx
%
self
.
num_cams
==
0
:
box_dict_per_frame
=
[]
cam0_idx
=
idx
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
cam0_key
=
list
(
info
[
'images'
].
keys
())[
0
]
cam0_info
=
info
# Here in mono3d, we use the 'CAM_FRONT' "the first
# index in the camera" as the default image shape.
# If you want to another camera, please modify it.
image_shape
=
(
info
[
'images'
][
cam0_key
][
'height'
],
info
[
'images'
][
cam0_key
][
'width'
])
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
else
:
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
# Here default used 'CAM_FRONT' to compute metric.
# If you want to use another camera, please modify it.
image_shape
=
(
info
[
'images'
][
self
.
default_cam_key
][
'height'
],
info
[
'images'
][
self
.
default_cam_key
][
'width'
])
if
self
.
task
==
'mono3d'
:
box_dict_per_frame
.
append
(
box_dict
)
if
(
idx
+
1
)
%
self
.
num_cams
!=
0
:
continue
box_dict
=
self
.
merge_multi_view_boxes
(
box_dict_per_frame
,
self
.
data_infos
[
cam0_idx
])
box_dict
=
self
.
merge_multi_view_boxes
(
box_dict_per_frame
,
cam0_info
)
anno
=
{
'name'
:
[],
'truncated'
:
[],
...
...
@@ -444,3 +558,106 @@ class WaymoMetric(KittiMetric):
print
(
f
'Result is saved to
{
out
}
.'
)
return
det_annos
def
convert_valid_bboxes
(
self
,
box_dict
:
dict
,
info
:
dict
):
"""Convert the predicted boxes into valid ones. Should handle the
different task mode (mono3d, mv3d, lidar), separately.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
Returns:
dict: Valid predicted boxes.
- bbox (np.ndarray): 2D bounding boxes.
- box3d_camera (np.ndarray): 3D bounding boxes in
camera coordinate.
- box3d_lidar (np.ndarray): 3D bounding boxes in
LiDAR coordinate.
- scores (np.ndarray): Scores of boxes.
- label_preds (np.ndarray): Class label predictions.
- sample_idx (int): Sample index.
"""
# TODO: refactor this function
box_preds
=
box_dict
[
'bboxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'sample_id'
]
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
# Here default used 'CAM2' to compute metric. If you want to
# use another camera, please modify it.
if
self
.
task
in
[
'mv3d'
,
'lidar'
]:
cam_key
=
self
.
default_cam_key
elif
self
.
task
==
'mono3d'
:
cam_key
=
list
(
info
[
'images'
].
keys
())[
0
]
else
:
raise
NotImplementedError
lidar2cam
=
np
.
array
(
info
[
'images'
][
cam_key
][
'lidar2cam'
]).
astype
(
np
.
float32
)
P2
=
np
.
array
(
info
[
'images'
][
cam_key
][
'cam2img'
]).
astype
(
np
.
float32
)
img_shape
=
(
info
[
'images'
][
cam_key
][
'height'
],
info
[
'images'
][
cam_key
][
'width'
])
P2
=
box_preds
.
tensor
.
new_tensor
(
P2
)
if
isinstance
(
box_preds
,
LiDARInstance3DBoxes
):
box_preds_camera
=
box_preds
.
convert_to
(
Box3DMode
.
CAM
,
lidar2cam
)
box_preds_lidar
=
box_preds
elif
isinstance
(
box_preds
,
CameraInstance3DBoxes
):
box_preds_camera
=
box_preds
box_preds_lidar
=
box_preds
.
convert_to
(
Box3DMode
.
LIDAR
,
np
.
linalg
.
inv
(
lidar2cam
))
box_corners
=
box_preds_camera
.
corners
box_corners_in_image
=
points_cam2img
(
box_corners
,
P2
)
# box_corners_in_image: [N, 8, 2]
minxy
=
torch
.
min
(
box_corners_in_image
,
dim
=
1
)[
0
]
maxxy
=
torch
.
max
(
box_corners_in_image
,
dim
=
1
)[
0
]
box_2d_preds
=
torch
.
cat
([
minxy
,
maxxy
],
dim
=
1
)
# Post-processing
# check box_preds_camera
image_shape
=
box_preds
.
tensor
.
new_tensor
(
img_shape
)
valid_cam_inds
=
((
box_2d_preds
[:,
0
]
<
image_shape
[
1
])
&
(
box_2d_preds
[:,
1
]
<
image_shape
[
0
])
&
(
box_2d_preds
[:,
2
]
>
0
)
&
(
box_2d_preds
[:,
3
]
>
0
))
# check box_preds_lidar
if
self
.
task
in
[
'lidar'
,
'mono3d'
]:
limit_range
=
box_preds
.
tensor
.
new_tensor
(
self
.
pcd_limit_range
)
valid_pcd_inds
=
((
box_preds_lidar
.
center
>
limit_range
[:
3
])
&
(
box_preds_lidar
.
center
<
limit_range
[
3
:]))
valid_inds
=
valid_pcd_inds
.
all
(
-
1
)
elif
self
.
task
==
'mono3d'
:
valid_inds
=
valid_cam_inds
if
valid_inds
.
sum
()
>
0
:
return
dict
(
bbox
=
box_2d_preds
[
valid_inds
,
:].
numpy
(),
pred_box_type_3d
=
type
(
box_preds
),
box3d_camera
=
box_preds_camera
[
valid_inds
].
tensor
.
numpy
(),
box3d_lidar
=
box_preds_lidar
[
valid_inds
].
tensor
.
numpy
(),
scores
=
scores
[
valid_inds
].
numpy
(),
label_preds
=
labels
[
valid_inds
].
numpy
(),
sample_idx
=
sample_idx
)
else
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
pred_box_type_3d
=
type
(
box_preds
),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
]),
sample_idx
=
sample_idx
)
mmdet3d/models/builder.py
View file @
6c03a971
...
...
@@ -92,7 +92,7 @@ def build_segmentor(cfg, train_cfg=None, test_cfg=None):
def
build_model
(
cfg
,
train_cfg
=
None
,
test_cfg
=
None
):
"""A function w
a
rpper for building 3D detector or segmentor according to
"""A function wr
a
pper for building 3D detector or segmentor according to
cfg.
Should be deprecated in the future.
...
...
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
6c03a971
...
...
@@ -13,7 +13,7 @@ from torch.nn import functional as F
from
mmdet3d.registry
import
MODELS
from
mmdet3d.utils
import
OptConfigType
from
mmdet.models
import
DetDataPreprocessor
from
mmdet.models
.utils
.misc
import
samplelist_boxlist2tensor
from
.utils
import
multiview_img_stack_batch
@
MODELS
.
register_module
()
...
...
@@ -75,7 +75,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
seg_pad_value
:
int
=
255
,
bgr_to_rgb
:
bool
=
False
,
rgb_to_bgr
:
bool
=
False
,
box
list
2tensor
:
bool
=
True
,
box
type
2tensor
:
bool
=
True
,
batch_augments
:
Optional
[
List
[
dict
]]
=
None
):
super
().
__init__
(
mean
=
mean
,
...
...
@@ -88,7 +88,6 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
seg_pad_value
=
seg_pad_value
,
bgr_to_rgb
=
bgr_to_rgb
,
rgb_to_bgr
=
rgb_to_bgr
,
boxlist2tensor
=
boxlist2tensor
,
batch_augments
=
batch_augments
)
self
.
voxel
=
voxel
self
.
voxel_type
=
voxel_type
...
...
@@ -104,10 +103,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
``BaseDataPreprocessor``.
Args:
data (
List[
dict
]
|
List[
List[dict]
]
): data from dataloader.
The
outer list always represent th
e batch
size
, when it is
a
list[
list[dict]
]
, the
inter
list indicate test time
augmentation.
data (dict | List[dict]): data from dataloader.
The
dict contains the whol
e batch
data
, when it is
a list[dict], the list indicate test time
augmentation.
training (bool): Whether to enable training time augmentation.
Defaults to False.
...
...
@@ -144,7 +143,6 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
data
=
self
.
collate_data
(
data
)
inputs
,
data_samples
=
data
[
'inputs'
],
data
[
'data_samples'
]
batch_inputs
=
dict
()
if
'points'
in
inputs
:
...
...
@@ -169,9 +167,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
'pad_shape'
:
pad_shape
})
if
self
.
boxlist2tensor
:
if
hasattr
(
self
,
'boxtype2tensor'
)
and
self
.
boxtype2tensor
:
from
mmdet.models.utils.misc
import
\
samplelist_boxtype2tensor
samplelist_boxtype2tensor
(
data_samples
)
elif
hasattr
(
self
,
'boxlist2tensor'
)
and
self
.
boxlist2tensor
:
from
mmdet.models.utils.misc
import
\
samplelist_boxlist2tensor
samplelist_boxlist2tensor
(
data_samples
)
if
self
.
pad_mask
:
self
.
pad_gt_masks
(
data_samples
)
...
...
@@ -185,6 +188,23 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return
{
'inputs'
:
batch_inputs
,
'data_samples'
:
data_samples
}
def
preprocess_img
(
self
,
_batch_img
):
# channel transform
if
self
.
_channel_conversion
:
_batch_img
=
_batch_img
[[
2
,
1
,
0
],
...]
# Convert to float after channel conversion to ensure
# efficiency
_batch_img
=
_batch_img
.
float
()
# Normalization.
if
self
.
_enable_normalize
:
if
self
.
mean
.
shape
[
0
]
==
3
:
assert
_batch_img
.
dim
()
==
3
and
_batch_img
.
shape
[
0
]
==
3
,
(
'If the mean has 3 values, the input tensor '
'should in shape of (3, H, W), but got the '
f
'tensor with shape
{
_batch_img
.
shape
}
'
)
_batch_img
=
(
_batch_img
-
self
.
mean
)
/
self
.
std
return
_batch_img
def
collate_data
(
self
,
data
:
dict
)
->
dict
:
"""Copying data to the target device and Performs normalization、
padding and bgr2rgb conversion and stack based on
...
...
@@ -203,30 +223,30 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
'img'
in
data
[
'inputs'
]:
_batch_imgs
=
data
[
'inputs'
][
'img'
]
# Process data with `pseudo_collate`.
if
is_list_of
(
_batch_imgs
,
torch
.
Tensor
):
batch_imgs
=
[]
img_dim
=
_batch_imgs
[
0
].
dim
()
for
_batch_img
in
_batch_imgs
:
# channel transform
if
self
.
_channel_conversion
:
_batch_img
=
_batch_img
[[
2
,
1
,
0
],
...]
# Convert to float after channel conversion to ensure
# efficiency
_batch_img
=
_batch_img
.
float
()
# Normalization.
if
self
.
_enable_normalize
:
if
self
.
mean
.
shape
[
0
]
==
3
:
assert
_batch_img
.
dim
(
)
==
3
and
_batch_img
.
shape
[
0
]
==
3
,
(
'If the mean has 3 values, the input tensor '
'should in shape of (3, H, W), but got the '
f
'tensor with shape
{
_batch_img
.
shape
}
'
)
_batch_img
=
(
_batch_img
-
self
.
mean
)
/
self
.
std
if
img_dim
==
3
:
# standard img
_batch_img
=
self
.
preprocess_img
(
_batch_img
)
elif
img_dim
==
4
:
_batch_img
=
[
self
.
preprocess_img
(
_img
)
for
_img
in
_batch_img
]
_batch_img
=
torch
.
stack
(
_batch_img
,
dim
=
0
)
batch_imgs
.
append
(
_batch_img
)
# Pad and stack Tensor.
batch_imgs
=
stack_batch
(
batch_imgs
,
self
.
pad_size_divisor
,
self
.
pad_value
)
if
img_dim
==
3
:
batch_imgs
=
stack_batch
(
batch_imgs
,
self
.
pad_size_divisor
,
self
.
pad_value
)
elif
img_dim
==
4
:
batch_imgs
=
multiview_img_stack_batch
(
batch_imgs
,
self
.
pad_size_divisor
,
self
.
pad_value
)
# Process data with `default_collate`.
elif
isinstance
(
_batch_imgs
,
torch
.
Tensor
):
assert
_batch_imgs
.
dim
()
==
4
,
(
...
...
@@ -270,6 +290,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
is_list_of
(
_batch_inputs
,
torch
.
Tensor
):
batch_pad_shape
=
[]
for
ori_input
in
_batch_inputs
:
if
ori_input
.
dim
()
==
4
:
# mean multiivew input, select ont of the
# image to calculate the pad shape
ori_input
=
ori_input
[
0
]
pad_h
=
int
(
np
.
ceil
(
ori_input
.
shape
[
1
]
/
self
.
pad_size_divisor
))
*
self
.
pad_size_divisor
...
...
@@ -293,7 +317,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
else
:
raise
TypeError
(
'Output of `cast_data` should be a list of dict '
'or a tuple with inputs and data_samples, but got'
f
'
{
type
(
data
)
}
:
{
data
}
'
)
f
'
{
type
(
data
)
}
:
{
data
}
'
)
return
batch_pad_shape
@
torch
.
no_grad
()
...
...
mmdet3d/models/data_preprocessors/utils.py
0 → 100644
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
import
torch
import
torch.nn.functional
as
F
def
multiview_img_stack_batch
(
tensor_list
:
List
[
torch
.
Tensor
],
pad_size_divisor
:
int
=
1
,
pad_value
:
Union
[
int
,
float
]
=
0
)
->
torch
.
Tensor
:
"""
Compared to the stack_batch in mmengine.model.utils,
multiview_img_stack_batch further handle the multiview images.
see diff of padded_sizes[:, :-2] = 0 vs padded_sizees[:, 0] = 0 in line 47
Stack multiple tensors to form a batch and pad the tensor to the max
shape use the right bottom padding mode in these images. If
``pad_size_divisor > 0``, add padding to ensure the shape of each dim is
divisible by ``pad_size_divisor``.
Args:
tensor_list (List[Tensor]): A list of tensors with the same dim.
pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding
to ensure the shape of each dim is divisible by
``pad_size_divisor``. This depends on the model, and many
models need to be divisible by 32. Defaults to 1
pad_value (int, float): The padding value. Defaults to 0.
Returns:
Tensor: The n dim tensor.
"""
assert
isinstance
(
tensor_list
,
list
),
(
f
'Expected input type to be list, but got
{
type
(
tensor_list
)
}
'
)
assert
tensor_list
,
'`tensor_list` could not be an empty list'
assert
len
({
tensor
.
ndim
for
tensor
in
tensor_list
})
==
1
,
(
f
'Expected the dimensions of all tensors must be the same, '
f
'but got
{
[
tensor
.
ndim
for
tensor
in
tensor_list
]
}
'
)
dim
=
tensor_list
[
0
].
dim
()
num_img
=
len
(
tensor_list
)
all_sizes
:
torch
.
Tensor
=
torch
.
Tensor
(
[
tensor
.
shape
for
tensor
in
tensor_list
])
max_sizes
=
torch
.
ceil
(
torch
.
max
(
all_sizes
,
dim
=
0
)[
0
]
/
pad_size_divisor
)
*
pad_size_divisor
padded_sizes
=
max_sizes
-
all_sizes
# The first dim normally means channel, which should not be padded.
padded_sizes
[:,
:
-
2
]
=
0
if
padded_sizes
.
sum
()
==
0
:
return
torch
.
stack
(
tensor_list
)
# `pad` is the second arguments of `F.pad`. If pad is (1, 2, 3, 4),
# it means that padding the last dim with 1(left) 2(right), padding the
# penultimate dim to 3(top) 4(bottom). The order of `pad` is opposite of
# the `padded_sizes`. Therefore, the `padded_sizes` needs to be reversed,
# and only odd index of pad should be assigned to keep padding "right" and
# "bottom".
pad
=
torch
.
zeros
(
num_img
,
2
*
dim
,
dtype
=
torch
.
int
)
pad
[:,
1
::
2
]
=
padded_sizes
[:,
range
(
dim
-
1
,
-
1
,
-
1
)]
batch_tensor
=
[]
for
idx
,
tensor
in
enumerate
(
tensor_list
):
batch_tensor
.
append
(
F
.
pad
(
tensor
,
tuple
(
pad
[
idx
].
tolist
()),
value
=
pad_value
))
return
torch
.
stack
(
batch_tensor
)
mmdet3d/models/dense_heads/base_3d_dense_head.py
View file @
6c03a971
...
...
@@ -204,7 +204,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
score_factors (list[Tensor], optional): Score factor for
all scale level, each is a 4D-tensor, has shape
(batch_size, num_priors * 1, H, W). Defaults to None.
batch_input_metas (list[dict], Optional): Batch i
mage
meta info.
batch_input_metas (list[dict], Optional): Batch i
nputs
meta info.
Defaults to None.
cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
...
...
mmdet3d/models/dense_heads/parta2_rpn_head.py
View file @
6c03a971
...
...
@@ -183,8 +183,7 @@ class PartA2RPNHead(Anchor3DHead):
result
=
self
.
class_agnostic_nms
(
mlvl_bboxes
,
mlvl_bboxes_for_nms
,
mlvl_max_scores
,
mlvl_label_pred
,
mlvl_cls_score
,
mlvl_dir_scores
,
score_thr
,
cfg
.
nms_post
,
cfg
,
input_meta
)
score_thr
,
cfg
,
input_meta
)
return
result
def
loss_and_predict
(
self
,
...
...
@@ -275,7 +274,7 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_bboxes_for_nms
:
Tensor
,
mlvl_max_scores
:
Tensor
,
mlvl_label_pred
:
Tensor
,
mlvl_cls_score
:
Tensor
,
mlvl_dir_scores
:
Tensor
,
score_thr
:
int
,
max_num
:
int
,
cfg
:
ConfigDict
,
score_thr
:
int
,
cfg
:
ConfigDict
,
input_meta
:
dict
)
->
Dict
:
"""Class agnostic nms for single batch.
...
...
@@ -291,7 +290,6 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_dir_scores (torch.Tensor): Direction scores of
Multi-level bbox.
score_thr (int): Score threshold.
max_num (int): Max number of bboxes after nms.
cfg (:obj:`ConfigDict`): Training or testing config.
input_meta (dict): Contain pcd and img's meta info.
...
...
@@ -339,9 +337,9 @@ class PartA2RPNHead(Anchor3DHead):
scores
=
torch
.
cat
(
scores
,
dim
=
0
)
cls_scores
=
torch
.
cat
(
cls_scores
,
dim
=
0
)
labels
=
torch
.
cat
(
labels
,
dim
=
0
)
if
bboxes
.
shape
[
0
]
>
max_num
:
if
bboxes
.
shape
[
0
]
>
cfg
.
nms_post
:
_
,
inds
=
scores
.
sort
(
descending
=
True
)
inds
=
inds
[:
max_num
]
inds
=
inds
[:
cfg
.
nms_post
]
bboxes
=
bboxes
[
inds
,
:]
labels
=
labels
[
inds
]
scores
=
scores
[
inds
]
...
...
mmdet3d/models/dense_heads/point_rpn_head.py
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
from
mmengine.model
import
BaseModule
from
mmengine.structures
import
InstanceData
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.models.layers
import
nms_bev
,
nms_normal_bev
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.structures
import
xywhr2xyxyr
from
mmdet3d.structures.bbox_3d
import
(
DepthInstance3DBoxes
,
from
mmdet3d.structures.bbox_3d
import
(
BaseInstance3DBoxes
,
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
from
mmdet3d.structures.det3d_data_sample
import
SampleList
from
mmdet3d.utils.typing
import
InstanceList
from
mmdet.models.utils
import
multi_apply
...
...
@@ -34,15 +40,15 @@ class PointRPNHead(BaseModule):
"""
def
__init__
(
self
,
num_classes
,
train_cfg
,
test_cfg
,
pred_layer_cfg
=
None
,
enlarge_width
=
0.1
,
cls_loss
=
None
,
bbox_loss
=
None
,
bbox_coder
=
None
,
init_cfg
=
None
)
:
num_classes
:
int
,
train_cfg
:
dict
,
test_cfg
:
dict
,
pred_layer_cfg
:
Optional
[
dict
]
=
None
,
enlarge_width
:
float
=
0.1
,
cls_loss
:
Optional
[
dict
]
=
None
,
bbox_loss
:
Optional
[
dict
]
=
None
,
bbox_coder
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
)
->
None
:
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
train_cfg
=
train_cfg
...
...
@@ -50,8 +56,8 @@ class PointRPNHead(BaseModule):
self
.
enlarge_width
=
enlarge_width
# build loss function
self
.
bbox_loss
=
build
_loss
(
bbox_loss
)
self
.
cls_loss
=
build
_loss
(
cls_loss
)
self
.
bbox_loss
=
MODELS
.
build
(
bbox_loss
)
self
.
cls_loss
=
MODELS
.
build
(
cls_loss
)
# build box coder
self
.
bbox_coder
=
TASK_UTILS
.
build
(
bbox_coder
)
...
...
@@ -67,7 +73,8 @@ class PointRPNHead(BaseModule):
input_channels
=
pred_layer_cfg
.
in_channels
,
output_channels
=
self
.
_get_reg_out_channels
())
def
_make_fc_layers
(
self
,
fc_cfg
,
input_channels
,
output_channels
):
def
_make_fc_layers
(
self
,
fc_cfg
:
dict
,
input_channels
:
int
,
output_channels
:
int
)
->
nn
.
Sequential
:
"""Make fully connect layers.
Args:
...
...
@@ -102,7 +109,7 @@ class PointRPNHead(BaseModule):
# torch.cos(yaw) (1), torch.sin(yaw) (1)
return
self
.
bbox_coder
.
code_size
def
forward
(
self
,
feat_dict
)
:
def
forward
(
self
,
feat_dict
:
dict
)
->
Tuple
[
List
[
Tensor
]]
:
"""Forward pass.
Args:
...
...
@@ -124,30 +131,35 @@ class PointRPNHead(BaseModule):
batch_size
,
-
1
,
self
.
_get_reg_out_channels
())
return
point_box_preds
,
point_cls_preds
def
loss
(
self
,
bbox_preds
,
cls_preds
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
img_metas
=
None
):
def
loss_by_feat
(
self
,
bbox_preds
:
List
[
Tensor
],
cls_preds
:
List
[
Tensor
],
points
:
List
[
Tensor
],
batch_gt_instances_3d
:
InstanceList
,
batch_input_metas
:
Optional
[
List
[
dict
]]
=
None
,
batch_gt_instances_ignore
:
Optional
[
InstanceList
]
=
None
)
->
Dict
:
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of PointRCNN RPN_Head.
cls_preds (dict): Classification from forward of PointRCNN
RPN_Head.
bbox_preds (list[torch.Tensor]): Predictions from forward of
PointRCNN RPN_Head.
cls_preds (list[torch.Tensor]): Classification from forward of
PointRCNN RPN_Head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
img_metas (list[dict], Optional): Contain pcd and img's meta info.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances_3d. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict: Losses of PointRCNN RPN module.
"""
targets
=
self
.
get_targets
(
points
,
gt_bboxes_3d
,
gt_label
s_3d
)
targets
=
self
.
get_targets
(
points
,
batch_gt_instance
s_3d
)
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
box_loss_weights
,
point_targets
)
=
targets
...
...
@@ -169,25 +181,25 @@ class PointRPNHead(BaseModule):
return
losses
def
get_targets
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets
(
self
,
points
:
List
[
Tensor
],
batch_gt_instances_3d
:
InstanceList
)
->
Tuple
[
Tensor
]:
"""Generate targets of PointRCNN RPN head.
Args:
points (list[torch.Tensor]): Points
of each
batch.
gt_bbox
es_3d (list[:obj:`
Base
Instance
3DBoxes`]): Ground truth
bboxes of each batch.
gt_
labels_3d
(list[torch.Tensor]): Labels of each batch
.
points (list[torch.Tensor]): Points
in one
batch.
batch_gt_instanc
es_3d (list[:obj:`Instance
Data`]): Batch of
gt_instances_3d. It usually includes ``bboxes_3d`` and
``
labels_3d
`` attributes
.
Returns:
tuple[torch.Tensor]: Targets of PointRCNN RPN head.
"""
# find empty example
for
index
in
range
(
len
(
gt_labels_3d
)):
if
len
(
gt_labels_3d
[
index
])
==
0
:
fake_box
=
gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
1
,
gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
gt_bboxes_3d
[
index
]
=
gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
gt_labels_3d
[
index
]
=
gt_labels_3d
[
index
].
new_zeros
(
1
)
gt_labels_3d
=
[
instances
.
labels_3d
for
instances
in
batch_gt_instances_3d
]
gt_bboxes_3d
=
[
instances
.
bboxes_3d
for
instances
in
batch_gt_instances_3d
]
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
point_targets
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
...
...
@@ -202,7 +214,9 @@ class PointRPNHead(BaseModule):
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
box_loss_weights
,
point_targets
)
def
get_targets_single
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets_single
(
self
,
points
:
Tensor
,
gt_bboxes_3d
:
BaseInstance3DBoxes
,
gt_labels_3d
:
Tensor
)
->
Tuple
[
Tensor
]:
"""Generate targets of PointRCNN RPN head for single batch.
Args:
...
...
@@ -243,24 +257,34 @@ class PointRPNHead(BaseModule):
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
point_targets
)
def
get_bboxes
(
self
,
points
,
bbox_preds
,
cls_preds
,
input_metas
,
rescale
=
False
):
def
predict_by_feat
(
self
,
points
:
Tensor
,
bbox_preds
:
List
[
Tensor
],
cls_preds
:
List
[
Tensor
],
batch_input_metas
:
List
[
dict
],
cfg
:
Optional
[
dict
])
->
InstanceList
:
"""Generate bboxes from RPN head predictions.
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Regression predictions from PointRCNN head.
cls_preds (dict): Class scores predictions from PointRCNN head.
input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool, optional): Whether to rescale bboxes.
Defaults to False.
bbox_preds (list[tensor]): Regression predictions from PointRCNN
head.
cls_preds (list[tensor]): Class scores predictions from PointRCNN
head.
batch_input_metas (list[dict]): Batch inputs meta info.
cfg (ConfigDict, optional): Test / postprocessing
configuration.
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
- cls_preds (torch.Tensor): Class score of each bbox.
"""
sem_scores
=
cls_preds
.
sigmoid
()
obj_scores
=
sem_scores
.
max
(
-
1
)[
0
]
...
...
@@ -271,30 +295,40 @@ class PointRPNHead(BaseModule):
for
b
in
range
(
batch_size
):
bbox3d
=
self
.
bbox_coder
.
decode
(
bbox_preds
[
b
],
points
[
b
,
...,
:
3
],
object_class
[
b
])
mask
=
~
bbox3d
.
sum
(
dim
=
1
).
isinf
()
bbox_selected
,
score_selected
,
labels
,
cls_preds_selected
=
\
self
.
class_agnostic_nms
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
,
points
[
b
,
...,
:
3
],
input_metas
[
b
])
bbox
=
input_metas
[
b
][
'box_type_3d'
](
bbox_selected
.
clone
(),
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
True
)
results
.
append
((
bbox
,
score_selected
,
labels
,
cls_preds_selected
))
self
.
class_agnostic_nms
(
obj_scores
[
b
][
mask
],
sem_scores
[
b
][
mask
,
:],
bbox3d
[
mask
,
:],
points
[
b
,
...,
:
3
][
mask
,
:],
batch_input_metas
[
b
],
cfg
.
nms_cfg
)
bbox_selected
=
batch_input_metas
[
b
][
'box_type_3d'
](
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
])
result
=
InstanceData
()
result
.
bboxes_3d
=
bbox_selected
result
.
scores_3d
=
score_selected
result
.
labels_3d
=
labels
result
.
cls_preds
=
cls_preds_selected
results
.
append
(
result
)
return
results
def
class_agnostic_nms
(
self
,
obj_scores
,
sem_scores
,
bbox
,
points
,
input_meta
):
def
class_agnostic_nms
(
self
,
obj_scores
:
Tensor
,
sem_scores
:
Tensor
,
bbox
:
Tensor
,
points
:
Tensor
,
input_meta
:
Dict
,
nms_cfg
:
Dict
)
->
Tuple
[
Tensor
]:
"""Class agnostic nms.
Args:
obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): Semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
points (torch.Tensor): Input points.
input_meta (dict): Contain pcd and img's meta info.
nms_cfg (dict): NMS config dict.
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
nms_cfg
=
self
.
test_cfg
.
nms_cfg
if
not
self
.
training
\
else
self
.
train_cfg
.
nms_cfg
if
nms_cfg
.
use_rotate_nms
:
nms_func
=
nms_bev
else
:
...
...
@@ -323,14 +357,14 @@ class PointRPNHead(BaseModule):
bbox
=
bbox
[
nonempty_box_mask
]
if
self
.
test
_cfg
.
score_thr
is
not
None
:
score_thr
=
self
.
test
_cfg
.
score_thr
if
nms
_cfg
.
score_thr
is
not
None
:
score_thr
=
nms
_cfg
.
score_thr
keep
=
(
obj_scores
>=
score_thr
)
obj_scores
=
obj_scores
[
keep
]
sem_scores
=
sem_scores
[
keep
]
bbox
=
bbox
.
tensor
[
keep
]
if
obj_scores
.
shape
[
0
]
>
0
:
if
bbox
.
tensor
.
shape
[
0
]
>
0
:
topk
=
min
(
nms_cfg
.
nms_pre
,
obj_scores
.
shape
[
0
])
obj_scores_nms
,
indices
=
torch
.
topk
(
obj_scores
,
k
=
topk
)
bbox_for_nms
=
xywhr2xyxyr
(
bbox
[
indices
].
bev
)
...
...
@@ -343,15 +377,22 @@ class PointRPNHead(BaseModule):
score_selected
=
obj_scores_nms
[
keep
]
cls_preds
=
sem_scores_nms
[
keep
]
labels
=
torch
.
argmax
(
cls_preds
,
-
1
)
if
bbox_selected
.
shape
[
0
]
>
nms_cfg
.
nms_post
:
_
,
inds
=
score_selected
.
sort
(
descending
=
True
)
inds
=
inds
[:
score_selected
.
nms_post
]
bbox_selected
=
bbox_selected
[
inds
,
:]
labels
=
labels
[
inds
]
score_selected
=
score_selected
[
inds
]
cls_preds
=
cls_preds
[
inds
,
:]
else
:
bbox_selected
=
bbox
.
tensor
score_selected
=
obj_scores
.
new_zeros
([
0
])
labels
=
obj_scores
.
new_zeros
([
0
])
cls_preds
=
obj_scores
.
new_zeros
([
0
,
sem_scores
.
shape
[
-
1
]])
return
bbox_selected
,
score_selected
,
labels
,
cls_preds
def
_assign_targets_by_points_inside
(
self
,
bboxes_3d
,
points
):
def
_assign_targets_by_points_inside
(
self
,
bboxes_3d
:
BaseInstance3DBoxes
,
points
:
Tensor
)
->
Tuple
[
Tensor
]:
"""Compute assignment by checking whether point is inside bbox.
Args:
...
...
@@ -379,3 +420,92 @@ class PointRPNHead(BaseModule):
raise
NotImplementedError
(
'Unsupported bbox type!'
)
return
points_mask
,
assignment
def
predict
(
self
,
feats_dict
:
Dict
,
batch_data_samples
:
SampleList
)
->
InstanceList
:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas
=
[
data_samples
.
metainfo
for
data_samples
in
batch_data_samples
]
raw_points
=
feats_dict
.
pop
(
'raw_points'
)
bbox_preds
,
cls_preds
=
self
(
feats_dict
)
proposal_cfg
=
self
.
test_cfg
proposal_list
=
self
.
predict_by_feat
(
raw_points
,
bbox_preds
,
cls_preds
,
cfg
=
proposal_cfg
,
batch_input_metas
=
batch_input_metas
)
feats_dict
[
'points_cls_preds'
]
=
cls_preds
return
proposal_list
def
loss_and_predict
(
self
,
feats_dict
:
Dict
,
batch_data_samples
:
SampleList
,
proposal_cfg
:
Optional
[
dict
]
=
None
,
**
kwargs
)
->
Tuple
[
dict
,
InstanceList
]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
proposal_cfg (ConfigDict, optional): Proposal config.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each sample after the post process.
"""
batch_gt_instances_3d
=
[]
batch_gt_instances_ignore
=
[]
batch_input_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_input_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances_3d
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
raw_points
=
feats_dict
.
pop
(
'raw_points'
)
bbox_preds
,
cls_preds
=
self
(
feats_dict
)
loss_inputs
=
(
bbox_preds
,
cls_preds
,
raw_points
)
+
(
batch_gt_instances_3d
,
batch_input_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
)
predictions
=
self
.
predict_by_feat
(
raw_points
,
bbox_preds
,
cls_preds
,
batch_input_metas
=
batch_input_metas
,
cfg
=
proposal_cfg
)
feats_dict
[
'points_cls_preds'
]
=
cls_preds
if
predictions
[
0
].
bboxes_3d
.
tensor
.
isinf
().
any
():
print
(
predictions
)
return
losses
,
predictions
mmdet3d/models/dense_heads/ssd_3d_head.py
View file @
6c03a971
...
...
@@ -14,7 +14,6 @@ from mmdet3d.structures.bbox_3d import (DepthInstance3DBoxes,
LiDARInstance3DBoxes
,
rotation_3d_in_axis
)
from
mmdet.models.utils
import
multi_apply
from
..builder
import
build_loss
from
.vote_head
import
VoteHead
...
...
@@ -76,8 +75,8 @@ class SSD3DHead(VoteHead):
size_res_loss
=
size_res_loss
,
semantic_loss
=
None
,
init_cfg
=
init_cfg
)
self
.
corner_loss
=
build
_loss
(
corner_loss
)
self
.
vote_loss
=
build
_loss
(
vote_loss
)
self
.
corner_loss
=
MODELS
.
build
(
corner_loss
)
self
.
vote_loss
=
MODELS
.
build
(
vote_loss
)
self
.
num_candidates
=
vote_module_cfg
[
'num_points'
]
def
_get_cls_out_channels
(
self
)
->
int
:
...
...
mmdet3d/models/detectors/__init__.py
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
from
.base
import
Base3DDetector
from
.centerpoint
import
CenterPoint
from
.dfm
import
DfM
from
.dynamic_voxelnet
import
DynamicVoxelNet
from
.fcos_mono3d
import
FCOSMono3D
from
.groupfree3dnet
import
GroupFree3DNet
from
.h3dnet
import
H3DNet
from
.imvotenet
import
ImVoteNet
from
.imvoxelnet
import
ImVoxelNet
from
.multiview_dfm
import
MultiViewDfM
from
.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
,
MVXFasterRCNN
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.parta2
import
PartA2
...
...
@@ -19,9 +21,25 @@ from .votenet import VoteNet
from
.voxelnet
import
VoxelNet
__all__
=
[
'Base3DDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
,
'PointRCNN'
,
'SMOKEMono3D'
,
'SASSD'
'Base3DDetector'
,
'DfM'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'MultiViewDfM'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
,
'PointRCNN'
,
'SMOKEMono3D'
,
'SASSD'
,
]
mmdet3d/models/detectors/base.py
View file @
6c03a971
...
...
@@ -89,7 +89,7 @@ class Base3DDetector(BaseDetector):
raise
RuntimeError
(
f
'Invalid mode "
{
mode
}
". '
'Only supports loss, predict and tensor mode'
)
def
convert
_to_datasample
(
def
add_pred
_to_datasample
(
self
,
data_samples
:
SampleList
,
data_instances_3d
:
OptInstanceList
=
None
,
...
...
mmdet3d/models/detectors/dfm.py
0 → 100644
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmdet3d.registry
import
MODELS
from
mmdet3d.structures.ops
import
bbox3d2result
from
mmdet3d.utils
import
ConfigType
from
mmdet.models.detectors
import
BaseDetector
from
..builder
import
build_backbone
,
build_head
,
build_neck
@
MODELS
.
register_module
()
class
DfM
(
BaseDetector
):
r
"""`Monocular 3D Object Detection with Depth from Motion.
<https://arxiv.org/abs/2207.12988>`_.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone
config.
backbone_3d (:obj:`ConfigDict` or dict): The 3d backbone config.
neck_3d (:obj:`ConfigDict` or dict): The 3D neck config.
bbox_head_3d (:obj:`ConfigDict` or dict): The 3d bbox head config.
neck_2d (:obj:`ConfigDict` or dict, optional): The 2D neck config
for 2D object detection. Defaults to None.
bbox_head_2d (:obj:`ConfigDict` or dict, optional): The 2D bbox
head config for 2D object detection. Defaults to None.
depth_head_2d (:obj:`ConfigDict` or dict, optional): The 2D depth
head config for depth estimation in fov space. Defaults to None.
depth_head (:obj:`ConfigDict` or dict, optional): The depth head
config for depth estimation in 3D voxel projected to fov space .
train_cfg (:obj:`ConfigDict` or dict, optional): Config dict of
training hyper-parameters. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): Config dict of test
hyper-parameters. Defaults to None.
pretrained (:obj: `ConfigDict` or dict optional): The pretrained
config.
init_cfg (:obj:`ConfigDict` or dict, optional): The initialization
config. Defaults to None.
"""
def
__init__
(
self
,
backbone
:
ConfigType
,
neck
:
ConfigType
,
backbone_stereo
:
ConfigType
,
backbone_3d
:
ConfigType
,
neck_3d
:
ConfigType
,
bbox_head_3d
:
ConfigType
,
neck_2d
=
None
,
bbox_head_2d
=
None
,
depth_head_2d
=
None
,
depth_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
,
init_cfg
=
None
):
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
backbone
=
build_backbone
(
backbone
)
self
.
neck
=
build_neck
(
neck
)
if
backbone_stereo
is
not
None
:
backbone_stereo
.
update
(
cat_img_feature
=
self
.
neck
.
cat_img_feature
)
backbone_stereo
.
update
(
in_sem_channels
=
self
.
neck
.
sem_channels
[
-
1
])
self
.
backbone_stereo
=
build_backbone
(
backbone_stereo
)
assert
self
.
neck
.
cat_img_feature
==
\
self
.
backbone_stereo
.
cat_img_feature
assert
self
.
neck
.
sem_channels
[
-
1
]
==
self
.
backbone_stereo
.
in_sem_channels
if
backbone_3d
is
not
None
:
self
.
backbone_3d
=
build_backbone
(
backbone_3d
)
if
neck_3d
is
not
None
:
self
.
neck_3d
=
build_neck
(
neck_3d
)
if
neck_2d
is
not
None
:
self
.
neck_2d
=
build_neck
(
neck_2d
)
if
bbox_head_2d
is
not
None
:
self
.
bbox_head_2d
=
build_head
(
bbox_head_2d
)
if
depth_head_2d
is
not
None
:
self
.
depth_head_2d
=
build_head
(
depth_head_2d
)
if
depth_head
is
not
None
:
self
.
depth_head
=
build_head
(
depth_head
)
self
.
depth_samples
=
self
.
depth_head
.
depth_samples
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
bbox_head_3d
.
update
(
train_cfg
=
train_cfg
)
bbox_head_3d
.
update
(
test_cfg
=
test_cfg
)
self
.
bbox_head_3d
=
build_head
(
bbox_head_3d
)
@
property
def
with_backbone_3d
(
self
):
"""Whether the detector has a 3D backbone."""
return
hasattr
(
self
,
'backbone_3d'
)
and
self
.
backbone_3d
is
not
None
@
property
def
with_neck_3d
(
self
):
"""Whether the detector has a 3D neck."""
return
hasattr
(
self
,
'neck_3d'
)
and
self
.
neck_3d
is
not
None
@
property
def
with_neck_2d
(
self
):
"""Whether the detector has a 2D neck."""
return
hasattr
(
self
,
'neck_2d'
)
and
self
.
neck_2d
is
not
None
@
property
def
with_bbox_head_2d
(
self
):
"""Whether the detector has a 2D detection head."""
return
hasattr
(
self
,
'bbox_head_2d'
)
and
self
.
bbox_head_2d
is
not
None
@
property
def
with_depth_head_2d
(
self
):
"""Whether the detector has a image-based depth head."""
return
hasattr
(
self
,
'depth_head_2d'
)
and
self
.
depth_head_2d
is
not
None
@
property
def
with_depth_head
(
self
):
"""Whether the detector has a frustum-based depth head."""
return
hasattr
(
self
,
'depth_head'
)
and
self
.
depth_head
is
not
None
def
extract_feat
(
self
,
img
,
img_metas
):
"""Feature extraction for perspective-view images.
Args:
img (torch.Tensor): Images of shape [B, N, C_in, H, W].
img_metas (list): Image meta information. Each element corresponds
to a group of images. len(img_metas) == B.
Returns:
torch.Tensor: bev feature with shape [B, C_out, N_y, N_x].
"""
# split input img into current and previous ones
batch_size
,
N
,
C_in
,
H
,
W
=
img
.
shape
cur_imgs
=
img
[:,
0
]
prev_imgs
=
img
[:,
1
]
# TODO: to support multiple prev imgs
# 2D backbone for feature extraction
cur_feats
=
self
.
backbone
(
cur_imgs
)
cur_feats
=
[
cur_imgs
]
+
list
(
cur_feats
)
prev_feats
=
self
.
backbone
(
prev_imgs
)
prev_feats
=
[
prev_imgs
]
+
list
(
prev_feats
)
# SPP module as the feature neck
cur_stereo_feat
,
cur_sem_feat
=
self
.
neck
(
cur_feats
)
prev_stereo_feat
,
prev_sem_feat
=
self
.
neck
(
prev_feats
)
# derive cur2prevs
cur_pose
=
torch
.
tensor
(
[
img_meta
[
'cam2global'
]
for
img_meta
in
img_metas
],
device
=
img
.
device
)[:,
None
,
:,
:]
# (B, 1, 4, 4)
prev_poses
=
[]
for
img_meta
in
img_metas
:
sweep_img_metas
=
img_meta
[
'sweep_img_metas'
]
prev_poses
.
append
([
sweep_img_meta
[
'cam2global'
]
for
sweep_img_meta
in
sweep_img_metas
])
prev_poses
=
torch
.
tensor
(
prev_poses
,
device
=
img
.
device
)
pad_prev_cam2global
=
torch
.
eye
(
4
)[
None
,
None
].
expand
(
batch_size
,
N
-
1
,
4
,
4
).
to
(
img
.
device
)
pad_prev_cam2global
[:,
:,
:
prev_poses
.
shape
[
-
2
],
:
prev_poses
.
shape
[
-
1
]]
=
prev_poses
pad_cur_cam2global
=
torch
.
eye
(
4
)[
None
,
None
].
expand
(
batch_size
,
1
,
4
,
4
).
to
(
img
.
device
)
pad_cur_cam2global
[:,
:,
:
cur_pose
.
shape
[
-
2
],
:
cur_pose
.
shape
[
-
1
]]
=
cur_pose
# (B, N-1, 4, 4) * (B, 1, 4, 4) -> (B, N-1, 4, 4)
# torch.linalg.solve is faster and more numerically stable
# than torch.matmul(torch.linalg.inv(A), B)
# empirical results show that torch.linalg.solve can derive
# almost the same result with np.linalg.inv
# while torch.linalg.inv can not
cur2prevs
=
torch
.
linalg
.
solve
(
pad_prev_cam2global
,
pad_cur_cam2global
)
for
meta_idx
,
img_meta
in
enumerate
(
img_metas
):
img_meta
[
'cur2prevs'
]
=
cur2prevs
[
meta_idx
]
# stereo backbone for depth estimation
# volume_feat: (batch_size, Cv, Nz, Ny, Nx)
volume_feat
=
self
.
backbone_stereo
(
cur_stereo_feat
,
prev_stereo_feat
,
img_metas
,
cur_sem_feat
)
# height compression
_
,
Cv
,
Nz
,
Ny
,
Nx
=
volume_feat
.
shape
bev_feat
=
volume_feat
.
view
(
batch_size
,
Cv
*
Nz
,
Ny
,
Nx
)
bev_feat_prehg
,
bev_feat
=
self
.
neck_3d
(
bev_feat
)
return
bev_feat
def
forward_train
(
self
,
img
,
img_metas
,
gt_bboxes_3d
,
gt_labels_3d
,
depth_img
=
None
,
**
kwargs
):
"""Forward function for training."""
bev_feat
=
self
.
extract_feat
(
img
,
img_metas
)
outs
=
self
.
bbox_head_3d
([
bev_feat
])
losses
=
self
.
bbox_head_3d
.
loss
(
*
outs
,
gt_bboxes_3d
,
gt_labels_3d
,
img_metas
)
# TODO: loss_dense_depth, loss_2d, loss_imitation
return
losses
def
forward_test
(
self
,
img
,
img_metas
,
**
kwargs
):
"""Forward of testing.
Args:
img (torch.Tensor): Input images of shape (N, C_in, H, W).
img_metas (list): Image metas.
Returns:
list[dict]: Predicted 3d boxes.
"""
# not supporting aug_test for now
return
self
.
simple_test
(
img
,
img_metas
)
def
simple_test
(
self
,
img
,
img_metas
):
"""Simple inference forward without test time augmentation."""
bev_feat
=
self
.
extract_feat
(
img
,
img_metas
)
# bbox_head takes a list of feature from different levels as input
# so need [bev_feat]
outs
=
self
.
bbox_head_3d
([
bev_feat
])
bbox_list
=
self
.
bbox_head_3d
.
get_bboxes
(
*
outs
,
img_metas
)
bbox_results
=
[
bbox3d2result
(
det_bboxes
,
det_scores
,
det_labels
)
for
det_bboxes
,
det_scores
,
det_labels
in
bbox_list
]
# add pseudo-lidar label to each pred_dict for post-processing
for
bbox_result
in
bbox_results
:
bbox_result
[
'pseudo_lidar'
]
=
True
return
bbox_results
def
aug_test
(
self
,
imgs
,
img_metas
,
**
kwargs
):
"""Test with augmentations.
Args:
imgs (list[torch.Tensor]): Input images of shape (N, C_in, H, W).
img_metas (list): Image metas.
Returns:
list[dict]: Predicted 3d boxes.
"""
raise
NotImplementedError
mmdet3d/models/detectors/dynamic_voxelnet.py
View file @
6c03a971
...
...
@@ -10,7 +10,8 @@ from .voxelnet import VoxelNet
@
MODELS
.
register_module
()
class
DynamicVoxelNet
(
VoxelNet
):
r
"""VoxelNet using `dynamic voxelization <https://arxiv.org/abs/1910.06528>`_.
r
"""VoxelNet using `dynamic voxelization
<https://arxiv.org/abs/1910.06528>`_.
"""
def
__init__
(
self
,
...
...
mmdet3d/models/detectors/fcos_mono3d.py
View file @
6c03a971
...
...
@@ -95,6 +95,7 @@ class FCOSMono3D(SingleStageMono3DDetector):
x
=
self
.
extract_feat
(
batch_inputs_dict
)
results_list
,
results_list_2d
=
self
.
bbox_head
.
predict
(
x
,
batch_data_samples
,
rescale
=
rescale
)
predictions
=
self
.
convert_to_datasample
(
batch_data_samples
,
results_list
,
results_list_2d
)
predictions
=
self
.
add_pred_to_datasample
(
batch_data_samples
,
results_list
,
results_list_2d
)
return
predictions
mmdet3d/models/detectors/groupfree3dnet.py
View file @
6c03a971
...
...
@@ -82,6 +82,6 @@ class GroupFree3DNet(SingleStage3DDetector):
points
=
batch_inputs_dict
[
'points'
]
results_list
=
self
.
bbox_head
.
predict
(
points
,
x
,
batch_data_samples
,
**
kwargs
)
predictions
=
self
.
convert
_to_datasample
(
batch_data_samples
,
results_list
)
predictions
=
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_list
)
return
predictions
mmdet3d/models/detectors/h3dnet.py
View file @
6c03a971
...
...
@@ -154,4 +154,4 @@ class H3DNet(TwoStage3DDetector):
feats_dict
,
batch_data_samples
,
suffix
=
'_optimized'
)
return
self
.
convert
_to_datasample
(
batch_data_samples
,
results_list
)
return
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_list
)
mmdet3d/models/detectors/imvotenet.py
View file @
6c03a971
...
...
@@ -433,7 +433,7 @@ class ImVoteNet(Base3DDetector):
if
points
is
None
:
assert
imgs
is
not
None
results_2d
=
self
.
predict_img_only
(
imgs
,
batch_data_samples
)
return
self
.
convert
_to_datasample
(
return
self
.
add_pred
_to_datasample
(
batch_data_samples
,
data_instances_2d
=
results_2d
)
else
:
...
...
@@ -488,7 +488,7 @@ class ImVoteNet(Base3DDetector):
batch_data_samples
,
rescale
=
True
)
return
self
.
convert
_to_datasample
(
batch_data_samples
,
results_3d
)
return
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_3d
)
def
predict_img_only
(
self
,
imgs
:
Tensor
,
...
...
mmdet3d/models/detectors/imvoxelnet.py
View file @
6c03a971
...
...
@@ -2,16 +2,17 @@
from
typing
import
List
,
Tuple
,
Union
import
torch
from
mmengine.structures
import
InstanceData
from
mmdet3d.models.detectors
import
Base3DDetector
from
mmdet3d.models.layers.fusion_layers.point_fusion
import
point_sample
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.structures.det3d_data_sample
import
SampleList
from
mmdet3d.utils
import
ConfigType
,
InstanceList
,
OptConfigType
from
mmdet.models.detectors
import
BaseDetector
from
mmdet3d.utils
import
ConfigType
,
OptConfigType
,
OptInstanceList
@
MODELS
.
register_module
()
class
ImVoxelNet
(
BaseDetector
):
class
ImVoxelNet
(
Base
3D
Detector
):
r
"""`ImVoxelNet <https://arxiv.org/abs/2106.01178>`_.
Args:
...
...
@@ -57,31 +58,6 @@ class ImVoxelNet(BaseDetector):
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
def
convert_to_datasample
(
self
,
data_samples
:
SampleList
,
data_instances
:
InstanceList
)
->
SampleList
:
""" Convert results list to `Det3DDataSample`.
Args:
inputs (list[:obj:`Det3DDataSample`]): The input data.
data_instances (list[:obj:`InstanceData`]): 3D Detection
results of each image.
Returns:
list[:obj:`Det3DDataSample`]: 3D Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
for
data_sample
,
pred_instances_3d
in
zip
(
data_samples
,
data_instances
):
data_sample
.
pred_instances_3d
=
pred_instances_3d
return
data_samples
def
extract_feat
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
):
"""Extract 3d features from the backbone -> fpn -> 3d projection.
...
...
@@ -185,8 +161,8 @@ class ImVoxelNet(BaseDetector):
"""
x
=
self
.
extract_feat
(
batch_inputs_dict
,
batch_data_samples
)
results_list
=
self
.
bbox_head
.
predict
(
x
,
batch_data_samples
,
**
kwargs
)
predictions
=
self
.
convert
_to_datasample
(
batch_data_samples
,
results_list
)
predictions
=
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_list
)
return
predictions
def
_forward
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
...
...
@@ -209,3 +185,64 @@ class ImVoxelNet(BaseDetector):
x
=
self
.
extract_feat
(
batch_inputs_dict
,
batch_data_samples
)
results
=
self
.
bbox_head
.
forward
(
x
)
return
results
def
convert_to_datasample
(
self
,
data_samples
:
SampleList
,
data_instances_3d
:
OptInstanceList
=
None
,
data_instances_2d
:
OptInstanceList
=
None
,
)
->
SampleList
:
"""Convert results list to `Det3DDataSample`.
Subclasses could override it to be compatible for some multi-modality
3D detectors.
Args:
data_samples (list[:obj:`Det3DDataSample`]): The input data.
data_instances_3d (list[:obj:`InstanceData`], optional): 3D
Detection results of each sample.
data_instances_2d (list[:obj:`InstanceData`], optional): 2D
Detection results of each sample.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input. Each Det3DDataSample usually contains
'pred_instances_3d'. And the ``pred_instances_3d`` normally
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
When there are image prediction in some models, it should
contains `pred_instances`, And the ``pred_instances`` normally
contains following keys.
- scores (Tensor): Classification scores of image, has a shape
(num_instance, )
- labels (Tensor): Predict Labels of 2D bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Contains a tensor with shape
(num_instances, 4).
"""
assert
(
data_instances_2d
is
not
None
)
or
\
(
data_instances_3d
is
not
None
),
\
'please pass at least one type of data_samples'
if
data_instances_2d
is
None
:
data_instances_2d
=
[
InstanceData
()
for
_
in
range
(
len
(
data_instances_3d
))
]
if
data_instances_3d
is
None
:
data_instances_3d
=
[
InstanceData
()
for
_
in
range
(
len
(
data_instances_2d
))
]
for
i
,
data_sample
in
enumerate
(
data_samples
):
data_sample
.
pred_instances_3d
=
data_instances_3d
[
i
]
data_sample
.
pred_instances
=
data_instances_2d
[
i
]
return
data_samples
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment