Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
6c03a971
Unverified
Commit
6c03a971
authored
Oct 14, 2022
by
Tai-Wang
Committed by
GitHub
Oct 14, 2022
Browse files
Release v1.1.0rc1
Release v1.1.0rc1
parents
9611c2d0
ca42c312
Changes
174
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1031 additions
and
243 deletions
+1031
-243
mmdet3d/engine/hooks/benchmark_hook.py
mmdet3d/engine/hooks/benchmark_hook.py
+38
-0
mmdet3d/engine/hooks/visualization_hook.py
mmdet3d/engine/hooks/visualization_hook.py
+34
-10
mmdet3d/evaluation/metrics/kitti_metric.py
mmdet3d/evaluation/metrics/kitti_metric.py
+4
-3
mmdet3d/evaluation/metrics/waymo_metric.py
mmdet3d/evaluation/metrics/waymo_metric.py
+292
-75
mmdet3d/models/builder.py
mmdet3d/models/builder.py
+1
-1
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+53
-29
mmdet3d/models/data_preprocessors/utils.py
mmdet3d/models/data_preprocessors/utils.py
+65
-0
mmdet3d/models/dense_heads/base_3d_dense_head.py
mmdet3d/models/dense_heads/base_3d_dense_head.py
+1
-1
mmdet3d/models/dense_heads/parta2_rpn_head.py
mmdet3d/models/dense_heads/parta2_rpn_head.py
+4
-6
mmdet3d/models/dense_heads/point_rpn_head.py
mmdet3d/models/dense_heads/point_rpn_head.py
+201
-71
mmdet3d/models/dense_heads/ssd_3d_head.py
mmdet3d/models/dense_heads/ssd_3d_head.py
+2
-3
mmdet3d/models/detectors/__init__.py
mmdet3d/models/detectors/__init__.py
+23
-5
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+1
-1
mmdet3d/models/detectors/dfm.py
mmdet3d/models/detectors/dfm.py
+235
-0
mmdet3d/models/detectors/dynamic_voxelnet.py
mmdet3d/models/detectors/dynamic_voxelnet.py
+2
-1
mmdet3d/models/detectors/fcos_mono3d.py
mmdet3d/models/detectors/fcos_mono3d.py
+3
-2
mmdet3d/models/detectors/groupfree3dnet.py
mmdet3d/models/detectors/groupfree3dnet.py
+2
-2
mmdet3d/models/detectors/h3dnet.py
mmdet3d/models/detectors/h3dnet.py
+1
-1
mmdet3d/models/detectors/imvotenet.py
mmdet3d/models/detectors/imvotenet.py
+2
-2
mmdet3d/models/detectors/imvoxelnet.py
mmdet3d/models/detectors/imvoxelnet.py
+67
-30
No files found.
mmdet3d/engine/hooks/benchmark_hook.py
0 → 100644
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.hooks
import
Hook
from
mmdet3d.registry
import
HOOKS
@
HOOKS
.
register_module
()
class
BenchmarkHook
(
Hook
):
"""A hook that logs the training speed of each epch."""
priority
=
'NORMAL'
def
after_train_epoch
(
self
,
runner
)
->
None
:
"""We use the average throughput in iterations of the entire training
run and skip the first 50 iterations of each epoch to skip GPU warmup
time.
Args:
runner (Runner): The runner of the training process.
"""
message_hub
=
runner
.
message_hub
max_iter_num
=
len
(
runner
.
train_dataloader
)
speed
=
message_hub
.
get_scalar
(
'train/time'
).
mean
(
max_iter_num
-
50
)
message_hub
.
update_scalar
(
'train/speed'
,
speed
)
runner
.
logger
.
info
(
f
'Training speed of epoch
{
runner
.
epoch
+
1
}
is
{
speed
}
s/iter'
)
def
after_train
(
self
,
runner
)
->
None
:
"""Log average training speed of entire training process.
Args:
runner (Runner): The runner of the training process.
"""
message_hub
=
runner
.
message_hub
avg_speed
=
message_hub
.
get_scalar
(
'train/speed'
).
mean
()
runner
.
logger
.
info
(
'Average training speed of entire training process'
f
'is
{
avg_speed
}
s/iter'
)
mmdet3d/engine/hooks/visualization_hook.py
View file @
6c03a971
...
@@ -4,6 +4,7 @@ import warnings
...
@@ -4,6 +4,7 @@ import warnings
from
typing
import
Optional
,
Sequence
from
typing
import
Optional
,
Sequence
import
mmcv
import
mmcv
import
numpy
as
np
from
mmengine.fileio
import
FileClient
from
mmengine.fileio
import
FileClient
from
mmengine.hooks
import
Hook
from
mmengine.hooks
import
Hook
from
mmengine.runner
import
Runner
from
mmengine.runner
import
Runner
...
@@ -95,15 +96,27 @@ class Det3DVisualizationHook(Hook):
...
@@ -95,15 +96,27 @@ class Det3DVisualizationHook(Hook):
# is visualized for each evaluation.
# is visualized for each evaluation.
total_curr_iter
=
runner
.
iter
+
batch_idx
total_curr_iter
=
runner
.
iter
+
batch_idx
data_input
=
dict
()
# Visualize only the first data
# Visualize only the first data
img_path
=
outputs
[
0
].
img_path
if
'img_path'
in
outputs
[
0
]:
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img_path
=
outputs
[
0
].
img_path
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
data_input
[
'img'
]
=
img
if
'lidar_path'
in
outputs
[
0
]:
lidar_path
=
outputs
[
0
].
lidar_path
num_pts_feats
=
outputs
[
0
].
num_pts_feats
pts_bytes
=
self
.
file_client
.
get
(
lidar_path
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
points
=
points
.
reshape
(
-
1
,
num_pts_feats
)
data_input
[
'points'
]
=
points
if
total_curr_iter
%
self
.
interval
==
0
:
if
total_curr_iter
%
self
.
interval
==
0
:
self
.
_visualizer
.
add_datasample
(
self
.
_visualizer
.
add_datasample
(
osp
.
basename
(
img_path
)
if
self
.
show
else
'val_img
'
,
'val sample
'
,
img
,
data_input
,
data_sample
=
outputs
[
0
],
data_sample
=
outputs
[
0
],
show
=
self
.
show
,
show
=
self
.
show
,
wait_time
=
self
.
wait_time
,
wait_time
=
self
.
wait_time
,
...
@@ -135,9 +148,20 @@ class Det3DVisualizationHook(Hook):
...
@@ -135,9 +148,20 @@ class Det3DVisualizationHook(Hook):
for
data_sample
in
outputs
:
for
data_sample
in
outputs
:
self
.
_test_index
+=
1
self
.
_test_index
+=
1
img_path
=
data_sample
.
img_path
data_input
=
dict
()
img_bytes
=
self
.
file_client
.
get
(
img_path
)
if
'img_path'
in
data_sample
:
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
img_path
=
data_sample
.
img_path
img_bytes
=
self
.
file_client
.
get
(
img_path
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
channel_order
=
'rgb'
)
data_input
[
'img'
]
=
img
if
'lidar_path'
in
data_sample
:
lidar_path
=
data_sample
.
lidar_path
num_pts_feats
=
data_sample
.
num_pts_feats
pts_bytes
=
self
.
file_client
.
get
(
lidar_path
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
points
=
points
.
reshape
(
-
1
,
num_pts_feats
)
data_input
[
'points'
]
=
points
out_file
=
None
out_file
=
None
if
self
.
test_out_dir
is
not
None
:
if
self
.
test_out_dir
is
not
None
:
...
@@ -145,8 +169,8 @@ class Det3DVisualizationHook(Hook):
...
@@ -145,8 +169,8 @@ class Det3DVisualizationHook(Hook):
out_file
=
osp
.
join
(
self
.
test_out_dir
,
out_file
)
out_file
=
osp
.
join
(
self
.
test_out_dir
,
out_file
)
self
.
_visualizer
.
add_datasample
(
self
.
_visualizer
.
add_datasample
(
osp
.
basename
(
img_path
)
if
self
.
show
else
'test_img
'
,
'test sample
'
,
img
,
data_input
,
data_sample
=
data_sample
,
data_sample
=
data_sample
,
show
=
self
.
show
,
show
=
self
.
show
,
wait_time
=
self
.
wait_time
,
wait_time
=
self
.
wait_time
,
...
...
mmdet3d/evaluation/metrics/kitti_metric.py
View file @
6c03a971
...
@@ -66,7 +66,8 @@ class KittiMetric(BaseMetric):
...
@@ -66,7 +66,8 @@ class KittiMetric(BaseMetric):
self
.
default_cam_key
=
default_cam_key
self
.
default_cam_key
=
default_cam_key
self
.
file_client_args
=
file_client_args
self
.
file_client_args
=
file_client_args
self
.
default_cam_key
=
default_cam_key
self
.
default_cam_key
=
default_cam_key
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
]
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
,
'LET_mAP'
]
self
.
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
self
.
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
for
metric
in
self
.
metrics
:
for
metric
in
self
.
metrics
:
if
metric
not
in
allowed_metrics
:
if
metric
not
in
allowed_metrics
:
...
@@ -168,7 +169,7 @@ class KittiMetric(BaseMetric):
...
@@ -168,7 +169,7 @@ class KittiMetric(BaseMetric):
"""Compute the metrics from processed results.
"""Compute the metrics from processed results.
Args:
Args:
results (list): The processed results of
each batch
.
results (list): The processed results of
the whole dataset
.
Returns:
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
Dict[str, float]: The computed metrics. The keys are the names of
...
@@ -575,7 +576,7 @@ class KittiMetric(BaseMetric):
...
@@ -575,7 +576,7 @@ class KittiMetric(BaseMetric):
box_preds
=
box_dict
[
'bboxes_3d'
]
box_preds
=
box_dict
[
'bboxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'sample_id'
]
sample_idx
=
info
[
'sample_id
x
'
]
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
if
len
(
box_preds
)
==
0
:
...
...
mmdet3d/evaluation/metrics/waymo_metric.py
View file @
6c03a971
...
@@ -11,8 +11,9 @@ from mmengine.logging import MMLogger, print_log
...
@@ -11,8 +11,9 @@ from mmengine.logging import MMLogger, print_log
from
mmdet3d.models.layers
import
box3d_multiclass_nms
from
mmdet3d.models.layers
import
box3d_multiclass_nms
from
mmdet3d.registry
import
METRICS
from
mmdet3d.registry
import
METRICS
from
mmdet3d.structures
import
(
Box3DMode
,
LiDARInstance3DBoxes
,
bbox3d2result
,
from
mmdet3d.structures
import
(
Box3DMode
,
CameraInstance3DBoxes
,
xywhr2xyxyr
)
LiDARInstance3DBoxes
,
bbox3d2result
,
points_cam2img
,
xywhr2xyxyr
)
from
.kitti_metric
import
KittiMetric
from
.kitti_metric
import
KittiMetric
...
@@ -27,7 +28,7 @@ class WaymoMetric(KittiMetric):
...
@@ -27,7 +28,7 @@ class WaymoMetric(KittiMetric):
Used for storing waymo evaluation programs.
Used for storing waymo evaluation programs.
split (str): The split of the evaluation set.
split (str): The split of the evaluation set.
metric (str | list[str]): Metrics to be evaluated.
metric (str | list[str]): Metrics to be evaluated.
Default to '
bbox
'.
Default to '
mAP
'.
pcd_limit_range (list): The range of point cloud used to
pcd_limit_range (list): The range of point cloud used to
filter invalid predicted boxes.
filter invalid predicted boxes.
Default to [0, -40, -3, 70.4, 40, 0.0].
Default to [0, -40, -3, 70.4, 40, 0.0].
...
@@ -54,13 +55,14 @@ class WaymoMetric(KittiMetric):
...
@@ -54,13 +55,14 @@ class WaymoMetric(KittiMetric):
'gpu'. Defaults to 'cpu'.
'gpu'. Defaults to 'cpu'.
file_client_args (dict): file client for reading gt in waymo format.
file_client_args (dict): file client for reading gt in waymo format.
"""
"""
num_cams
=
5
def
__init__
(
self
,
def
__init__
(
self
,
ann_file
:
str
,
ann_file
:
str
,
waymo_bin_file
:
str
,
waymo_bin_file
:
str
,
data_root
:
str
,
data_root
:
str
,
split
:
str
=
'training'
,
split
:
str
=
'training'
,
metric
:
Union
[
str
,
List
[
str
]]
=
'
bbox
'
,
metric
:
Union
[
str
,
List
[
str
]]
=
'
mAP
'
,
pcd_limit_range
:
List
[
float
]
=
[
-
85
,
-
85
,
-
5
,
85
,
85
,
5
],
pcd_limit_range
:
List
[
float
]
=
[
-
85
,
-
85
,
-
5
,
85
,
85
,
5
],
prefix
:
Optional
[
str
]
=
None
,
prefix
:
Optional
[
str
]
=
None
,
pklfile_prefix
:
str
=
None
,
pklfile_prefix
:
str
=
None
,
...
@@ -70,7 +72,6 @@ class WaymoMetric(KittiMetric):
...
@@ -70,7 +72,6 @@ class WaymoMetric(KittiMetric):
use_pred_sample_idx
:
bool
=
False
,
use_pred_sample_idx
:
bool
=
False
,
collect_device
:
str
=
'cpu'
,
collect_device
:
str
=
'cpu'
,
file_client_args
:
dict
=
dict
(
backend
=
'disk'
)):
file_client_args
:
dict
=
dict
(
backend
=
'disk'
)):
self
.
waymo_bin_file
=
waymo_bin_file
self
.
waymo_bin_file
=
waymo_bin_file
self
.
data_root
=
data_root
self
.
data_root
=
data_root
self
.
split
=
split
self
.
split
=
split
...
@@ -92,7 +93,7 @@ class WaymoMetric(KittiMetric):
...
@@ -92,7 +93,7 @@ class WaymoMetric(KittiMetric):
"""Compute the metrics from processed results.
"""Compute the metrics from processed results.
Args:
Args:
results (list): The processed results of
each batch
.
results (list): The processed results of
the whole dataset
.
Returns:
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
Dict[str, float]: The computed metrics. The keys are the names of
...
@@ -104,6 +105,35 @@ class WaymoMetric(KittiMetric):
...
@@ -104,6 +105,35 @@ class WaymoMetric(KittiMetric):
# load annotations
# load annotations
self
.
data_infos
=
load
(
self
.
ann_file
)[
'data_list'
]
self
.
data_infos
=
load
(
self
.
ann_file
)[
'data_list'
]
# different from kitti, waymo do not need to convert the ann file
# different from kitti, waymo do not need to convert the ann file
# handle the mono3d task
if
self
.
task
==
'mono3d'
:
new_data_infos
=
[]
for
info
in
self
.
data_infos
:
height
=
info
[
'images'
][
self
.
default_cam_key
][
'height'
]
width
=
info
[
'images'
][
self
.
default_cam_key
][
'width'
]
for
(
cam_key
,
img_info
)
in
info
[
'images'
].
items
():
camera_info
=
dict
()
camera_info
[
'images'
]
=
dict
()
camera_info
[
'images'
][
cam_key
]
=
img_info
# TODO remove the check by updating the data info;
if
'height'
not
in
img_info
:
img_info
[
'height'
]
=
height
img_info
[
'width'
]
=
width
if
'cam_instances'
in
info
\
and
cam_key
in
info
[
'cam_instances'
]:
camera_info
[
'instances'
]
=
info
[
'cam_instances'
][
cam_key
]
else
:
camera_info
[
'instances'
]
=
[]
camera_info
[
'ego2global'
]
=
info
[
'ego2global'
]
if
'image_sweeps'
in
info
:
camera_info
[
'image_sweeps'
]
=
info
[
'image_sweeps'
]
# TODO check if need to modify the sample id
# TODO check when will use it except for evaluation.
camera_info
[
'sample_id'
]
=
info
[
'sample_id'
]
new_data_infos
.
append
(
camera_info
)
self
.
data_infos
=
new_data_infos
if
self
.
pklfile_prefix
is
None
:
if
self
.
pklfile_prefix
is
None
:
eval_tmp_dir
=
tempfile
.
TemporaryDirectory
()
eval_tmp_dir
=
tempfile
.
TemporaryDirectory
()
...
@@ -120,65 +150,141 @@ class WaymoMetric(KittiMetric):
...
@@ -120,65 +150,141 @@ class WaymoMetric(KittiMetric):
submission_prefix
=
self
.
submission_prefix
,
submission_prefix
=
self
.
submission_prefix
,
classes
=
self
.
classes
)
classes
=
self
.
classes
)
import
subprocess
metric_dict
=
{}
eval_str
=
'mmdet3d/evaluation/functional/waymo_utils/'
+
\
for
metric
in
self
.
metrics
:
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
\
ap_dict
=
self
.
waymo_evaluate
(
f
'
{
self
.
waymo_bin_file
}
'
pklfile_prefix
,
metric
=
metric
,
logger
=
logger
)
print
(
eval_str
)
metric_dict
[
metric
]
=
ap_dict
ret_bytes
=
subprocess
.
check_output
(
eval_str
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
ap_dict
=
{
'Vehicle/L1 mAP'
:
0
,
'Vehicle/L1 mAPH'
:
0
,
'Vehicle/L2 mAP'
:
0
,
'Vehicle/L2 mAPH'
:
0
,
'Pedestrian/L1 mAP'
:
0
,
'Pedestrian/L1 mAPH'
:
0
,
'Pedestrian/L2 mAP'
:
0
,
'Pedestrian/L2 mAPH'
:
0
,
'Sign/L1 mAP'
:
0
,
'Sign/L1 mAPH'
:
0
,
'Sign/L2 mAP'
:
0
,
'Sign/L2 mAPH'
:
0
,
'Cyclist/L1 mAP'
:
0
,
'Cyclist/L1 mAPH'
:
0
,
'Cyclist/L2 mAP'
:
0
,
'Cyclist/L2 mAPH'
:
0
,
'Overall/L1 mAP'
:
0
,
'Overall/L1 mAPH'
:
0
,
'Overall/L2 mAP'
:
0
,
'Overall/L2 mAPH'
:
0
}
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
for
idx
,
key
in
enumerate
(
ap_dict
.
keys
()):
split_idx
=
int
(
idx
/
2
)
+
1
if
idx
%
2
==
0
:
# mAP
ap_dict
[
key
]
=
float
(
mAP_splits
[
split_idx
].
split
(
']'
)[
0
])
else
:
# mAPH
ap_dict
[
key
]
=
float
(
mAPH_splits
[
split_idx
].
split
(
']'
)[
0
])
ap_dict
[
'Overall/L1 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAP'
]
+
ap_dict
[
'Pedestrian/L1 mAP'
]
+
ap_dict
[
'Cyclist/L1 mAP'
])
/
3
ap_dict
[
'Overall/L1 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAPH'
]
+
ap_dict
[
'Pedestrian/L1 mAPH'
]
+
ap_dict
[
'Cyclist/L1 mAPH'
])
/
3
ap_dict
[
'Overall/L2 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAP'
]
+
ap_dict
[
'Pedestrian/L2 mAP'
]
+
ap_dict
[
'Cyclist/L2 mAP'
])
/
3
ap_dict
[
'Overall/L2 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAPH'
]
+
ap_dict
[
'Pedestrian/L2 mAPH'
]
+
ap_dict
[
'Cyclist/L2 mAPH'
])
/
3
if
eval_tmp_dir
is
not
None
:
if
eval_tmp_dir
is
not
None
:
eval_tmp_dir
.
cleanup
()
eval_tmp_dir
.
cleanup
()
if
tmp_dir
is
not
None
:
if
tmp_dir
is
not
None
:
tmp_dir
.
cleanup
()
tmp_dir
.
cleanup
()
return
metric_dict
def
waymo_evaluate
(
self
,
pklfile_prefix
:
str
,
metric
:
str
=
None
,
logger
:
MMLogger
=
None
)
->
dict
:
"""Evaluation in Waymo protocol.
Args:
pklfile_prefix (str): The location that stored the prediction
results.
metric (str): Metric to be evaluated. Defaults to None.
logger (MMLogger, optional): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str, float]: Results of each evaluation metric.
"""
import
subprocess
if
metric
==
'mAP'
:
eval_str
=
'mmdet3d/evaluation/functional/waymo_utils/'
+
\
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
\
f
'
{
self
.
waymo_bin_file
}
'
print
(
eval_str
)
ret_bytes
=
subprocess
.
check_output
(
'mmdet3d/evaluation/functional/waymo_utils/'
+
f
'compute_detection_metrics_main
{
pklfile_prefix
}
.bin '
+
f
'
{
self
.
waymo_bin_file
}
'
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
ap_dict
=
{
'Vehicle/L1 mAP'
:
0
,
'Vehicle/L1 mAPH'
:
0
,
'Vehicle/L2 mAP'
:
0
,
'Vehicle/L2 mAPH'
:
0
,
'Pedestrian/L1 mAP'
:
0
,
'Pedestrian/L1 mAPH'
:
0
,
'Pedestrian/L2 mAP'
:
0
,
'Pedestrian/L2 mAPH'
:
0
,
'Sign/L1 mAP'
:
0
,
'Sign/L1 mAPH'
:
0
,
'Sign/L2 mAP'
:
0
,
'Sign/L2 mAPH'
:
0
,
'Cyclist/L1 mAP'
:
0
,
'Cyclist/L1 mAPH'
:
0
,
'Cyclist/L2 mAP'
:
0
,
'Cyclist/L2 mAPH'
:
0
,
'Overall/L1 mAP'
:
0
,
'Overall/L1 mAPH'
:
0
,
'Overall/L2 mAP'
:
0
,
'Overall/L2 mAPH'
:
0
}
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
for
idx
,
key
in
enumerate
(
ap_dict
.
keys
()):
split_idx
=
int
(
idx
/
2
)
+
1
if
idx
%
2
==
0
:
# mAP
ap_dict
[
key
]
=
float
(
mAP_splits
[
split_idx
].
split
(
']'
)[
0
])
else
:
# mAPH
ap_dict
[
key
]
=
float
(
mAPH_splits
[
split_idx
].
split
(
']'
)[
0
])
ap_dict
[
'Overall/L1 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAP'
]
+
ap_dict
[
'Pedestrian/L1 mAP'
]
+
ap_dict
[
'Cyclist/L1 mAP'
])
/
3
ap_dict
[
'Overall/L1 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L1 mAPH'
]
+
ap_dict
[
'Pedestrian/L1 mAPH'
]
+
ap_dict
[
'Cyclist/L1 mAPH'
])
/
3
ap_dict
[
'Overall/L2 mAP'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAP'
]
+
ap_dict
[
'Pedestrian/L2 mAP'
]
+
ap_dict
[
'Cyclist/L2 mAP'
])
/
3
ap_dict
[
'Overall/L2 mAPH'
]
=
\
(
ap_dict
[
'Vehicle/L2 mAPH'
]
+
ap_dict
[
'Pedestrian/L2 mAPH'
]
+
ap_dict
[
'Cyclist/L2 mAPH'
])
/
3
elif
metric
==
'LET_mAP'
:
eval_str
=
'mmdet3d/evaluation/functional/waymo_utils/'
+
\
f
'compute_detection_let_metrics_main
{
pklfile_prefix
}
.bin '
+
\
f
'
{
self
.
waymo_bin_file
}
'
print
(
eval_str
)
ret_bytes
=
subprocess
.
check_output
(
eval_str
,
shell
=
True
)
ret_texts
=
ret_bytes
.
decode
(
'utf-8'
)
print_log
(
ret_texts
,
logger
=
logger
)
ap_dict
=
{
'Vehicle mAPL'
:
0
,
'Vehicle mAP'
:
0
,
'Vehicle mAPH'
:
0
,
'Pedestrian mAPL'
:
0
,
'Pedestrian mAP'
:
0
,
'Pedestrian mAPH'
:
0
,
'Sign mAPL'
:
0
,
'Sign mAP'
:
0
,
'Sign mAPH'
:
0
,
'Cyclist mAPL'
:
0
,
'Cyclist mAP'
:
0
,
'Cyclist mAPH'
:
0
,
'Overall mAPL'
:
0
,
'Overall mAP'
:
0
,
'Overall mAPH'
:
0
}
mAPL_splits
=
ret_texts
.
split
(
'mAPL '
)
mAP_splits
=
ret_texts
.
split
(
'mAP '
)
mAPH_splits
=
ret_texts
.
split
(
'mAPH '
)
for
idx
,
key
in
enumerate
(
ap_dict
.
keys
()):
split_idx
=
int
(
idx
/
3
)
+
1
if
idx
%
3
==
0
:
# mAPL
ap_dict
[
key
]
=
float
(
mAPL_splits
[
split_idx
].
split
(
']'
)[
0
])
elif
idx
%
3
==
1
:
# mAP
ap_dict
[
key
]
=
float
(
mAP_splits
[
split_idx
].
split
(
']'
)[
0
])
else
:
# mAPH
ap_dict
[
key
]
=
float
(
mAPH_splits
[
split_idx
].
split
(
']'
)[
0
])
ap_dict
[
'Overall mAPL'
]
=
\
(
ap_dict
[
'Vehicle mAPL'
]
+
ap_dict
[
'Pedestrian mAPL'
]
+
ap_dict
[
'Cyclist mAPL'
])
/
3
ap_dict
[
'Overall mAP'
]
=
\
(
ap_dict
[
'Vehicle mAP'
]
+
ap_dict
[
'Pedestrian mAP'
]
+
ap_dict
[
'Cyclist mAP'
])
/
3
ap_dict
[
'Overall mAPH'
]
=
\
(
ap_dict
[
'Vehicle mAPH'
]
+
ap_dict
[
'Pedestrian mAPH'
]
+
ap_dict
[
'Cyclist mAPH'
])
/
3
return
ap_dict
return
ap_dict
def
format_results
(
self
,
def
format_results
(
self
,
...
@@ -254,7 +360,7 @@ class WaymoMetric(KittiMetric):
...
@@ -254,7 +360,7 @@ class WaymoMetric(KittiMetric):
for
cam_idx
in
range
(
self
.
num_cams
):
for
cam_idx
in
range
(
self
.
num_cams
):
box_dict
[
key
].
append
(
box_dict_per_frame
[
cam_idx
][
key
])
box_dict
[
key
].
append
(
box_dict_per_frame
[
cam_idx
][
key
])
# merge each elements
# merge each elements
box_dict
[
'sample_id'
]
=
cam0_info
[
'image_id'
]
box_dict
[
'sample_id
x
'
]
=
cam0_info
[
'image_id'
]
for
key
in
[
'bbox'
,
'box3d_lidar'
,
'scores'
,
'label_preds'
]:
for
key
in
[
'bbox'
,
'box3d_lidar'
,
'scores'
,
'label_preds'
]:
box_dict
[
key
]
=
np
.
concatenate
(
box_dict
[
key
])
box_dict
[
key
]
=
np
.
concatenate
(
box_dict
[
key
])
...
@@ -284,14 +390,14 @@ class WaymoMetric(KittiMetric):
...
@@ -284,14 +390,14 @@ class WaymoMetric(KittiMetric):
nms_cfg
.
max_per_frame
,
nms_cfg
)
nms_cfg
.
max_per_frame
,
nms_cfg
)
lidar_boxes3d
=
LiDARInstance3DBoxes
(
boxes3d
)
lidar_boxes3d
=
LiDARInstance3DBoxes
(
boxes3d
)
det
=
bbox3d2result
(
lidar_boxes3d
,
scores
,
labels
)
det
=
bbox3d2result
(
lidar_boxes3d
,
scores
,
labels
)
box_preds_lidar
=
det
[
'boxes_3d'
]
box_preds_lidar
=
det
[
'
b
boxes_3d'
]
scores
=
det
[
'scores_3d'
]
scores
=
det
[
'scores_3d'
]
labels
=
det
[
'labels_3d'
]
labels
=
det
[
'labels_3d'
]
# box_preds_camera is in the cam0 system
# box_preds_camera is in the cam0 system
rect
=
cam0_info
[
'
calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
lidar2cam
=
cam0_info
[
'
images'
][
self
.
default_cam_key
][
'lidar2img'
]
Trv2c
=
cam0_info
[
'calib'
][
'Tr_velo_to_
cam
'
]
.
astype
(
np
.
float32
)
lidar2cam
=
np
.
array
(
lidar2
cam
)
.
astype
(
np
.
float32
)
box_preds_camera
=
box_preds_lidar
.
convert_to
(
box_preds_camera
=
box_preds_lidar
.
convert_to
(
Box3DMode
.
CAM
,
rect
@
Trv2c
,
correct_yaw
=
True
)
Box3DMode
.
CAM
,
np
.
linalg
.
inv
(
lidar2cam
)
,
correct_yaw
=
True
)
# Note: bbox is meaningless in final evaluation, set to 0
# Note: bbox is meaningless in final evaluation, set to 0
merged_box_dict
=
dict
(
merged_box_dict
=
dict
(
bbox
=
np
.
zeros
([
box_preds_lidar
.
tensor
.
shape
[
0
],
4
]),
bbox
=
np
.
zeros
([
box_preds_lidar
.
tensor
.
shape
[
0
],
4
]),
...
@@ -299,7 +405,7 @@ class WaymoMetric(KittiMetric):
...
@@ -299,7 +405,7 @@ class WaymoMetric(KittiMetric):
box3d_lidar
=
box_preds_lidar
.
tensor
.
numpy
(),
box3d_lidar
=
box_preds_lidar
.
tensor
.
numpy
(),
scores
=
scores
.
numpy
(),
scores
=
scores
.
numpy
(),
label_preds
=
labels
.
numpy
(),
label_preds
=
labels
.
numpy
(),
sample_idx
=
box_dict
[
'sample_id
x
'
],
sample_idx
=
box_dict
[
'sample_id'
],
)
)
return
merged_box_dict
return
merged_box_dict
...
@@ -337,23 +443,31 @@ class WaymoMetric(KittiMetric):
...
@@ -337,23 +443,31 @@ class WaymoMetric(KittiMetric):
annos
=
[]
annos
=
[]
sample_idx
=
sample_id_list
[
idx
]
sample_idx
=
sample_id_list
[
idx
]
info
=
self
.
data_infos
[
sample_idx
]
info
=
self
.
data_infos
[
sample_idx
]
# Here default used 'CAM2' to compute metric. If you want to
# use another camera, please modify it.
image_shape
=
(
info
[
'images'
][
self
.
default_cam_key
][
'height'
],
info
[
'images'
][
self
.
default_cam_key
][
'width'
])
if
self
.
task
==
'mono
3d
'
:
if
self
.
task
==
'mono
_det
'
:
if
idx
%
self
.
num_cams
==
0
:
if
idx
%
self
.
num_cams
==
0
:
box_dict_per_frame
=
[]
box_dict_per_frame
=
[]
cam0_idx
=
idx
cam0_key
=
list
(
info
[
'images'
].
keys
())[
0
]
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
cam0_info
=
info
# Here in mono3d, we use the 'CAM_FRONT' "the first
# index in the camera" as the default image shape.
# If you want to another camera, please modify it.
image_shape
=
(
info
[
'images'
][
cam0_key
][
'height'
],
info
[
'images'
][
cam0_key
][
'width'
])
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
else
:
box_dict
=
self
.
convert_valid_bboxes
(
pred_dicts
,
info
)
# Here default used 'CAM_FRONT' to compute metric.
# If you want to use another camera, please modify it.
image_shape
=
(
info
[
'images'
][
self
.
default_cam_key
][
'height'
],
info
[
'images'
][
self
.
default_cam_key
][
'width'
])
if
self
.
task
==
'mono3d'
:
if
self
.
task
==
'mono3d'
:
box_dict_per_frame
.
append
(
box_dict
)
box_dict_per_frame
.
append
(
box_dict
)
if
(
idx
+
1
)
%
self
.
num_cams
!=
0
:
if
(
idx
+
1
)
%
self
.
num_cams
!=
0
:
continue
continue
box_dict
=
self
.
merge_multi_view_boxes
(
box_dict
=
self
.
merge_multi_view_boxes
(
box_dict_per_frame
,
box_dict_per_frame
,
self
.
data_infos
[
cam0_idx
])
cam0_info
)
anno
=
{
anno
=
{
'name'
:
[],
'name'
:
[],
'truncated'
:
[],
'truncated'
:
[],
...
@@ -444,3 +558,106 @@ class WaymoMetric(KittiMetric):
...
@@ -444,3 +558,106 @@ class WaymoMetric(KittiMetric):
print
(
f
'Result is saved to
{
out
}
.'
)
print
(
f
'Result is saved to
{
out
}
.'
)
return
det_annos
return
det_annos
def
convert_valid_bboxes
(
self
,
box_dict
:
dict
,
info
:
dict
):
"""Convert the predicted boxes into valid ones. Should handle the
different task mode (mono3d, mv3d, lidar), separately.
Args:
box_dict (dict): Box dictionaries to be converted.
- boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
- scores_3d (torch.Tensor): Scores of boxes.
- labels_3d (torch.Tensor): Class labels of boxes.
info (dict): Data info.
Returns:
dict: Valid predicted boxes.
- bbox (np.ndarray): 2D bounding boxes.
- box3d_camera (np.ndarray): 3D bounding boxes in
camera coordinate.
- box3d_lidar (np.ndarray): 3D bounding boxes in
LiDAR coordinate.
- scores (np.ndarray): Scores of boxes.
- label_preds (np.ndarray): Class label predictions.
- sample_idx (int): Sample index.
"""
# TODO: refactor this function
box_preds
=
box_dict
[
'bboxes_3d'
]
scores
=
box_dict
[
'scores_3d'
]
labels
=
box_dict
[
'labels_3d'
]
sample_idx
=
info
[
'sample_id'
]
box_preds
.
limit_yaw
(
offset
=
0.5
,
period
=
np
.
pi
*
2
)
if
len
(
box_preds
)
==
0
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
,
4
]),
sample_idx
=
sample_idx
)
# Here default used 'CAM2' to compute metric. If you want to
# use another camera, please modify it.
if
self
.
task
in
[
'mv3d'
,
'lidar'
]:
cam_key
=
self
.
default_cam_key
elif
self
.
task
==
'mono3d'
:
cam_key
=
list
(
info
[
'images'
].
keys
())[
0
]
else
:
raise
NotImplementedError
lidar2cam
=
np
.
array
(
info
[
'images'
][
cam_key
][
'lidar2cam'
]).
astype
(
np
.
float32
)
P2
=
np
.
array
(
info
[
'images'
][
cam_key
][
'cam2img'
]).
astype
(
np
.
float32
)
img_shape
=
(
info
[
'images'
][
cam_key
][
'height'
],
info
[
'images'
][
cam_key
][
'width'
])
P2
=
box_preds
.
tensor
.
new_tensor
(
P2
)
if
isinstance
(
box_preds
,
LiDARInstance3DBoxes
):
box_preds_camera
=
box_preds
.
convert_to
(
Box3DMode
.
CAM
,
lidar2cam
)
box_preds_lidar
=
box_preds
elif
isinstance
(
box_preds
,
CameraInstance3DBoxes
):
box_preds_camera
=
box_preds
box_preds_lidar
=
box_preds
.
convert_to
(
Box3DMode
.
LIDAR
,
np
.
linalg
.
inv
(
lidar2cam
))
box_corners
=
box_preds_camera
.
corners
box_corners_in_image
=
points_cam2img
(
box_corners
,
P2
)
# box_corners_in_image: [N, 8, 2]
minxy
=
torch
.
min
(
box_corners_in_image
,
dim
=
1
)[
0
]
maxxy
=
torch
.
max
(
box_corners_in_image
,
dim
=
1
)[
0
]
box_2d_preds
=
torch
.
cat
([
minxy
,
maxxy
],
dim
=
1
)
# Post-processing
# check box_preds_camera
image_shape
=
box_preds
.
tensor
.
new_tensor
(
img_shape
)
valid_cam_inds
=
((
box_2d_preds
[:,
0
]
<
image_shape
[
1
])
&
(
box_2d_preds
[:,
1
]
<
image_shape
[
0
])
&
(
box_2d_preds
[:,
2
]
>
0
)
&
(
box_2d_preds
[:,
3
]
>
0
))
# check box_preds_lidar
if
self
.
task
in
[
'lidar'
,
'mono3d'
]:
limit_range
=
box_preds
.
tensor
.
new_tensor
(
self
.
pcd_limit_range
)
valid_pcd_inds
=
((
box_preds_lidar
.
center
>
limit_range
[:
3
])
&
(
box_preds_lidar
.
center
<
limit_range
[
3
:]))
valid_inds
=
valid_pcd_inds
.
all
(
-
1
)
elif
self
.
task
==
'mono3d'
:
valid_inds
=
valid_cam_inds
if
valid_inds
.
sum
()
>
0
:
return
dict
(
bbox
=
box_2d_preds
[
valid_inds
,
:].
numpy
(),
pred_box_type_3d
=
type
(
box_preds
),
box3d_camera
=
box_preds_camera
[
valid_inds
].
tensor
.
numpy
(),
box3d_lidar
=
box_preds_lidar
[
valid_inds
].
tensor
.
numpy
(),
scores
=
scores
[
valid_inds
].
numpy
(),
label_preds
=
labels
[
valid_inds
].
numpy
(),
sample_idx
=
sample_idx
)
else
:
return
dict
(
bbox
=
np
.
zeros
([
0
,
4
]),
pred_box_type_3d
=
type
(
box_preds
),
box3d_camera
=
np
.
zeros
([
0
,
7
]),
box3d_lidar
=
np
.
zeros
([
0
,
7
]),
scores
=
np
.
zeros
([
0
]),
label_preds
=
np
.
zeros
([
0
]),
sample_idx
=
sample_idx
)
mmdet3d/models/builder.py
View file @
6c03a971
...
@@ -92,7 +92,7 @@ def build_segmentor(cfg, train_cfg=None, test_cfg=None):
...
@@ -92,7 +92,7 @@ def build_segmentor(cfg, train_cfg=None, test_cfg=None):
def
build_model
(
cfg
,
train_cfg
=
None
,
test_cfg
=
None
):
def
build_model
(
cfg
,
train_cfg
=
None
,
test_cfg
=
None
):
"""A function w
a
rpper for building 3D detector or segmentor according to
"""A function wr
a
pper for building 3D detector or segmentor according to
cfg.
cfg.
Should be deprecated in the future.
Should be deprecated in the future.
...
...
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
6c03a971
...
@@ -13,7 +13,7 @@ from torch.nn import functional as F
...
@@ -13,7 +13,7 @@ from torch.nn import functional as F
from
mmdet3d.registry
import
MODELS
from
mmdet3d.registry
import
MODELS
from
mmdet3d.utils
import
OptConfigType
from
mmdet3d.utils
import
OptConfigType
from
mmdet.models
import
DetDataPreprocessor
from
mmdet.models
import
DetDataPreprocessor
from
mmdet.models
.utils
.misc
import
samplelist_boxlist2tensor
from
.utils
import
multiview_img_stack_batch
@
MODELS
.
register_module
()
@
MODELS
.
register_module
()
...
@@ -75,7 +75,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -75,7 +75,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
seg_pad_value
:
int
=
255
,
seg_pad_value
:
int
=
255
,
bgr_to_rgb
:
bool
=
False
,
bgr_to_rgb
:
bool
=
False
,
rgb_to_bgr
:
bool
=
False
,
rgb_to_bgr
:
bool
=
False
,
box
list
2tensor
:
bool
=
True
,
box
type
2tensor
:
bool
=
True
,
batch_augments
:
Optional
[
List
[
dict
]]
=
None
):
batch_augments
:
Optional
[
List
[
dict
]]
=
None
):
super
().
__init__
(
super
().
__init__
(
mean
=
mean
,
mean
=
mean
,
...
@@ -88,7 +88,6 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -88,7 +88,6 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
seg_pad_value
=
seg_pad_value
,
seg_pad_value
=
seg_pad_value
,
bgr_to_rgb
=
bgr_to_rgb
,
bgr_to_rgb
=
bgr_to_rgb
,
rgb_to_bgr
=
rgb_to_bgr
,
rgb_to_bgr
=
rgb_to_bgr
,
boxlist2tensor
=
boxlist2tensor
,
batch_augments
=
batch_augments
)
batch_augments
=
batch_augments
)
self
.
voxel
=
voxel
self
.
voxel
=
voxel
self
.
voxel_type
=
voxel_type
self
.
voxel_type
=
voxel_type
...
@@ -104,10 +103,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -104,10 +103,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
``BaseDataPreprocessor``.
``BaseDataPreprocessor``.
Args:
Args:
data (
List[
dict
]
|
List[
List[dict]
]
): data from dataloader.
data (dict | List[dict]): data from dataloader.
The
outer list always represent th
e batch
size
, when it is
The
dict contains the whol
e batch
data
, when it is
a
list[
list[dict]
]
, the
inter
list indicate test time
a list[dict], the list indicate test time
augmentation.
augmentation.
training (bool): Whether to enable training time augmentation.
training (bool): Whether to enable training time augmentation.
Defaults to False.
Defaults to False.
...
@@ -144,7 +143,6 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -144,7 +143,6 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
data
=
self
.
collate_data
(
data
)
data
=
self
.
collate_data
(
data
)
inputs
,
data_samples
=
data
[
'inputs'
],
data
[
'data_samples'
]
inputs
,
data_samples
=
data
[
'inputs'
],
data
[
'data_samples'
]
batch_inputs
=
dict
()
batch_inputs
=
dict
()
if
'points'
in
inputs
:
if
'points'
in
inputs
:
...
@@ -169,9 +167,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -169,9 +167,14 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
'pad_shape'
:
pad_shape
'pad_shape'
:
pad_shape
})
})
if
self
.
boxlist2tensor
:
if
hasattr
(
self
,
'boxtype2tensor'
)
and
self
.
boxtype2tensor
:
from
mmdet.models.utils.misc
import
\
samplelist_boxtype2tensor
samplelist_boxtype2tensor
(
data_samples
)
elif
hasattr
(
self
,
'boxlist2tensor'
)
and
self
.
boxlist2tensor
:
from
mmdet.models.utils.misc
import
\
samplelist_boxlist2tensor
samplelist_boxlist2tensor
(
data_samples
)
samplelist_boxlist2tensor
(
data_samples
)
if
self
.
pad_mask
:
if
self
.
pad_mask
:
self
.
pad_gt_masks
(
data_samples
)
self
.
pad_gt_masks
(
data_samples
)
...
@@ -185,6 +188,23 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -185,6 +188,23 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
return
{
'inputs'
:
batch_inputs
,
'data_samples'
:
data_samples
}
return
{
'inputs'
:
batch_inputs
,
'data_samples'
:
data_samples
}
def
preprocess_img
(
self
,
_batch_img
):
# channel transform
if
self
.
_channel_conversion
:
_batch_img
=
_batch_img
[[
2
,
1
,
0
],
...]
# Convert to float after channel conversion to ensure
# efficiency
_batch_img
=
_batch_img
.
float
()
# Normalization.
if
self
.
_enable_normalize
:
if
self
.
mean
.
shape
[
0
]
==
3
:
assert
_batch_img
.
dim
()
==
3
and
_batch_img
.
shape
[
0
]
==
3
,
(
'If the mean has 3 values, the input tensor '
'should in shape of (3, H, W), but got the '
f
'tensor with shape
{
_batch_img
.
shape
}
'
)
_batch_img
=
(
_batch_img
-
self
.
mean
)
/
self
.
std
return
_batch_img
def
collate_data
(
self
,
data
:
dict
)
->
dict
:
def
collate_data
(
self
,
data
:
dict
)
->
dict
:
"""Copying data to the target device and Performs normalization、
"""Copying data to the target device and Performs normalization、
padding and bgr2rgb conversion and stack based on
padding and bgr2rgb conversion and stack based on
...
@@ -203,30 +223,30 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -203,30 +223,30 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
'img'
in
data
[
'inputs'
]:
if
'img'
in
data
[
'inputs'
]:
_batch_imgs
=
data
[
'inputs'
][
'img'
]
_batch_imgs
=
data
[
'inputs'
][
'img'
]
# Process data with `pseudo_collate`.
# Process data with `pseudo_collate`.
if
is_list_of
(
_batch_imgs
,
torch
.
Tensor
):
if
is_list_of
(
_batch_imgs
,
torch
.
Tensor
):
batch_imgs
=
[]
batch_imgs
=
[]
img_dim
=
_batch_imgs
[
0
].
dim
()
for
_batch_img
in
_batch_imgs
:
for
_batch_img
in
_batch_imgs
:
# channel transform
if
img_dim
==
3
:
# standard img
if
self
.
_channel_conversion
:
_batch_img
=
self
.
preprocess_img
(
_batch_img
)
_batch_img
=
_batch_img
[[
2
,
1
,
0
],
...]
elif
img_dim
==
4
:
# Convert to float after channel conversion to ensure
_batch_img
=
[
# efficiency
self
.
preprocess_img
(
_img
)
for
_img
in
_batch_img
_batch_img
=
_batch_img
.
float
()
]
# Normalization.
if
self
.
_enable_normalize
:
_batch_img
=
torch
.
stack
(
_batch_img
,
dim
=
0
)
if
self
.
mean
.
shape
[
0
]
==
3
:
assert
_batch_img
.
dim
(
)
==
3
and
_batch_img
.
shape
[
0
]
==
3
,
(
'If the mean has 3 values, the input tensor '
'should in shape of (3, H, W), but got the '
f
'tensor with shape
{
_batch_img
.
shape
}
'
)
_batch_img
=
(
_batch_img
-
self
.
mean
)
/
self
.
std
batch_imgs
.
append
(
_batch_img
)
batch_imgs
.
append
(
_batch_img
)
# Pad and stack Tensor.
# Pad and stack Tensor.
batch_imgs
=
stack_batch
(
batch_imgs
,
self
.
pad_size_divisor
,
if
img_dim
==
3
:
self
.
pad_value
)
batch_imgs
=
stack_batch
(
batch_imgs
,
self
.
pad_size_divisor
,
self
.
pad_value
)
elif
img_dim
==
4
:
batch_imgs
=
multiview_img_stack_batch
(
batch_imgs
,
self
.
pad_size_divisor
,
self
.
pad_value
)
# Process data with `default_collate`.
# Process data with `default_collate`.
elif
isinstance
(
_batch_imgs
,
torch
.
Tensor
):
elif
isinstance
(
_batch_imgs
,
torch
.
Tensor
):
assert
_batch_imgs
.
dim
()
==
4
,
(
assert
_batch_imgs
.
dim
()
==
4
,
(
...
@@ -270,6 +290,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -270,6 +290,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
is_list_of
(
_batch_inputs
,
torch
.
Tensor
):
if
is_list_of
(
_batch_inputs
,
torch
.
Tensor
):
batch_pad_shape
=
[]
batch_pad_shape
=
[]
for
ori_input
in
_batch_inputs
:
for
ori_input
in
_batch_inputs
:
if
ori_input
.
dim
()
==
4
:
# mean multiivew input, select ont of the
# image to calculate the pad shape
ori_input
=
ori_input
[
0
]
pad_h
=
int
(
pad_h
=
int
(
np
.
ceil
(
ori_input
.
shape
[
1
]
/
np
.
ceil
(
ori_input
.
shape
[
1
]
/
self
.
pad_size_divisor
))
*
self
.
pad_size_divisor
self
.
pad_size_divisor
))
*
self
.
pad_size_divisor
...
@@ -293,7 +317,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -293,7 +317,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
else
:
else
:
raise
TypeError
(
'Output of `cast_data` should be a list of dict '
raise
TypeError
(
'Output of `cast_data` should be a list of dict '
'or a tuple with inputs and data_samples, but got'
'or a tuple with inputs and data_samples, but got'
f
'
{
type
(
data
)
}
:
{
data
}
'
)
f
'
{
type
(
data
)
}
:
{
data
}
'
)
return
batch_pad_shape
return
batch_pad_shape
@
torch
.
no_grad
()
@
torch
.
no_grad
()
...
...
mmdet3d/models/data_preprocessors/utils.py
0 → 100644
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
import
torch
import
torch.nn.functional
as
F
def
multiview_img_stack_batch
(
tensor_list
:
List
[
torch
.
Tensor
],
pad_size_divisor
:
int
=
1
,
pad_value
:
Union
[
int
,
float
]
=
0
)
->
torch
.
Tensor
:
"""
Compared to the stack_batch in mmengine.model.utils,
multiview_img_stack_batch further handle the multiview images.
see diff of padded_sizes[:, :-2] = 0 vs padded_sizees[:, 0] = 0 in line 47
Stack multiple tensors to form a batch and pad the tensor to the max
shape use the right bottom padding mode in these images. If
``pad_size_divisor > 0``, add padding to ensure the shape of each dim is
divisible by ``pad_size_divisor``.
Args:
tensor_list (List[Tensor]): A list of tensors with the same dim.
pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding
to ensure the shape of each dim is divisible by
``pad_size_divisor``. This depends on the model, and many
models need to be divisible by 32. Defaults to 1
pad_value (int, float): The padding value. Defaults to 0.
Returns:
Tensor: The n dim tensor.
"""
assert
isinstance
(
tensor_list
,
list
),
(
f
'Expected input type to be list, but got
{
type
(
tensor_list
)
}
'
)
assert
tensor_list
,
'`tensor_list` could not be an empty list'
assert
len
({
tensor
.
ndim
for
tensor
in
tensor_list
})
==
1
,
(
f
'Expected the dimensions of all tensors must be the same, '
f
'but got
{
[
tensor
.
ndim
for
tensor
in
tensor_list
]
}
'
)
dim
=
tensor_list
[
0
].
dim
()
num_img
=
len
(
tensor_list
)
all_sizes
:
torch
.
Tensor
=
torch
.
Tensor
(
[
tensor
.
shape
for
tensor
in
tensor_list
])
max_sizes
=
torch
.
ceil
(
torch
.
max
(
all_sizes
,
dim
=
0
)[
0
]
/
pad_size_divisor
)
*
pad_size_divisor
padded_sizes
=
max_sizes
-
all_sizes
# The first dim normally means channel, which should not be padded.
padded_sizes
[:,
:
-
2
]
=
0
if
padded_sizes
.
sum
()
==
0
:
return
torch
.
stack
(
tensor_list
)
# `pad` is the second arguments of `F.pad`. If pad is (1, 2, 3, 4),
# it means that padding the last dim with 1(left) 2(right), padding the
# penultimate dim to 3(top) 4(bottom). The order of `pad` is opposite of
# the `padded_sizes`. Therefore, the `padded_sizes` needs to be reversed,
# and only odd index of pad should be assigned to keep padding "right" and
# "bottom".
pad
=
torch
.
zeros
(
num_img
,
2
*
dim
,
dtype
=
torch
.
int
)
pad
[:,
1
::
2
]
=
padded_sizes
[:,
range
(
dim
-
1
,
-
1
,
-
1
)]
batch_tensor
=
[]
for
idx
,
tensor
in
enumerate
(
tensor_list
):
batch_tensor
.
append
(
F
.
pad
(
tensor
,
tuple
(
pad
[
idx
].
tolist
()),
value
=
pad_value
))
return
torch
.
stack
(
batch_tensor
)
mmdet3d/models/dense_heads/base_3d_dense_head.py
View file @
6c03a971
...
@@ -204,7 +204,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
...
@@ -204,7 +204,7 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
score_factors (list[Tensor], optional): Score factor for
score_factors (list[Tensor], optional): Score factor for
all scale level, each is a 4D-tensor, has shape
all scale level, each is a 4D-tensor, has shape
(batch_size, num_priors * 1, H, W). Defaults to None.
(batch_size, num_priors * 1, H, W). Defaults to None.
batch_input_metas (list[dict], Optional): Batch i
mage
meta info.
batch_input_metas (list[dict], Optional): Batch i
nputs
meta info.
Defaults to None.
Defaults to None.
cfg (ConfigDict, optional): Test / postprocessing
cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
configuration, if None, test_cfg would be used.
...
...
mmdet3d/models/dense_heads/parta2_rpn_head.py
View file @
6c03a971
...
@@ -183,8 +183,7 @@ class PartA2RPNHead(Anchor3DHead):
...
@@ -183,8 +183,7 @@ class PartA2RPNHead(Anchor3DHead):
result
=
self
.
class_agnostic_nms
(
mlvl_bboxes
,
mlvl_bboxes_for_nms
,
result
=
self
.
class_agnostic_nms
(
mlvl_bboxes
,
mlvl_bboxes_for_nms
,
mlvl_max_scores
,
mlvl_label_pred
,
mlvl_max_scores
,
mlvl_label_pred
,
mlvl_cls_score
,
mlvl_dir_scores
,
mlvl_cls_score
,
mlvl_dir_scores
,
score_thr
,
cfg
.
nms_post
,
cfg
,
score_thr
,
cfg
,
input_meta
)
input_meta
)
return
result
return
result
def
loss_and_predict
(
self
,
def
loss_and_predict
(
self
,
...
@@ -275,7 +274,7 @@ class PartA2RPNHead(Anchor3DHead):
...
@@ -275,7 +274,7 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_bboxes_for_nms
:
Tensor
,
mlvl_bboxes_for_nms
:
Tensor
,
mlvl_max_scores
:
Tensor
,
mlvl_label_pred
:
Tensor
,
mlvl_max_scores
:
Tensor
,
mlvl_label_pred
:
Tensor
,
mlvl_cls_score
:
Tensor
,
mlvl_dir_scores
:
Tensor
,
mlvl_cls_score
:
Tensor
,
mlvl_dir_scores
:
Tensor
,
score_thr
:
int
,
max_num
:
int
,
cfg
:
ConfigDict
,
score_thr
:
int
,
cfg
:
ConfigDict
,
input_meta
:
dict
)
->
Dict
:
input_meta
:
dict
)
->
Dict
:
"""Class agnostic nms for single batch.
"""Class agnostic nms for single batch.
...
@@ -291,7 +290,6 @@ class PartA2RPNHead(Anchor3DHead):
...
@@ -291,7 +290,6 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_dir_scores (torch.Tensor): Direction scores of
mlvl_dir_scores (torch.Tensor): Direction scores of
Multi-level bbox.
Multi-level bbox.
score_thr (int): Score threshold.
score_thr (int): Score threshold.
max_num (int): Max number of bboxes after nms.
cfg (:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`): Training or testing config.
input_meta (dict): Contain pcd and img's meta info.
input_meta (dict): Contain pcd and img's meta info.
...
@@ -339,9 +337,9 @@ class PartA2RPNHead(Anchor3DHead):
...
@@ -339,9 +337,9 @@ class PartA2RPNHead(Anchor3DHead):
scores
=
torch
.
cat
(
scores
,
dim
=
0
)
scores
=
torch
.
cat
(
scores
,
dim
=
0
)
cls_scores
=
torch
.
cat
(
cls_scores
,
dim
=
0
)
cls_scores
=
torch
.
cat
(
cls_scores
,
dim
=
0
)
labels
=
torch
.
cat
(
labels
,
dim
=
0
)
labels
=
torch
.
cat
(
labels
,
dim
=
0
)
if
bboxes
.
shape
[
0
]
>
max_num
:
if
bboxes
.
shape
[
0
]
>
cfg
.
nms_post
:
_
,
inds
=
scores
.
sort
(
descending
=
True
)
_
,
inds
=
scores
.
sort
(
descending
=
True
)
inds
=
inds
[:
max_num
]
inds
=
inds
[:
cfg
.
nms_post
]
bboxes
=
bboxes
[
inds
,
:]
bboxes
=
bboxes
[
inds
,
:]
labels
=
labels
[
inds
]
labels
=
labels
[
inds
]
scores
=
scores
[
inds
]
scores
=
scores
[
inds
]
...
...
mmdet3d/models/dense_heads/point_rpn_head.py
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
torch
from
mmengine.model
import
BaseModule
from
mmengine.model
import
BaseModule
from
mmengine.structures
import
InstanceData
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
torch
import
nn
as
nn
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.models.layers
import
nms_bev
,
nms_normal_bev
from
mmdet3d.models.layers
import
nms_bev
,
nms_normal_bev
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.structures
import
xywhr2xyxyr
from
mmdet3d.structures
import
xywhr2xyxyr
from
mmdet3d.structures.bbox_3d
import
(
DepthInstance3DBoxes
,
from
mmdet3d.structures.bbox_3d
import
(
BaseInstance3DBoxes
,
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
LiDARInstance3DBoxes
)
from
mmdet3d.structures.det3d_data_sample
import
SampleList
from
mmdet3d.utils.typing
import
InstanceList
from
mmdet.models.utils
import
multi_apply
from
mmdet.models.utils
import
multi_apply
...
@@ -34,15 +40,15 @@ class PointRPNHead(BaseModule):
...
@@ -34,15 +40,15 @@ class PointRPNHead(BaseModule):
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_classes
,
num_classes
:
int
,
train_cfg
,
train_cfg
:
dict
,
test_cfg
,
test_cfg
:
dict
,
pred_layer_cfg
=
None
,
pred_layer_cfg
:
Optional
[
dict
]
=
None
,
enlarge_width
=
0.1
,
enlarge_width
:
float
=
0.1
,
cls_loss
=
None
,
cls_loss
:
Optional
[
dict
]
=
None
,
bbox_loss
=
None
,
bbox_loss
:
Optional
[
dict
]
=
None
,
bbox_coder
=
None
,
bbox_coder
:
Optional
[
dict
]
=
None
,
init_cfg
=
None
)
:
init_cfg
:
Optional
[
dict
]
=
None
)
->
None
:
super
().
__init__
(
init_cfg
=
init_cfg
)
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
num_classes
=
num_classes
self
.
train_cfg
=
train_cfg
self
.
train_cfg
=
train_cfg
...
@@ -50,8 +56,8 @@ class PointRPNHead(BaseModule):
...
@@ -50,8 +56,8 @@ class PointRPNHead(BaseModule):
self
.
enlarge_width
=
enlarge_width
self
.
enlarge_width
=
enlarge_width
# build loss function
# build loss function
self
.
bbox_loss
=
build
_loss
(
bbox_loss
)
self
.
bbox_loss
=
MODELS
.
build
(
bbox_loss
)
self
.
cls_loss
=
build
_loss
(
cls_loss
)
self
.
cls_loss
=
MODELS
.
build
(
cls_loss
)
# build box coder
# build box coder
self
.
bbox_coder
=
TASK_UTILS
.
build
(
bbox_coder
)
self
.
bbox_coder
=
TASK_UTILS
.
build
(
bbox_coder
)
...
@@ -67,7 +73,8 @@ class PointRPNHead(BaseModule):
...
@@ -67,7 +73,8 @@ class PointRPNHead(BaseModule):
input_channels
=
pred_layer_cfg
.
in_channels
,
input_channels
=
pred_layer_cfg
.
in_channels
,
output_channels
=
self
.
_get_reg_out_channels
())
output_channels
=
self
.
_get_reg_out_channels
())
def
_make_fc_layers
(
self
,
fc_cfg
,
input_channels
,
output_channels
):
def
_make_fc_layers
(
self
,
fc_cfg
:
dict
,
input_channels
:
int
,
output_channels
:
int
)
->
nn
.
Sequential
:
"""Make fully connect layers.
"""Make fully connect layers.
Args:
Args:
...
@@ -102,7 +109,7 @@ class PointRPNHead(BaseModule):
...
@@ -102,7 +109,7 @@ class PointRPNHead(BaseModule):
# torch.cos(yaw) (1), torch.sin(yaw) (1)
# torch.cos(yaw) (1), torch.sin(yaw) (1)
return
self
.
bbox_coder
.
code_size
return
self
.
bbox_coder
.
code_size
def
forward
(
self
,
feat_dict
)
:
def
forward
(
self
,
feat_dict
:
dict
)
->
Tuple
[
List
[
Tensor
]]
:
"""Forward pass.
"""Forward pass.
Args:
Args:
...
@@ -124,30 +131,35 @@ class PointRPNHead(BaseModule):
...
@@ -124,30 +131,35 @@ class PointRPNHead(BaseModule):
batch_size
,
-
1
,
self
.
_get_reg_out_channels
())
batch_size
,
-
1
,
self
.
_get_reg_out_channels
())
return
point_box_preds
,
point_cls_preds
return
point_box_preds
,
point_cls_preds
def
loss
(
self
,
def
loss_by_feat
(
bbox_preds
,
self
,
cls_preds
,
bbox_preds
:
List
[
Tensor
],
points
,
cls_preds
:
List
[
Tensor
],
gt_bboxes_3d
,
points
:
List
[
Tensor
],
gt_labels_3d
,
batch_gt_instances_3d
:
InstanceList
,
img_metas
=
None
):
batch_input_metas
:
Optional
[
List
[
dict
]]
=
None
,
batch_gt_instances_ignore
:
Optional
[
InstanceList
]
=
None
)
->
Dict
:
"""Compute loss.
"""Compute loss.
Args:
Args:
bbox_preds (dict): Predictions from forward of PointRCNN RPN_Head.
bbox_preds (list[torch.Tensor]): Predictions from forward of
cls_preds (dict): Classification from forward of PointRCNN
PointRCNN RPN_Head.
RPN_Head.
cls_preds (list[torch.Tensor]): Classification from forward of
PointRCNN RPN_Head.
points (list[torch.Tensor]): Input points.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
bboxes of each sample.
gt_instances_3d. It usually includes ``bboxes_3d`` and
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
``labels_3d`` attributes.
img_metas (list[dict], Optional): Contain pcd and img's meta info.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Defaults to None.
Returns:
Returns:
dict: Losses of PointRCNN RPN module.
dict: Losses of PointRCNN RPN module.
"""
"""
targets
=
self
.
get_targets
(
points
,
gt_bboxes_3d
,
gt_label
s_3d
)
targets
=
self
.
get_targets
(
points
,
batch_gt_instance
s_3d
)
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
box_loss_weights
,
point_targets
)
=
targets
box_loss_weights
,
point_targets
)
=
targets
...
@@ -169,25 +181,25 @@ class PointRPNHead(BaseModule):
...
@@ -169,25 +181,25 @@ class PointRPNHead(BaseModule):
return
losses
return
losses
def
get_targets
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets
(
self
,
points
:
List
[
Tensor
],
batch_gt_instances_3d
:
InstanceList
)
->
Tuple
[
Tensor
]:
"""Generate targets of PointRCNN RPN head.
"""Generate targets of PointRCNN RPN head.
Args:
Args:
points (list[torch.Tensor]): Points
of each
batch.
points (list[torch.Tensor]): Points
in one
batch.
gt_bbox
es_3d (list[:obj:`
Base
Instance
3DBoxes`]): Ground truth
batch_gt_instanc
es_3d (list[:obj:`Instance
Data`]): Batch of
bboxes of each batch.
gt_instances_3d. It usually includes ``bboxes_3d`` and
gt_
labels_3d
(list[torch.Tensor]): Labels of each batch
.
``
labels_3d
`` attributes
.
Returns:
Returns:
tuple[torch.Tensor]: Targets of PointRCNN RPN head.
tuple[torch.Tensor]: Targets of PointRCNN RPN head.
"""
"""
# find empty example
gt_labels_3d
=
[
for
index
in
range
(
len
(
gt_labels_3d
)):
instances
.
labels_3d
for
instances
in
batch_gt_instances_3d
if
len
(
gt_labels_3d
[
index
])
==
0
:
]
fake_box
=
gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
gt_bboxes_3d
=
[
1
,
gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
instances
.
bboxes_3d
for
instances
in
batch_gt_instances_3d
gt_bboxes_3d
[
index
]
=
gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
]
gt_labels_3d
[
index
]
=
gt_labels_3d
[
index
].
new_zeros
(
1
)
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
point_targets
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
point_targets
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
...
@@ -202,7 +214,9 @@ class PointRPNHead(BaseModule):
...
@@ -202,7 +214,9 @@ class PointRPNHead(BaseModule):
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
box_loss_weights
,
point_targets
)
box_loss_weights
,
point_targets
)
def
get_targets_single
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets_single
(
self
,
points
:
Tensor
,
gt_bboxes_3d
:
BaseInstance3DBoxes
,
gt_labels_3d
:
Tensor
)
->
Tuple
[
Tensor
]:
"""Generate targets of PointRCNN RPN head for single batch.
"""Generate targets of PointRCNN RPN head for single batch.
Args:
Args:
...
@@ -243,24 +257,34 @@ class PointRPNHead(BaseModule):
...
@@ -243,24 +257,34 @@ class PointRPNHead(BaseModule):
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
point_targets
)
point_targets
)
def
get_bboxes
(
self
,
def
predict_by_feat
(
self
,
points
:
Tensor
,
bbox_preds
:
List
[
Tensor
],
points
,
cls_preds
:
List
[
Tensor
],
batch_input_metas
:
List
[
dict
],
bbox_preds
,
cfg
:
Optional
[
dict
])
->
InstanceList
:
cls_preds
,
input_metas
,
rescale
=
False
):
"""Generate bboxes from RPN head predictions.
"""Generate bboxes from RPN head predictions.
Args:
Args:
points (torch.Tensor): Input points.
points (torch.Tensor): Input points.
bbox_preds (dict): Regression predictions from PointRCNN head.
bbox_preds (list[tensor]): Regression predictions from PointRCNN
cls_preds (dict): Class scores predictions from PointRCNN head.
head.
input_metas (list[dict]): Point cloud and image's meta info.
cls_preds (list[tensor]): Class scores predictions from PointRCNN
rescale (bool, optional): Whether to rescale bboxes.
head.
Defaults to False.
batch_input_metas (list[dict]): Batch inputs meta info.
cfg (ConfigDict, optional): Test / postprocessing
configuration.
Returns:
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
- cls_preds (torch.Tensor): Class score of each bbox.
"""
"""
sem_scores
=
cls_preds
.
sigmoid
()
sem_scores
=
cls_preds
.
sigmoid
()
obj_scores
=
sem_scores
.
max
(
-
1
)[
0
]
obj_scores
=
sem_scores
.
max
(
-
1
)[
0
]
...
@@ -271,30 +295,40 @@ class PointRPNHead(BaseModule):
...
@@ -271,30 +295,40 @@ class PointRPNHead(BaseModule):
for
b
in
range
(
batch_size
):
for
b
in
range
(
batch_size
):
bbox3d
=
self
.
bbox_coder
.
decode
(
bbox_preds
[
b
],
points
[
b
,
...,
:
3
],
bbox3d
=
self
.
bbox_coder
.
decode
(
bbox_preds
[
b
],
points
[
b
,
...,
:
3
],
object_class
[
b
])
object_class
[
b
])
mask
=
~
bbox3d
.
sum
(
dim
=
1
).
isinf
()
bbox_selected
,
score_selected
,
labels
,
cls_preds_selected
=
\
bbox_selected
,
score_selected
,
labels
,
cls_preds_selected
=
\
self
.
class_agnostic_nms
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
,
self
.
class_agnostic_nms
(
obj_scores
[
b
][
mask
],
points
[
b
,
...,
:
3
],
input_metas
[
b
])
sem_scores
[
b
][
mask
,
:],
bbox
=
input_metas
[
b
][
'box_type_3d'
](
bbox3d
[
mask
,
:],
bbox_selected
.
clone
(),
points
[
b
,
...,
:
3
][
mask
,
:],
box_dim
=
bbox_selected
.
shape
[
-
1
],
batch_input_metas
[
b
],
with_yaw
=
True
)
cfg
.
nms_cfg
)
results
.
append
((
bbox
,
score_selected
,
labels
,
cls_preds_selected
))
bbox_selected
=
batch_input_metas
[
b
][
'box_type_3d'
](
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
])
result
=
InstanceData
()
result
.
bboxes_3d
=
bbox_selected
result
.
scores_3d
=
score_selected
result
.
labels_3d
=
labels
result
.
cls_preds
=
cls_preds_selected
results
.
append
(
result
)
return
results
return
results
def
class_agnostic_nms
(
self
,
obj_scores
,
sem_scores
,
bbox
,
points
,
def
class_agnostic_nms
(
self
,
obj_scores
:
Tensor
,
sem_scores
:
Tensor
,
input_meta
):
bbox
:
Tensor
,
points
:
Tensor
,
input_meta
:
Dict
,
nms_cfg
:
Dict
)
->
Tuple
[
Tensor
]:
"""Class agnostic nms.
"""Class agnostic nms.
Args:
Args:
obj_scores (torch.Tensor): Objectness score of bounding boxes.
obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): Semantic class score of bounding boxes.
sem_scores (torch.Tensor): Semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
points (torch.Tensor): Input points.
input_meta (dict): Contain pcd and img's meta info.
nms_cfg (dict): NMS config dict.
Returns:
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
"""
nms_cfg
=
self
.
test_cfg
.
nms_cfg
if
not
self
.
training
\
else
self
.
train_cfg
.
nms_cfg
if
nms_cfg
.
use_rotate_nms
:
if
nms_cfg
.
use_rotate_nms
:
nms_func
=
nms_bev
nms_func
=
nms_bev
else
:
else
:
...
@@ -323,14 +357,14 @@ class PointRPNHead(BaseModule):
...
@@ -323,14 +357,14 @@ class PointRPNHead(BaseModule):
bbox
=
bbox
[
nonempty_box_mask
]
bbox
=
bbox
[
nonempty_box_mask
]
if
self
.
test
_cfg
.
score_thr
is
not
None
:
if
nms
_cfg
.
score_thr
is
not
None
:
score_thr
=
self
.
test
_cfg
.
score_thr
score_thr
=
nms
_cfg
.
score_thr
keep
=
(
obj_scores
>=
score_thr
)
keep
=
(
obj_scores
>=
score_thr
)
obj_scores
=
obj_scores
[
keep
]
obj_scores
=
obj_scores
[
keep
]
sem_scores
=
sem_scores
[
keep
]
sem_scores
=
sem_scores
[
keep
]
bbox
=
bbox
.
tensor
[
keep
]
bbox
=
bbox
.
tensor
[
keep
]
if
obj_scores
.
shape
[
0
]
>
0
:
if
bbox
.
tensor
.
shape
[
0
]
>
0
:
topk
=
min
(
nms_cfg
.
nms_pre
,
obj_scores
.
shape
[
0
])
topk
=
min
(
nms_cfg
.
nms_pre
,
obj_scores
.
shape
[
0
])
obj_scores_nms
,
indices
=
torch
.
topk
(
obj_scores
,
k
=
topk
)
obj_scores_nms
,
indices
=
torch
.
topk
(
obj_scores
,
k
=
topk
)
bbox_for_nms
=
xywhr2xyxyr
(
bbox
[
indices
].
bev
)
bbox_for_nms
=
xywhr2xyxyr
(
bbox
[
indices
].
bev
)
...
@@ -343,15 +377,22 @@ class PointRPNHead(BaseModule):
...
@@ -343,15 +377,22 @@ class PointRPNHead(BaseModule):
score_selected
=
obj_scores_nms
[
keep
]
score_selected
=
obj_scores_nms
[
keep
]
cls_preds
=
sem_scores_nms
[
keep
]
cls_preds
=
sem_scores_nms
[
keep
]
labels
=
torch
.
argmax
(
cls_preds
,
-
1
)
labels
=
torch
.
argmax
(
cls_preds
,
-
1
)
if
bbox_selected
.
shape
[
0
]
>
nms_cfg
.
nms_post
:
_
,
inds
=
score_selected
.
sort
(
descending
=
True
)
inds
=
inds
[:
score_selected
.
nms_post
]
bbox_selected
=
bbox_selected
[
inds
,
:]
labels
=
labels
[
inds
]
score_selected
=
score_selected
[
inds
]
cls_preds
=
cls_preds
[
inds
,
:]
else
:
else
:
bbox_selected
=
bbox
.
tensor
bbox_selected
=
bbox
.
tensor
score_selected
=
obj_scores
.
new_zeros
([
0
])
score_selected
=
obj_scores
.
new_zeros
([
0
])
labels
=
obj_scores
.
new_zeros
([
0
])
labels
=
obj_scores
.
new_zeros
([
0
])
cls_preds
=
obj_scores
.
new_zeros
([
0
,
sem_scores
.
shape
[
-
1
]])
cls_preds
=
obj_scores
.
new_zeros
([
0
,
sem_scores
.
shape
[
-
1
]])
return
bbox_selected
,
score_selected
,
labels
,
cls_preds
return
bbox_selected
,
score_selected
,
labels
,
cls_preds
def
_assign_targets_by_points_inside
(
self
,
bboxes_3d
,
points
):
def
_assign_targets_by_points_inside
(
self
,
bboxes_3d
:
BaseInstance3DBoxes
,
points
:
Tensor
)
->
Tuple
[
Tensor
]:
"""Compute assignment by checking whether point is inside bbox.
"""Compute assignment by checking whether point is inside bbox.
Args:
Args:
...
@@ -379,3 +420,92 @@ class PointRPNHead(BaseModule):
...
@@ -379,3 +420,92 @@ class PointRPNHead(BaseModule):
raise
NotImplementedError
(
'Unsupported bbox type!'
)
raise
NotImplementedError
(
'Unsupported bbox type!'
)
return
points_mask
,
assignment
return
points_mask
,
assignment
def
predict
(
self
,
feats_dict
:
Dict
,
batch_data_samples
:
SampleList
)
->
InstanceList
:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas
=
[
data_samples
.
metainfo
for
data_samples
in
batch_data_samples
]
raw_points
=
feats_dict
.
pop
(
'raw_points'
)
bbox_preds
,
cls_preds
=
self
(
feats_dict
)
proposal_cfg
=
self
.
test_cfg
proposal_list
=
self
.
predict_by_feat
(
raw_points
,
bbox_preds
,
cls_preds
,
cfg
=
proposal_cfg
,
batch_input_metas
=
batch_input_metas
)
feats_dict
[
'points_cls_preds'
]
=
cls_preds
return
proposal_list
def
loss_and_predict
(
self
,
feats_dict
:
Dict
,
batch_data_samples
:
SampleList
,
proposal_cfg
:
Optional
[
dict
]
=
None
,
**
kwargs
)
->
Tuple
[
dict
,
InstanceList
]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
feats_dict (dict): Contains features from the first stage.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
proposal_cfg (ConfigDict, optional): Proposal config.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each sample after the post process.
"""
batch_gt_instances_3d
=
[]
batch_gt_instances_ignore
=
[]
batch_input_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_input_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances_3d
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
raw_points
=
feats_dict
.
pop
(
'raw_points'
)
bbox_preds
,
cls_preds
=
self
(
feats_dict
)
loss_inputs
=
(
bbox_preds
,
cls_preds
,
raw_points
)
+
(
batch_gt_instances_3d
,
batch_input_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
)
predictions
=
self
.
predict_by_feat
(
raw_points
,
bbox_preds
,
cls_preds
,
batch_input_metas
=
batch_input_metas
,
cfg
=
proposal_cfg
)
feats_dict
[
'points_cls_preds'
]
=
cls_preds
if
predictions
[
0
].
bboxes_3d
.
tensor
.
isinf
().
any
():
print
(
predictions
)
return
losses
,
predictions
mmdet3d/models/dense_heads/ssd_3d_head.py
View file @
6c03a971
...
@@ -14,7 +14,6 @@ from mmdet3d.structures.bbox_3d import (DepthInstance3DBoxes,
...
@@ -14,7 +14,6 @@ from mmdet3d.structures.bbox_3d import (DepthInstance3DBoxes,
LiDARInstance3DBoxes
,
LiDARInstance3DBoxes
,
rotation_3d_in_axis
)
rotation_3d_in_axis
)
from
mmdet.models.utils
import
multi_apply
from
mmdet.models.utils
import
multi_apply
from
..builder
import
build_loss
from
.vote_head
import
VoteHead
from
.vote_head
import
VoteHead
...
@@ -76,8 +75,8 @@ class SSD3DHead(VoteHead):
...
@@ -76,8 +75,8 @@ class SSD3DHead(VoteHead):
size_res_loss
=
size_res_loss
,
size_res_loss
=
size_res_loss
,
semantic_loss
=
None
,
semantic_loss
=
None
,
init_cfg
=
init_cfg
)
init_cfg
=
init_cfg
)
self
.
corner_loss
=
build
_loss
(
corner_loss
)
self
.
corner_loss
=
MODELS
.
build
(
corner_loss
)
self
.
vote_loss
=
build
_loss
(
vote_loss
)
self
.
vote_loss
=
MODELS
.
build
(
vote_loss
)
self
.
num_candidates
=
vote_module_cfg
[
'num_points'
]
self
.
num_candidates
=
vote_module_cfg
[
'num_points'
]
def
_get_cls_out_channels
(
self
)
->
int
:
def
_get_cls_out_channels
(
self
)
->
int
:
...
...
mmdet3d/models/detectors/__init__.py
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
.base
import
Base3DDetector
from
.base
import
Base3DDetector
from
.centerpoint
import
CenterPoint
from
.centerpoint
import
CenterPoint
from
.dfm
import
DfM
from
.dynamic_voxelnet
import
DynamicVoxelNet
from
.dynamic_voxelnet
import
DynamicVoxelNet
from
.fcos_mono3d
import
FCOSMono3D
from
.fcos_mono3d
import
FCOSMono3D
from
.groupfree3dnet
import
GroupFree3DNet
from
.groupfree3dnet
import
GroupFree3DNet
from
.h3dnet
import
H3DNet
from
.h3dnet
import
H3DNet
from
.imvotenet
import
ImVoteNet
from
.imvotenet
import
ImVoteNet
from
.imvoxelnet
import
ImVoxelNet
from
.imvoxelnet
import
ImVoxelNet
from
.multiview_dfm
import
MultiViewDfM
from
.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
,
MVXFasterRCNN
from
.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
,
MVXFasterRCNN
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.parta2
import
PartA2
from
.parta2
import
PartA2
...
@@ -19,9 +21,25 @@ from .votenet import VoteNet
...
@@ -19,9 +21,25 @@ from .votenet import VoteNet
from
.voxelnet
import
VoxelNet
from
.voxelnet
import
VoxelNet
__all__
=
[
__all__
=
[
'Base3DDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'Base3DDetector'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'DfM'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'VoxelNet'
,
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
,
'PointRCNN'
,
'SMOKEMono3D'
,
'DynamicVoxelNet'
,
'SASSD'
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'MultiViewDfM'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
,
'PointRCNN'
,
'SMOKEMono3D'
,
'SASSD'
,
]
]
mmdet3d/models/detectors/base.py
View file @
6c03a971
...
@@ -89,7 +89,7 @@ class Base3DDetector(BaseDetector):
...
@@ -89,7 +89,7 @@ class Base3DDetector(BaseDetector):
raise
RuntimeError
(
f
'Invalid mode "
{
mode
}
". '
raise
RuntimeError
(
f
'Invalid mode "
{
mode
}
". '
'Only supports loss, predict and tensor mode'
)
'Only supports loss, predict and tensor mode'
)
def
convert
_to_datasample
(
def
add_pred
_to_datasample
(
self
,
self
,
data_samples
:
SampleList
,
data_samples
:
SampleList
,
data_instances_3d
:
OptInstanceList
=
None
,
data_instances_3d
:
OptInstanceList
=
None
,
...
...
mmdet3d/models/detectors/dfm.py
0 → 100644
View file @
6c03a971
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmdet3d.registry
import
MODELS
from
mmdet3d.structures.ops
import
bbox3d2result
from
mmdet3d.utils
import
ConfigType
from
mmdet.models.detectors
import
BaseDetector
from
..builder
import
build_backbone
,
build_head
,
build_neck
@
MODELS
.
register_module
()
class
DfM
(
BaseDetector
):
r
"""`Monocular 3D Object Detection with Depth from Motion.
<https://arxiv.org/abs/2207.12988>`_.
Args:
backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config.
backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone
config.
backbone_3d (:obj:`ConfigDict` or dict): The 3d backbone config.
neck_3d (:obj:`ConfigDict` or dict): The 3D neck config.
bbox_head_3d (:obj:`ConfigDict` or dict): The 3d bbox head config.
neck_2d (:obj:`ConfigDict` or dict, optional): The 2D neck config
for 2D object detection. Defaults to None.
bbox_head_2d (:obj:`ConfigDict` or dict, optional): The 2D bbox
head config for 2D object detection. Defaults to None.
depth_head_2d (:obj:`ConfigDict` or dict, optional): The 2D depth
head config for depth estimation in fov space. Defaults to None.
depth_head (:obj:`ConfigDict` or dict, optional): The depth head
config for depth estimation in 3D voxel projected to fov space .
train_cfg (:obj:`ConfigDict` or dict, optional): Config dict of
training hyper-parameters. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): Config dict of test
hyper-parameters. Defaults to None.
pretrained (:obj: `ConfigDict` or dict optional): The pretrained
config.
init_cfg (:obj:`ConfigDict` or dict, optional): The initialization
config. Defaults to None.
"""
def
__init__
(
self
,
backbone
:
ConfigType
,
neck
:
ConfigType
,
backbone_stereo
:
ConfigType
,
backbone_3d
:
ConfigType
,
neck_3d
:
ConfigType
,
bbox_head_3d
:
ConfigType
,
neck_2d
=
None
,
bbox_head_2d
=
None
,
depth_head_2d
=
None
,
depth_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
,
init_cfg
=
None
):
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
backbone
=
build_backbone
(
backbone
)
self
.
neck
=
build_neck
(
neck
)
if
backbone_stereo
is
not
None
:
backbone_stereo
.
update
(
cat_img_feature
=
self
.
neck
.
cat_img_feature
)
backbone_stereo
.
update
(
in_sem_channels
=
self
.
neck
.
sem_channels
[
-
1
])
self
.
backbone_stereo
=
build_backbone
(
backbone_stereo
)
assert
self
.
neck
.
cat_img_feature
==
\
self
.
backbone_stereo
.
cat_img_feature
assert
self
.
neck
.
sem_channels
[
-
1
]
==
self
.
backbone_stereo
.
in_sem_channels
if
backbone_3d
is
not
None
:
self
.
backbone_3d
=
build_backbone
(
backbone_3d
)
if
neck_3d
is
not
None
:
self
.
neck_3d
=
build_neck
(
neck_3d
)
if
neck_2d
is
not
None
:
self
.
neck_2d
=
build_neck
(
neck_2d
)
if
bbox_head_2d
is
not
None
:
self
.
bbox_head_2d
=
build_head
(
bbox_head_2d
)
if
depth_head_2d
is
not
None
:
self
.
depth_head_2d
=
build_head
(
depth_head_2d
)
if
depth_head
is
not
None
:
self
.
depth_head
=
build_head
(
depth_head
)
self
.
depth_samples
=
self
.
depth_head
.
depth_samples
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
bbox_head_3d
.
update
(
train_cfg
=
train_cfg
)
bbox_head_3d
.
update
(
test_cfg
=
test_cfg
)
self
.
bbox_head_3d
=
build_head
(
bbox_head_3d
)
@
property
def
with_backbone_3d
(
self
):
"""Whether the detector has a 3D backbone."""
return
hasattr
(
self
,
'backbone_3d'
)
and
self
.
backbone_3d
is
not
None
@
property
def
with_neck_3d
(
self
):
"""Whether the detector has a 3D neck."""
return
hasattr
(
self
,
'neck_3d'
)
and
self
.
neck_3d
is
not
None
@
property
def
with_neck_2d
(
self
):
"""Whether the detector has a 2D neck."""
return
hasattr
(
self
,
'neck_2d'
)
and
self
.
neck_2d
is
not
None
@
property
def
with_bbox_head_2d
(
self
):
"""Whether the detector has a 2D detection head."""
return
hasattr
(
self
,
'bbox_head_2d'
)
and
self
.
bbox_head_2d
is
not
None
@
property
def
with_depth_head_2d
(
self
):
"""Whether the detector has a image-based depth head."""
return
hasattr
(
self
,
'depth_head_2d'
)
and
self
.
depth_head_2d
is
not
None
@
property
def
with_depth_head
(
self
):
"""Whether the detector has a frustum-based depth head."""
return
hasattr
(
self
,
'depth_head'
)
and
self
.
depth_head
is
not
None
def
extract_feat
(
self
,
img
,
img_metas
):
"""Feature extraction for perspective-view images.
Args:
img (torch.Tensor): Images of shape [B, N, C_in, H, W].
img_metas (list): Image meta information. Each element corresponds
to a group of images. len(img_metas) == B.
Returns:
torch.Tensor: bev feature with shape [B, C_out, N_y, N_x].
"""
# split input img into current and previous ones
batch_size
,
N
,
C_in
,
H
,
W
=
img
.
shape
cur_imgs
=
img
[:,
0
]
prev_imgs
=
img
[:,
1
]
# TODO: to support multiple prev imgs
# 2D backbone for feature extraction
cur_feats
=
self
.
backbone
(
cur_imgs
)
cur_feats
=
[
cur_imgs
]
+
list
(
cur_feats
)
prev_feats
=
self
.
backbone
(
prev_imgs
)
prev_feats
=
[
prev_imgs
]
+
list
(
prev_feats
)
# SPP module as the feature neck
cur_stereo_feat
,
cur_sem_feat
=
self
.
neck
(
cur_feats
)
prev_stereo_feat
,
prev_sem_feat
=
self
.
neck
(
prev_feats
)
# derive cur2prevs
cur_pose
=
torch
.
tensor
(
[
img_meta
[
'cam2global'
]
for
img_meta
in
img_metas
],
device
=
img
.
device
)[:,
None
,
:,
:]
# (B, 1, 4, 4)
prev_poses
=
[]
for
img_meta
in
img_metas
:
sweep_img_metas
=
img_meta
[
'sweep_img_metas'
]
prev_poses
.
append
([
sweep_img_meta
[
'cam2global'
]
for
sweep_img_meta
in
sweep_img_metas
])
prev_poses
=
torch
.
tensor
(
prev_poses
,
device
=
img
.
device
)
pad_prev_cam2global
=
torch
.
eye
(
4
)[
None
,
None
].
expand
(
batch_size
,
N
-
1
,
4
,
4
).
to
(
img
.
device
)
pad_prev_cam2global
[:,
:,
:
prev_poses
.
shape
[
-
2
],
:
prev_poses
.
shape
[
-
1
]]
=
prev_poses
pad_cur_cam2global
=
torch
.
eye
(
4
)[
None
,
None
].
expand
(
batch_size
,
1
,
4
,
4
).
to
(
img
.
device
)
pad_cur_cam2global
[:,
:,
:
cur_pose
.
shape
[
-
2
],
:
cur_pose
.
shape
[
-
1
]]
=
cur_pose
# (B, N-1, 4, 4) * (B, 1, 4, 4) -> (B, N-1, 4, 4)
# torch.linalg.solve is faster and more numerically stable
# than torch.matmul(torch.linalg.inv(A), B)
# empirical results show that torch.linalg.solve can derive
# almost the same result with np.linalg.inv
# while torch.linalg.inv can not
cur2prevs
=
torch
.
linalg
.
solve
(
pad_prev_cam2global
,
pad_cur_cam2global
)
for
meta_idx
,
img_meta
in
enumerate
(
img_metas
):
img_meta
[
'cur2prevs'
]
=
cur2prevs
[
meta_idx
]
# stereo backbone for depth estimation
# volume_feat: (batch_size, Cv, Nz, Ny, Nx)
volume_feat
=
self
.
backbone_stereo
(
cur_stereo_feat
,
prev_stereo_feat
,
img_metas
,
cur_sem_feat
)
# height compression
_
,
Cv
,
Nz
,
Ny
,
Nx
=
volume_feat
.
shape
bev_feat
=
volume_feat
.
view
(
batch_size
,
Cv
*
Nz
,
Ny
,
Nx
)
bev_feat_prehg
,
bev_feat
=
self
.
neck_3d
(
bev_feat
)
return
bev_feat
def
forward_train
(
self
,
img
,
img_metas
,
gt_bboxes_3d
,
gt_labels_3d
,
depth_img
=
None
,
**
kwargs
):
"""Forward function for training."""
bev_feat
=
self
.
extract_feat
(
img
,
img_metas
)
outs
=
self
.
bbox_head_3d
([
bev_feat
])
losses
=
self
.
bbox_head_3d
.
loss
(
*
outs
,
gt_bboxes_3d
,
gt_labels_3d
,
img_metas
)
# TODO: loss_dense_depth, loss_2d, loss_imitation
return
losses
def
forward_test
(
self
,
img
,
img_metas
,
**
kwargs
):
"""Forward of testing.
Args:
img (torch.Tensor): Input images of shape (N, C_in, H, W).
img_metas (list): Image metas.
Returns:
list[dict]: Predicted 3d boxes.
"""
# not supporting aug_test for now
return
self
.
simple_test
(
img
,
img_metas
)
def
simple_test
(
self
,
img
,
img_metas
):
"""Simple inference forward without test time augmentation."""
bev_feat
=
self
.
extract_feat
(
img
,
img_metas
)
# bbox_head takes a list of feature from different levels as input
# so need [bev_feat]
outs
=
self
.
bbox_head_3d
([
bev_feat
])
bbox_list
=
self
.
bbox_head_3d
.
get_bboxes
(
*
outs
,
img_metas
)
bbox_results
=
[
bbox3d2result
(
det_bboxes
,
det_scores
,
det_labels
)
for
det_bboxes
,
det_scores
,
det_labels
in
bbox_list
]
# add pseudo-lidar label to each pred_dict for post-processing
for
bbox_result
in
bbox_results
:
bbox_result
[
'pseudo_lidar'
]
=
True
return
bbox_results
def
aug_test
(
self
,
imgs
,
img_metas
,
**
kwargs
):
"""Test with augmentations.
Args:
imgs (list[torch.Tensor]): Input images of shape (N, C_in, H, W).
img_metas (list): Image metas.
Returns:
list[dict]: Predicted 3d boxes.
"""
raise
NotImplementedError
mmdet3d/models/detectors/dynamic_voxelnet.py
View file @
6c03a971
...
@@ -10,7 +10,8 @@ from .voxelnet import VoxelNet
...
@@ -10,7 +10,8 @@ from .voxelnet import VoxelNet
@
MODELS
.
register_module
()
@
MODELS
.
register_module
()
class
DynamicVoxelNet
(
VoxelNet
):
class
DynamicVoxelNet
(
VoxelNet
):
r
"""VoxelNet using `dynamic voxelization <https://arxiv.org/abs/1910.06528>`_.
r
"""VoxelNet using `dynamic voxelization
<https://arxiv.org/abs/1910.06528>`_.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
...
mmdet3d/models/detectors/fcos_mono3d.py
View file @
6c03a971
...
@@ -95,6 +95,7 @@ class FCOSMono3D(SingleStageMono3DDetector):
...
@@ -95,6 +95,7 @@ class FCOSMono3D(SingleStageMono3DDetector):
x
=
self
.
extract_feat
(
batch_inputs_dict
)
x
=
self
.
extract_feat
(
batch_inputs_dict
)
results_list
,
results_list_2d
=
self
.
bbox_head
.
predict
(
results_list
,
results_list_2d
=
self
.
bbox_head
.
predict
(
x
,
batch_data_samples
,
rescale
=
rescale
)
x
,
batch_data_samples
,
rescale
=
rescale
)
predictions
=
self
.
convert_to_datasample
(
batch_data_samples
,
predictions
=
self
.
add_pred_to_datasample
(
batch_data_samples
,
results_list
,
results_list_2d
)
results_list
,
results_list_2d
)
return
predictions
return
predictions
mmdet3d/models/detectors/groupfree3dnet.py
View file @
6c03a971
...
@@ -82,6 +82,6 @@ class GroupFree3DNet(SingleStage3DDetector):
...
@@ -82,6 +82,6 @@ class GroupFree3DNet(SingleStage3DDetector):
points
=
batch_inputs_dict
[
'points'
]
points
=
batch_inputs_dict
[
'points'
]
results_list
=
self
.
bbox_head
.
predict
(
points
,
x
,
batch_data_samples
,
results_list
=
self
.
bbox_head
.
predict
(
points
,
x
,
batch_data_samples
,
**
kwargs
)
**
kwargs
)
predictions
=
self
.
convert
_to_datasample
(
batch_data_samples
,
predictions
=
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_list
)
results_list
)
return
predictions
return
predictions
mmdet3d/models/detectors/h3dnet.py
View file @
6c03a971
...
@@ -154,4 +154,4 @@ class H3DNet(TwoStage3DDetector):
...
@@ -154,4 +154,4 @@ class H3DNet(TwoStage3DDetector):
feats_dict
,
feats_dict
,
batch_data_samples
,
batch_data_samples
,
suffix
=
'_optimized'
)
suffix
=
'_optimized'
)
return
self
.
convert
_to_datasample
(
batch_data_samples
,
results_list
)
return
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_list
)
mmdet3d/models/detectors/imvotenet.py
View file @
6c03a971
...
@@ -433,7 +433,7 @@ class ImVoteNet(Base3DDetector):
...
@@ -433,7 +433,7 @@ class ImVoteNet(Base3DDetector):
if
points
is
None
:
if
points
is
None
:
assert
imgs
is
not
None
assert
imgs
is
not
None
results_2d
=
self
.
predict_img_only
(
imgs
,
batch_data_samples
)
results_2d
=
self
.
predict_img_only
(
imgs
,
batch_data_samples
)
return
self
.
convert
_to_datasample
(
return
self
.
add_pred
_to_datasample
(
batch_data_samples
,
data_instances_2d
=
results_2d
)
batch_data_samples
,
data_instances_2d
=
results_2d
)
else
:
else
:
...
@@ -488,7 +488,7 @@ class ImVoteNet(Base3DDetector):
...
@@ -488,7 +488,7 @@ class ImVoteNet(Base3DDetector):
batch_data_samples
,
batch_data_samples
,
rescale
=
True
)
rescale
=
True
)
return
self
.
convert
_to_datasample
(
batch_data_samples
,
results_3d
)
return
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_3d
)
def
predict_img_only
(
self
,
def
predict_img_only
(
self
,
imgs
:
Tensor
,
imgs
:
Tensor
,
...
...
mmdet3d/models/detectors/imvoxelnet.py
View file @
6c03a971
...
@@ -2,16 +2,17 @@
...
@@ -2,16 +2,17 @@
from
typing
import
List
,
Tuple
,
Union
from
typing
import
List
,
Tuple
,
Union
import
torch
import
torch
from
mmengine.structures
import
InstanceData
from
mmdet3d.models.detectors
import
Base3DDetector
from
mmdet3d.models.layers.fusion_layers.point_fusion
import
point_sample
from
mmdet3d.models.layers.fusion_layers.point_fusion
import
point_sample
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet3d.structures.det3d_data_sample
import
SampleList
from
mmdet3d.structures.det3d_data_sample
import
SampleList
from
mmdet3d.utils
import
ConfigType
,
InstanceList
,
OptConfigType
from
mmdet3d.utils
import
ConfigType
,
OptConfigType
,
OptInstanceList
from
mmdet.models.detectors
import
BaseDetector
@
MODELS
.
register_module
()
@
MODELS
.
register_module
()
class
ImVoxelNet
(
BaseDetector
):
class
ImVoxelNet
(
Base
3D
Detector
):
r
"""`ImVoxelNet <https://arxiv.org/abs/2106.01178>`_.
r
"""`ImVoxelNet <https://arxiv.org/abs/2106.01178>`_.
Args:
Args:
...
@@ -57,31 +58,6 @@ class ImVoxelNet(BaseDetector):
...
@@ -57,31 +58,6 @@ class ImVoxelNet(BaseDetector):
self
.
train_cfg
=
train_cfg
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
self
.
test_cfg
=
test_cfg
def
convert_to_datasample
(
self
,
data_samples
:
SampleList
,
data_instances
:
InstanceList
)
->
SampleList
:
""" Convert results list to `Det3DDataSample`.
Args:
inputs (list[:obj:`Det3DDataSample`]): The input data.
data_instances (list[:obj:`InstanceData`]): 3D Detection
results of each image.
Returns:
list[:obj:`Det3DDataSample`]: 3D Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
for
data_sample
,
pred_instances_3d
in
zip
(
data_samples
,
data_instances
):
data_sample
.
pred_instances_3d
=
pred_instances_3d
return
data_samples
def
extract_feat
(
self
,
batch_inputs_dict
:
dict
,
def
extract_feat
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
):
batch_data_samples
:
SampleList
):
"""Extract 3d features from the backbone -> fpn -> 3d projection.
"""Extract 3d features from the backbone -> fpn -> 3d projection.
...
@@ -185,8 +161,8 @@ class ImVoxelNet(BaseDetector):
...
@@ -185,8 +161,8 @@ class ImVoxelNet(BaseDetector):
"""
"""
x
=
self
.
extract_feat
(
batch_inputs_dict
,
batch_data_samples
)
x
=
self
.
extract_feat
(
batch_inputs_dict
,
batch_data_samples
)
results_list
=
self
.
bbox_head
.
predict
(
x
,
batch_data_samples
,
**
kwargs
)
results_list
=
self
.
bbox_head
.
predict
(
x
,
batch_data_samples
,
**
kwargs
)
predictions
=
self
.
convert
_to_datasample
(
batch_data_samples
,
predictions
=
self
.
add_pred
_to_datasample
(
batch_data_samples
,
results_list
)
results_list
)
return
predictions
return
predictions
def
_forward
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
def
_forward
(
self
,
batch_inputs_dict
:
dict
,
batch_data_samples
:
SampleList
,
...
@@ -209,3 +185,64 @@ class ImVoxelNet(BaseDetector):
...
@@ -209,3 +185,64 @@ class ImVoxelNet(BaseDetector):
x
=
self
.
extract_feat
(
batch_inputs_dict
,
batch_data_samples
)
x
=
self
.
extract_feat
(
batch_inputs_dict
,
batch_data_samples
)
results
=
self
.
bbox_head
.
forward
(
x
)
results
=
self
.
bbox_head
.
forward
(
x
)
return
results
return
results
def
convert_to_datasample
(
self
,
data_samples
:
SampleList
,
data_instances_3d
:
OptInstanceList
=
None
,
data_instances_2d
:
OptInstanceList
=
None
,
)
->
SampleList
:
"""Convert results list to `Det3DDataSample`.
Subclasses could override it to be compatible for some multi-modality
3D detectors.
Args:
data_samples (list[:obj:`Det3DDataSample`]): The input data.
data_instances_3d (list[:obj:`InstanceData`], optional): 3D
Detection results of each sample.
data_instances_2d (list[:obj:`InstanceData`], optional): 2D
Detection results of each sample.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input. Each Det3DDataSample usually contains
'pred_instances_3d'. And the ``pred_instances_3d`` normally
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
When there are image prediction in some models, it should
contains `pred_instances`, And the ``pred_instances`` normally
contains following keys.
- scores (Tensor): Classification scores of image, has a shape
(num_instance, )
- labels (Tensor): Predict Labels of 2D bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Contains a tensor with shape
(num_instances, 4).
"""
assert
(
data_instances_2d
is
not
None
)
or
\
(
data_instances_3d
is
not
None
),
\
'please pass at least one type of data_samples'
if
data_instances_2d
is
None
:
data_instances_2d
=
[
InstanceData
()
for
_
in
range
(
len
(
data_instances_3d
))
]
if
data_instances_3d
is
None
:
data_instances_3d
=
[
InstanceData
()
for
_
in
range
(
len
(
data_instances_2d
))
]
for
i
,
data_sample
in
enumerate
(
data_samples
):
data_sample
.
pred_instances_3d
=
data_instances_3d
[
i
]
data_sample
.
pred_instances
=
data_instances_2d
[
i
]
return
data_samples
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment