Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
c9ad3605
Commit
c9ad3605
authored
Jun 21, 2022
by
jshilong
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Refactor]New version VoteNet
parent
db44cc50
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
792 additions
and
411 deletions
+792
-411
configs/_base_/datasets/scannet-3d-18class.py
configs/_base_/datasets/scannet-3d-18class.py
+35
-44
configs/_base_/default_runtime.py
configs/_base_/default_runtime.py
+0
-1
configs/_base_/models/votenet.py
configs/_base_/models/votenet.py
+10
-10
configs/_base_/schedules/cyclic_20e.py
configs/_base_/schedules/cyclic_20e.py
+40
-15
configs/_base_/schedules/cyclic_40e.py
configs/_base_/schedules/cyclic_40e.py
+41
-21
configs/_base_/schedules/schedule_3x.py
configs/_base_/schedules/schedule_3x.py
+21
-5
configs/votenet/votenet_8x8_scannet-3d-18class.py
configs/votenet/votenet_8x8_scannet-3d-18class.py
+1
-3
mmdet3d/core/data_structures/det3d_data_sample.py
mmdet3d/core/data_structures/det3d_data_sample.py
+2
-0
mmdet3d/core/evaluation/indoor_eval.py
mmdet3d/core/evaluation/indoor_eval.py
+6
-13
mmdet3d/core/post_processing/merge_augs.py
mmdet3d/core/post_processing/merge_augs.py
+2
-2
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+16
-3
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+46
-7
mmdet3d/datasets/pipelines/loading.py
mmdet3d/datasets/pipelines/loading.py
+6
-0
mmdet3d/datasets/pipelines/test_time_aug.py
mmdet3d/datasets/pipelines/test_time_aug.py
+17
-19
mmdet3d/metrics/__init__.py
mmdet3d/metrics/__init__.py
+2
-1
mmdet3d/metrics/indoor_metric.py
mmdet3d/metrics/indoor_metric.py
+92
-0
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+28
-6
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+284
-174
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+29
-11
mmdet3d/models/detectors/votenet.py
mmdet3d/models/detectors/votenet.py
+114
-76
No files found.
configs/_base_/datasets/scannet-3d-18class.py
View file @
c9ad3605
# dataset settings
dataset_type
=
'ScanNetDataset'
data_root
=
'./data/scannet/'
class_names
=
(
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
metainfo
=
dict
(
CLASSES
=
(
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
...
...
@@ -35,9 +37,8 @@ train_pipeline = [
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
],
shift_height
=
True
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
...
...
@@ -68,61 +69,51 @@ test_pipeline = [
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
40000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'GlobalAlignment'
,
rotation_axis
=
2
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
ann_file
=
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
box_type_3d
=
'Depth'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
box_type_3d
=
'Depth'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
ann_file
=
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
evaluat
ion
=
dict
(
pipeline
=
eval_pipeline
)
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_
evaluat
or
=
val_evaluator
configs/_base_/default_runtime.py
View file @
c9ad3605
default_scope
=
'mmdet3d'
default_hooks
=
dict
(
optimizer
=
dict
(
type
=
'OptimizerHook'
,
grad_clip
=
None
),
timer
=
dict
(
type
=
'IterTimerHook'
),
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
50
),
param_scheduler
=
dict
(
type
=
'ParamSchedulerHook'
),
...
...
configs/_base_/models/votenet.py
View file @
c9ad3605
model
=
dict
(
type
=
'VoteNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
...
...
@@ -40,10 +41,8 @@ model = dict(
normalize_xyz
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
type
=
'
mmdet.
CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
...
...
@@ -54,20 +53,21 @@ model = dict(
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'
mmdet.
CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
),
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
e
=
'vote'
),
test_cfg
=
dict
(
sample_mod
=
'seed'
,
sample_mod
e
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
))
configs/_base_/schedules/cyclic_20e.py
View file @
c9ad3605
...
...
@@ -3,22 +3,47 @@
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
lr
=
1e-4
iter_num_in_epoch
=
3712
# This schedule is mainly used by models on nuScenes dataset
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
1e-4
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning rate
param_scheduler
=
[
dict
(
type
=
'CosineAnnealingLR'
,
T_max
=
8
*
iter_num_in_epoch
,
eta_min
=
lr
*
10
,
by_epoch
=
False
,
begin
=
0
,
end
=
8
*
iter_num_in_epoch
),
dict
(
type
=
'CosineAnnealingLR'
,
T_max
=
12
*
iter_num_in_epoch
,
eta_min
=
lr
*
1e-4
,
by_epoch
=
False
,
begin
=
8
*
iter_num_in_epoch
,
end
=
20
*
iter_num_in_epoch
),
dict
(
type
=
'CosineAnnealingBetas'
,
T_max
=
8
*
iter_num_in_epoch
,
eta_min
=
0.85
/
0.95
,
by_epoch
=
False
,
begin
=
0
,
end
=
8
*
iter_num_in_epoch
),
dict
(
type
=
'CosineAnnealingBetas'
,
T_max
=
12
*
iter_num_in_epoch
,
eta_min
=
1
,
by_epoch
=
False
,
begin
=
8
*
iter_num_in_epoch
,
end
=
20
*
iter_num_in_epoch
)
]
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
20
)
train_cfg
=
dict
(
by_epoch
=
True
,
max_epochs
=
20
)
val_cfg
=
dict
(
interval
=
1
)
test_cfg
=
dict
()
configs/_base_/schedules/cyclic_40e.py
View file @
c9ad3605
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr
=
0.0018
iter_num_in_epoch
=
3712
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the official AdamW optimizer implemented by PyTorch.
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
# learning rate
param_scheduler
=
[
dict
(
type
=
'CosineAnnealingLR'
,
T_max
=
16
*
iter_num_in_epoch
,
eta_min
=
lr
*
10
,
by_epoch
=
False
,
begin
=
0
,
end
=
16
*
iter_num_in_epoch
),
dict
(
type
=
'CosineAnnealingLR'
,
T_max
=
24
*
iter_num_in_epoch
,
eta_min
=
lr
*
1e-4
,
by_epoch
=
False
,
begin
=
16
*
iter_num_in_epoch
,
end
=
40
*
iter_num_in_epoch
),
dict
(
type
=
'CosineAnnealingBetas'
,
T_max
=
16
*
iter_num_in_epoch
,
eta_min
=
0.85
/
0.95
,
by_epoch
=
False
,
begin
=
0
,
end
=
16
*
iter_num_in_epoch
),
dict
(
type
=
'CosineAnnealingBetas'
,
T_max
=
24
*
iter_num_in_epoch
,
eta_min
=
1
,
by_epoch
=
False
,
begin
=
16
*
iter_num_in_epoch
,
end
=
40
*
iter_num_in_epoch
)
]
# Runtime settings,training schedule for 40e
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
40
)
train_cfg
=
dict
(
by_epoch
=
True
,
max_epochs
=
40
)
val_cfg
=
dict
(
interval
=
1
)
test_cfg
=
dict
()
configs/_base_/schedules/schedule_3x.py
View file @
c9ad3605
...
...
@@ -2,8 +2,24 @@
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
36
)
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
10
,
norm_type
=
2
),
)
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
36
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
36
,
by_epoch
=
True
,
milestones
=
[
24
,
32
],
gamma
=
0.1
)
]
configs/votenet/votenet_8x8_scannet-3d-18class.py
View file @
c9ad3605
...
...
@@ -31,6 +31,4 @@ model = dict(
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]])))
# yapf:disable
log_config
=
dict
(
interval
=
30
)
# yapf:enable
default_hooks
=
dict
(
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
30
))
mmdet3d/core/data_structures/det3d_data_sample.py
View file @
c9ad3605
...
...
@@ -51,6 +51,8 @@ class Det3DDataSample(DetDataSample):
panoptic segmentation.
- ``pred_pts_panoptic_seg``(PixelData): Predicted of point cloud
panoptic segmentation.
- ``eval_ann_info``(dict): Raw annotation, which will be passed to
evaluator and do the online evaluation.
Examples:
>>> from mmengine.data import InstanceData, PixelData
...
...
mmdet3d/core/evaluation/indoor_eval.py
View file @
c9ad3605
...
...
@@ -205,7 +205,6 @@ def indoor_eval(gt_annos,
metric
,
label2cat
,
logger
=
None
,
box_type_3d
=
None
,
box_mode_3d
=
None
):
"""Indoor Evaluation.
...
...
@@ -217,11 +216,11 @@ def indoor_eval(gt_annos,
includes the following keys
- labels_3d (torch.Tensor): Labels of boxes.
- boxes_3d (:obj:`BaseInstance3DBoxes`):
-
b
boxes_3d (:obj:`BaseInstance3DBoxes`):
3D bounding boxes in Depth coordinate.
- scores_3d (torch.Tensor): Scores of boxes.
metric (list[float]): IoU thresholds for computing average precisions.
label2cat (
dict
): Map from label to category.
label2cat (
tuple
): Map from label to category.
logger (logging.Logger | str, optional): The way to print the mAP
summary. See `mmdet.utils.print_log()` for details. Default: None.
...
...
@@ -236,7 +235,7 @@ def indoor_eval(gt_annos,
det_anno
=
dt_annos
[
img_id
]
for
i
in
range
(
len
(
det_anno
[
'labels_3d'
])):
label
=
det_anno
[
'labels_3d'
].
numpy
()[
i
]
bbox
=
det_anno
[
'boxes_3d'
].
convert_to
(
box_mode_3d
)[
i
]
bbox
=
det_anno
[
'
b
boxes_3d'
].
convert_to
(
box_mode_3d
)[
i
]
score
=
det_anno
[
'scores_3d'
].
numpy
()[
i
]
if
label
not
in
pred
:
pred
[
int
(
label
)]
=
{}
...
...
@@ -250,15 +249,9 @@ def indoor_eval(gt_annos,
# parse gt annotations
gt_anno
=
gt_annos
[
img_id
]
if
gt_anno
[
'gt_num'
]
!=
0
:
gt_boxes
=
box_type_3d
(
gt_anno
[
'gt_boxes_upright_depth'
],
box_dim
=
gt_anno
[
'gt_boxes_upright_depth'
].
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
box_mode_3d
)
labels_3d
=
gt_anno
[
'class'
]
else
:
gt_boxes
=
box_type_3d
(
np
.
array
([],
dtype
=
np
.
float32
))
labels_3d
=
np
.
array
([],
dtype
=
np
.
int64
)
gt_boxes
=
gt_anno
[
'gt_bboxes_3d'
]
labels_3d
=
gt_anno
[
'gt_labels_3d'
]
for
i
in
range
(
len
(
labels_3d
)):
label
=
labels_3d
[
i
]
...
...
mmdet3d/core/post_processing/merge_augs.py
View file @
c9ad3605
...
...
@@ -51,7 +51,7 @@ def merge_aug_bboxes_3d(aug_results, aug_batch_input_metas, test_cfg):
aug_labels
=
torch
.
cat
(
recovered_labels
,
dim
=
0
)
# TODO: use a more elegent way to deal with nms
if
test_cfg
.
use_rotate_nms
:
if
test_cfg
.
get
(
'
use_rotate_nms
'
,
False
)
:
nms_func
=
nms_bev
else
:
nms_func
=
nms_normal_bev
...
...
@@ -83,7 +83,7 @@ def merge_aug_bboxes_3d(aug_results, aug_batch_input_metas, test_cfg):
merged_labels
=
torch
.
cat
(
merged_labels
,
dim
=
0
)
_
,
order
=
merged_scores
.
sort
(
0
,
descending
=
True
)
num
=
min
(
test_cfg
.
max_num
,
len
(
aug_bboxes
))
num
=
min
(
test_cfg
.
get
(
'
max_num
'
,
500
)
,
len
(
aug_bboxes
))
order
=
order
[:
num
]
merged_bboxes
=
merged_bboxes
[
order
]
...
...
mmdet3d/datasets/det3d_dataset.py
View file @
c9ad3605
...
...
@@ -47,10 +47,15 @@ class Det3DDataset(BaseDataset):
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool
, optional
): Whether to filter the data with
filter_empty_gt (bool): Whether to filter the data with
empty GT. Defaults to True.
test_mode (bool
, optional
): Whether the dataset is in test mode.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations
in test_mode, the annotation will be save in
`eval_ann_infos`, which can be use in Evaluator.
file_client_args (dict): Configuration of file client.
Defaults to `dict(backend='disk')`.
"""
def
__init__
(
self
,
...
...
@@ -63,11 +68,13 @@ class Det3DDataset(BaseDataset):
box_type_3d
:
dict
=
'LiDAR'
,
filter_empty_gt
:
bool
=
True
,
test_mode
:
bool
=
False
,
load_eval_anns
=
True
,
file_client_args
:
dict
=
dict
(
backend
=
'disk'
),
**
kwargs
):
# init file client
self
.
file_client
=
mmcv
.
FileClient
(
**
file_client_args
)
self
.
filter_empty_gt
=
filter_empty_gt
self
.
load_eval_anns
=
load_eval_anns
_default_modality_keys
=
(
'use_lidar'
,
'use_camera'
)
if
modality
is
None
:
modality
=
dict
()
...
...
@@ -82,7 +89,6 @@ class Det3DDataset(BaseDataset):
f
', `use_camera`) for
{
self
.
__class__
.
__name__
}
'
)
self
.
box_type_3d
,
self
.
box_mode_3d
=
get_box_type
(
box_type_3d
)
if
metainfo
is
not
None
and
'CLASSES'
in
metainfo
:
# we allow to train on subset of self.METAINFO['CLASSES']
# map unselected labels to -1
...
...
@@ -101,6 +107,10 @@ class Det3DDataset(BaseDataset):
}
self
.
label_mapping
[
-
1
]
=
-
1
# can be accessed by other component in runner
metainfo
[
'box_type_3d'
]
=
box_type_3d
metainfo
[
'label_mapping'
]
=
self
.
label_mapping
super
().
__init__
(
ann_file
=
ann_file
,
metainfo
=
metainfo
,
...
...
@@ -221,7 +231,10 @@ class Det3DDataset(BaseDataset):
self
.
data_prefix
.
get
(
'img'
,
''
),
img_info
[
'img_path'
])
if
not
self
.
test_mode
:
# used in traing
info
[
'ann_info'
]
=
self
.
parse_ann_info
(
info
)
if
self
.
test_mode
and
self
.
load_eval_anns
:
info
[
'eval_ann_info'
]
=
self
.
parse_ann_info
(
info
)
return
info
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
import
numpy
as
np
from
mmcv
import
BaseTransform
from
mmcv.transforms
import
to_tensor
...
...
@@ -45,14 +47,16 @@ class Pack3DDetInputs(BaseTransform):
key
=
key
[
3
:]
return
key
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Method to pack the input data.
def
transform
(
self
,
results
:
Union
[
dict
,
List
[
dict
]])
->
Union
[
dict
,
List
[
dict
]]:
"""Method to pack the input data. when the value in this dict is a
list, it usually is in Augmentations Testing.
Args:
results (dict): Result dict from the data pipeline.
results (dict
| list[dict]
): Result dict from the data pipeline.
Returns:
dict:
dict
| List[dict]
:
- 'inputs' (dict): The forward data of models. It usually contains
following keys:
...
...
@@ -63,12 +67,41 @@ class Pack3DDetInputs(BaseTransform):
- 'data_sample' (obj:`Det3DDataSample`): The annotation info of the
sample.
"""
packed_results
=
dict
()
# augtest
if
isinstance
(
results
,
list
):
pack_results
=
[]
for
single_result
in
results
:
pack_results
.
append
(
self
.
pack_single_results
(
single_result
))
return
pack_results
# norm training and simple testing
elif
isinstance
(
results
,
dict
):
return
self
.
pack_single_results
(
results
)
else
:
raise
NotImplementedError
def
pack_single_results
(
self
,
results
):
"""Method to pack the single input data. when the value in this dict is
a list, it usually is in Augmentations Testing.
Args:
results (dict): Result dict from the data pipeline.
Returns:
dict: A dict contains
- 'inputs' (dict): The forward data of models. It usually contains
following keys:
- points
- img
- 'data_sample' (obj:`Det3DDataSample`): The annotation info of the
sample.
"""
# Format 3D data
if
'points'
in
results
:
assert
isinstance
(
results
[
'points'
],
BasePoints
)
results
[
'points'
]
=
results
[
'points'
].
tensor
if
isinstance
(
results
[
'points'
],
BasePoints
)
:
results
[
'points'
]
=
results
[
'points'
].
tensor
if
'img'
in
results
:
if
isinstance
(
results
[
'img'
],
list
):
...
...
@@ -134,6 +167,12 @@ class Pack3DDetInputs(BaseTransform):
data_sample
.
gt_instances_3d
=
gt_instances_3d
data_sample
.
gt_instances
=
gt_instances
data_sample
.
seg_data
=
seg_data
if
'eval_ann_info'
in
results
:
data_sample
.
eval_ann_info
=
results
[
'eval_ann_info'
]
else
:
data_sample
.
eval_ann_info
=
None
packed_results
=
dict
()
packed_results
[
'data_sample'
]
=
data_sample
packed_results
[
'inputs'
]
=
inputs
...
...
mmdet3d/datasets/pipelines/loading.py
View file @
c9ad3605
...
...
@@ -684,6 +684,9 @@ class LoadAnnotations3D(LoadAnnotations):
pts_instance_mask_path
,
dtype
=
np
.
int64
)
results
[
'pts_instance_mask'
]
=
pts_instance_mask
# 'eval_ann_info' will be passed to evaluator
if
'eval_ann_info'
in
results
:
results
[
'eval_ann_info'
][
'pts_instance_mask'
]
=
pts_instance_mask
return
results
def
_load_semantic_seg_3d
(
self
,
results
:
dict
)
->
dict
:
...
...
@@ -710,6 +713,9 @@ class LoadAnnotations3D(LoadAnnotations):
pts_semantic_mask_path
,
dtype
=
np
.
int64
)
results
[
'pts_semantic_mask'
]
=
pts_semantic_mask
# 'eval_ann_info' will be passed to evaluator
if
'eval_ann_info'
in
results
:
results
[
'eval_ann_info'
][
'pts_semantic_mask'
]
=
pts_semantic_mask
return
results
def
transform
(
self
,
results
:
dict
)
->
dict
:
...
...
mmdet3d/datasets/pipelines/test_time_aug.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
copy
import
deepcopy
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
import
mmcv
from
mmcv
import
BaseTransform
from
mmengine.dataset
import
Compose
from
mmdet3d.registry
import
TRANSFORMS
from
.compose
import
Compose
@
TRANSFORMS
.
register_module
()
class
MultiScaleFlipAug3D
(
object
):
class
MultiScaleFlipAug3D
(
BaseTransform
):
"""Test-time augmentation with multiple scales and flipping.
Args:
...
...
@@ -33,13 +35,13 @@ class MultiScaleFlipAug3D(object):
"""
def
__init__
(
self
,
transforms
,
img_scale
,
pts_scale_ratio
,
flip
=
False
,
flip_direction
=
'horizontal'
,
pcd_horizontal_flip
=
False
,
pcd_vertical_flip
=
False
)
:
transforms
:
List
[
dict
]
,
img_scale
:
Optional
[
Union
[
Tuple
[
int
],
List
[
Tuple
[
int
]]]]
,
pts_scale_ratio
:
Union
[
float
,
List
[
float
]]
,
flip
:
bool
=
False
,
flip_direction
:
str
=
'horizontal'
,
pcd_horizontal_flip
:
bool
=
False
,
pcd_vertical_flip
:
bool
=
False
)
->
None
:
self
.
transforms
=
Compose
(
transforms
)
self
.
img_scale
=
img_scale
if
isinstance
(
img_scale
,
list
)
else
[
img_scale
]
...
...
@@ -65,17 +67,17 @@ class MultiScaleFlipAug3D(object):
warnings
.
warn
(
'flip has no effect when RandomFlip is not in transforms'
)
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
Dict
)
->
List
[
Dict
]
:
"""Call function to augment common fields in results.
Args:
results (dict): Result dict contains the data to augment.
Returns:
dict: The
result dic
t contains the data that is augmented with
List[
dict
]
: The
lis
t contains the data that is augmented with
different scales and flips.
"""
aug_data
=
[]
aug_data
_list
=
[]
# modified from `flip_aug = [False, True] if self.flip else [False]`
# to reduce unnecessary scenes when using double flip augmentation
...
...
@@ -104,13 +106,9 @@ class MultiScaleFlipAug3D(object):
_results
[
'pcd_vertical_flip'
]
=
\
pcd_vertical_flip
data
=
self
.
transforms
(
_results
)
aug_data
.
append
(
data
)
# list of dict to dict of list
aug_data_dict
=
{
key
:
[]
for
key
in
aug_data
[
0
]}
for
data
in
aug_data
:
for
key
,
val
in
data
.
items
():
aug_data_dict
[
key
].
append
(
val
)
return
aug_data_dict
aug_data_list
.
append
(
data
)
return
aug_data_list
def
__repr__
(
self
):
"""str: Return a string that describes the module."""
...
...
mmdet3d/metrics/__init__.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
from
.indoor_metric
import
IndoorMetric
# noqa: F401,F403
from
.kitti_metric
import
KittiMetric
# noqa: F401,F403
__all_
=
[
'KittiMetric'
]
__all_
=
[
'KittiMetric'
,
'IndoorMetric'
]
mmdet3d/metrics/indoor_metric.py
0 → 100644
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Dict
,
List
,
Optional
,
Sequence
from
mmengine.evaluator
import
BaseMetric
from
mmengine.logging
import
MMLogger
from
mmdet3d.core
import
get_box_type
,
indoor_eval
from
mmdet3d.registry
import
METRICS
@
METRICS
.
register_module
()
class
IndoorMetric
(
BaseMetric
):
"""Kitti evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
metric. Defaults to [0.25, 0.5].
collect_device (str, optional): Device name used for collecting
results from different ranks during distributed training.
Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
prefix (str): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
"""
def
__init__
(
self
,
iou_thr
:
List
[
float
]
=
[
0.25
,
0.5
],
collect_device
:
str
=
'cpu'
,
prefix
:
Optional
[
str
]
=
None
,
**
kwargs
):
super
(
IndoorMetric
,
self
).
__init__
(
prefix
=
prefix
,
collect_device
=
collect_device
)
self
.
iou_thr
=
iou_thr
def
process
(
self
,
data_batch
:
Sequence
[
dict
],
predictions
:
Sequence
[
dict
])
->
None
:
"""Process one batch of data samples and predictions.
The processed results should be stored in ``self.results``,
which will be used to compute the metrics when all batches
have been processed.
Args:
data_batch (Sequence[dict]): A batch of data
from the dataloader.
predictions (Sequence[dict]): A batch of outputs from
the model.
"""
batch_eval_anns
=
[
item
[
'data_sample'
][
'eval_ann_info'
]
for
item
in
data_batch
]
for
eval_ann
,
pred_dict
in
zip
(
batch_eval_anns
,
predictions
):
pred_3d
=
pred_dict
[
'pred_instances_3d'
]
cpu_pred_3d
=
dict
()
for
k
,
v
in
pred_3d
.
items
():
if
hasattr
(
v
,
'to'
):
cpu_pred_3d
[
k
]
=
v
.
to
(
'cpu'
)
else
:
cpu_pred_3d
[
k
]
=
v
self
.
results
.
append
((
eval_ann
,
cpu_pred_3d
))
def
compute_metrics
(
self
,
results
:
list
)
->
Dict
[
str
,
float
]:
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
the metrics, and the values are corresponding results.
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
ann_infos
=
[]
pred_results
=
[]
for
eval_ann
,
sinlge_pred_results
in
results
:
ann_infos
.
append
(
eval_ann
)
pred_results
.
append
(
sinlge_pred_results
)
box_type_3d
,
box_mode_3d
=
get_box_type
(
self
.
dataset_meta
[
'box_type_3d'
])
ret_dict
=
indoor_eval
(
ann_infos
,
pred_results
,
self
.
iou_thr
,
self
.
dataset_meta
[
'CLASSES'
],
logger
=
logger
,
box_mode_3d
=
box_mode_3d
)
return
ret_dict
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
from
numbers
import
Number
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
from
typing
import
List
,
Optional
,
Sequence
,
Tuple
,
Union
import
numpy
as
np
from
mmengine.data
import
BaseDataElement
...
...
@@ -66,19 +66,41 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
batch_augments
=
batch_augments
)
def
forward
(
self
,
data
:
Sequence
[
dict
],
training
:
bool
=
False
)
->
Tuple
[
Dict
,
Optional
[
list
]]:
data
:
List
[
Union
[
dict
,
List
[
dict
]]],
training
:
bool
=
False
)
->
Tuple
[
Union
[
dict
,
List
[
dict
]],
Optional
[
list
]]:
"""Perform normalization、padding and bgr2rgb conversion based on
``BaseDataPreprocessor``.
Args:
data (Sequence[dict]): data sampled from dataloader.
data (List[dict] | List[List[dict]]): data from dataloader.
The outer list always represent the batch size, when it is
a list[list[dict]], the inter list indicate test time
augmentation.
training (bool): Whether to enable training time augmentation.
Returns:
Tuple[Dict, Optional[list]]: Data in the same format as the
model input.
Tuple[Dict, Optional[list]] |
Tuple[List[Dict], Optional[list[list]]]:
Data in the same format as the model input.
"""
if
isinstance
(
data
[
0
],
list
):
num_augs
=
len
(
data
[
0
])
aug_batch_data
=
[]
aug_batch_data_sample
=
[]
for
aug_id
in
range
(
num_augs
):
single_aug_batch_data
,
\
single_aug_batch_data_sample
=
self
.
simple_process
(
[
item
[
aug_id
]
for
item
in
data
],
training
)
aug_batch_data
.
append
(
single_aug_batch_data
)
aug_batch_data_sample
.
append
(
single_aug_batch_data_sample
)
return
aug_batch_data
,
aug_batch_data_sample
else
:
return
self
.
simple_process
(
data
,
training
)
def
simple_process
(
self
,
data
:
Sequence
[
dict
],
training
:
bool
=
False
):
inputs_dict
,
batch_data_samples
=
self
.
collate_data
(
data
)
if
'points'
in
inputs_dict
[
0
].
keys
():
...
...
mmdet3d/models/dense_heads/vote_head.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Dict
,
List
,
Optional
,
Union
import
numpy
as
np
import
torch
from
mmcv.ops
import
furthest_point_sample
from
mmcv.runner
import
BaseModule
,
force_fp32
from
mmengine
import
ConfigDict
,
InstanceData
from
torch.nn
import
functional
as
F
from
mmdet3d.core.post_processing
import
aligned_3d_nms
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.models.losses
import
chamfer_distance
from
mmdet3d.models.model_utils
import
VoteModule
from
mmdet3d.ops
import
build_sa_module
from
mmdet3d.registry
import
MODELS
from
mmdet.core
import
build_bbox_coder
,
multi_apply
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet.core.utils
import
multi_apply
from
...core
import
Det3DDataSample
from
.base_conv_bbox_head
import
BaseConvBboxHead
...
...
@@ -21,66 +24,76 @@ class VoteHead(BaseModule):
Args:
num_classes (int): The number of class.
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
decoding boxes.
train_cfg (dict): Config for training.
test_cfg (dict): Config for testing.
vote_module_cfg (dict): Config of VoteModule for point-wise votes.
vote_aggregation_cfg (dict): Config of vote aggregation layer.
pred_layer_cfg (dict): Config of classfication and regression
prediction layers.
conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss.
center_loss (dict): Config of center loss.
dir_class_loss (dict): Config of direction classification loss.
dir_res_loss (dict): Config of direction residual regression loss.
size_class_loss (dict): Config of size classification loss.
size_res_loss (dict): Config of size residual regression loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
bbox_coder (ConfigDict, dict): Bbox coder for encoding and
decoding boxes. Defaults to None.
train_cfg (dict, optional): Config for training. Defaults to None.
test_cfg (dict, optional): Config for testing. Defaults to None.
vote_module_cfg (dict, optional): Config of VoteModule for
point-wise votes. Defaults to None.
vote_aggregation_cfg (dict, optional): Config of vote
aggregation layer. Defaults to None.
pred_layer_cfg (dict, optional): Config of classification
and regression prediction layers. Defaults to None.
objectness_loss (dict, optional): Config of objectness loss.
Defaults to None.
center_loss (dict, optional): Config of center loss.
Defaults to None.
dir_class_loss (dict, optional): Config of direction
classification loss. Defaults to None.
dir_res_loss (dict, optional): Config of direction
residual regression loss. Defaults to None.
size_class_loss (dict, optional): Config of size
classification loss. Defaults to None.
size_res_loss (dict, optional): Config of size
residual regression loss. Defaults to None.
semantic_loss (dict, optional): Config of point-wise
semantic segmentation loss. Defaults to None.
iou_loss (dict, optional): Config of IOU loss for
regression. Defaults to None.
init_cfg (dict, optional): Config of model weight
initialization. Defaults to None.
"""
def
__init__
(
self
,
num_classes
,
bbox_coder
,
train_cfg
=
None
,
test_cfg
=
None
,
vote_module_cfg
=
None
,
vote_aggregation_cfg
=
None
,
pred_layer_cfg
=
None
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
None
,
center_loss
=
None
,
dir_class_loss
=
None
,
dir_res_loss
=
None
,
size_class_loss
=
None
,
size_res_loss
=
None
,
semantic_loss
=
None
,
iou_loss
=
None
,
init_cfg
=
None
):
num_classes
:
int
,
bbox_coder
:
Union
[
ConfigDict
,
dict
],
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
vote_module_cfg
:
Optional
[
dict
]
=
None
,
vote_aggregation_cfg
:
Optional
[
dict
]
=
None
,
pred_layer_cfg
:
Optional
[
dict
]
=
None
,
objectness_loss
:
Optional
[
dict
]
=
None
,
center_loss
:
Optional
[
dict
]
=
None
,
dir_class_loss
:
Optional
[
dict
]
=
None
,
dir_res_loss
:
Optional
[
dict
]
=
None
,
size_class_loss
:
Optional
[
dict
]
=
None
,
size_res_loss
:
Optional
[
dict
]
=
None
,
semantic_loss
:
Optional
[
dict
]
=
None
,
iou_loss
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
):
super
(
VoteHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
self
.
gt_per_seed
=
vote_module_cfg
[
'gt_per_seed'
]
self
.
num_proposal
=
vote_aggregation_cfg
[
'num_point'
]
self
.
objectness
_loss
=
build_loss
(
objectness_loss
)
self
.
center
_loss
=
build_loss
(
center_loss
)
self
.
dir_res
_loss
=
build_loss
(
dir_res_loss
)
self
.
dir_class
_loss
=
build_loss
(
dir_class_loss
)
self
.
size_res
_loss
=
build_loss
(
size_res_loss
)
self
.
loss_
objectness
=
MODELS
.
build
(
objectness_loss
)
self
.
loss_
center
=
MODELS
.
build
(
center_loss
)
self
.
loss_
dir_res
=
MODELS
.
build
(
dir_res_loss
)
self
.
loss_
dir_class
=
MODELS
.
build
(
dir_class_loss
)
self
.
loss_
size_res
=
MODELS
.
build
(
size_res_loss
)
if
size_class_loss
is
not
None
:
self
.
size_class_loss
=
build
_loss
(
size_class_loss
)
self
.
size_class_loss
=
MODELS
.
build
(
size_class_loss
)
if
semantic_loss
is
not
None
:
self
.
semantic_loss
=
build
_loss
(
semantic_loss
)
self
.
semantic_loss
=
MODELS
.
build
(
semantic_loss
)
if
iou_loss
is
not
None
:
self
.
iou_loss
=
build
_loss
(
iou_loss
)
self
.
iou_loss
=
MODELS
.
build
(
iou_loss
)
else
:
self
.
iou_loss
=
None
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
self
.
bbox_coder
=
TASK_UTILS
.
build
(
bbox_coder
)
self
.
num_sizes
=
self
.
bbox_coder
.
num_sizes
self
.
num_dir_bins
=
self
.
bbox_coder
.
num_dir_bins
...
...
@@ -94,6 +107,15 @@ class VoteHead(BaseModule):
num_cls_out_channels
=
self
.
_get_cls_out_channels
(),
num_reg_out_channels
=
self
.
_get_reg_out_channels
())
@
property
def
sample_mode
(
self
):
if
self
.
training
:
sample_mode
=
self
.
train_cfg
.
sample_mode
else
:
sample_mode
=
self
.
test_cfg
.
sample_mode
assert
sample_mode
in
[
'vote'
,
'seed'
,
'random'
,
'spec'
]
return
sample_mode
def
_get_cls_out_channels
(
self
):
"""Return the channel number of classification outputs."""
# Class numbers (k) + objectness (2)
...
...
@@ -106,16 +128,18 @@ class VoteHead(BaseModule):
# size class+residual(num_sizes*4)
return
3
+
self
.
num_dir_bins
*
2
+
self
.
num_sizes
*
4
def
_extract_input
(
self
,
feat_dict
)
:
def
_extract_input
(
self
,
feat_dict
:
dict
)
->
tuple
:
"""Extract inputs from features dictionary.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
torch.Tensor: Coordinates of input points.
torch.Tensor: Features of input points.
torch.Tensor: Indices of input points.
tuple[Tensor]: Arrage as following three tensor.
- Coordinates of input points.
- Features of input points.
- Indices of input points.
"""
# for imvotenet
...
...
@@ -133,7 +157,77 @@ class VoteHead(BaseModule):
return
seed_points
,
seed_features
,
seed_indices
def
forward
(
self
,
feat_dict
,
sample_mod
):
def
predict
(
self
,
points
:
List
[
torch
.
Tensor
],
feats_dict
:
Dict
[
str
,
torch
.
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
rescale
=
True
,
**
kwargs
)
->
List
[
InstanceData
]:
"""
Args:
points (list[tensor]): Point clouds of multiple samples.
feats_dict (dict): Features from FPN or backbone..
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
rescale (bool): Whether rescale the resutls to
the original scale.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict
=
self
(
feats_dict
)
batch_size
=
len
(
batch_data_samples
)
batch_input_metas
=
[]
for
batch_index
in
range
(
batch_size
):
metainfo
=
batch_data_samples
[
batch_index
].
metainfo
batch_input_metas
.
append
(
metainfo
)
results_list
=
self
.
predict_by_feat
(
points
,
preds_dict
,
batch_input_metas
,
rescale
=
rescale
,
**
kwargs
)
return
results_list
def
loss
(
self
,
points
:
List
[
torch
.
Tensor
],
feats_dict
:
Dict
[
str
,
torch
.
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
**
kwargs
)
->
dict
:
"""
Args:
points (list[tensor]): Points cloud of multiple samples.
feats_dict (dict): Predictions from backbone or FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each sample and
corresponding annotations.
Returns:
dict: A dictionary of loss components.
"""
preds_dict
=
self
.
forward
(
feats_dict
)
batch_gt_instance_3d
=
[]
batch_gt_instances_ignore
=
[]
batch_input_metas
=
[]
batch_pts_semantic_mask
=
[]
batch_pts_instance_mask
=
[]
for
data_sample
in
batch_data_samples
:
batch_input_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instance_3d
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
batch_pts_semantic_mask
.
append
(
data_sample
.
seg_data
.
get
(
'pts_semantic_mask'
,
None
))
batch_pts_instance_mask
.
append
(
data_sample
.
seg_data
.
get
(
'pts_instance_mask'
,
None
))
loss_inputs
=
(
points
,
preds_dict
,
batch_gt_instance_3d
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
,
batch_pts_semantic_mask
=
batch_pts_semantic_mask
,
batch_pts_instance_mask
=
batch_pts_instance_mask
,
batch_input_metas
=
batch_input_metas
,
batch_gt_instances_ignore
=
batch_gt_instances_ignore
)
return
losses
def
forward
(
self
,
feat_dict
:
dict
)
->
dict
:
"""Forward pass.
Note:
...
...
@@ -146,13 +240,10 @@ class VoteHead(BaseModule):
Args:
feat_dict (dict): Feature dict from backbone.
sample_mod (str): Sample mode for vote aggregation layer.
valid modes are "vote", "seed", "random" and "spec".
Returns:
dict: Predictions of vote head.
"""
assert
sample_mod
in
[
'vote'
,
'seed'
,
'random'
,
'spec'
]
seed_points
,
seed_features
,
seed_indices
=
self
.
_extract_input
(
feat_dict
)
...
...
@@ -168,11 +259,11 @@ class VoteHead(BaseModule):
vote_offset
=
vote_offset
)
# 2. aggregate vote_points
if
sample_mod
==
'vote'
:
if
self
.
sample_mod
e
==
'vote'
:
# use fps in vote_aggregation
aggregation_inputs
=
dict
(
points_xyz
=
vote_points
,
features
=
vote_features
)
elif
sample_mod
==
'seed'
:
elif
self
.
sample_mod
e
==
'seed'
:
# FPS on seed and choose the votes corresponding to the seeds
sample_indices
=
furthest_point_sample
(
seed_points
,
self
.
num_proposal
)
...
...
@@ -180,7 +271,7 @@ class VoteHead(BaseModule):
points_xyz
=
vote_points
,
features
=
vote_features
,
indices
=
sample_indices
)
elif
sample_mod
==
'random'
:
elif
self
.
sample_mod
e
==
'random'
:
# Random sampling from the votes
batch_size
,
num_seed
=
seed_points
.
shape
[:
2
]
sample_indices
=
seed_points
.
new_tensor
(
...
...
@@ -190,7 +281,7 @@ class VoteHead(BaseModule):
points_xyz
=
vote_points
,
features
=
vote_features
,
indices
=
sample_indices
)
elif
sample_mod
==
'spec'
:
elif
self
.
sample_mod
e
==
'spec'
:
# Specify the new center in vote_aggregation
aggregation_inputs
=
dict
(
points_xyz
=
seed_points
,
...
...
@@ -198,7 +289,7 @@ class VoteHead(BaseModule):
target_xyz
=
vote_points
)
else
:
raise
NotImplementedError
(
f
'Sample mode
{
sample_mod
}
is not supported!'
)
f
'Sample mode
{
self
.
sample_mod
e
}
is not supported!'
)
vote_aggregation_ret
=
self
.
vote_aggregation
(
**
aggregation_inputs
)
aggregated_points
,
features
,
aggregated_indices
=
vote_aggregation_ret
...
...
@@ -214,45 +305,42 @@ class VoteHead(BaseModule):
decode_res
=
self
.
bbox_coder
.
split_pred
(
cls_predictions
,
reg_predictions
,
aggregated_points
)
results
.
update
(
decode_res
)
return
results
@
force_fp32
(
apply_to
=
(
'bbox_preds'
,
))
def
loss
(
self
,
bbox_preds
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
img_metas
=
None
,
gt_bboxes_ignore
=
None
,
ret_target
=
False
):
def
loss_by_feat
(
self
,
points
:
List
[
torch
.
Tensor
],
bbox_preds_dict
:
dict
,
batch_gt_instances_3d
:
List
[
InstanceData
],
batch_pts_semantic_mask
:
Optional
[
List
[
torch
.
Tensor
]]
=
None
,
batch_pts_instance_mask
:
Optional
[
List
[
torch
.
Tensor
]]
=
None
,
ret_target
:
bool
=
False
,
**
kwargs
)
->
dict
:
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
ret_target (Bool): Return targets or not.
bbox_preds_dict (dict): Predictions from forward of vote head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic mask
of points cloud. Defaults to None.
batch_pts_semantic_mask (list[tensor]): Instance mask
of points cloud. Defaults to None.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
ret_target (bool): Return targets or not.
Returns:
dict: Losses of Votenet.
"""
targets
=
self
.
get_targets
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
bbox_preds
)
targets
=
self
.
get_targets
(
points
,
bbox_preds_dict
,
batch_gt_instances_3d
,
batch_pts_semantic_mask
,
batch_pts_instance_mask
)
(
vote_targets
,
vote_target_masks
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
assigned_center_targets
,
mask_targets
,
valid_gt_masks
,
...
...
@@ -260,28 +348,28 @@ class VoteHead(BaseModule):
valid_gt_weights
)
=
targets
# calculate vote loss
vote_loss
=
self
.
vote_module
.
get_loss
(
bbox_preds
[
'seed_points'
],
bbox_preds
[
'vote_points'
],
bbox_preds
[
'seed_indices'
],
vote_loss
=
self
.
vote_module
.
get_loss
(
bbox_preds
_dict
[
'seed_points'
],
bbox_preds
_dict
[
'vote_points'
],
bbox_preds
_dict
[
'seed_indices'
],
vote_target_masks
,
vote_targets
)
# calculate objectness loss
objectness_loss
=
self
.
objectness
_loss
(
bbox_preds
[
'obj_scores'
].
transpose
(
2
,
1
),
objectness_loss
=
self
.
loss_
objectness
(
bbox_preds
_dict
[
'obj_scores'
].
transpose
(
2
,
1
),
objectness_targets
,
weight
=
objectness_weights
)
# calculate center loss
source2target_loss
,
target2source_loss
=
self
.
center
_loss
(
bbox_preds
[
'center'
],
source2target_loss
,
target2source_loss
=
self
.
loss_
center
(
bbox_preds
_dict
[
'center'
],
center_targets
,
src_weight
=
box_loss_weights
,
dst_weight
=
valid_gt_weights
)
center_loss
=
source2target_loss
+
target2source_loss
# calculate direction class loss
dir_class_loss
=
self
.
dir_class
_loss
(
bbox_preds
[
'dir_class'
].
transpose
(
2
,
1
),
dir_class_loss
=
self
.
loss_
dir_class
(
bbox_preds
_dict
[
'dir_class'
].
transpose
(
2
,
1
),
dir_class_targets
,
weight
=
box_loss_weights
)
...
...
@@ -291,13 +379,13 @@ class VoteHead(BaseModule):
(
batch_size
,
proposal_num
,
self
.
num_dir_bins
))
heading_label_one_hot
.
scatter_
(
2
,
dir_class_targets
.
unsqueeze
(
-
1
),
1
)
dir_res_norm
=
torch
.
sum
(
bbox_preds
[
'dir_res_norm'
]
*
heading_label_one_hot
,
-
1
)
dir_res_loss
=
self
.
dir_res
_loss
(
bbox_preds
_dict
[
'dir_res_norm'
]
*
heading_label_one_hot
,
-
1
)
dir_res_loss
=
self
.
loss_
dir_res
(
dir_res_norm
,
dir_res_targets
,
weight
=
box_loss_weights
)
# calculate size class loss
size_class_loss
=
self
.
size_class_loss
(
bbox_preds
[
'size_class'
].
transpose
(
2
,
1
),
bbox_preds
_dict
[
'size_class'
].
transpose
(
2
,
1
),
size_class_targets
,
weight
=
box_loss_weights
)
...
...
@@ -308,17 +396,17 @@ class VoteHead(BaseModule):
one_hot_size_targets_expand
=
one_hot_size_targets
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
1
,
3
).
contiguous
()
size_residual_norm
=
torch
.
sum
(
bbox_preds
[
'size_res_norm'
]
*
one_hot_size_targets_expand
,
2
)
bbox_preds
_dict
[
'size_res_norm'
]
*
one_hot_size_targets_expand
,
2
)
box_loss_weights_expand
=
box_loss_weights
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
3
)
size_res_loss
=
self
.
size_res
_loss
(
size_res_loss
=
self
.
loss_
size_res
(
size_residual_norm
,
size_res_targets
,
weight
=
box_loss_weights_expand
)
# calculate semantic loss
semantic_loss
=
self
.
semantic_loss
(
bbox_preds
[
'sem_scores'
].
transpose
(
2
,
1
),
bbox_preds
_dict
[
'sem_scores'
].
transpose
(
2
,
1
),
mask_targets
,
weight
=
box_loss_weights
)
...
...
@@ -334,7 +422,7 @@ class VoteHead(BaseModule):
if
self
.
iou_loss
:
corners_pred
=
self
.
bbox_coder
.
decode_corners
(
bbox_preds
[
'center'
],
size_residual_norm
,
bbox_preds
_dict
[
'center'
],
size_residual_norm
,
one_hot_size_targets_expand
)
corners_target
=
self
.
bbox_coder
.
decode_corners
(
assigned_center_targets
,
size_res_targets
,
...
...
@@ -348,25 +436,26 @@ class VoteHead(BaseModule):
return
losses
def
get_targets
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
bbox_preds
=
None
):
def
get_targets
(
self
,
points
,
bbox_preds
:
dict
=
None
,
batch_gt_instances_3d
:
List
[
InstanceData
]
=
None
,
batch_pts_semantic_mask
:
List
[
torch
.
Tensor
]
=
None
,
batch_pts_instance_mask
:
List
[
torch
.
Tensor
]
=
None
,
):
"""Generate targets of vote head.
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images.
Returns:
tuple[torch.Tensor]: Targets of vote head.
...
...
@@ -374,40 +463,46 @@ class VoteHead(BaseModule):
# find empty example
valid_gt_masks
=
list
()
gt_num
=
list
()
for
index
in
range
(
len
(
gt_labels_3d
)):
if
len
(
gt_labels_3d
[
index
])
==
0
:
fake_box
=
gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
1
,
gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
gt_bboxes_3d
[
index
]
=
gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
gt_labels_3d
[
index
]
=
gt_labels_3d
[
index
].
new_zeros
(
1
)
valid_gt_masks
.
append
(
gt_labels_3d
[
index
].
new_zeros
(
1
))
batch_gt_labels_3d
=
[
gt_instances_3d
.
labels_3d
for
gt_instances_3d
in
batch_gt_instances_3d
]
batch_gt_bboxes_3d
=
[
gt_instances_3d
.
bboxes_3d
for
gt_instances_3d
in
batch_gt_instances_3d
]
for
index
in
range
(
len
(
batch_gt_labels_3d
)):
if
len
(
batch_gt_labels_3d
[
index
])
==
0
:
fake_box
=
batch_gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
1
,
batch_gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
batch_gt_bboxes_3d
[
index
]
=
batch_gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
batch_gt_labels_3d
[
index
]
=
batch_gt_labels_3d
[
index
].
new_zeros
(
1
)
valid_gt_masks
.
append
(
batch_gt_labels_3d
[
index
].
new_zeros
(
1
))
gt_num
.
append
(
1
)
else
:
valid_gt_masks
.
append
(
gt_labels_3d
[
index
].
new_ones
(
gt_labels_3d
[
index
].
shape
))
gt_num
.
append
(
gt_labels_3d
[
index
].
shape
[
0
])
valid_gt_masks
.
append
(
batch_
gt_labels_3d
[
index
].
new_ones
(
batch_
gt_labels_3d
[
index
].
shape
))
gt_num
.
append
(
batch_
gt_labels_3d
[
index
].
shape
[
0
])
max_gt_num
=
max
(
gt_num
)
if
pts_semantic_mask
is
None
:
pts_semantic_mask
=
[
None
for
i
in
range
(
len
(
gt_labels_3d
))]
pts_instance_mask
=
[
None
for
i
in
range
(
len
(
gt_labels_3d
))]
aggregated_points
=
[
bbox_preds
[
'aggregated_points'
][
i
]
for
i
in
range
(
len
(
gt_labels_3d
))
for
i
in
range
(
len
(
batch_
gt_labels_3d
))
]
(
vote_targets
,
vote_target_masks
,
size_class_targets
,
size_res_targets
,
dir_class_targets
,
dir_res_targets
,
center_targets
,
assigned_center_targets
,
mask_targets
,
objectness_targets
,
objectness_masks
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
gt_bboxes_3d
,
gt_label
s_3d
,
pts_semantic_mask
,
pts_instance
_mask
,
aggregated_points
)
assigned_center_targets
,
mask_targets
,
objectness_targets
,
objectness_masks
)
=
multi_apply
(
self
.
_get_targets_single
,
points
,
batch_gt_bboxe
s_3d
,
batch_gt_labels_3d
,
batch_pts_semantic
_mask
,
batch_pts_instance_mask
,
aggregated_points
)
# pad targets as original code of votenet.
for
index
in
range
(
len
(
gt_labels_3d
)):
pad_num
=
max_gt_num
-
gt_labels_3d
[
index
].
shape
[
0
]
for
index
in
range
(
len
(
batch_
gt_labels_3d
)):
pad_num
=
max_gt_num
-
batch_
gt_labels_3d
[
index
].
shape
[
0
]
center_targets
[
index
]
=
F
.
pad
(
center_targets
[
index
],
(
0
,
0
,
0
,
pad_num
))
valid_gt_masks
[
index
]
=
F
.
pad
(
valid_gt_masks
[
index
],
(
0
,
pad_num
))
...
...
@@ -437,13 +532,13 @@ class VoteHead(BaseModule):
valid_gt_masks
,
objectness_targets
,
objectness_weights
,
box_loss_weights
,
valid_gt_weights
)
def
get_targets_single
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
aggregated_points
=
None
):
def
_
get_targets_single
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
aggregated_points
=
None
):
"""Generate targets of vote head for single batch.
Args:
...
...
@@ -501,7 +596,6 @@ class VoteHead(BaseModule):
vote_targets
=
points
.
new_zeros
([
num_points
,
3
])
vote_target_masks
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
for
i
in
torch
.
unique
(
pts_instance_mask
):
indices
=
torch
.
nonzero
(
pts_instance_mask
==
i
,
as_tuple
=
False
).
squeeze
(
-
1
)
...
...
@@ -561,47 +655,63 @@ class VoteHead(BaseModule):
dir_res_targets
,
center_targets
,
assigned_center_targets
,
mask_targets
.
long
(),
objectness_targets
,
objectness_masks
)
def
get_bboxes
(
self
,
points
,
bbox_preds
,
input_metas
,
rescale
=
Fals
e
,
use_nms
=
True
)
:
def
predict_by_feat
(
self
,
points
:
List
[
torch
.
Tensor
]
,
bbox_preds
_dict
:
dict
,
batch_input_metas
:
List
[
dict
]
,
use_nms
:
bool
=
Tru
e
,
**
kwargs
)
->
List
[
InstanceData
]
:
"""Generate bboxes from vote head predictions.
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Predictions from vote head.
input_metas (list[dict]):
Point cloud and image's meta info.
rescale (bool): Whether to rescale bboxes
.
points (
List[
torch.Tensor
]
): Input points
of multiple samples
.
bbox_preds
_dict
(dict): Predictions from vote head.
batch_
input_metas (list[dict]):
Each item
contains the meta information of each sample
.
use_nms (bool): Whether to apply NMS, skip nms postprocessing
while using vote head in rpn stage.
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData cantains 3d Bounding boxes and corresponding
scores and labels.
"""
# decode boxes
obj_scores
=
F
.
softmax
(
bbox_preds
[
'obj_scores'
],
dim
=-
1
)[...,
-
1
]
sem_scores
=
F
.
softmax
(
bbox_preds
[
'sem_scores'
],
dim
=-
1
)
bbox3d
=
self
.
bbox_coder
.
decode
(
bbox_preds
)
if
use_nms
:
batch_size
=
bbox3d
.
shape
[
0
]
results
=
list
()
for
b
in
range
(
batch_size
):
stack_points
=
torch
.
stack
(
points
)
obj_scores
=
F
.
softmax
(
bbox_preds_dict
[
'obj_scores'
],
dim
=-
1
)[...,
-
1
]
sem_scores
=
F
.
softmax
(
bbox_preds_dict
[
'sem_scores'
],
dim
=-
1
)
bbox3d
=
self
.
bbox_coder
.
decode
(
bbox_preds_dict
)
batch_size
=
bbox3d
.
shape
[
0
]
results_list
=
list
()
for
b
in
range
(
batch_size
):
temp_results
=
InstanceData
()
if
use_nms
:
bbox_selected
,
score_selected
,
labels
=
\
self
.
multiclass_nms_single
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
[
b
],
points
[
b
,
...,
:
3
],
input_metas
[
b
])
bbox
=
input_metas
[
b
][
'box_type_3d'
](
self
.
multiclass_nms_single
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
[
b
],
stack_points
[
b
,
...,
:
3
],
batch_input_metas
[
b
])
bbox
=
batch_input_metas
[
b
][
'box_type_3d'
](
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
)
temp_results
.
bboxes_3d
=
bbox
temp_results
.
scores_3d
=
score_selected
temp_results
.
labels_3d
=
labels
results_list
.
append
(
temp_results
)
else
:
bbox
=
batch_input_metas
[
b
][
'box_type_3d'
](
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
)
results
.
append
((
bbox
,
score_selected
,
labels
))
temp_results
.
bboxes_3d
=
bbox
temp_results
.
obj_scores_3d
=
obj_scores
[
b
]
temp_results
.
sem_scores_3d
=
obj_scores
[
b
]
results_list
.
append
(
temp_results
)
return
results
else
:
return
bbox3d
return
results_list
def
multiclass_nms_single
(
self
,
obj_scores
,
sem_scores
,
bbox
,
points
,
input_meta
):
...
...
mmdet3d/models/detectors/base.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
from
mmdet3d.core
import
Det3DDataSample
from
mmdet3d.core.utils
import
(
ForwardResults
,
InstanceList
,
OptConfigType
,
OptMultiConfig
,
OptSampleList
,
SampleList
)
...
...
@@ -24,8 +26,8 @@ class Base3DDetector(BaseDetector):
super
().
__init__
(
data_preprocessor
=
data_processor
,
init_cfg
=
init_cfg
)
def
forward
(
self
,
batch_inputs_dict
:
dict
,
batch_
data_samples
:
OptSampleList
=
None
,
inputs
:
Union
[
dict
,
List
[
dict
]]
,
data_samples
:
OptSampleList
=
None
,
mode
:
str
=
'tensor'
,
**
kwargs
)
->
ForwardResults
:
"""The unified entry for a forward process in both training and test.
...
...
@@ -43,10 +45,19 @@ class Base3DDetector(BaseDetector):
optimizer updating, which are done in the :meth:`train_step`.
Args:
batch_inputs (torch.Tensor): The input tensor with shape
(N, C, ...) in general.
batch_data_samples (list[:obj:`DetDataSample`], optional): The
annotation data of every samples. Defaults to None.
inputs (dict | list[dict]): When it is a list[dict], the
outer list indicate the test time augmentation. Each
dict contains batch inputs
which include 'points' and 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor): Image tensor has shape (B, C, H, W).
data_samples (list[:obj:`DetDataSample`],
list[list[:obj:`DetDataSample`]], optional): The
annotation data of every samples. When it is a list[list], the
outer list indicate the test time augmentation, and the
inter list indicate the batch. Otherwise, the list simply
indicate the batch. Defaults to None.
mode (str): Return what kind of value. Defaults to 'tensor'.
Returns:
...
...
@@ -57,13 +68,20 @@ class Base3DDetector(BaseDetector):
- If ``mode="loss"``, return a dict of tensor.
"""
if
mode
==
'loss'
:
return
self
.
loss
(
batch_inputs_dict
,
batch_
data_samples
,
**
kwargs
)
return
self
.
loss
(
inputs
,
data_samples
,
**
kwargs
)
elif
mode
==
'predict'
:
return
self
.
predict
(
batch_inputs_dict
,
batch_data_samples
,
**
kwargs
)
if
isinstance
(
data_samples
[
0
],
list
):
# aug test
assert
len
(
data_samples
[
0
])
==
1
,
'Only support '
\
'batch_size 1 '
\
'in mmdet3d when '
\
'do the test'
\
'time augmentation.'
return
self
.
aug_test
(
inputs
,
data_samples
,
**
kwargs
)
else
:
return
self
.
predict
(
inputs
,
data_samples
,
**
kwargs
)
elif
mode
==
'tensor'
:
return
self
.
_forward
(
batch_inputs_dict
,
batch_data_samples
,
**
kwargs
)
return
self
.
_forward
(
inputs
,
data_samples
,
**
kwargs
)
else
:
raise
RuntimeError
(
f
'Invalid mode "
{
mode
}
". '
'Only supports loss, predict and tensor mode'
)
...
...
mmdet3d/models/detectors/votenet.py
View file @
c9ad3605
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
typing
import
Dict
,
List
,
Optional
,
Union
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmengine
import
InstanceData
from
torch
import
Tensor
from
mmdet3d.core
import
Det3DDataSample
,
merge_aug_bboxes_3d
from
mmdet3d.registry
import
MODELS
from
.single_stage
import
SingleStage3DDetector
@
MODELS
.
register_module
()
class
VoteNet
(
SingleStage3DDetector
):
r
"""`VoteNet <https://arxiv.org/pdf/1904.09664.pdf>`_ for 3D detection."""
r
"""`VoteNet <https://arxiv.org/pdf/1904.09664.pdf>`_ for 3D detection.
Args:
backbone (dict): Config dict of detector's backbone.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
init_cfg (dict, optional): the config to control the
initialization. Default to None.
data_preprocessor (dict or ConfigDict, optional): The pre-process
config of :class:`BaseDataPreprocessor`. it usually includes,
``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
"""
def
__init__
(
self
,
backbone
,
bbox_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
init_cfg
=
None
,
pretrained
=
None
):
backbone
:
dict
,
bbox_head
:
Optional
[
dict
]
=
None
,
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
,
data_preprocessor
:
Optional
[
dict
]
=
None
,
**
kwargs
):
super
(
VoteNet
,
self
).
__init__
(
backbone
=
backbone
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
init_cfg
=
None
,
pretrained
=
pretrained
)
def
forward_train
(
self
,
points
,
img_metas
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
gt_bboxes_ignore
=
None
):
"""Forward of training.
init_cfg
=
init_cfg
,
data_preprocessor
=
data_preprocessor
,
**
kwargs
)
def
loss
(
self
,
batch_inputs_dict
:
Dict
[
str
,
Union
[
List
,
Tensor
]],
batch_data_samples
:
List
[
Det3DDataSample
],
**
kwargs
)
->
List
[
Det3DDataSample
]:
"""
Args:
points (list[torch.Tensor]): Points of each batch.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
batch_inputs_dict (dict): The model input dict which include
'points' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`.
Returns:
dict
: Losse
s.
dict
[str, Tensor]: A dictionary of loss component
s.
"""
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
train_cfg
.
sample_mod
)
loss_inputs
=
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
img_metas
)
losses
=
self
.
bbox_head
.
loss
(
bbox_preds
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
feat_dict
=
self
.
extract_feat
(
batch_inputs_dict
)
points
=
batch_inputs_dict
[
'points'
]
losses
=
self
.
bbox_head
.
loss
(
points
,
feat_dict
,
batch_data_samples
,
**
kwargs
)
return
losses
def
simple_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
def
predict
(
self
,
batch_inputs_dict
:
Dict
[
str
,
Optional
[
Tensor
]],
batch_data_samples
:
List
[
Det3DDataSample
],
**
kwargs
)
->
List
[
Det3DDataSample
]:
"""Forward of testing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list): Image metas.
rescale (bool): Whether to rescale results.
batch_inputs_dict (dict): The model input dict which include
'points' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`.
Returns:
list: Predicted 3d boxes.
list[:obj:`Det3DDataSample`]: Detection results of the
input sample. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
points_cat
,
bbox_preds
,
img_metas
,
rescale
=
rescale
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
return
bbox_results
def
aug_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
"""Test with augmentation."""
points_cat
=
[
torch
.
stack
(
pts
)
for
pts
in
points
]
feats
=
self
.
extract_feats
(
points_cat
,
img_metas
)
# only support aug_test for one sample
aug_bboxes
=
[]
for
x
,
pts_cat
,
img_meta
in
zip
(
feats
,
points_cat
,
img_metas
):
bbox_preds
=
self
.
bbox_head
(
x
,
self
.
test_cfg
.
sample_mod
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
pts_cat
,
bbox_preds
,
img_meta
,
rescale
=
rescale
)
bbox_list
=
[
dict
(
boxes_3d
=
bboxes
,
scores_3d
=
scores
,
labels_3d
=
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
aug_bboxes
.
append
(
bbox_list
[
0
])
feats_dict
=
self
.
extract_feat
(
batch_inputs_dict
)
points
=
batch_inputs_dict
[
'points'
]
results_list
=
self
.
bbox_head
.
predict
(
points
,
feats_dict
,
batch_data_samples
,
**
kwargs
)
data_3d_samples
=
self
.
convert_to_datasample
(
results_list
)
return
data_3d_samples
def
aug_test
(
self
,
aug_inputs_list
:
List
[
dict
],
aug_data_samples
:
List
[
List
[
dict
]],
**
kwargs
):
"""Test with augmentation.
Batch size always is 1 when do the augtest.
Args:
aug_inputs_list (List[dict]): The list indicate same data
under differecnt augmentation.
aug_data_samples (List[List[dict]]): The outer list
indicate different augmentation, and the inter
list indicate the batch size.
"""
num_augs
=
len
(
aug_inputs_list
)
if
num_augs
==
1
:
return
self
.
predict
(
aug_inputs_list
[
0
],
aug_data_samples
[
0
])
batch_size
=
len
(
aug_data_samples
[
0
])
assert
batch_size
==
1
multi_aug_results
=
[]
for
aug_id
in
range
(
num_augs
):
batch_inputs_dict
=
aug_inputs_list
[
aug_id
]
batch_data_samples
=
aug_data_samples
[
aug_id
]
feats_dict
=
self
.
extract_feat
(
batch_inputs_dict
)
points
=
batch_inputs_dict
[
'points'
]
results_list
=
self
.
bbox_head
.
predict
(
points
,
feats_dict
,
batch_data_samples
,
**
kwargs
)
multi_aug_results
.
append
(
results_list
[
0
])
aug_input_metas_list
=
[]
for
aug_index
in
range
(
num_augs
):
metainfo
=
aug_data_samples
[
aug_id
][
0
].
metainfo
aug_input_metas_list
.
append
(
metainfo
)
aug_results_list
=
[
item
.
to_dict
()
for
item
in
multi_aug_results
]
# after merging, bboxes will be rescaled to the original image size
merged_bboxes
=
merge_aug_bboxes_3d
(
aug_bboxes
,
img_metas
,
self
.
bbox_head
.
test_cfg
)
merged_results_dict
=
merge_aug_bboxes_3d
(
aug_results_list
,
aug_input_metas_list
,
self
.
bbox_head
.
test_cfg
)
return
[
merged_bboxes
]
merged_results
=
InstanceData
(
**
merged_results_dict
)
data_3d_samples
=
self
.
convert_to_datasample
([
merged_results
])
return
data_3d_samples
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment