Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f63a62b8
"vscode:/vscode.git/clone" did not exist on "407df37dfb109b8e2410e9afb0b72615aa4d3b07"
Commit
f63a62b8
authored
Jul 19, 2022
by
zhangshilong
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Rrfactor]Imvotenet
parent
edb6b369
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
798 additions
and
722 deletions
+798
-722
configs/_base_/datasets/sunrgbd-3d-10class.py
configs/_base_/datasets/sunrgbd-3d-10class.py
+35
-40
configs/_base_/models/imvotenet_image.py
configs/_base_/models/imvotenet_image.py
+15
-4
configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
...t/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
+55
-40
configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py
...igs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py
+34
-74
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+13
-10
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+2
-1
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+43
-6
mmdet3d/datasets/pipelines/loading.py
mmdet3d/datasets/pipelines/loading.py
+50
-28
mmdet3d/datasets/sunrgbd_dataset.py
mmdet3d/datasets/sunrgbd_dataset.py
+53
-7
mmdet3d/metrics/indoor_metric.py
mmdet3d/metrics/indoor_metric.py
+92
-1
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+8
-7
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+11
-3
mmdet3d/models/detectors/imvotenet.py
mmdet3d/models/detectors/imvotenet.py
+198
-480
mmdet3d/models/fusion_layers/coord_transform.py
mmdet3d/models/fusion_layers/coord_transform.py
+2
-2
mmdet3d/models/fusion_layers/vote_fusion.py
mmdet3d/models/fusion_layers/vote_fusion.py
+0
-2
tests/data/sunrgbd/sunrgbd_infos.pkl
tests/data/sunrgbd/sunrgbd_infos.pkl
+0
-0
tests/test_models/test_detectors/test_h3d.py
tests/test_models/test_detectors/test_h3d.py
+51
-0
tests/test_models/test_detectors/test_imvotenet.py
tests/test_models/test_detectors/test_imvotenet.py
+88
-0
tests/utils/model_utils.py
tests/utils/model_utils.py
+26
-2
tools/data_converter/update_infos_to_v2.py
tools/data_converter/update_infos_to_v2.py
+22
-15
No files found.
configs/_base_/datasets/sunrgbd-3d-10class.py
View file @
f63a62b8
...
...
@@ -2,6 +2,9 @@ dataset_type = 'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
metainfo
=
dict
(
CLASSES
=
class_names
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
...
...
@@ -21,8 +24,9 @@ train_pipeline = [
scale_ratio_range
=
[
0.85
,
1.15
],
shift_height
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
...
...
@@ -47,61 +51,52 @@ test_pipeline = [
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
dict
(
type
=
'PointSample'
,
num_points
=
20000
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
16
,
workers_per_gpu
=
4
,
train
=
dict
(
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_train.pkl'
,
ann_file
=
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
filter_empty_gt
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
box_type_3d
=
'Depth'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
ann_file
=
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
box_type_3d
=
'Depth'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
ann_file
=
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
evaluat
ion
=
dict
(
pipeline
=
eval_pipeline
)
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_
evaluat
or
=
val_evaluator
configs/_base_/models/imvotenet_image.py
View file @
f63a62b8
model
=
dict
(
type
=
'ImVoteNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
# use caffe img_norm
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
img_backbone
=
dict
(
type
=
'ResNet'
,
type
=
'
mmdet.
ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
...
...
@@ -10,11 +17,12 @@ model = dict(
norm_eval
=
True
,
style
=
'caffe'
),
img_neck
=
dict
(
type
=
'FPN'
,
type
=
'
mmdet.
FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
img_rpn_head
=
dict
(
_scope_
=
'mmdet'
,
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
...
...
@@ -31,6 +39,7 @@ model = dict(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
img_roi_head
=
dict
(
_scope_
=
'mmdet'
,
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
...
...
@@ -56,6 +65,7 @@ model = dict(
train_cfg
=
dict
(
img_rpn
=
dict
(
assigner
=
dict
(
_scope_
=
'mmdet'
,
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
...
...
@@ -63,7 +73,7 @@ model = dict(
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
type
=
'
mmdet.
RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
...
...
@@ -80,6 +90,7 @@ model = dict(
min_bbox_size
=
0
),
img_rcnn
=
dict
(
assigner
=
dict
(
_scope_
=
'mmdet'
,
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
...
...
@@ -87,7 +98,7 @@ model = dict(
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
type
=
'
mmdet.
RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
...
...
configs/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py
View file @
f63a62b8
...
...
@@ -3,56 +3,71 @@ _base_ = [
'../_base_/models/imvotenet_image.py'
]
# use caffe img_norm
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
1333
,
480
),
(
1333
,
504
),
(
1333
,
528
),
(
1333
,
552
),
(
1333
,
576
),
(
1333
,
600
)],
multiscale_mode
=
'value'
,
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_bbox_3d
=
False
,
with_label_3d
=
False
),
dict
(
type
=
'RandomChoiceResize'
,
scales
=
[(
1333
,
480
),
(
1333
,
504
),
(
1333
,
528
),
(
1333
,
552
),
(
1333
,
576
),
(
1333
,
600
)],
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
dict
(
type
=
'RandomFlip'
,
prob
=
0.5
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
# online evaluation
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_bbox_3d
=
False
,
with_label_3d
=
False
),
dict
(
type
=
'Resize'
,
scale
=
(
1333
,
600
),
keep_ratio
=
True
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1333
,
600
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
type
=
'Pack3DDetInputs'
,
keys
=
([
'img'
]),
meta_keys
=
(
'img_id'
,
'img_path'
,
'ori_shape'
,
'img_shape'
,
'scale_factor'
))
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
1
,
dataset
=
dict
(
pipeline
=
train_pipeline
)))
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
times
=
1
,
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
8
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
8
,
by_epoch
=
True
,
milestones
=
[
6
],
gamma
=
0.1
)
]
val_evaluator
=
dict
(
type
=
'Indoor2DMetric'
)
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
6
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
8
)
# optimizer
optim_wrapper
=
dict
(
type
=
'OptimWrapper'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
load_from
=
'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# noqa
configs/imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py
View file @
f63a62b8
...
...
@@ -7,10 +7,6 @@ _base_ = [
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
# use caffe img_norm
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
model
=
dict
(
pts_backbone
=
dict
(
type
=
'PointNet2SASSG'
,
...
...
@@ -48,10 +44,8 @@ model = dict(
[
0.76584
,
1.398258
,
0.472728
]]),
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
type
=
'
mmdet.
CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
...
...
@@ -62,15 +56,23 @@ model = dict(
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
type
=
'
mmdet.
SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
joint
=
dict
(
vote_module_cfg
=
dict
(
in_channels
=
512
,
...
...
@@ -154,11 +156,11 @@ model = dict(
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)),
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
e
=
'vote'
)),
test_cfg
=
dict
(
img_rcnn
=
dict
(
score_thr
=
0.1
),
pts
=
dict
(
sample_mod
=
'seed'
,
sample_mod
e
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)))
...
...
@@ -171,12 +173,13 @@ train_pipeline = [
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1333
,
600
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.0
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'Resize'
,
scale
=
(
1333
,
600
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
...
...
@@ -188,15 +191,13 @@ train_pipeline = [
scale_ratio_range
=
[
0.85
,
1.15
],
shift_height
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'
Collect3D
'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'points'
,
'gt_bboxes_3d'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
(
[
'img'
,
'gt_bboxes'
,
'gt_
bboxes_
labels'
,
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
])
)
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
...
...
@@ -205,56 +206,15 @@ test_pipeline = [
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
600
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.0
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
,
'points'
])
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
,
'points'
])
dict
(
type
=
'Resize'
,
scale
=
(
1333
,
600
),
keep_ratio
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'points'
])
]
data
=
dict
(
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
evaluation
=
dict
(
pipeline
=
eval_pipeline
)
train_dataloader
=
dict
(
dataset
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# may also use your own pre-trained image branch
load_from
=
'https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222-cad62aeb.pth'
# noqa
mmdet3d/datasets/det3d_dataset.py
View file @
f63a62b8
...
...
@@ -178,10 +178,11 @@ class Det3DDataset(BaseDataset):
dict | None: Processed `ann_info`
"""
# add s or gt prefix for most keys after concat
# we only process 3d annotations here, the corresponding
# 2d annotation process is in the `LoadAnnotations3D`
# in `pipelines`
name_mapping
=
{
'bbox_label'
:
'gt_labels'
,
'bbox_label_3d'
:
'gt_labels_3d'
,
'bbox'
:
'gt_bboxes'
,
'bbox_3d'
:
'gt_bboxes_3d'
,
'depth'
:
'depths'
,
'center_2d'
:
'centers_2d'
,
...
...
@@ -196,16 +197,18 @@ class Det3DDataset(BaseDataset):
keys
=
list
(
instances
[
0
].
keys
())
ann_info
=
dict
()
for
ann_name
in
keys
:
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
# map the original dataset label to training label
if
'label'
in
ann_name
:
temp_anns
=
[
self
.
label_mapping
[
item
]
for
item
in
temp_anns
]
temp_anns
=
np
.
array
(
temp_anns
)
if
ann_name
in
name_mapping
:
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
# map the original dataset label to training label
if
'label'
in
ann_name
:
temp_anns
=
[
self
.
label_mapping
[
item
]
for
item
in
temp_anns
]
temp_anns
=
np
.
array
(
temp_anns
)
ann_name
=
name_mapping
[
ann_name
]
ann_info
[
ann_name
]
=
temp_anns
ann_info
[
ann_name
]
=
temp_anns
ann_info
[
'instances'
]
=
info
[
'instances'
]
return
ann_info
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
f63a62b8
...
...
@@ -87,7 +87,8 @@ class KittiDataset(Det3DDataset):
if
'plane'
in
info
:
# convert ground plane to velodyne coordinates
plane
=
np
.
array
(
info
[
'plane'
])
lidar2cam
=
np
.
array
(
info
[
'images'
][
'CAM2'
][
'lidar2cam'
])
lidar2cam
=
np
.
array
(
info
[
'images'
][
'CAM2'
][
'lidar2cam'
],
dtype
=
np
.
float32
)
reverse
=
np
.
linalg
.
inv
(
lidar2cam
)
(
plane_norm_cam
,
plane_off_cam
)
=
(
plane
[:
3
],
...
...
mmdet3d/datasets/pipelines/formating.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
from
typing
import
List
,
Sequence
,
Union
import
mmcv
import
numpy
as
np
import
torch
from
mmcv
import
BaseTransform
from
mmcv.transforms
import
to_tensor
from
mmengine
import
InstanceData
from
numpy
import
dtype
from
mmdet3d.core
import
Det3DDataSample
,
PointData
from
mmdet3d.core.bbox
import
BaseInstance3DBoxes
...
...
@@ -12,6 +14,38 @@ from mmdet3d.core.points import BasePoints
from
mmdet3d.registry
import
TRANSFORMS
def
to_tensor
(
data
:
Union
[
torch
.
Tensor
,
np
.
ndarray
,
Sequence
,
int
,
float
])
->
torch
.
Tensor
:
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
Args:
data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
be converted.
Returns:
torch.Tensor: the converted data.
"""
if
isinstance
(
data
,
torch
.
Tensor
):
return
data
elif
isinstance
(
data
,
np
.
ndarray
):
if
data
.
dtype
is
dtype
(
'float64'
):
data
=
data
.
astype
(
np
.
float32
)
return
torch
.
from_numpy
(
data
)
elif
isinstance
(
data
,
Sequence
)
and
not
mmcv
.
is_str
(
data
):
return
torch
.
tensor
(
data
)
elif
isinstance
(
data
,
int
):
return
torch
.
LongTensor
([
data
])
elif
isinstance
(
data
,
float
):
return
torch
.
FloatTensor
([
data
])
else
:
raise
TypeError
(
f
'type
{
type
(
data
)
}
cannot be converted to tensor.'
)
@
TRANSFORMS
.
register_module
()
class
Pack3DDetInputs
(
BaseTransform
):
INPUTS_KEYS
=
[
'points'
,
'img'
]
...
...
@@ -20,7 +54,7 @@ class Pack3DDetInputs(BaseTransform):
]
INSTANCEDATA_2D_KEYS
=
[
'gt_bboxes'
,
'gt_labels'
,
'gt_
bboxes_
labels'
,
]
SEG_KEYS
=
[
...
...
@@ -121,8 +155,8 @@ class Pack3DDetInputs(BaseTransform):
for
key
in
[
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels
_3d
'
,
'attr_labels'
,
'pts_instance_mask'
,
'pts_semantic_mask'
,
'centers_2d'
,
'depths'
'gt_
bboxes_
labels'
,
'attr_labels'
,
'pts_instance_mask'
,
'pts_semantic_mask'
,
'centers_2d'
,
'depths'
,
'gt_labels_3d'
]:
if
key
not
in
results
:
continue
...
...
@@ -159,7 +193,10 @@ class Pack3DDetInputs(BaseTransform):
elif
key
in
self
.
INSTANCEDATA_3D_KEYS
:
gt_instances_3d
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
elif
key
in
self
.
INSTANCEDATA_2D_KEYS
:
gt_instances
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
if
key
==
'gt_bboxes_labels'
:
gt_instances
[
'labels'
]
=
results
[
key
]
else
:
gt_instances
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
elif
key
in
self
.
SEG_KEYS
:
gt_pts_seg
[
self
.
_remove_prefix
(
key
)]
=
results
[
key
]
else
:
...
...
mmdet3d/datasets/pipelines/loading.py
View file @
f63a62b8
...
...
@@ -632,34 +632,6 @@ class LoadAnnotations3D(LoadAnnotations):
self
.
with_seg_3d
=
with_seg_3d
self
.
seg_3d_dtype
=
seg_3d_dtype
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
results
[
'gt_bboxes'
]
=
results
[
'ann_info'
][
'gt_bboxes'
]
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded label annotations.
"""
results
[
'gt_labels'
]
=
results
[
'ann_info'
][
'gt_labels'
]
def
_load_bboxes_3d
(
self
,
results
:
dict
)
->
dict
:
"""Private function to move the 3D bounding box annotation from
`ann_info` field to the root of `results`.
...
...
@@ -769,6 +741,56 @@ class LoadAnnotations3D(LoadAnnotations):
results
[
'eval_ann_info'
][
'pts_semantic_mask'
]
=
pts_semantic_mask
return
results
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
The only difference is it remove the proceess for
`ignore_flag`
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes
=
[]
for
instance
in
results
[
'instances'
]:
gt_bboxes
.
append
(
instance
[
'bbox'
])
if
len
(
gt_bboxes
)
==
0
:
results
[
'gt_bboxes'
]
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
else
:
results
[
'gt_bboxes'
]
=
np
.
array
(
gt_bboxes
,
dtype
=
np
.
float32
).
reshape
((
-
1
,
4
))
if
self
.
denorm_bbox
:
bbox_num
=
results
[
'gt_bboxes'
].
shape
[
0
]
if
bbox_num
!=
0
:
h
,
w
=
results
[
'img_shape'
]
results
[
'gt_bboxes'
][:,
0
::
2
]
*=
w
results
[
'gt_bboxes'
][:,
1
::
2
]
*=
h
if
'eval_ann_info'
in
results
:
results
[
'eval_ann_info'
][
'gt_bboxes'
]
=
results
[
'gt_bboxes'
]
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded label annotations.
"""
gt_bboxes_labels
=
[]
for
instance
in
results
[
'instances'
]:
gt_bboxes_labels
.
append
(
instance
[
'bbox_label'
])
if
len
(
gt_bboxes_labels
)
==
0
:
results
[
'gt_bboxes_labels'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
else
:
results
[
'gt_bboxes_labels'
]
=
np
.
array
(
gt_bboxes_labels
,
dtype
=
np
.
int64
)
if
'eval_ann_info'
in
results
:
results
[
'eval_ann_info'
][
'gt_bboxes_labels'
]
=
results
[
'gt_bboxes_labels'
]
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Function to load multiple types annotations.
...
...
mmdet3d/datasets/sunrgbd_dataset.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
import
os.path
as
osp
from
typing
import
Callable
,
List
,
Optional
,
Union
import
numpy
as
np
...
...
@@ -22,13 +24,15 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict
, optional
): Prefix for data. Defaults to
data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset.
default_cam_key (str): The default camera name adopted.
Defaults to "CAM0".
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
...
...
@@ -36,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset):
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool
, optional
): Whether to filter empty GT.
filter_empty_gt (bool): Whether to filter empty GT.
Defaults to True.
test_mode (bool
, optional
): Whether the dataset is in test mode.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
"""
METAINFO
=
{
...
...
@@ -51,8 +55,9 @@ class SUNRGBDDataset(Det3DDataset):
ann_file
:
str
,
metainfo
:
Optional
[
dict
]
=
None
,
data_prefix
:
dict
=
dict
(
pts
=
'points'
,
img
=
'sunrgbd_trainval'
),
pts
=
'points'
,
img
=
'sunrgbd_trainval
/image
'
),
pipeline
:
List
[
Union
[
dict
,
Callable
]]
=
[],
default_cam_key
:
str
=
'CAM0'
,
modality
=
dict
(
use_camera
=
True
,
use_lidar
=
True
),
box_type_3d
:
str
=
'Depth'
,
filter_empty_gt
:
bool
=
True
,
...
...
@@ -64,6 +69,7 @@ class SUNRGBDDataset(Det3DDataset):
metainfo
=
metainfo
,
data_prefix
=
data_prefix
,
pipeline
=
pipeline
,
default_cam_key
=
default_cam_key
,
modality
=
modality
,
box_type_3d
=
box_type_3d
,
filter_empty_gt
=
filter_empty_gt
,
...
...
@@ -73,6 +79,47 @@ class SUNRGBDDataset(Det3DDataset):
'use_lidar'
in
self
.
modality
assert
self
.
modality
[
'use_camera'
]
or
self
.
modality
[
'use_lidar'
]
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the raw data info.
Convert all relative path of needed modality data file to
the absolute path. And process
the `instances` field to `ann_info` in training stage.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if
self
.
modality
[
'use_lidar'
]:
info
[
'lidar_points'
][
'lidar_path'
]
=
\
osp
.
join
(
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
img_info
[
'img_path'
]
=
osp
.
join
(
self
.
data_prefix
.
get
(
'img'
,
''
),
img_info
[
'img_path'
])
if
self
.
default_cam_key
is
not
None
:
info
[
'img_path'
]
=
info
[
'images'
][
self
.
default_cam_key
][
'img_path'
]
info
[
'depth2img'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'depth2img'
],
dtype
=
np
.
float32
)
if
not
self
.
test_mode
:
# used in traing
info
[
'ann_info'
]
=
self
.
parse_ann_info
(
info
)
if
self
.
test_mode
and
self
.
load_eval_anns
:
info
[
'eval_ann_info'
]
=
self
.
parse_ann_info
(
info
)
return
info
def
parse_ann_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the `instances` in data info to `ann_info`
...
...
@@ -83,12 +130,11 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info`
"""
ann_info
=
super
().
parse_ann_info
(
info
)
#
empty gt
#
process data without any annotations
if
ann_info
is
None
:
ann_info
=
dict
()
ann_info
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
6
),
dtype
=
np
.
float32
)
ann_info
[
'gt_labels_3d'
]
=
np
.
zeros
((
0
,
),
dtype
=
np
.
int64
)
# to target box structure
ann_info
[
'gt_bboxes_3d'
]
=
DepthInstance3DBoxes
(
ann_info
[
'gt_bboxes_3d'
],
...
...
mmdet3d/metrics/indoor_metric.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
typing
import
Dict
,
List
,
Optional
,
Sequence
import
numpy
as
np
from
mmengine.evaluator
import
BaseMetric
from
mmengine.logging
import
MMLogger
from
mmdet3d.core
import
get_box_type
,
indoor_eval
from
mmdet3d.registry
import
METRICS
from
mmdet.core
import
eval_map
@
METRICS
.
register_module
()
class
IndoorMetric
(
BaseMetric
):
"""
Kitti
evaluation metric.
"""
Indoor scene
evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
...
...
@@ -90,3 +93,91 @@ class IndoorMetric(BaseMetric):
box_mode_3d
=
box_mode_3d
)
return
ret_dict
@
METRICS
.
register_module
()
class
Indoor2DMetric
(
BaseMetric
):
"""indoor 2d predictions evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
metric. Defaults to [0.5].
collect_device (str, optional): Device name used for collecting
results from different ranks during distributed training.
Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
prefix (str): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
"""
def
__init__
(
self
,
iou_thr
:
List
[
float
]
=
[
0.5
],
collect_device
:
str
=
'cpu'
,
prefix
:
Optional
[
str
]
=
None
,
**
kwargs
):
super
(
Indoor2DMetric
,
self
).
__init__
(
prefix
=
prefix
,
collect_device
=
collect_device
)
self
.
iou_thr
=
iou_thr
def
process
(
self
,
data_batch
:
Sequence
[
dict
],
predictions
:
Sequence
[
dict
])
->
None
:
"""Process one batch of data samples and predictions.
The processed results should be stored in ``self.results``,
which will be used to compute the metrics when all batches
have been processed.
Args:
data_batch (Sequence[dict]): A batch of data
from the dataloader.
predictions (Sequence[dict]): A batch of outputs from
the model.
"""
batch_eval_anns
=
[
item
[
'data_sample'
][
'eval_ann_info'
]
for
item
in
data_batch
]
for
eval_ann
,
pred_dict
in
zip
(
batch_eval_anns
,
predictions
):
pred
=
pred_dict
[
'pred_instances'
]
ann
=
dict
(
labels
=
eval_ann
[
'gt_bboxes_labels'
],
bboxes
=
eval_ann
[
'gt_bboxes'
])
pred_bboxes
=
pred
[
'bboxes'
].
cpu
().
numpy
()
pred_scores
=
pred
[
'scores'
].
cpu
().
numpy
()
pred_labels
=
pred
[
'labels'
].
cpu
().
numpy
()
dets
=
[]
for
label
in
range
(
len
(
self
.
dataset_meta
[
'CLASSES'
])):
index
=
np
.
where
(
pred_labels
==
label
)[
0
]
pred_bbox_scores
=
np
.
hstack
(
[
pred_bboxes
[
index
],
pred_scores
[
index
].
reshape
((
-
1
,
1
))])
dets
.
append
(
pred_bbox_scores
)
self
.
results
.
append
((
ann
,
dets
))
def
compute_metrics
(
self
,
results
:
list
)
->
Dict
[
str
,
float
]:
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
the metrics, and the values are corresponding results.
"""
logger
:
MMLogger
=
MMLogger
.
get_current_instance
()
annotations
,
preds
=
zip
(
*
results
)
eval_results
=
OrderedDict
()
iou_thr_2d
=
(
self
.
iou_thr
)
if
isinstance
(
self
.
iou_thr
,
float
)
else
self
.
iou_thr
for
iou_thr_2d_single
in
iou_thr_2d
:
mean_ap
,
_
=
eval_map
(
preds
,
annotations
,
scale_ranges
=
None
,
iou_thr
=
iou_thr_2d_single
,
dataset
=
self
.
dataset_meta
[
'CLASSES'
],
logger
=
logger
)
eval_results
[
'mAP_'
+
str
(
iou_thr_2d_single
)]
=
mean_ap
return
eval_results
mmdet3d/models/dense_heads/vote_head.py
View file @
f63a62b8
...
...
@@ -754,15 +754,16 @@ class VoteHead(BaseModule):
batch_size
=
bbox3d
.
shape
[
0
]
results_list
=
list
()
if
use_nms
:
for
b
in
range
(
batch_size
):
for
b
atch_index
in
range
(
batch_size
):
temp_results
=
InstanceData
()
bbox_selected
,
score_selected
,
labels
=
\
self
.
multiclass_nms_single
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
[
b
],
stack_points
[
b
,
...,
:
3
],
batch_input_metas
[
b
])
bbox
=
batch_input_metas
[
b
][
'box_type_3d'
](
self
.
multiclass_nms_single
(
obj_scores
[
batch_index
],
sem_scores
[
batch_index
],
bbox3d
[
batch_index
],
stack_points
[
batch_index
,
...,
:
3
],
batch_input_metas
[
batch_index
])
bbox
=
batch_input_metas
[
batch_index
][
'box_type_3d'
](
bbox_selected
,
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
)
...
...
mmdet3d/models/detectors/base.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
from
typing
import
List
,
Optional
,
Union
from
mmengine
import
InstanceData
...
...
@@ -91,8 +91,8 @@ class Base3DDetector(BaseDetector):
def
convert_to_datasample
(
self
,
results_list_3d
:
InstanceList
,
results_list_2d
:
InstanceList
=
None
,
results_list_3d
:
Optional
[
InstanceList
]
=
None
,
results_list_2d
:
Optional
[
InstanceList
]
=
None
,
)
->
SampleList
:
"""Convert results list to `Det3DDataSample`.
...
...
@@ -128,10 +128,18 @@ class Base3DDetector(BaseDetector):
"""
data_sample_list
=
[]
assert
(
results_list_2d
is
not
None
)
or
\
(
results_list_3d
is
not
None
),
\
'please pass at least one type of results_list'
if
results_list_2d
is
None
:
results_list_2d
=
[
InstanceData
()
for
_
in
range
(
len
(
results_list_3d
))
]
if
results_list_3d
is
None
:
results_list_3d
=
[
InstanceData
()
for
_
in
range
(
len
(
results_list_2d
))
]
for
i
in
range
(
len
(
results_list_3d
)):
result
=
Det3DDataSample
()
result
.
pred_instances_3d
=
results_list_3d
[
i
]
...
...
mmdet3d/models/detectors/imvotenet.py
View file @
f63a62b8
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
copy
from
typing
import
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
import
numpy
as
np
import
torch
from
mmengine
import
InstanceData
from
torch
import
Tensor
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmdet3d.core
import
Det3DDataSample
from
mmdet3d.models.utils
import
MLP
from
mmdet3d.registry
import
MODELS
from
.base
import
Base3DDetector
def
sample_valid_seeds
(
mask
,
num_sampled_seed
=
1024
)
:
def
sample_valid_seeds
(
mask
:
Tensor
,
num_sampled_seed
:
int
=
1024
)
->
Tensor
:
r
"""Randomly sample seeds from all imvotes.
Modified from `<https://github.com/facebookresearch/imvotenet/blob/a8856345146bacf29a57266a2f0b874406fd8823/models/imvotenet.py#L26>`_
...
...
@@ -54,26 +56,34 @@ def sample_valid_seeds(mask, num_sampled_seed=1024):
@
MODELS
.
register_module
()
class
ImVoteNet
(
Base3DDetector
):
r
"""`ImVoteNet <https://arxiv.org/abs/2001.10692>`_ for 3D detection."""
r
"""`ImVoteNet <https://arxiv.org/abs/2001.10692>`_ for 3D detection.
ImVoteNet is based on fusing 2D votes in images and 3D votes in point
clouds, which explicitly extract both geometric and semantic features
from the 2D images. It leverage camera parameters to lift these
features to 3D. A multi-tower training scheme also improve the synergy
of 2D-3D feature fusion.
"""
def
__init__
(
self
,
pts_backbone
=
None
,
pts_bbox_heads
=
None
,
pts_neck
=
None
,
img_backbone
=
None
,
img_neck
=
None
,
img_roi_head
=
None
,
img_rpn_head
=
None
,
img_mlp
=
None
,
freeze_img_branch
=
False
,
fusion_layer
=
None
,
num_sampled_seed
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
,
init_cfg
=
None
)
:
super
(
ImVoteNet
,
self
).
__init__
(
init_cfg
=
init_cfg
)
pts_backbone
:
Optional
[
dict
]
=
None
,
pts_bbox_heads
:
Optional
[
dict
]
=
None
,
pts_neck
:
Optional
[
dict
]
=
None
,
img_backbone
:
Optional
[
dict
]
=
None
,
img_neck
:
Optional
[
dict
]
=
None
,
img_roi_head
:
Optional
[
dict
]
=
None
,
img_rpn_head
:
Optional
[
dict
]
=
None
,
img_mlp
:
Optional
[
dict
]
=
None
,
freeze_img_branch
:
bool
=
False
,
fusion_layer
:
Optional
[
dict
]
=
None
,
num_sampled_seed
:
Optional
[
dict
]
=
None
,
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
,
**
kwargs
)
->
None
:
super
(
ImVoteNet
,
self
).
__init__
(
init_cfg
=
init_cfg
,
**
kwargs
)
# point branch
if
pts_backbone
is
not
None
:
...
...
@@ -137,35 +147,8 @@ class ImVoteNet(Base3DDetector):
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
if
pretrained
is
None
:
img_pretrained
=
None
pts_pretrained
=
None
elif
isinstance
(
pretrained
,
dict
):
img_pretrained
=
pretrained
.
get
(
'img'
,
None
)
pts_pretrained
=
pretrained
.
get
(
'pts'
,
None
)
else
:
raise
ValueError
(
f
'pretrained should be a dict, got
{
type
(
pretrained
)
}
'
)
if
self
.
with_img_backbone
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated '
'key, please consider using init_cfg.'
)
self
.
img_backbone
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
if
self
.
with_img_roi_head
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated '
'key, please consider using init_cfg.'
)
self
.
img_roi_head
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
if
self
.
with_pts_backbone
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated '
'key, please consider using init_cfg.'
)
self
.
pts_backbone
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
pts_pretrained
)
def
_forward
(
self
):
raise
NotImplementedError
def
freeze_img_branch_params
(
self
):
"""Freeze all image branch parameters."""
...
...
@@ -267,28 +250,14 @@ class ImVoteNet(Base3DDetector):
"""Just to inherit from abstract method."""
pass
def
extract_img_feat
(
self
,
img
)
:
def
extract_img_feat
(
self
,
img
:
Tensor
)
->
Sequence
[
Tensor
]
:
"""Directly extract features from the img backbone+neck."""
x
=
self
.
img_backbone
(
img
)
if
self
.
with_img_neck
:
x
=
self
.
img_neck
(
x
)
return
x
def
extract_img_feats
(
self
,
imgs
):
"""Extract features from multiple images.
Args:
imgs (list[torch.Tensor]): A list of images. The images are
augmented from the same image but in different ways.
Returns:
list[torch.Tensor]: Features of different images
"""
assert
isinstance
(
imgs
,
list
)
return
[
self
.
extract_img_feat
(
img
)
for
img
in
imgs
]
def
extract_pts_feat
(
self
,
pts
):
def
extract_pts_feat
(
self
,
pts
:
Tensor
)
->
Tuple
[
Tensor
]:
"""Extract features of points."""
x
=
self
.
pts_backbone
(
pts
)
if
self
.
with_pts_neck
:
...
...
@@ -300,151 +269,93 @@ class ImVoteNet(Base3DDetector):
return
(
seed_points
,
seed_features
,
seed_indices
)
def
extract_pts_feats
(
self
,
pts
):
"""Extract features of points from multiple samples."""
assert
isinstance
(
pts
,
list
)
return
[
self
.
extract_pts_feat
(
pt
)
for
pt
in
pts
]
@
torch
.
no_grad
()
def
extract_bboxes_2d
(
self
,
img
,
img_metas
,
train
=
True
,
bboxes_2d
=
None
,
**
kwargs
):
"""Extract bounding boxes from 2d detector.
Args:
img (torch.Tensor): of shape (N, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
img_metas (list[dict]): Image meta info.
train (bool): train-time or not.
bboxes_2d (list[torch.Tensor]): provided 2d bboxes,
not supported yet.
Return:
list[torch.Tensor]: a list of processed 2d bounding boxes.
def
loss
(
self
,
batch_inputs_dict
:
Dict
[
str
,
Union
[
List
,
Tensor
]],
batch_data_samples
:
List
[
Det3DDataSample
],
**
kwargs
)
->
List
[
Det3DDataSample
]:
"""
if
bboxes_2d
is
None
:
x
=
self
.
extract_img_feat
(
img
)
proposal_list
=
self
.
img_rpn_head
.
simple_test_rpn
(
x
,
img_metas
)
rets
=
self
.
img_roi_head
.
simple_test
(
x
,
proposal_list
,
img_metas
,
rescale
=
False
)
rets_processed
=
[]
for
ret
in
rets
:
tmp
=
np
.
concatenate
(
ret
,
axis
=
0
)
sem_class
=
img
.
new_zeros
((
len
(
tmp
)))
start
=
0
for
i
,
bboxes
in
enumerate
(
ret
):
sem_class
[
start
:
start
+
len
(
bboxes
)]
=
i
start
+=
len
(
bboxes
)
ret
=
img
.
new_tensor
(
tmp
)
# append class index
ret
=
torch
.
cat
([
ret
,
sem_class
[:,
None
]],
dim
=-
1
)
inds
=
torch
.
argsort
(
ret
[:,
4
],
descending
=
True
)
ret
=
ret
.
index_select
(
0
,
inds
)
# drop half bboxes during training for better generalization
if
train
:
rand_drop
=
torch
.
randperm
(
len
(
ret
))[:(
len
(
ret
)
+
1
)
//
2
]
rand_drop
=
torch
.
sort
(
rand_drop
)[
0
]
ret
=
ret
[
rand_drop
]
rets_processed
.
append
(
ret
.
float
())
return
rets_processed
else
:
rets_processed
=
[]
for
ret
in
bboxes_2d
:
if
len
(
ret
)
>
0
and
train
:
rand_drop
=
torch
.
randperm
(
len
(
ret
))[:(
len
(
ret
)
+
1
)
//
2
]
rand_drop
=
torch
.
sort
(
rand_drop
)[
0
]
ret
=
ret
[
rand_drop
]
rets_processed
.
append
(
ret
.
float
())
return
rets_processed
def
forward_train
(
self
,
points
=
None
,
img
=
None
,
img_metas
=
None
,
gt_bboxes
=
None
,
gt_labels
=
None
,
gt_bboxes_ignore
=
None
,
gt_masks
=
None
,
proposals
=
None
,
bboxes_2d
=
None
,
gt_bboxes_3d
=
None
,
gt_labels_3d
=
None
,
pts_semantic_mask
=
None
,
pts_instance_mask
=
None
,
**
kwargs
):
"""Forwarding of train for image branch pretrain or stage 2 train.
Args:
points (list[torch.Tensor]): Points of each batch.
img (torch.Tensor): of shape (N, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
img_metas (list[dict]): list of image and point cloud meta info
dict. For example, keys include 'ori_shape', 'img_norm_cfg',
and 'transformation_3d_flow'. For details on the values of
the keys see `mmdet/datasets/pipelines/formatting.py:Collect`.
gt_bboxes (list[torch.Tensor]): Ground truth bboxes for each image
with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[torch.Tensor]): class indices for each
2d bounding box.
gt_bboxes_ignore (list[torch.Tensor]): specify which
2d bounding boxes can be ignored when computing the loss.
gt_masks (torch.Tensor): true segmentation masks for each
2d bbox, used if the architecture supports a segmentation task.
proposals: override rpn proposals (2d) with custom proposals.
Use when `with_rpn` is False.
bboxes_2d (list[torch.Tensor]): provided 2d bboxes,
not supported yet.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes.
gt_labels_3d (list[torch.Tensor]): gt class labels for 3d bboxes.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs` keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (list[torch.Tensor]): Image tensor with shape
(N, C, H ,W).
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`.
Returns:
dict[str,
torch.
Tensor]:
a
dictionary of loss components.
dict[str, Tensor]:
A
dictionary of loss components.
"""
imgs
=
batch_inputs_dict
.
get
(
'imgs'
,
None
)
points
=
batch_inputs_dict
.
get
(
'points'
,
None
)
if
points
is
None
:
x
=
self
.
extract_img_feat
(
img
)
x
=
self
.
extract_img_feat
(
img
s
)
losses
=
dict
()
# RPN forward and loss
if
self
.
with_img_rpn
:
proposal_cfg
=
self
.
train_cfg
.
get
(
'img_rpn_proposal'
,
self
.
test_cfg
.
img_rpn
)
rpn_losses
,
proposal_list
=
self
.
img_rpn_head
.
forward_train
(
x
,
img_metas
,
gt_bboxes
,
gt_labels
=
None
,
gt_bboxes_ignore
=
gt_bboxes_ignore
,
proposal_cfg
=
proposal_cfg
)
rpn_data_samples
=
copy
.
deepcopy
(
batch_data_samples
)
# set cat_id of gt_labels to 0 in RPN
for
data_sample
in
rpn_data_samples
:
data_sample
.
gt_instances
.
labels
=
\
torch
.
zeros_like
(
data_sample
.
gt_instances
.
labels
)
rpn_losses
,
rpn_results_list
=
\
self
.
img_rpn_head
.
loss_and_predict
(
x
,
rpn_data_samples
,
proposal_cfg
=
proposal_cfg
,
**
kwargs
)
# avoid get same name with roi_head loss
keys
=
rpn_losses
.
keys
()
for
key
in
keys
:
if
'loss'
in
key
and
'rpn'
not
in
key
:
rpn_losses
[
f
'rpn_
{
key
}
'
]
=
rpn_losses
.
pop
(
key
)
losses
.
update
(
rpn_losses
)
else
:
proposal_list
=
proposals
roi_losses
=
self
.
img_roi_head
.
forward_train
(
x
,
img_metas
,
proposal_list
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
,
gt_masks
,
**
kwargs
)
assert
batch_data_samples
[
0
].
get
(
'proposals'
,
None
)
is
not
None
# use pre-defined proposals in InstanceData for
# the second stage
# to extract ROI features.
rpn_results_list
=
[
data_sample
.
proposals
for
data_sample
in
batch_data_samples
]
roi_losses
=
self
.
img_roi_head
.
loss
(
x
,
rpn_results_list
,
batch_data_samples
,
**
kwargs
)
losses
.
update
(
roi_losses
)
return
losses
else
:
bboxes_2d
=
self
.
extract_bboxes_2d
(
img
,
img_metas
,
bboxes_2d
=
bboxes_2d
,
**
kwargs
)
with
torch
.
no_grad
():
results_2d
=
self
.
predict_img_only
(
batch_inputs_dict
[
'imgs'
],
batch_data_samples
,
rescale
=
False
)
# tensor with shape (n, 6), the 6 arrange
# as [x1, x2, y1, y2, score, label]
pred_bboxes_with_label_list
=
[]
for
single_results
in
results_2d
:
cat_preds
=
torch
.
cat
(
(
single_results
.
bboxes
,
single_results
.
scores
[:,
None
],
single_results
.
labels
[:,
None
]),
dim
=-
1
)
cat_preds
=
cat_preds
[
torch
.
argsort
(
cat_preds
[:,
4
],
descending
=
True
)]
# drop half bboxes during training for better generalization
if
self
.
training
:
rand_drop
=
torch
.
randperm
(
len
(
cat_preds
))[:(
len
(
cat_preds
)
+
1
)
//
2
]
rand_drop
=
torch
.
sort
(
rand_drop
)[
0
]
cat_preds
=
cat_preds
[
rand_drop
]
points
=
torch
.
stack
(
points
)
seeds_3d
,
seed_3d_features
,
seed_indices
=
\
self
.
extract_pts_feat
(
points
)
pred_bboxes_with_label_list
.
append
(
cat_preds
)
img_features
,
masks
=
self
.
fusion_layer
(
img
,
bboxes_2d
,
seeds_3d
,
img_metas
)
stack_points
=
torch
.
stack
(
points
)
seeds_3d
,
seed_3d_features
,
seed_indices
=
\
self
.
extract_pts_feat
(
stack_points
)
img_metas
=
[
item
.
metainfo
for
item
in
batch_data_samples
]
img_features
,
masks
=
self
.
fusion_layer
(
imgs
,
pred_bboxes_with_label_list
,
seeds_3d
,
img_metas
)
inds
=
sample_valid_seeds
(
masks
,
self
.
num_sampled_seed
)
batch_size
,
img_feat_size
=
img_features
.
shape
[:
2
]
...
...
@@ -476,27 +387,13 @@ class ImVoteNet(Base3DDetector):
seed_features
=
img_features
,
seed_indices
=
seed_indices
)
loss_inputs
=
(
points
,
gt_bboxes_3d
,
gt_labels_3d
,
pts_semantic_mask
,
pts_instance_mask
,
img_metas
)
bbox_preds_joints
=
self
.
pts_bbox_head_joint
(
feat_dict_joint
,
self
.
train_cfg
.
pts
.
sample_mod
)
bbox_preds_pts
=
self
.
pts_bbox_head_pts
(
feat_dict_pts
,
self
.
train_cfg
.
pts
.
sample_mod
)
bbox_preds_img
=
self
.
pts_bbox_head_img
(
feat_dict_img
,
self
.
train_cfg
.
pts
.
sample_mod
)
losses_towers
=
[]
losses_joint
=
self
.
pts_bbox_head_joint
.
loss
(
bbox_preds_joints
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
losses_pts
=
self
.
pts_bbox_head_pts
.
loss
(
bbox_preds_pts
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
losses_img
=
self
.
pts_bbox_head_img
.
loss
(
bbox_preds_img
,
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
points
,
feat_dict_joint
,
batch_data_samples
)
losses_pts
=
self
.
pts_bbox_head_pts
.
loss
(
points
,
feat_dict_pts
,
batch_data_samples
)
losses_img
=
self
.
pts_bbox_head_img
.
loss
(
points
,
feat_dict_img
,
batch_data_samples
)
losses_towers
.
append
(
losses_joint
)
losses_towers
.
append
(
losses_pts
)
losses_towers
.
append
(
losses_img
)
...
...
@@ -516,267 +413,50 @@ class ImVoteNet(Base3DDetector):
return
combined_losses
def
forward_test
(
self
,
points
=
None
,
img_metas
=
None
,
img
=
None
,
bboxes_2d
=
None
,
**
kwargs
):
"""Forwarding of test for image branch pretrain or stage 2 train.
def
predict
(
self
,
batch_inputs_dict
:
Dict
[
str
,
Optional
[
Tensor
]],
batch_data_samples
:
List
[
Det3DDataSample
],
**
kwargs
)
->
List
[
Det3DDataSample
]:
"""Forward of testing.
Args:
points (list[list[torch.Tensor]], optional): the outer
list indicates test-time augmentations and the inner
list contains all points in the batch, where each Tensor
should have a shape NxC. Defaults to None.
img_metas (list[list[dict]], optional): the outer list
indicates test-time augs (multiscale, flip, etc.)
and the inner list indicates images in a batch.
Defaults to None.
img (list[list[torch.Tensor]], optional): the outer
list indicates test-time augmentations and inner Tensor
should have a shape NxCxHxW, which contains all images
in the batch. Defaults to None. Defaults to None.
bboxes_2d (list[list[torch.Tensor]], optional):
Provided 2d bboxes, not supported yet. Defaults to None.
Returns:
list[list[torch.Tensor]]|list[dict]: Predicted 2d or 3d boxes.
batch_inputs_dict (dict): The model input dict which include
'points' and 'imgs keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (list[torch.Tensor]): Tensor of Images.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`.
"""
points
=
batch_inputs_dict
.
get
(
'points'
,
None
)
imgs
=
batch_inputs_dict
.
get
(
'imgs'
,
None
)
if
points
is
None
:
for
var
,
name
in
[(
img
,
'img'
),
(
img_metas
,
'img_metas'
)]:
if
not
isinstance
(
var
,
list
):
raise
TypeError
(
f
'
{
name
}
must be a list, but got
{
type
(
var
)
}
'
)
num_augs
=
len
(
img
)
if
num_augs
!=
len
(
img_metas
):
raise
ValueError
(
f
'num of augmentations (
{
len
(
img
)
}
) '
f
'!= num of image meta (
{
len
(
img_metas
)
}
)'
)
if
num_augs
==
1
:
# proposals (List[List[Tensor]]): the outer list indicates
# test-time augs (multiscale, flip, etc.) and the inner list
# indicates images in a batch.
# The Tensor should have a shape Px4, where P is the number of
# proposals.
if
'proposals'
in
kwargs
:
kwargs
[
'proposals'
]
=
kwargs
[
'proposals'
][
0
]
return
self
.
simple_test_img_only
(
img
=
img
[
0
],
img_metas
=
img_metas
[
0
],
**
kwargs
)
else
:
assert
img
[
0
].
size
(
0
)
==
1
,
'aug test does not support '
\
'inference with batch size '
\
f
'
{
img
[
0
].
size
(
0
)
}
'
# TODO: support test augmentation for predefined proposals
assert
'proposals'
not
in
kwargs
return
self
.
aug_test_img_only
(
img
=
img
,
img_metas
=
img_metas
,
**
kwargs
)
assert
imgs
is
not
None
results_2d
=
self
.
predict_img_only
(
imgs
,
batch_data_samples
)
return
self
.
convert_to_datasample
(
results_list_2d
=
results_2d
)
else
:
for
var
,
name
in
[(
points
,
'points'
),
(
img_metas
,
'img_metas'
)]:
if
not
isinstance
(
var
,
list
):
raise
TypeError
(
'{} must be a list, but got {}'
.
format
(
name
,
type
(
var
)))
num_augs
=
len
(
points
)
if
num_augs
!=
len
(
img_metas
):
raise
ValueError
(
'num of augmentations ({}) != num of image meta ({})'
.
format
(
len
(
points
),
len
(
img_metas
)))
if
num_augs
==
1
:
return
self
.
simple_test
(
points
[
0
],
img_metas
[
0
],
img
[
0
],
bboxes_2d
=
bboxes_2d
[
0
]
if
bboxes_2d
is
not
None
else
None
,
**
kwargs
)
else
:
return
self
.
aug_test
(
points
,
img_metas
,
img
,
bboxes_2d
,
**
kwargs
)
def
simple_test_img_only
(
self
,
img
,
img_metas
,
proposals
=
None
,
rescale
=
False
):
r
"""Test without augmentation, image network pretrain. May refer to
`<https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py>`_.
Args:
img (torch.Tensor): Should have a shape NxCxHxW, which contains
all images in the batch.
img_metas (list[dict]):
proposals (list[Tensor], optional): override rpn proposals
with custom proposals. Defaults to None.
rescale (bool, optional): Whether or not rescale bboxes to the
original shape of input image. Defaults to False.
Returns:
list[list[torch.Tensor]]: Predicted 2d boxes.
"""
# noqa: E501
assert
self
.
with_img_bbox
,
'Img bbox head must be implemented.'
assert
self
.
with_img_backbone
,
'Img backbone must be implemented.'
assert
self
.
with_img_rpn
,
'Img rpn must be implemented.'
assert
self
.
with_img_roi_head
,
'Img roi head must be implemented.'
x
=
self
.
extract_img_feat
(
img
)
if
proposals
is
None
:
proposal_list
=
self
.
img_rpn_head
.
simple_test_rpn
(
x
,
img_metas
)
else
:
proposal_list
=
proposals
ret
=
self
.
img_roi_head
.
simple_test
(
x
,
proposal_list
,
img_metas
,
rescale
=
rescale
)
return
ret
def
simple_test
(
self
,
points
=
None
,
img_metas
=
None
,
img
=
None
,
bboxes_2d
=
None
,
rescale
=
False
,
**
kwargs
):
"""Test without augmentation, stage 2.
Args:
points (list[torch.Tensor], optional): Elements in the list
should have a shape NxC, the list indicates all point-clouds
in the batch. Defaults to None.
img_metas (list[dict], optional): List indicates
images in a batch. Defaults to None.
img (torch.Tensor, optional): Should have a shape NxCxHxW,
which contains all images in the batch. Defaults to None.
bboxes_2d (list[torch.Tensor], optional):
Provided 2d bboxes, not supported yet. Defaults to None.
rescale (bool, optional): Whether or not rescale bboxes.
Defaults to False.
Returns:
list[dict]: Predicted 3d boxes.
"""
bboxes_2d
=
self
.
extract_bboxes_2d
(
img
,
img_metas
,
train
=
False
,
bboxes_2d
=
bboxes_2d
,
**
kwargs
)
points
=
torch
.
stack
(
points
)
seeds_3d
,
seed_3d_features
,
seed_indices
=
\
self
.
extract_pts_feat
(
points
)
img_features
,
masks
=
self
.
fusion_layer
(
img
,
bboxes_2d
,
seeds_3d
,
img_metas
)
inds
=
sample_valid_seeds
(
masks
,
self
.
num_sampled_seed
)
batch_size
,
img_feat_size
=
img_features
.
shape
[:
2
]
pts_feat_size
=
seed_3d_features
.
shape
[
1
]
inds_img
=
inds
.
view
(
batch_size
,
1
,
-
1
).
expand
(
-
1
,
img_feat_size
,
-
1
)
img_features
=
img_features
.
gather
(
-
1
,
inds_img
)
inds
=
inds
%
inds
.
shape
[
1
]
inds_seed_xyz
=
inds
.
view
(
batch_size
,
-
1
,
1
).
expand
(
-
1
,
-
1
,
3
)
seeds_3d
=
seeds_3d
.
gather
(
1
,
inds_seed_xyz
)
inds_seed_feats
=
inds
.
view
(
batch_size
,
1
,
-
1
).
expand
(
-
1
,
pts_feat_size
,
-
1
)
seed_3d_features
=
seed_3d_features
.
gather
(
-
1
,
inds_seed_feats
)
seed_indices
=
seed_indices
.
gather
(
1
,
inds
)
img_features
=
self
.
img_mlp
(
img_features
)
fused_features
=
torch
.
cat
([
seed_3d_features
,
img_features
],
dim
=
1
)
feat_dict
=
dict
(
seed_points
=
seeds_3d
,
seed_features
=
fused_features
,
seed_indices
=
seed_indices
)
bbox_preds
=
self
.
pts_bbox_head_joint
(
feat_dict
,
self
.
test_cfg
.
pts
.
sample_mod
)
bbox_list
=
self
.
pts_bbox_head_joint
.
get_bboxes
(
points
,
bbox_preds
,
img_metas
,
rescale
=
rescale
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
return
bbox_results
def
aug_test_img_only
(
self
,
img
,
img_metas
,
rescale
=
False
):
r
"""Test function with augmentation, image network pretrain. May refer
to `<https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/two_stage.py>`_.
Args:
img (list[list[torch.Tensor]], optional): the outer
list indicates test-time augmentations and inner Tensor
should have a shape NxCxHxW, which contains all images
in the batch. Defaults to None. Defaults to None.
img_metas (list[list[dict]], optional): the outer list
indicates test-time augs (multiscale, flip, etc.)
and the inner list indicates images in a batch.
Defaults to None.
rescale (bool, optional): Whether or not rescale bboxes to the
original shape of input image. If rescale is False, then
returned bboxes and masks will fit the scale of imgs[0].
Defaults to None.
Returns:
list[list[torch.Tensor]]: Predicted 2d boxes.
"""
# noqa: E501
assert
self
.
with_img_bbox
,
'Img bbox head must be implemented.'
assert
self
.
with_img_backbone
,
'Img backbone must be implemented.'
assert
self
.
with_img_rpn
,
'Img rpn must be implemented.'
assert
self
.
with_img_roi_head
,
'Img roi head must be implemented.'
x
=
self
.
extract_img_feats
(
img
)
proposal_list
=
self
.
img_rpn_head
.
aug_test_rpn
(
x
,
img_metas
)
return
self
.
img_roi_head
.
aug_test
(
x
,
proposal_list
,
img_metas
,
rescale
=
rescale
)
def
aug_test
(
self
,
points
=
None
,
img_metas
=
None
,
imgs
=
None
,
bboxes_2d
=
None
,
rescale
=
False
,
**
kwargs
):
"""Test function with augmentation, stage 2.
Args:
points (list[list[torch.Tensor]], optional): the outer
list indicates test-time augmentations and the inner
list contains all points in the batch, where each Tensor
should have a shape NxC. Defaults to None.
img_metas (list[list[dict]], optional): the outer list
indicates test-time augs (multiscale, flip, etc.)
and the inner list indicates images in a batch.
Defaults to None.
imgs (list[list[torch.Tensor]], optional): the outer
list indicates test-time augmentations and inner Tensor
should have a shape NxCxHxW, which contains all images
in the batch. Defaults to None. Defaults to None.
bboxes_2d (list[list[torch.Tensor]], optional):
Provided 2d bboxes, not supported yet. Defaults to None.
rescale (bool, optional): Whether or not rescale bboxes.
Defaults to False.
Returns:
list[dict]: Predicted 3d boxes.
"""
points_cat
=
[
torch
.
stack
(
pts
)
for
pts
in
points
]
feats
=
self
.
extract_pts_feats
(
points_cat
,
img_metas
)
# only support aug_test for one sample
aug_bboxes
=
[]
for
x
,
pts_cat
,
img_meta
,
bbox_2d
,
img
in
zip
(
feats
,
points_cat
,
img_metas
,
bboxes_2d
,
imgs
):
bbox_2d
=
self
.
extract_bboxes_2d
(
img
,
img_metas
,
train
=
False
,
bboxes_2d
=
bbox_2d
,
**
kwargs
)
seeds_3d
,
seed_3d_features
,
seed_indices
=
x
results_2d
=
self
.
predict_img_only
(
batch_inputs_dict
[
'imgs'
],
batch_data_samples
,
rescale
=
False
)
# tensor with shape (n, 6), the 6 arrange
# as [x1, x2, y1, y2, score, label]
pred_bboxes_with_label_list
=
[]
for
single_results
in
results_2d
:
cat_preds
=
torch
.
cat
(
(
single_results
.
bboxes
,
single_results
.
scores
[:,
None
],
single_results
.
labels
[:,
None
]),
dim
=-
1
)
cat_preds
=
cat_preds
[
torch
.
argsort
(
cat_preds
[:,
4
],
descending
=
True
)]
pred_bboxes_with_label_list
.
append
(
cat_preds
)
stack_points
=
torch
.
stack
(
points
)
seeds_3d
,
seed_3d_features
,
seed_indices
=
\
self
.
extract_pts_feat
(
stack_points
)
img_features
,
masks
=
self
.
fusion_layer
(
img
,
bbox_2d
,
seeds_3d
,
img_metas
)
img_features
,
masks
=
self
.
fusion_layer
(
imgs
,
pred_bboxes_with_label_list
,
seeds_3d
,
[
item
.
metainfo
for
item
in
batch_data_samples
])
inds
=
sample_valid_seeds
(
masks
,
self
.
num_sampled_seed
)
batch_size
,
img_feat_size
=
img_features
.
shape
[:
2
]
...
...
@@ -800,19 +480,57 @@ class ImVoteNet(Base3DDetector):
seed_points
=
seeds_3d
,
seed_features
=
fused_features
,
seed_indices
=
seed_indices
)
bbox_preds
=
self
.
pts_bbox_head_joint
(
feat_dict
,
self
.
test_cfg
.
pts
.
sample_mod
)
bbox_list
=
self
.
pts_bbox_head_joint
.
get_bboxes
(
pts_cat
,
bbox_preds
,
img_metas
,
rescale
=
rescale
)
bbox_list
=
[
dict
(
boxes_3d
=
bboxes
,
scores_3d
=
scores
,
labels_3d
=
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
results_3d
=
self
.
pts_bbox_head_joint
.
predict
(
batch_inputs_dict
[
'points'
],
feat_dict
,
batch_data_samples
,
rescale
=
True
)
return
self
.
convert_to_datasample
(
results_3d
)
def
predict_img_only
(
self
,
imgs
:
Tensor
,
batch_data_samples
:
List
[
Det3DDataSample
],
rescale
:
bool
=
True
)
->
List
[
InstanceData
]:
"""Predict results from a batch of imgs with post- processing.
Args:
imgs (Tensor): Inputs images with shape (N, C, H, W).
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
rescale (bool): Whether to rescale the results.
Defaults to True.
Returns:
list[:obj:`InstanceData`]: Return the list of detection
results of the input images, usually contains following keys.
- scores (Tensor): Classification scores, has a shape
(num_instance, )
- labels (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Has a shape (num_instances, 4),
the last dimension 4 arrange as (x1, y1, x2, y2).
"""
assert
self
.
with_img_bbox
,
'Img bbox head must be implemented.'
assert
self
.
with_img_backbone
,
'Img backbone must be implemented.'
assert
self
.
with_img_rpn
,
'Img rpn must be implemented.'
assert
self
.
with_img_roi_head
,
'Img roi head must be implemented.'
x
=
self
.
extract_img_feat
(
imgs
)
# If there are no pre-defined proposals, use RPN to get proposals
if
batch_data_samples
[
0
].
get
(
'proposals'
,
None
)
is
None
:
rpn_results_list
=
self
.
img_rpn_head
.
predict
(
x
,
batch_data_samples
,
rescale
=
False
)
else
:
rpn_results_list
=
[
data_sample
.
proposals
for
data_sample
in
batch_data_samples
]
aug_bboxes
.
append
(
bbox_list
[
0
])
# after merging, bboxes will be rescaled to the original image size
merged_bboxes
=
merge_aug_bboxes_3d
(
aug_bboxes
,
img_metas
,
self
.
bbox_head
.
test_cfg
)
results_list
=
self
.
img_roi_head
.
predict
(
x
,
rpn_results_list
,
batch_data_samples
,
rescale
=
rescale
)
return
[
merged_bboxes
]
return
results_list
mmdet3d/models/fusion_layers/coord_transform.py
View file @
f63a62b8
...
...
@@ -105,8 +105,8 @@ def extract_2d_info(img_meta, tensor):
"""
img_shape
=
img_meta
[
'img_shape'
]
ori_shape
=
img_meta
[
'ori_shape'
]
img_h
,
img_w
,
_
=
img_shape
ori_h
,
ori_w
,
_
=
ori_shape
img_h
,
img_w
=
img_shape
ori_h
,
ori_w
=
ori_shape
img_scale_factor
=
(
tensor
.
new_tensor
(
img_meta
[
'scale_factor'
][:
2
])
...
...
mmdet3d/models/fusion_layers/vote_fusion.py
View file @
f63a62b8
...
...
@@ -45,8 +45,6 @@ class VoteFusion(nn.Module):
seed_num
=
seed_3d_depth
.
shape
[
0
]
img_shape
=
img_meta
[
'img_shape'
]
img_h
,
img_w
,
_
=
img_shape
# first reverse the data transformations
xyz_depth
=
apply_3d_transformation
(
seed_3d_depth
,
'DEPTH'
,
img_meta
,
reverse
=
True
)
...
...
tests/data/sunrgbd/sunrgbd_infos.pkl
View file @
f63a62b8
No preview for this file type
tests/test_models/test_detectors/test_h3d.py
0 → 100644
View file @
f63a62b8
import
unittest
import
torch
from
mmengine
import
DefaultScope
from
mmdet3d.registry
import
MODELS
from
tests.utils.model_utils
import
(
_create_detector_inputs
,
_get_detector_cfg
,
_setup_seed
)
class
TestH3D
(
unittest
.
TestCase
):
def
test_h3dnet
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'H3DNet'
)
DefaultScope
.
get_instance
(
'test_H3DNet'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
voxel_net_cfg
=
_get_detector_cfg
(
'h3dnet/h3dnet_3x8_scannet-3d-18class.py'
)
model
=
MODELS
.
build
(
voxel_net_cfg
)
num_gt_instance
=
5
data
=
[
_create_detector_inputs
(
num_gt_instance
=
num_gt_instance
,
points_feat_dim
=
4
,
bboxes_3d_type
=
'depth'
,
with_pts_semantic_mask
=
True
,
with_pts_instance_mask
=
True
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'scores_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'labels_3d'
,
results
[
0
].
pred_instances_3d
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
losses
[
'vote_loss'
],
0
)
self
.
assertGreater
(
losses
[
'objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
'center_loss'
],
0
)
tests/test_models/test_detectors/test_imvotenet.py
0 → 100644
View file @
f63a62b8
import
unittest
import
torch
from
mmengine
import
DefaultScope
from
mmdet3d.registry
import
MODELS
from
tests.utils.model_utils
import
(
_create_detector_inputs
,
_get_detector_cfg
,
_setup_seed
)
class
TestImvoteNet
(
unittest
.
TestCase
):
def
test_imvotenet_only_img
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'ImVoteNet'
)
DefaultScope
.
get_instance
(
'test_imvotenet_img'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
votenet_net_cfg
=
_get_detector_cfg
(
'imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py'
)
model
=
MODELS
.
build
(
votenet_net_cfg
)
data
=
[
_create_detector_inputs
(
with_points
=
False
,
with_img
=
True
,
img_size
=
128
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes'
,
results
[
0
].
pred_instances
)
self
.
assertIn
(
'scores'
,
results
[
0
].
pred_instances
)
self
.
assertIn
(
'labels'
,
results
[
0
].
pred_instances
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
sum
(
losses
[
'loss_rpn_cls'
]),
0
)
self
.
assertGreater
(
losses
[
'loss_cls'
],
0
)
self
.
assertGreater
(
losses
[
'loss_bbox'
],
0
)
def
test_imvotenet
(
self
):
import
mmdet3d.models
assert
hasattr
(
mmdet3d
.
models
,
'ImVoteNet'
)
DefaultScope
.
get_instance
(
'test_imvotenet'
,
scope_name
=
'mmdet3d'
)
_setup_seed
(
0
)
votenet_net_cfg
=
_get_detector_cfg
(
'imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py'
)
model
=
MODELS
.
build
(
votenet_net_cfg
)
data
=
[
_create_detector_inputs
(
with_points
=
True
,
with_img
=
True
,
img_size
=
128
,
bboxes_3d_type
=
'depth'
)
]
if
torch
.
cuda
.
is_available
():
model
=
model
.
cuda
()
# test simple_test
with
torch
.
no_grad
():
batch_inputs
,
data_samples
=
model
.
data_preprocessor
(
data
,
True
)
results
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'predict'
)
self
.
assertEqual
(
len
(
results
),
len
(
data
))
self
.
assertIn
(
'bboxes_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'scores_3d'
,
results
[
0
].
pred_instances_3d
)
self
.
assertIn
(
'labels_3d'
,
results
[
0
].
pred_instances_3d
)
# save the memory
with
torch
.
no_grad
():
losses
=
model
.
forward
(
batch_inputs
,
data_samples
,
mode
=
'loss'
)
self
.
assertGreater
(
losses
[
'vote_loss'
],
0
)
self
.
assertGreater
(
losses
[
'objectness_loss'
],
0
)
self
.
assertGreater
(
losses
[
'semantic_loss'
],
0
)
tests/utils/model_utils.py
View file @
f63a62b8
...
...
@@ -75,6 +75,7 @@ def _get_detector_cfg(fname):
def
_create_detector_inputs
(
seed
=
0
,
with_points
=
True
,
with_img
=
False
,
img_size
=
10
,
num_gt_instance
=
20
,
num_points
=
10
,
points_feat_dim
=
4
,
...
...
@@ -90,23 +91,46 @@ def _create_detector_inputs(seed=0,
'depth'
:
DepthInstance3DBoxes
,
'cam'
:
CameraInstance3DBoxes
}
meta_info
=
dict
()
meta_info
[
'depth2img'
]
=
np
.
array
(
[[
5.23289349e+02
,
3.68831943e+02
,
6.10469439e+01
],
[
1.09560138e+02
,
1.97404735e+02
,
-
5.47377738e+02
],
[
1.25930002e-02
,
9.92229998e-01
,
-
1.23769999e-01
]])
meta_info
[
'lidar2img'
]
=
np
.
array
(
[[
5.23289349e+02
,
3.68831943e+02
,
6.10469439e+01
],
[
1.09560138e+02
,
1.97404735e+02
,
-
5.47377738e+02
],
[
1.25930002e-02
,
9.92229998e-01
,
-
1.23769999e-01
]])
if
with_points
:
points
=
torch
.
rand
([
num_points
,
points_feat_dim
])
else
:
points
=
None
if
with_img
:
img
=
torch
.
rand
(
3
,
10
,
10
)
img
=
torch
.
rand
(
3
,
img_size
,
img_size
)
meta_info
[
'img_shape'
]
=
(
img_size
,
img_size
)
meta_info
[
'ori_shape'
]
=
(
img_size
,
img_size
)
meta_info
[
'scale_factor'
]
=
np
.
array
([
1.
,
1.
])
else
:
img
=
None
inputs_dict
=
dict
(
img
=
img
,
points
=
points
)
gt_instance_3d
=
InstanceData
()
gt_instance_3d
.
bboxes_3d
=
bbox_3d_class
[
bboxes_3d_type
](
torch
.
rand
([
num_gt_instance
,
gt_bboxes_dim
]),
box_dim
=
gt_bboxes_dim
)
gt_instance_3d
.
labels_3d
=
torch
.
randint
(
0
,
num_classes
,
[
num_gt_instance
])
data_sample
=
Det3DDataSample
(
metainfo
=
dict
(
box_type_3d
=
bbox_3d_class
[
bboxes_3d_type
]))
data_sample
.
set_metainfo
(
meta_info
)
data_sample
.
gt_instances_3d
=
gt_instance_3d
gt_instance
=
InstanceData
()
gt_instance
.
labels
=
torch
.
randint
(
0
,
num_classes
,
[
num_gt_instance
])
gt_instance
.
bboxes
=
torch
.
rand
(
num_gt_instance
,
4
)
gt_instance
.
bboxes
[:,
2
:]
=
gt_instance
.
bboxes
[:,
:
2
]
+
gt_instance
.
bboxes
[:,
2
:]
data_sample
.
gt_instances
=
gt_instance
data_sample
.
gt_pts_seg
=
PointData
()
if
with_pts_instance_mask
:
pts_instance_mask
=
torch
.
randint
(
0
,
num_gt_instance
,
[
num_points
])
...
...
tools/data_converter/update_infos_to_v2.py
View file @
f63a62b8
...
...
@@ -652,21 +652,28 @@ def update_sunrgbd_infos(pkl_path, out_dir):
temp_data_info
[
'images'
][
'CAM0'
][
'width'
]
=
w
anns
=
ori_info_dict
[
'annos'
]
num_instances
=
len
(
anns
[
'name'
])
ignore_class_name
=
set
()
instance_list
=
[]
for
instance_id
in
range
(
num_instances
):
empty_instance
=
get_empty_instance
()
empty_instance
[
'bbox_3d'
]
=
anns
[
'gt_boxes_upright_depth'
][
instance_id
].
tolist
()
if
anns
[
'name'
][
instance_id
]
in
METAINFO
[
'CLASSES'
]:
empty_instance
[
'bbox_label_3d'
]
=
METAINFO
[
'CLASSES'
].
index
(
anns
[
'name'
][
instance_id
])
else
:
ignore_class_name
.
add
(
anns
[
'name'
][
instance_id
])
empty_instance
[
'bbox_label_3d'
]
=
-
1
empty_instance
=
clear_instance_unused_keys
(
empty_instance
)
instance_list
.
append
(
empty_instance
)
if
anns
[
'gt_num'
]
==
0
:
instance_list
=
[]
else
:
num_instances
=
len
(
anns
[
'name'
])
ignore_class_name
=
set
()
instance_list
=
[]
for
instance_id
in
range
(
num_instances
):
empty_instance
=
get_empty_instance
()
empty_instance
[
'bbox_3d'
]
=
anns
[
'gt_boxes_upright_depth'
][
instance_id
].
tolist
()
empty_instance
[
'bbox'
]
=
anns
[
'bbox'
][
instance_id
].
tolist
()
if
anns
[
'name'
][
instance_id
]
in
METAINFO
[
'CLASSES'
]:
empty_instance
[
'bbox_label_3d'
]
=
METAINFO
[
'CLASSES'
].
index
(
anns
[
'name'
][
instance_id
])
empty_instance
[
'bbox_label'
]
=
empty_instance
[
'bbox_label_3d'
]
else
:
ignore_class_name
.
add
(
anns
[
'name'
][
instance_id
])
empty_instance
[
'bbox_label_3d'
]
=
-
1
empty_instance
[
'bbox_label'
]
=
-
1
empty_instance
=
clear_instance_unused_keys
(
empty_instance
)
instance_list
.
append
(
empty_instance
)
temp_data_info
[
'instances'
]
=
instance_list
temp_data_info
,
_
=
clear_data_info_unused_keys
(
temp_data_info
)
converted_list
.
append
(
temp_data_info
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment