"...git@developer.sourcefind.cn:OpenDAS/torchaudio.git" did not exist on "e65e472698b8987398816ee83ea9dbcba1f713f3"
Commit 9ebb75da authored by jshilong's avatar jshilong Committed by ChaimZhu
Browse files

[refactor]Groupfree3d

parent b496f579
model = dict( model = dict(
type='GroupFree3DNet', type='GroupFree3DNet',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
backbone=dict( backbone=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=3, in_channels=3,
...@@ -38,33 +39,36 @@ model = dict( ...@@ -38,33 +39,36 @@ model = dict(
pred_layer_cfg=dict( pred_layer_cfg=dict(
in_channels=288, shared_conv_channels=(288, 288), bias=True), in_channels=288, shared_conv_channels=(288, 288), bias=True),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=1.0,
reduction='sum',
loss_weight=10.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings # model training and testing settings
train_cfg=dict(sample_mod='kps'), train_cfg=dict(sample_mode='kps'),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
......
...@@ -35,34 +35,37 @@ model = dict( ...@@ -35,34 +35,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -102,9 +108,8 @@ train_pipeline = [ ...@@ -102,9 +108,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -134,52 +139,60 @@ test_pipeline = [ ...@@ -134,52 +139,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -191,9 +204,21 @@ optimizer = dict( ...@@ -191,9 +204,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
...@@ -34,34 +34,37 @@ model = dict( ...@@ -34,34 +34,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -101,9 +107,8 @@ train_pipeline = [ ...@@ -101,9 +107,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -133,52 +138,60 @@ test_pipeline = [ ...@@ -133,52 +138,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -190,9 +203,21 @@ optimizer = dict( ...@@ -190,9 +203,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
...@@ -50,34 +50,37 @@ model = dict( ...@@ -50,34 +50,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -117,9 +123,8 @@ train_pipeline = [ ...@@ -117,9 +123,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -149,52 +154,60 @@ test_pipeline = [ ...@@ -149,52 +154,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -206,9 +219,21 @@ optimizer = dict( ...@@ -206,9 +219,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
...@@ -51,34 +51,37 @@ model = dict( ...@@ -51,34 +51,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -118,9 +124,8 @@ train_pipeline = [ ...@@ -118,9 +124,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -150,52 +155,60 @@ test_pipeline = [ ...@@ -150,52 +155,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -207,9 +220,21 @@ optimizer = dict( ...@@ -207,9 +220,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
_base_ = [
'../_base_/datasets/scannet-3d-18class.py', '../_base_/models/h3dnet.py',
'../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
rpn_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=24,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]])),
roi_head=dict(
bbox_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=24,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]))))
train_dataloader = dict(
batch_size=3,
num_workers=2,
)
# yapf:disable
default_hooks = dict(
logger=dict(type='LoggerHook', interval=30)
)
# yapf:enable
...@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset): ...@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('pts', ''), self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path']) info['lidar_points']['lidar_path'])
info['lidar_path'] = info['lidar_points']['lidar_path']
if self.modality['use_camera']: if self.modality['use_camera']:
for cam_id, img_info in info['images'].items(): for cam_id, img_info in info['images'].items():
if 'img_path' in img_info: if 'img_path' in img_info:
......
...@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset): ...@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset):
""" """
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
if ann_info is None: if ann_info is None:
ann_info = dict()
# empty instance # empty instance
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32) ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64) ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
......
...@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform): ...@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform):
def __init__( def __init__(
self, self,
keys: dict, keys: dict,
meta_keys: dict = ('filename', 'ori_shape', 'img_shape', 'lidar2img', meta_keys: dict = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip', 'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_trans', 'sample_idx', 'pcd_scale_factor',
'pcd_rotation', 'pcd_rotation_angle', 'pcd_rotation', 'pcd_rotation_angle', 'lidar_path',
'pts_filename', 'transformation_3d_flow', 'transformation_3d_flow', 'trans_mat',
'trans_mat', 'affine_aug')): 'affine_aug')):
self.keys = keys self.keys = keys
self.meta_keys = meta_keys self.meta_keys = meta_keys
......
...@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset): ...@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset):
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
# empty gt # empty gt
if ann_info is None: if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure # to target box structure
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from os import path as osp
from typing import Callable, List, Optional, Union from typing import Callable, List, Optional, Union
from mmdet3d.core import show_multi_modality_result, show_result import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet3d.registry import DATASETS from mmdet3d.registry import DATASETS
from mmdet.core import eval_map
from .det3d_dataset import Det3DDataset from .det3d_dataset import Det3DDataset
from .pipelines import Compose
@DATASETS.register_module() @DATASETS.register_module()
...@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info` dict: Processed `ann_info`
""" """
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
# empty gt
if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure # to target box structure
ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
ann_info['gt_bboxes_3d'], ann_info['gt_bboxes_3d'],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
return ann_info return ann_info
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
if self.modality['use_camera']:
pipeline.insert(0, dict(type='LoadImageFromFile'))
return Compose(pipeline)
# TODO fix this
def show(self, results, out_dir, show=True, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
data_info = self.data_infos[i]
pts_path = data_info['pts_path']
file_name = osp.split(pts_path)[-1].split('.')[0]
points, img_metas, img = self._extract_data(
i, pipeline, ['points', 'img_metas', 'img'])
# scale colors to [0, 255]
points = points.numpy()
points[:, 3:] *= 255
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
pred_bboxes = result['boxes_3d'].tensor.numpy()
show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
file_name, show)
# multi-modality visualization
if self.modality['use_camera']:
img = img.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
pred_bboxes = DepthInstance3DBoxes(
pred_bboxes, origin=(0.5, 0.5, 0))
gt_bboxes = DepthInstance3DBoxes(
gt_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
gt_bboxes,
pred_bboxes,
None,
out_dir,
file_name,
box_mode='depth',
img_metas=img_metas,
show=show)
def evaluate(self,
results,
metric=None,
iou_thr=(0.25, 0.5),
iou_thr_2d=(0.5, ),
logger=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
iou_thr (list[float], optional): AP IoU thresholds for 3D
evaluation. Default: (0.25, 0.5).
iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
evaluation. Default: (0.5, ).
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
# evaluate 3D detection performance
if isinstance(results[0], dict):
return super().evaluate(results, metric, iou_thr, logger, show,
out_dir, pipeline)
# evaluate 2D detection performance
else:
eval_results = OrderedDict()
annotations = [self.get_ann_info(i) for i in range(len(self))]
iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d,
float) else iou_thr_2d
for iou_thr_2d_single in iou_thr_2d:
mean_ap, _ = eval_map(
results,
annotations,
scale_ranges=None,
iou_thr=iou_thr_2d_single,
dataset=self.CLASSES,
logger=logger)
eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap
return eval_results
...@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
# image tensor. # image tensor.
inputs_dict = [{ inputs_dict = [{
k: v.to(self._device) k: v.to(self._device)
for k, v in _data['inputs'].items() for k, v in _data['inputs'].items() if v is not None
} for _data in data] } for _data in data]
batch_data_samples: List[BaseDataElement] = [] batch_data_samples: List[BaseDataElement] = []
......
...@@ -214,9 +214,9 @@ class VoteHead(BaseModule): ...@@ -214,9 +214,9 @@ class VoteHead(BaseModule):
batch_gt_instances_ignore.append( batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None)) data_sample.get('ignored_instances', None))
batch_pts_semantic_mask.append( batch_pts_semantic_mask.append(
data_sample.seg_data.get('pts_semantic_mask', None)) data_sample.gt_pts_seg.get('pts_semantic_mask', None))
batch_pts_instance_mask.append( batch_pts_instance_mask.append(
data_sample.seg_data.get('pts_instance_mask', None)) data_sample.gt_pts_seg.get('pts_instance_mask', None))
loss_inputs = (points, preds_dict, batch_gt_instance_3d) loss_inputs = (points, preds_dict, batch_gt_instance_3d)
losses = self.loss_by_feat( losses = self.loss_by_feat(
...@@ -452,9 +452,9 @@ class VoteHead(BaseModule): ...@@ -452,9 +452,9 @@ class VoteHead(BaseModule):
gt_instances. It usually includes ``bboxes`` and ``labels`` gt_instances. It usually includes ``bboxes`` and ``labels``
attributes. attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images. point clouds. Defaults to None.
batch_pts_instance_mask (list[tensor]): Instance gt mask for batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images. point clouds. Defaults to None.
Returns: Returns:
tuple[torch.Tensor]: Targets of vote head. tuple[torch.Tensor]: Targets of vote head.
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from ...core import SampleList
from .single_stage import SingleStage3DDetector from .single_stage import SingleStage3DDetector
...@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector): ...@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector):
bbox_head=None, bbox_head=None,
train_cfg=None, train_cfg=None,
test_cfg=None, test_cfg=None,
pretrained=None): init_cfg=None,
**kwargs):
super(GroupFree3DNet, self).__init__( super(GroupFree3DNet, self).__init__(
backbone=backbone, backbone=backbone,
bbox_head=bbox_head, bbox_head=bbox_head,
train_cfg=train_cfg, train_cfg=train_cfg,
test_cfg=test_cfg, test_cfg=test_cfg,
pretrained=pretrained) init_cfg=init_cfg,
**kwargs)
def forward_train(self, def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
points, **kwargs) -> dict:
img_metas, """Calculate losses from a batch of inputs dict and data samples.
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
Args: Args:
points (list[torch.Tensor]): Points of each batch. batch_inputs_dict (dict): The model input dict which include
img_metas (list): Image metas. 'points', 'imgs' keys.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. - points (list[torch.Tensor]): Point cloud of each sample.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic - imgs (torch.Tensor, optional): Image of each sample.
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
label of each batch. Samples. It usually includes information such as
gt_bboxes_ignore (list[torch.Tensor]): Specify `gt_instance_3d`, `gt_pts_seg`.
which bounding.
Returns: Returns:
dict[str: torch.Tensor]: Losses. dict: A dictionary of loss components.
""" """
# TODO: refactor votenet series to reduce redundant codes. x = self.extract_feat(batch_inputs_dict)
points_cat = torch.stack(points) points = batch_inputs_dict['points']
losses = self.bbox_head.loss(points, x, batch_data_samples, **kwargs)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_metas)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False): def predict(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
"""Forward of testing. **kwargs) -> SampleList:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args: Args:
points (list[torch.Tensor]): Points of each sample. batch_inputs_dict (dict): The model input dict which include
img_metas (list): Image metas. 'points', 'imgs' keys.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_metas, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def aug_test(self, points, img_metas, imgs=None, rescale=False): - points (list[torch.Tensor]): Point cloud of each sample.
"""Test with augmentation.""" - imgs (torch.Tensor, optional): Image of each sample.
points_cat = [torch.stack(pts) for pts in points]
feats = self.extract_feats(points_cat, img_metas)
# only support aug_test for one sample batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
aug_bboxes = [] Samples. It usually includes information such as
for x, pts_cat, img_meta in zip(feats, points_cat, img_metas): `gt_instance_3d`, `gt_pts_seg`.
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod) rescale (bool): Whether to rescale the results.
bbox_list = self.bbox_head.get_bboxes( Defaults to True.
pts_cat, bbox_preds, img_meta, rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
# after merging, bboxes will be rescaled to the original image size Returns:
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas, list[:obj:`Det3DDataSample`]: Detection results of the
self.bbox_head.test_cfg) input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
return [merged_bboxes] - scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
x = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
results_list = self.bbox_head.predict(points, x, batch_data_samples,
**kwargs)
predictions = self.convert_to_datasample(results_list)
return predictions
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestGroupfree3d(unittest.TestCase):
def test_groupfree3d(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'GroupFree3DNet')
DefaultScope.get_instance('test_groupfree3d', scope_name='mmdet3d')
_setup_seed(0)
voxel_net_cfg = _get_detector_cfg(
'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
model = MODELS.build(voxel_net_cfg)
num_gt_instance = 5
data = [
_create_detector_inputs(
num_gt_instance=num_gt_instance,
points_feat_dim=3,
with_pts_semantic_mask=True,
with_pts_instance_mask=True)
]
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['sampling_objectness_loss'], 0)
self.assertGreater(losses['proposal.objectness_loss'], 0)
self.assertGreater(losses['s0.objectness_loss'], 0)
self.assertGreater(losses['s1.size_res_loss'], 0)
self.assertGreater(losses['s4.size_class_loss'], 0)
...@@ -7,7 +7,7 @@ import numpy as np ...@@ -7,7 +7,7 @@ import numpy as np
import torch import torch
from mmengine import InstanceData from mmengine import InstanceData
from mmdet3d.core import Det3DDataSample, LiDARInstance3DBoxes from mmdet3d.core import Det3DDataSample, LiDARInstance3DBoxes, PointData
def _setup_seed(seed): def _setup_seed(seed):
...@@ -71,22 +71,28 @@ def _get_detector_cfg(fname): ...@@ -71,22 +71,28 @@ def _get_detector_cfg(fname):
return model return model
def _create_detector_inputs(seed=0, def _create_detector_inputs(
with_points=True, seed=0,
with_img=False, with_points=True,
num_gt_instance=20, with_img=False,
points_feat_dim=4, num_gt_instance=20,
gt_bboxes_dim=7, num_points=10,
num_classes=3): points_feat_dim=4,
num_classes=3,
gt_bboxes_dim=7,
with_pts_semantic_mask=False,
with_pts_instance_mask=False,
):
_setup_seed(seed) _setup_seed(seed)
inputs_dict = dict()
if with_points: if with_points:
points = torch.rand([3, points_feat_dim]) points = torch.rand([num_points, points_feat_dim])
inputs_dict['points'] = points else:
points = None
if with_img: if with_img:
img = torch.rand(3, 10, 10) img = torch.rand(3, 10, 10)
inputs_dict['img'] = img else:
img = None
inputs_dict = dict(img=img, points=points)
gt_instance_3d = InstanceData() gt_instance_3d = InstanceData()
gt_instance_3d.bboxes_3d = LiDARInstance3DBoxes( gt_instance_3d.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([num_gt_instance, gt_bboxes_dim]), box_dim=gt_bboxes_dim) torch.rand([num_gt_instance, gt_bboxes_dim]), box_dim=gt_bboxes_dim)
...@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0, ...@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0,
data_sample = Det3DDataSample( data_sample = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes)) metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample.gt_instances_3d = gt_instance_3d data_sample.gt_instances_3d = gt_instance_3d
data_sample.seg_data = dict() data_sample.gt_pts_seg = PointData()
if with_pts_instance_mask:
pts_instance_mask = torch.randint(0, num_gt_instance, [num_points])
data_sample.gt_pts_seg['pts_instance_mask'] = pts_instance_mask
if with_pts_semantic_mask:
pts_semantic_mask = torch.randint(0, num_classes, [num_points])
data_sample.gt_pts_seg['pts_semantic_mask'] = pts_semantic_mask
return dict(inputs=inputs_dict, data_sample=data_sample) return dict(inputs=inputs_dict, data_sample=data_sample)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment