Commit 9ebb75da authored by jshilong's avatar jshilong Committed by ChaimZhu
Browse files

[refactor]Groupfree3d

parent b496f579
model = dict( model = dict(
type='GroupFree3DNet', type='GroupFree3DNet',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
backbone=dict( backbone=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=3, in_channels=3,
...@@ -38,33 +39,36 @@ model = dict( ...@@ -38,33 +39,36 @@ model = dict(
pred_layer_cfg=dict( pred_layer_cfg=dict(
in_channels=288, shared_conv_channels=(288, 288), bias=True), in_channels=288, shared_conv_channels=(288, 288), bias=True),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=1.0,
reduction='sum',
loss_weight=10.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings # model training and testing settings
train_cfg=dict(sample_mod='kps'), train_cfg=dict(sample_mode='kps'),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
......
...@@ -35,34 +35,37 @@ model = dict( ...@@ -35,34 +35,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -102,9 +108,8 @@ train_pipeline = [ ...@@ -102,9 +108,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -134,52 +139,60 @@ test_pipeline = [ ...@@ -134,52 +139,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -191,9 +204,21 @@ optimizer = dict( ...@@ -191,9 +204,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
...@@ -34,34 +34,37 @@ model = dict( ...@@ -34,34 +34,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -101,9 +107,8 @@ train_pipeline = [ ...@@ -101,9 +107,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -133,52 +138,60 @@ test_pipeline = [ ...@@ -133,52 +138,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -190,9 +203,21 @@ optimizer = dict( ...@@ -190,9 +203,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
...@@ -50,34 +50,37 @@ model = dict( ...@@ -50,34 +50,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -117,9 +123,8 @@ train_pipeline = [ ...@@ -117,9 +123,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -149,52 +154,60 @@ test_pipeline = [ ...@@ -149,52 +154,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -206,9 +219,21 @@ optimizer = dict( ...@@ -206,9 +219,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
...@@ -51,34 +51,37 @@ model = dict( ...@@ -51,34 +51,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793], [1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]), [0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=10.0 / 9.0), loss_weight=10.0 / 9.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mode='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
...@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ...@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin') 'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -118,9 +124,8 @@ train_pipeline = [ ...@@ -118,9 +124,8 @@ train_pipeline = [
type='GlobalRotScaleTrans', type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266], rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]), scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict( dict(
type='Collect3D', type='Pack3DDetInputs',
keys=[ keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask' 'pts_instance_mask'
...@@ -150,52 +155,60 @@ test_pipeline = [ ...@@ -150,52 +155,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5, flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5), flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000), dict(type='PointSample', num_points=50000),
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=8, batch_size=8,
workers_per_gpu=4, num_workers=4,
train=dict( sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=5, times=5,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl', ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
filter_empty_gt=False, filter_empty_gt=False,
classes=class_names, metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')), box_type_3d='Depth')))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth'), box_type_3d='Depth'))
test=dict( test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl', ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
classes=class_names, metainfo=metainfo,
test_mode=True, test_mode=True,
box_type_3d='Depth')) box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer # optimizer
lr = 0.006 lr = 0.006
optimizer = dict( optim_wrapper = dict(
lr=lr, type='OptimWrapper',
weight_decay=0.0005, optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
...@@ -207,9 +220,21 @@ optimizer = dict( ...@@ -207,9 +220,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0) 'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
})) }))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning rate
lr_config = dict(policy='step', warmup=None, step=[56, 68]) param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings default_hooks = dict(
runner = dict(type='EpochBasedRunner', max_epochs=80) checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
_base_ = [
'../_base_/datasets/scannet-3d-18class.py', '../_base_/models/h3dnet.py',
'../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
rpn_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=24,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]])),
roi_head=dict(
bbox_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=24,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]))))
train_dataloader = dict(
batch_size=3,
num_workers=2,
)
# yapf:disable
default_hooks = dict(
logger=dict(type='LoggerHook', interval=30)
)
# yapf:enable
...@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset): ...@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('pts', ''), self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path']) info['lidar_points']['lidar_path'])
info['lidar_path'] = info['lidar_points']['lidar_path']
if self.modality['use_camera']: if self.modality['use_camera']:
for cam_id, img_info in info['images'].items(): for cam_id, img_info in info['images'].items():
if 'img_path' in img_info: if 'img_path' in img_info:
......
...@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset): ...@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset):
""" """
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
if ann_info is None: if ann_info is None:
ann_info = dict()
# empty instance # empty instance
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32) ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64) ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
......
...@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform): ...@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform):
def __init__( def __init__(
self, self,
keys: dict, keys: dict,
meta_keys: dict = ('filename', 'ori_shape', 'img_shape', 'lidar2img', meta_keys: dict = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip', 'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_trans', 'sample_idx', 'pcd_scale_factor',
'pcd_rotation', 'pcd_rotation_angle', 'pcd_rotation', 'pcd_rotation_angle', 'lidar_path',
'pts_filename', 'transformation_3d_flow', 'transformation_3d_flow', 'trans_mat',
'trans_mat', 'affine_aug')): 'affine_aug')):
self.keys = keys self.keys = keys
self.meta_keys = meta_keys self.meta_keys = meta_keys
......
...@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset): ...@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset):
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
# empty gt # empty gt
if ann_info is None: if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32) ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64) ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure # to target box structure
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from os import path as osp
from typing import Callable, List, Optional, Union from typing import Callable, List, Optional, Union
from mmdet3d.core import show_multi_modality_result, show_result import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet3d.registry import DATASETS from mmdet3d.registry import DATASETS
from mmdet.core import eval_map
from .det3d_dataset import Det3DDataset from .det3d_dataset import Det3DDataset
from .pipelines import Compose
@DATASETS.register_module() @DATASETS.register_module()
...@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset): ...@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info` dict: Processed `ann_info`
""" """
ann_info = super().parse_ann_info(info) ann_info = super().parse_ann_info(info)
# empty gt
if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure # to target box structure
ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes( ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
ann_info['gt_bboxes_3d'], ann_info['gt_bboxes_3d'],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
return ann_info return ann_info
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
if self.modality['use_camera']:
pipeline.insert(0, dict(type='LoadImageFromFile'))
return Compose(pipeline)
# TODO fix this
def show(self, results, out_dir, show=True, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
data_info = self.data_infos[i]
pts_path = data_info['pts_path']
file_name = osp.split(pts_path)[-1].split('.')[0]
points, img_metas, img = self._extract_data(
i, pipeline, ['points', 'img_metas', 'img'])
# scale colors to [0, 255]
points = points.numpy()
points[:, 3:] *= 255
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
pred_bboxes = result['boxes_3d'].tensor.numpy()
show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
file_name, show)
# multi-modality visualization
if self.modality['use_camera']:
img = img.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
pred_bboxes = DepthInstance3DBoxes(
pred_bboxes, origin=(0.5, 0.5, 0))
gt_bboxes = DepthInstance3DBoxes(
gt_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
gt_bboxes,
pred_bboxes,
None,
out_dir,
file_name,
box_mode='depth',
img_metas=img_metas,
show=show)
def evaluate(self,
results,
metric=None,
iou_thr=(0.25, 0.5),
iou_thr_2d=(0.5, ),
logger=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
iou_thr (list[float], optional): AP IoU thresholds for 3D
evaluation. Default: (0.25, 0.5).
iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
evaluation. Default: (0.5, ).
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
# evaluate 3D detection performance
if isinstance(results[0], dict):
return super().evaluate(results, metric, iou_thr, logger, show,
out_dir, pipeline)
# evaluate 2D detection performance
else:
eval_results = OrderedDict()
annotations = [self.get_ann_info(i) for i in range(len(self))]
iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d,
float) else iou_thr_2d
for iou_thr_2d_single in iou_thr_2d:
mean_ap, _ = eval_map(
results,
annotations,
scale_ranges=None,
iou_thr=iou_thr_2d_single,
dataset=self.CLASSES,
logger=logger)
eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap
return eval_results
...@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
# image tensor. # image tensor.
inputs_dict = [{ inputs_dict = [{
k: v.to(self._device) k: v.to(self._device)
for k, v in _data['inputs'].items() for k, v in _data['inputs'].items() if v is not None
} for _data in data] } for _data in data]
batch_data_samples: List[BaseDataElement] = [] batch_data_samples: List[BaseDataElement] = []
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy import copy
from typing import Dict, List, Optional, Tuple
import numpy as np import numpy as np
import torch import torch
from mmcv import ConfigDict
from mmcv.cnn import ConvModule, xavier_init from mmcv.cnn import ConvModule, xavier_init
from mmcv.cnn.bricks.transformer import (build_positional_encoding, from mmcv.cnn.bricks.transformer import (build_positional_encoding,
build_transformer_layer) build_transformer_layer)
from mmcv.ops import PointsSampler as Points_Sampler from mmcv.ops import PointsSampler as Points_Sampler
from mmcv.ops import gather_points from mmcv.ops import gather_points
from mmcv.runner import BaseModule, force_fp32 from mmcv.runner import BaseModule
from mmengine import InstanceData
from torch import Tensor
from torch import nn as nn from torch import nn as nn
from torch.nn import functional as F from torch.nn import functional as F
from mmdet3d.core.post_processing import aligned_3d_nms from mmdet3d.core.post_processing import aligned_3d_nms
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet.core import build_bbox_coder, multi_apply from mmdet.core import build_bbox_coder, multi_apply
from ..builder import build_loss from ...core import BaseInstance3DBoxes, Det3DDataSample, SampleList
from .base_conv_bbox_head import BaseConvBboxHead from .base_conv_bbox_head import BaseConvBboxHead
EPS = 1e-6 EPS = 1e-6
...@@ -38,12 +40,12 @@ class PointsObjClsModule(BaseModule): ...@@ -38,12 +40,12 @@ class PointsObjClsModule(BaseModule):
""" """
def __init__(self, def __init__(self,
in_channel, in_channel: int,
num_convs=3, num_convs: int = 3,
conv_cfg=dict(type='Conv1d'), conv_cfg: dict = dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg: dict = dict(type='BN1d'),
act_cfg=dict(type='ReLU'), act_cfg: dict = dict(type='ReLU'),
init_cfg=None): init_cfg: Optional[dict] = None):
super().__init__(init_cfg=init_cfg) super().__init__(init_cfg=init_cfg)
conv_channels = [in_channel for _ in range(num_convs - 1)] conv_channels = [in_channel for _ in range(num_convs - 1)]
conv_channels.append(1) conv_channels.append(1)
...@@ -85,11 +87,12 @@ class GeneralSamplingModule(nn.Module): ...@@ -85,11 +87,12 @@ class GeneralSamplingModule(nn.Module):
Sampling points with given index. Sampling points with given index.
""" """
def forward(self, xyz, features, sample_inds): def forward(self, xyz: Tensor, features: Tensor,
sample_inds: Tensor) -> Tuple[Tensor]:
"""Forward pass. """Forward pass.
Args: Args:
xyz: (B, N, 3) the coordinates of the features. xyz (Tensor): (B, N, 3) the coordinates of the features.
features (Tensor): (B, C, N) features to sample. features (Tensor): (B, C, N) features to sample.
sample_inds (Tensor): (B, M) the given index, sample_inds (Tensor): (B, M) the given index,
where M is the number of points. where M is the number of points.
...@@ -118,56 +121,61 @@ class GroupFree3DHead(BaseModule): ...@@ -118,56 +121,61 @@ class GroupFree3DHead(BaseModule):
decoding boxes. decoding boxes.
num_decoder_layers (int): The number of transformer decoder layers. num_decoder_layers (int): The number of transformer decoder layers.
transformerlayers (dict): Config for transformer decoder. transformerlayers (dict): Config for transformer decoder.
train_cfg (dict): Config for training. train_cfg (dict, optional): Config for training.
test_cfg (dict): Config for testing. test_cfg (dict, optional): Config for testing.
num_proposal (int): The number of initial sampling candidates. num_proposal (int): The number of initial sampling candidates.
pred_layer_cfg (dict): Config of classfication and regression pred_layer_cfg (dict, optional): Config of classfication and regression
prediction layers. prediction layers.
size_cls_agnostic (bool): Whether the predicted size is class-agnostic. size_cls_agnostic (bool): Whether the predicted size is class-agnostic.
gt_per_seed (int): the number of candidate instance each point belongs gt_per_seed (int): the number of candidate instance each point belongs
to. to.
sampling_objectness_loss (dict): Config of initial sampling sampling_objectness_loss (dict, optional): Config of initial sampling
objectness loss. objectness loss.
objectness_loss (dict): Config of objectness loss. objectness_loss (dict, optional): Config of objectness loss.
center_loss (dict): Config of center loss. center_loss (dict, optional): Config of center loss.
dir_class_loss (dict): Config of direction classification loss. dir_class_loss (dict, optional): Config of direction classification
dir_res_loss (dict): Config of direction residual regression loss. loss.
size_class_loss (dict): Config of size classification loss. dir_res_loss (dict, optional): Config of direction residual
size_res_loss (dict): Config of size residual regression loss. regression loss.
size_reg_loss (dict): Config of class-agnostic size regression loss. size_class_loss (dict, optional): Config of size classification loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss. size_res_loss (dict, optional): Config of size residual
regression loss.
size_reg_loss (dict, optional): Config of class-agnostic size
regression loss.
semantic_loss (dict, optional): Config of point-wise semantic
segmentation loss.
""" """
def __init__(self, def __init__(self,
num_classes, num_classes: int,
in_channels, in_channels: int,
bbox_coder, bbox_coder: dict,
num_decoder_layers, num_decoder_layers: int,
transformerlayers, transformerlayers: dict,
decoder_self_posembeds=dict( decoder_self_posembeds: dict = dict(
type='ConvBNPositionalEncoding', type='ConvBNPositionalEncoding',
input_channel=6, input_channel=6,
num_pos_feats=288), num_pos_feats=288),
decoder_cross_posembeds=dict( decoder_cross_posembeds: dict = dict(
type='ConvBNPositionalEncoding', type='ConvBNPositionalEncoding',
input_channel=3, input_channel=3,
num_pos_feats=288), num_pos_feats=288),
train_cfg=None, train_cfg: Optional[dict] = None,
test_cfg=None, test_cfg: Optional[dict] = None,
num_proposal=128, num_proposal: int = 128,
pred_layer_cfg=None, pred_layer_cfg: Optional[dict] = None,
size_cls_agnostic=True, size_cls_agnostic: bool = True,
gt_per_seed=3, gt_per_seed: int = 3,
sampling_objectness_loss=None, sampling_objectness_loss: Optional[dict] = None,
objectness_loss=None, objectness_loss: Optional[dict] = None,
center_loss=None, center_loss: Optional[dict] = None,
dir_class_loss=None, dir_class_loss: Optional[dict] = None,
dir_res_loss=None, dir_res_loss: Optional[dict] = None,
size_class_loss=None, size_class_loss: Optional[dict] = None,
size_res_loss=None, size_res_loss: Optional[dict] = None,
size_reg_loss=None, size_reg_loss: Optional[dict] = None,
semantic_loss=None, semantic_loss: Optional[dict] = None,
init_cfg=None): init_cfg: Optional[dict] = None):
super(GroupFree3DHead, self).__init__(init_cfg=init_cfg) super(GroupFree3DHead, self).__init__(init_cfg=init_cfg)
self.num_classes = num_classes self.num_classes = num_classes
self.train_cfg = train_cfg self.train_cfg = train_cfg
...@@ -179,7 +187,7 @@ class GroupFree3DHead(BaseModule): ...@@ -179,7 +187,7 @@ class GroupFree3DHead(BaseModule):
self.gt_per_seed = gt_per_seed self.gt_per_seed = gt_per_seed
# Transformer decoder layers # Transformer decoder layers
if isinstance(transformerlayers, ConfigDict): if isinstance(transformerlayers, dict):
transformerlayers = [ transformerlayers = [
copy.deepcopy(transformerlayers) copy.deepcopy(transformerlayers)
for _ in range(num_decoder_layers) for _ in range(num_decoder_layers)
...@@ -239,17 +247,17 @@ class GroupFree3DHead(BaseModule): ...@@ -239,17 +247,17 @@ class GroupFree3DHead(BaseModule):
num_cls_out_channels=self._get_cls_out_channels(), num_cls_out_channels=self._get_cls_out_channels(),
num_reg_out_channels=self._get_reg_out_channels())) num_reg_out_channels=self._get_reg_out_channels()))
self.sampling_objectness_loss = build_loss(sampling_objectness_loss) self.loss_sampling_objectness = MODELS.build(sampling_objectness_loss)
self.objectness_loss = build_loss(objectness_loss) self.loss_objectness = MODELS.build(objectness_loss)
self.center_loss = build_loss(center_loss) self.loss_center = MODELS.build(center_loss)
self.dir_res_loss = build_loss(dir_res_loss) self.loss_dir_res = MODELS.build(dir_res_loss)
self.dir_class_loss = build_loss(dir_class_loss) self.loss_dir_class = MODELS.build(dir_class_loss)
self.semantic_loss = build_loss(semantic_loss) self.loss_semantic = MODELS.build(semantic_loss)
if self.size_cls_agnostic: if self.size_cls_agnostic:
self.size_reg_loss = build_loss(size_reg_loss) self.loss_size_reg = MODELS.build(size_reg_loss)
else: else:
self.size_res_loss = build_loss(size_res_loss) self.loss_size_res = MODELS.build(size_res_loss)
self.size_class_loss = build_loss(size_class_loss) self.loss_size_class = MODELS.build(size_class_loss)
def init_weights(self): def init_weights(self):
"""Initialize weights of transformer decoder in GroupFree3DHead.""" """Initialize weights of transformer decoder in GroupFree3DHead."""
...@@ -279,16 +287,18 @@ class GroupFree3DHead(BaseModule): ...@@ -279,16 +287,18 @@ class GroupFree3DHead(BaseModule):
else: else:
return 3 + self.num_dir_bins * 2 + self.num_sizes * 4 return 3 + self.num_dir_bins * 2 + self.num_sizes * 4
def _extract_input(self, feat_dict): def _extract_input(self, feat_dict: dict) -> Tuple[Tensor]:
"""Extract inputs from features dictionary. """Extract inputs from features dictionary.
Args: Args:
feat_dict (dict): Feature dict from backbone. feat_dict (dict): Feature dict from backbone.
Returns: Returns:
torch.Tensor: Coordinates of input points. Tuple[Tensor]:
torch.Tensor: Features of input points.
torch.Tensor: Indices of input points. - seed_points (Tensor): Coordinates of input points.
- seed_features (Tensor): Features of input points.
- seed_indices (Tensor): Indices of input points.
""" """
seed_points = feat_dict['fp_xyz'][-1] seed_points = feat_dict['fp_xyz'][-1]
...@@ -297,7 +307,20 @@ class GroupFree3DHead(BaseModule): ...@@ -297,7 +307,20 @@ class GroupFree3DHead(BaseModule):
return seed_points, seed_features, seed_indices return seed_points, seed_features, seed_indices
def forward(self, feat_dict, sample_mod): @property
def sample_mode(self):
"""
Returns:
str: Sample mode for initial candidates sampling.
"""
if self.training:
sample_mode = self.train_cfg.sample_mode
else:
sample_mode = self.test_cfg.sample_mode
assert sample_mode in ['fps', 'kps']
return sample_mode
def forward(self, feat_dict: dict) -> dict:
"""Forward pass. """Forward pass.
Note: Note:
...@@ -308,12 +331,12 @@ class GroupFree3DHead(BaseModule): ...@@ -308,12 +331,12 @@ class GroupFree3DHead(BaseModule):
Args: Args:
feat_dict (dict): Feature dict from backbone. feat_dict (dict): Feature dict from backbone.
sample_mod (str): sample mode for initial candidates sampling.
Returns: Returns:
results (dict): Predictions of GroupFree3D head. results (dict): Predictions of GroupFree3D head.
""" """
assert sample_mod in ['fps', 'kps'] sample_mode = self.sample_mode
seed_xyz, seed_features, seed_indices = self._extract_input(feat_dict) seed_xyz, seed_features, seed_indices = self._extract_input(feat_dict)
...@@ -323,9 +346,9 @@ class GroupFree3DHead(BaseModule): ...@@ -323,9 +346,9 @@ class GroupFree3DHead(BaseModule):
seed_indices=seed_indices) seed_indices=seed_indices)
# 1. Initial object candidates sampling. # 1. Initial object candidates sampling.
if sample_mod == 'fps': if sample_mode == 'fps':
sample_inds = self.fps_module(seed_xyz, seed_features) sample_inds = self.fps_module(seed_xyz, seed_features)
elif sample_mod == 'kps': elif sample_mode == 'kps':
points_obj_cls_logits = self.points_obj_cls( points_obj_cls_logits = self.points_obj_cls(
seed_features) # (batch_size, 1, num_seed) seed_features) # (batch_size, 1, num_seed)
points_obj_cls_scores = points_obj_cls_logits.sigmoid().squeeze(1) points_obj_cls_scores = points_obj_cls_logits.sigmoid().squeeze(1)
...@@ -334,7 +357,7 @@ class GroupFree3DHead(BaseModule): ...@@ -334,7 +357,7 @@ class GroupFree3DHead(BaseModule):
results['seeds_obj_cls_logits'] = points_obj_cls_logits results['seeds_obj_cls_logits'] = points_obj_cls_logits
else: else:
raise NotImplementedError( raise NotImplementedError(
f'Sample mode {sample_mod} is not supported!') f'Sample mode {sample_mode} is not supported!')
candidate_xyz, candidate_features, sample_inds = self.gsample_module( candidate_xyz, candidate_features, sample_inds = self.gsample_module(
seed_xyz, seed_features, sample_inds) seed_xyz, seed_features, sample_inds)
...@@ -391,40 +414,74 @@ class GroupFree3DHead(BaseModule): ...@@ -391,40 +414,74 @@ class GroupFree3DHead(BaseModule):
return results return results
@force_fp32(apply_to=('bbox_preds', )) def loss(self, points: List[torch.Tensor], feats_dict: Dict[str,
def loss(self, torch.Tensor],
bbox_preds, batch_data_samples: SampleList, **kwargs) -> dict:
points, """
gt_bboxes_3d, Args:
gt_labels_3d, points (list[tensor]): Points cloud of multiple samples.
pts_semantic_mask=None, feats_dict (dict): Predictions from backbone or FPN.
pts_instance_mask=None, batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
img_metas=None, contains the meta information of each sample and
gt_bboxes_ignore=None, corresponding annotations.
ret_target=False):
Returns:
dict: A dictionary of loss components.
"""
preds_dict = self.forward(feats_dict)
batch_gt_instance_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
batch_pts_semantic_mask = []
batch_pts_instance_mask = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instance_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
batch_pts_semantic_mask.append(
data_sample.gt_pts_seg.get('pts_semantic_mask', None))
batch_pts_instance_mask.append(
data_sample.gt_pts_seg.get('pts_instance_mask', None))
loss_inputs = (points, preds_dict, batch_gt_instance_3d)
losses = self.loss_by_feat(
*loss_inputs,
batch_pts_semantic_mask=batch_pts_semantic_mask,
batch_pts_instance_mask=batch_pts_instance_mask,
batch_input_metas=batch_input_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore)
return losses
def loss_by_feat(
self,
points: List[torch.Tensor],
feats_dict: dict,
batch_gt_instances_3d: List[InstanceData],
batch_pts_semantic_mask: Optional[List[torch.Tensor]] = None,
batch_pts_instance_mask: Optional[List[torch.Tensor]] = None,
ret_target: bool = False,
**kwargs) -> dict:
"""Compute loss. """Compute loss.
Args: Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points. points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth feats_dict (dict): Predictions from previous component.
bboxes of each sample. batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_labels_3d (list[torch.Tensor]): Labels of each sample. gt_instances. It usually includes ``bboxes_3d`` and
pts_semantic_mask (list[torch.Tensor]): Point-wise ``labels_3d`` attributes.
semantic mask. batch_pts_semantic_mask (list[tensor]): Semantic mask
pts_instance_mask (list[torch.Tensor]): Point-wise of points cloud. Defaults to None.
instance mask. batch_pts_semantic_mask (list[tensor]): Instance mask
img_metas (list[dict]): Contain pcd and img's meta info. of points cloud. Defaults to None.
gt_bboxes_ignore (list[torch.Tensor]): Specify ret_target (bool): Return targets or not. Defaults to False.
which bounding.
ret_target (Bool): Return targets or not.
Returns: Returns:
dict: Losses of GroupFree3D. dict: Losses of `GroupFree3D`.
""" """
targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d, targets = self.get_targets(points, feats_dict, batch_gt_instances_3d,
pts_semantic_mask, pts_instance_mask, batch_pts_semantic_mask,
bbox_preds) batch_pts_instance_mask)
(sampling_targets, sampling_weights, assigned_size_targets, (sampling_targets, sampling_weights, assigned_size_targets,
size_class_targets, size_res_targets, dir_class_targets, size_class_targets, size_res_targets, dir_class_targets,
dir_res_targets, center_targets, assigned_center_targets, dir_res_targets, center_targets, assigned_center_targets,
...@@ -436,8 +493,8 @@ class GroupFree3DHead(BaseModule): ...@@ -436,8 +493,8 @@ class GroupFree3DHead(BaseModule):
losses = dict() losses = dict()
# calculate objectness classification loss # calculate objectness classification loss
sampling_obj_score = bbox_preds['seeds_obj_cls_logits'].reshape(-1, 1) sampling_obj_score = feats_dict['seeds_obj_cls_logits'].reshape(-1, 1)
sampling_objectness_loss = self.sampling_objectness_loss( sampling_objectness_loss = self.loss_sampling_objectness(
sampling_obj_score, sampling_obj_score,
1 - sampling_targets.reshape(-1), 1 - sampling_targets.reshape(-1),
sampling_weights.reshape(-1), sampling_weights.reshape(-1),
...@@ -445,14 +502,14 @@ class GroupFree3DHead(BaseModule): ...@@ -445,14 +502,14 @@ class GroupFree3DHead(BaseModule):
losses['sampling_objectness_loss'] = sampling_objectness_loss losses['sampling_objectness_loss'] = sampling_objectness_loss
prefixes = ['proposal.'] + [ prefixes = ['proposal.'] + [
f's{i}.' for i in range(bbox_preds['num_decoder_layers']) f's{i}.' for i in range(feats_dict['num_decoder_layers'])
] ]
num_stages = len(prefixes) num_stages = len(prefixes)
for prefix in prefixes: for prefix in prefixes:
# calculate objectness loss # calculate objectness loss
obj_score = bbox_preds[f'{prefix}obj_scores'].transpose(2, 1) obj_score = feats_dict[f'{prefix}obj_scores'].transpose(2, 1)
objectness_loss = self.objectness_loss( objectness_loss = self.loss_objectness(
obj_score.reshape(-1, 1), obj_score.reshape(-1, 1),
1 - objectness_targets.reshape(-1), 1 - objectness_targets.reshape(-1),
objectness_weights.reshape(-1), objectness_weights.reshape(-1),
...@@ -462,15 +519,15 @@ class GroupFree3DHead(BaseModule): ...@@ -462,15 +519,15 @@ class GroupFree3DHead(BaseModule):
# calculate center loss # calculate center loss
box_loss_weights_expand = box_loss_weights.unsqueeze(-1).expand( box_loss_weights_expand = box_loss_weights.unsqueeze(-1).expand(
-1, -1, 3) -1, -1, 3)
center_loss = self.center_loss( center_loss = self.loss_center(
bbox_preds[f'{prefix}center'], feats_dict[f'{prefix}center'],
assigned_center_targets, assigned_center_targets,
weight=box_loss_weights_expand) weight=box_loss_weights_expand)
losses[f'{prefix}center_loss'] = center_loss / num_stages losses[f'{prefix}center_loss'] = center_loss / num_stages
# calculate direction class loss # calculate direction class loss
dir_class_loss = self.dir_class_loss( dir_class_loss = self.loss_dir_class(
bbox_preds[f'{prefix}dir_class'].transpose(2, 1), feats_dict[f'{prefix}dir_class'].transpose(2, 1),
dir_class_targets, dir_class_targets,
weight=box_loss_weights) weight=box_loss_weights)
losses[f'{prefix}dir_class_loss'] = dir_class_loss / num_stages losses[f'{prefix}dir_class_loss'] = dir_class_loss / num_stages
...@@ -481,24 +538,24 @@ class GroupFree3DHead(BaseModule): ...@@ -481,24 +538,24 @@ class GroupFree3DHead(BaseModule):
heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1),
1) 1)
dir_res_norm = torch.sum( dir_res_norm = torch.sum(
bbox_preds[f'{prefix}dir_res_norm'] * heading_label_one_hot, feats_dict[f'{prefix}dir_res_norm'] * heading_label_one_hot,
-1) -1)
dir_res_loss = self.dir_res_loss( dir_res_loss = self.loss_dir_res(
dir_res_norm, dir_res_targets, weight=box_loss_weights) dir_res_norm, dir_res_targets, weight=box_loss_weights)
losses[f'{prefix}dir_res_loss'] = dir_res_loss / num_stages losses[f'{prefix}dir_res_loss'] = dir_res_loss / num_stages
if self.size_cls_agnostic: if self.size_cls_agnostic:
# calculate class-agnostic size loss # calculate class-agnostic size loss
size_reg_loss = self.size_reg_loss( size_reg_loss = self.loss_size_reg(
bbox_preds[f'{prefix}size'], feats_dict[f'{prefix}size'],
assigned_size_targets, assigned_size_targets,
weight=box_loss_weights_expand) weight=box_loss_weights_expand)
losses[f'{prefix}size_reg_loss'] = size_reg_loss / num_stages losses[f'{prefix}size_reg_loss'] = size_reg_loss / num_stages
else: else:
# calculate size class loss # calculate size class loss
size_class_loss = self.size_class_loss( size_class_loss = self.loss_size_class(
bbox_preds[f'{prefix}size_class'].transpose(2, 1), feats_dict[f'{prefix}size_class'].transpose(2, 1),
size_class_targets, size_class_targets,
weight=box_loss_weights) weight=box_loss_weights)
losses[ losses[
...@@ -513,19 +570,19 @@ class GroupFree3DHead(BaseModule): ...@@ -513,19 +570,19 @@ class GroupFree3DHead(BaseModule):
one_hot_size_targets_expand = one_hot_size_targets.unsqueeze( one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(
-1).expand(-1, -1, -1, 3).contiguous() -1).expand(-1, -1, -1, 3).contiguous()
size_residual_norm = torch.sum( size_residual_norm = torch.sum(
bbox_preds[f'{prefix}size_res_norm'] * feats_dict[f'{prefix}size_res_norm'] *
one_hot_size_targets_expand, 2) one_hot_size_targets_expand, 2)
box_loss_weights_expand = box_loss_weights.unsqueeze( box_loss_weights_expand = box_loss_weights.unsqueeze(
-1).expand(-1, -1, 3) -1).expand(-1, -1, 3)
size_res_loss = self.size_res_loss( size_res_loss = self.loss_size_res(
size_residual_norm, size_residual_norm,
size_res_targets, size_res_targets,
weight=box_loss_weights_expand) weight=box_loss_weights_expand)
losses[f'{prefix}size_res_loss'] = size_res_loss / num_stages losses[f'{prefix}size_res_loss'] = size_res_loss / num_stages
# calculate semantic loss # calculate semantic loss
semantic_loss = self.semantic_loss( semantic_loss = self.loss_semantic(
bbox_preds[f'{prefix}sem_scores'].transpose(2, 1), feats_dict[f'{prefix}sem_scores'].transpose(2, 1),
mask_targets, mask_targets,
weight=box_loss_weights) weight=box_loss_weights)
losses[f'{prefix}semantic_loss'] = semantic_loss / num_stages losses[f'{prefix}semantic_loss'] = semantic_loss / num_stages
...@@ -535,27 +592,29 @@ class GroupFree3DHead(BaseModule): ...@@ -535,27 +592,29 @@ class GroupFree3DHead(BaseModule):
return losses return losses
def get_targets(self, def get_targets(
points, self,
gt_bboxes_3d, points: List[Tensor],
gt_labels_3d, feats_dict: dict = None,
pts_semantic_mask=None, batch_gt_instances_3d: List[InstanceData] = None,
pts_instance_mask=None, batch_pts_semantic_mask: List[torch.Tensor] = None,
bbox_preds=None, batch_pts_instance_mask: List[torch.Tensor] = None,
max_gt_num=64): max_gt_num: int = 64,
):
"""Generate targets of GroupFree3D head. """Generate targets of GroupFree3D head.
Args: Args:
points (list[torch.Tensor]): Points of each batch. points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth feats_dict (torch.Tensor): Predictions of previous component.
bboxes of each batch. batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_labels_3d (list[torch.Tensor]): Labels of each batch. gt_instances. It usually includes ``bboxes_3d`` and
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic ``labels_3d`` attributes.
label of each batch. batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
pts_instance_mask (list[torch.Tensor]): Point-wise instance point clouds. Defaults to None.
label of each batch. batch_pts_instance_mask (list[tensor]): Instance gt mask for
bbox_preds (torch.Tensor): Bounding box predictions of vote head. point clouds. Defaults to None.
max_gt_num (int): Max number of GTs for single batch. max_gt_num (int): Max number of GTs for single batch. Defaults
to 64.
Returns: Returns:
tuple[torch.Tensor]: Targets of GroupFree3D head. tuple[torch.Tensor]: Targets of GroupFree3D head.
...@@ -563,51 +622,67 @@ class GroupFree3DHead(BaseModule): ...@@ -563,51 +622,67 @@ class GroupFree3DHead(BaseModule):
# find empty example # find empty example
valid_gt_masks = list() valid_gt_masks = list()
gt_num = list() gt_num = list()
for index in range(len(gt_labels_3d)): batch_gt_labels_3d = [
if len(gt_labels_3d[index]) == 0: gt_instances_3d.labels_3d
fake_box = gt_bboxes_3d[index].tensor.new_zeros( for gt_instances_3d in batch_gt_instances_3d
1, gt_bboxes_3d[index].tensor.shape[-1]) ]
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box) batch_gt_bboxes_3d = [
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1) gt_instances_3d.bboxes_3d
valid_gt_masks.append(gt_labels_3d[index].new_zeros(1)) for gt_instances_3d in batch_gt_instances_3d
]
for index in range(len(batch_gt_labels_3d)):
if len(batch_gt_labels_3d[index]) == 0:
fake_box = batch_gt_bboxes_3d[index].tensor.new_zeros(
1, batch_gt_bboxes_3d[index].tensor.shape[-1])
batch_gt_bboxes_3d[index] = batch_gt_bboxes_3d[index].new_box(
fake_box)
batch_gt_labels_3d[index] = batch_gt_labels_3d[
index].new_zeros(1)
valid_gt_masks.append(batch_gt_labels_3d[index].new_zeros(1))
gt_num.append(1) gt_num.append(1)
else: else:
valid_gt_masks.append(gt_labels_3d[index].new_ones( valid_gt_masks.append(batch_gt_labels_3d[index].new_ones(
gt_labels_3d[index].shape)) batch_gt_labels_3d[index].shape))
gt_num.append(gt_labels_3d[index].shape[0]) gt_num.append(batch_gt_labels_3d[index].shape[0])
# max_gt_num = max(gt_num)
max_gt_nums = [max_gt_num for _ in range(len(gt_labels_3d))] max_gt_nums = [max_gt_num for _ in range(len(batch_gt_labels_3d))]
if pts_semantic_mask is None: if batch_pts_semantic_mask is None:
pts_semantic_mask = [None for i in range(len(gt_labels_3d))] batch_pts_semantic_mask = [
pts_instance_mask = [None for i in range(len(gt_labels_3d))] None for i in range(len(batch_gt_labels_3d))
]
batch_pts_instance_mask = [
None for i in range(len(batch_gt_labels_3d))
]
seed_points = [ seed_points = [
bbox_preds['seed_points'][i] for i in range(len(gt_labels_3d)) feats_dict['seed_points'][i]
for i in range(len(batch_gt_labels_3d))
] ]
seed_indices = [ seed_indices = [
bbox_preds['seed_indices'][i] for i in range(len(gt_labels_3d)) feats_dict['seed_indices'][i]
for i in range(len(batch_gt_labels_3d))
] ]
candidate_indices = [ candidate_indices = [
bbox_preds['query_points_sample_inds'][i] feats_dict['query_points_sample_inds'][i]
for i in range(len(gt_labels_3d)) for i in range(len(batch_gt_labels_3d))
] ]
(sampling_targets, assigned_size_targets, size_class_targets, (sampling_targets, assigned_size_targets, size_class_targets,
size_res_targets, dir_class_targets, dir_res_targets, center_targets, size_res_targets, dir_class_targets, dir_res_targets, center_targets,
assigned_center_targets, mask_targets, objectness_targets, assigned_center_targets, mask_targets,
objectness_masks) = multi_apply(self.get_targets_single, points, objectness_targets, objectness_masks) = multi_apply(
gt_bboxes_3d, gt_labels_3d, self._get_targets_single, points, batch_gt_bboxes_3d,
pts_semantic_mask, pts_instance_mask, batch_gt_labels_3d, batch_pts_semantic_mask,
max_gt_nums, seed_points, batch_pts_instance_mask, max_gt_nums, seed_points, seed_indices,
seed_indices, candidate_indices) candidate_indices)
# pad targets as original code of GroupFree3D. # pad targets as original code of GroupFree3D.
for index in range(len(gt_labels_3d)): for index in range(len(batch_gt_labels_3d)):
pad_num = max_gt_num - gt_labels_3d[index].shape[0] pad_num = max_gt_num - batch_gt_labels_3d[index].shape[0]
valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num)) valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))
sampling_targets = torch.stack(sampling_targets) sampling_targets = torch.stack(sampling_targets)
...@@ -644,17 +719,17 @@ class GroupFree3DHead(BaseModule): ...@@ -644,17 +719,17 @@ class GroupFree3DHead(BaseModule):
mask_targets, valid_gt_masks, objectness_targets, mask_targets, valid_gt_masks, objectness_targets,
objectness_weights, box_loss_weights, valid_gt_weights) objectness_weights, box_loss_weights, valid_gt_weights)
def get_targets_single(self, def _get_targets_single(self,
points, points: Tensor,
gt_bboxes_3d, gt_bboxes_3d: BaseInstance3DBoxes,
gt_labels_3d, gt_labels_3d: Tensor,
pts_semantic_mask=None, pts_semantic_mask: Optional[Tensor] = None,
pts_instance_mask=None, pts_instance_mask: Optional[Tensor] = None,
max_gt_nums=None, max_gt_nums: Optional[int] = None,
seed_points=None, seed_points: Optional[Tensor] = None,
seed_indices=None, seed_indices: Optional[Tensor] = None,
candidate_indices=None, candidate_indices: Optional[Tensor] = None,
seed_points_obj_topk=4): seed_points_obj_topk: int = 4):
"""Generate targets of GroupFree3D head for single batch. """Generate targets of GroupFree3D head for single batch.
Args: Args:
...@@ -662,15 +737,20 @@ class GroupFree3DHead(BaseModule): ...@@ -662,15 +737,20 @@ class GroupFree3DHead(BaseModule):
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch. boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch. gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (torch.Tensor): Point-wise semantic pts_semantic_mask (torch.Tensor, optional): Point-wise semantic
label of each batch. label of each batch. Defaults to None.
pts_instance_mask (torch.Tensor): Point-wise instance pts_instance_mask (torch.Tensor, optional): Point-wise instance
label of each batch. label of each batch. Defaults to None.
max_gt_nums (int): Max number of GTs for single batch. max_gt_nums (int, optional): Max number of GTs for single batch.
seed_points (torch.Tensor): Coordinates of seed points. Defaults to None.
seed_indices (torch.Tensor): Indices of seed points. seed_points (torch.Tensor,optional): Coordinates of seed points.
candidate_indices (torch.Tensor): Indices of object candidates. Defaults to None.
seed_indices (torch.Tensor,optional): Indices of seed points.
Defaults to None.
candidate_indices (torch.Tensor,optional): Indices of object
candidates. Defaults to None.
seed_points_obj_topk (int): k value of k-Closest Points Sampling. seed_points_obj_topk (int): k value of k-Closest Points Sampling.
Defaults to 4.
Returns: Returns:
tuple[torch.Tensor]: Targets of GroupFree3D head. tuple[torch.Tensor]: Targets of GroupFree3D head.
...@@ -755,7 +835,7 @@ class GroupFree3DHead(BaseModule): ...@@ -755,7 +835,7 @@ class GroupFree3DHead(BaseModule):
pts_instance_label = instance_lable.long() pts_instance_label = instance_lable.long()
pts_instance_label[pts_obj_mask == 0] = -1 pts_instance_label[pts_obj_mask == 0] = -1
elif pts_semantic_mask is not None: elif pts_instance_mask is not None and pts_semantic_mask is not None:
for i in torch.unique(pts_instance_mask): for i in torch.unique(pts_instance_mask):
indices = torch.nonzero( indices = torch.nonzero(
pts_instance_mask == i, as_tuple=False).squeeze(-1) pts_instance_mask == i, as_tuple=False).squeeze(-1)
...@@ -863,30 +943,58 @@ class GroupFree3DHead(BaseModule): ...@@ -863,30 +943,58 @@ class GroupFree3DHead(BaseModule):
center_targets, assigned_center_targets, mask_targets, center_targets, assigned_center_targets, mask_targets,
objectness_targets, objectness_masks) objectness_targets, objectness_masks)
def get_bboxes(self, def predict(self, points: List[torch.Tensor],
points, feats_dict: Dict[str, torch.Tensor],
bbox_preds, batch_data_samples: List[Det3DDataSample],
input_metas, **kwargs) -> List[InstanceData]:
rescale=False, """
use_nms=True): Args:
"""Generate bboxes from GroupFree3D head predictions. points (list[tensor]): Point clouds of multiple samples.
feats_dict (dict): Features from FPN or backbone.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict = self(feats_dict)
batch_size = len(batch_data_samples)
batch_input_metas = []
for batch_index in range(batch_size):
metainfo = batch_data_samples[batch_index].metainfo
batch_input_metas.append(metainfo)
results_list = self.predict_by_feat(points, preds_dict,
batch_input_metas, **kwargs)
return results_list
def predict_by_feat(self,
points: List[torch.Tensor],
bbox_preds_dict: dict,
batch_input_metas: List[dict],
use_nms: bool = True,
**kwargs) -> List[InstanceData]:
"""Generate bboxes from vote head predictions.
Args: Args:
points (torch.Tensor): Input points. points (List[torch.Tensor]): Input points of multiple samples.
bbox_preds (dict): Predictions from GroupFree3D head. bbox_preds_dict (dict): Predictions from groupfree3d head.
input_metas (list[dict]): Point cloud and image's meta info. batch_input_metas (list[dict]): Each item
rescale (bool): Whether to rescale bboxes. contains the meta information of each sample.
use_nms (bool): Whether to apply NMS, skip nms postprocessing use_nms (bool): Whether to apply NMS, skip nms postprocessing
while using GroupFree3D head in rpn stage. while using vote head in rpn stage.
Returns: Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels. list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData cantains 3d Bounding boxes and corresponding
scores and labels.
""" """
# support multi-stage predictions # support multi-stage predictions
assert self.test_cfg['prediction_stages'] in \ assert self.test_cfg['prediction_stages'] in \
['last', 'all', 'last_three'] ['last', 'all', 'last_three']
prefixes = list()
if self.test_cfg['prediction_stages'] == 'last': if self.test_cfg['prediction_stages'] == 'last':
prefixes = [f's{self.num_decoder_layers - 1}.'] prefixes = [f's{self.num_decoder_layers - 1}.']
elif self.test_cfg['prediction_stages'] == 'all': elif self.test_cfg['prediction_stages'] == 'all':
...@@ -905,9 +1013,10 @@ class GroupFree3DHead(BaseModule): ...@@ -905,9 +1013,10 @@ class GroupFree3DHead(BaseModule):
bbox3d = list() bbox3d = list()
for prefix in prefixes: for prefix in prefixes:
# decode boxes # decode boxes
obj_score = bbox_preds[f'{prefix}obj_scores'][..., -1].sigmoid() obj_score = bbox_preds_dict[f'{prefix}obj_scores'][...,
sem_score = bbox_preds[f'{prefix}sem_scores'].softmax(-1) -1].sigmoid()
bbox = self.bbox_coder.decode(bbox_preds, prefix) sem_score = bbox_preds_dict[f'{prefix}sem_scores'].softmax(-1)
bbox = self.bbox_coder.decode(bbox_preds_dict, prefix)
obj_scores.append(obj_score) obj_scores.append(obj_score)
sem_scores.append(sem_score) sem_scores.append(sem_score)
bbox3d.append(bbox) bbox3d.append(bbox)
...@@ -915,22 +1024,27 @@ class GroupFree3DHead(BaseModule): ...@@ -915,22 +1024,27 @@ class GroupFree3DHead(BaseModule):
obj_scores = torch.cat(obj_scores, dim=1) obj_scores = torch.cat(obj_scores, dim=1)
sem_scores = torch.cat(sem_scores, dim=1) sem_scores = torch.cat(sem_scores, dim=1)
bbox3d = torch.cat(bbox3d, dim=1) bbox3d = torch.cat(bbox3d, dim=1)
stack_points = torch.stack(points)
results_list = list()
if use_nms: if use_nms:
batch_size = bbox3d.shape[0] batch_size = bbox3d.shape[0]
results = list() temp_results = InstanceData()
for b in range(batch_size): for b in range(batch_size):
bbox_selected, score_selected, labels = \ bbox_selected, score_selected, labels = \
self.multiclass_nms_single(obj_scores[b], sem_scores[b], self.multiclass_nms_single(obj_scores[b],
bbox3d[b], points[b, ..., :3], sem_scores[b],
input_metas[b]) bbox3d[b],
bbox = input_metas[b]['box_type_3d']( stack_points[b, ..., :3],
batch_input_metas[b])
bbox = batch_input_metas[b]['box_type_3d'](
bbox_selected, bbox_selected,
box_dim=bbox_selected.shape[-1], box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot) with_yaw=self.bbox_coder.with_rot)
results.append((bbox, score_selected, labels)) temp_results.bboxes_3d = bbox
temp_results.scores_3d = score_selected
return results temp_results.labels_3d = labels
results_list.append(temp_results)
return results_list
else: else:
return bbox3d return bbox3d
......
...@@ -214,9 +214,9 @@ class VoteHead(BaseModule): ...@@ -214,9 +214,9 @@ class VoteHead(BaseModule):
batch_gt_instances_ignore.append( batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None)) data_sample.get('ignored_instances', None))
batch_pts_semantic_mask.append( batch_pts_semantic_mask.append(
data_sample.seg_data.get('pts_semantic_mask', None)) data_sample.gt_pts_seg.get('pts_semantic_mask', None))
batch_pts_instance_mask.append( batch_pts_instance_mask.append(
data_sample.seg_data.get('pts_instance_mask', None)) data_sample.gt_pts_seg.get('pts_instance_mask', None))
loss_inputs = (points, preds_dict, batch_gt_instance_3d) loss_inputs = (points, preds_dict, batch_gt_instance_3d)
losses = self.loss_by_feat( losses = self.loss_by_feat(
...@@ -452,9 +452,9 @@ class VoteHead(BaseModule): ...@@ -452,9 +452,9 @@ class VoteHead(BaseModule):
gt_instances. It usually includes ``bboxes`` and ``labels`` gt_instances. It usually includes ``bboxes`` and ``labels``
attributes. attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images. point clouds. Defaults to None.
batch_pts_instance_mask (list[tensor]): Instance gt mask for batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images. point clouds. Defaults to None.
Returns: Returns:
tuple[torch.Tensor]: Targets of vote head. tuple[torch.Tensor]: Targets of vote head.
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from ...core import SampleList
from .single_stage import SingleStage3DDetector from .single_stage import SingleStage3DDetector
...@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector): ...@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector):
bbox_head=None, bbox_head=None,
train_cfg=None, train_cfg=None,
test_cfg=None, test_cfg=None,
pretrained=None): init_cfg=None,
**kwargs):
super(GroupFree3DNet, self).__init__( super(GroupFree3DNet, self).__init__(
backbone=backbone, backbone=backbone,
bbox_head=bbox_head, bbox_head=bbox_head,
train_cfg=train_cfg, train_cfg=train_cfg,
test_cfg=test_cfg, test_cfg=test_cfg,
pretrained=pretrained) init_cfg=init_cfg,
**kwargs)
def forward_train(self, def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
points, **kwargs) -> dict:
img_metas, """Calculate losses from a batch of inputs dict and data samples.
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
Args: Args:
points (list[torch.Tensor]): Points of each batch. batch_inputs_dict (dict): The model input dict which include
img_metas (list): Image metas. 'points', 'imgs' keys.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. - points (list[torch.Tensor]): Point cloud of each sample.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic - imgs (torch.Tensor, optional): Image of each sample.
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
label of each batch. Samples. It usually includes information such as
gt_bboxes_ignore (list[torch.Tensor]): Specify `gt_instance_3d`, `gt_pts_seg`.
which bounding.
Returns: Returns:
dict[str: torch.Tensor]: Losses. dict: A dictionary of loss components.
""" """
# TODO: refactor votenet series to reduce redundant codes. x = self.extract_feat(batch_inputs_dict)
points_cat = torch.stack(points) points = batch_inputs_dict['points']
losses = self.bbox_head.loss(points, x, batch_data_samples, **kwargs)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_metas)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False): def predict(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
"""Forward of testing. **kwargs) -> SampleList:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args: Args:
points (list[torch.Tensor]): Points of each sample. batch_inputs_dict (dict): The model input dict which include
img_metas (list): Image metas. 'points', 'imgs' keys.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_metas, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def aug_test(self, points, img_metas, imgs=None, rescale=False): - points (list[torch.Tensor]): Point cloud of each sample.
"""Test with augmentation.""" - imgs (torch.Tensor, optional): Image of each sample.
points_cat = [torch.stack(pts) for pts in points]
feats = self.extract_feats(points_cat, img_metas)
# only support aug_test for one sample batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
aug_bboxes = [] Samples. It usually includes information such as
for x, pts_cat, img_meta in zip(feats, points_cat, img_metas): `gt_instance_3d`, `gt_pts_seg`.
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod) rescale (bool): Whether to rescale the results.
bbox_list = self.bbox_head.get_bboxes( Defaults to True.
pts_cat, bbox_preds, img_meta, rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
# after merging, bboxes will be rescaled to the original image size Returns:
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas, list[:obj:`Det3DDataSample`]: Detection results of the
self.bbox_head.test_cfg) input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
return [merged_bboxes] - scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
x = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
results_list = self.bbox_head.predict(points, x, batch_data_samples,
**kwargs)
predictions = self.convert_to_datasample(results_list)
return predictions
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestGroupfree3d(unittest.TestCase):
def test_groupfree3d(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'GroupFree3DNet')
DefaultScope.get_instance('test_groupfree3d', scope_name='mmdet3d')
_setup_seed(0)
voxel_net_cfg = _get_detector_cfg(
'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
model = MODELS.build(voxel_net_cfg)
num_gt_instance = 5
data = [
_create_detector_inputs(
num_gt_instance=num_gt_instance,
points_feat_dim=3,
with_pts_semantic_mask=True,
with_pts_instance_mask=True)
]
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['sampling_objectness_loss'], 0)
self.assertGreater(losses['proposal.objectness_loss'], 0)
self.assertGreater(losses['s0.objectness_loss'], 0)
self.assertGreater(losses['s1.size_res_loss'], 0)
self.assertGreater(losses['s4.size_class_loss'], 0)
...@@ -7,7 +7,7 @@ import numpy as np ...@@ -7,7 +7,7 @@ import numpy as np
import torch import torch
from mmengine import InstanceData from mmengine import InstanceData
from mmdet3d.core import Det3DDataSample, LiDARInstance3DBoxes from mmdet3d.core import Det3DDataSample, LiDARInstance3DBoxes, PointData
def _setup_seed(seed): def _setup_seed(seed):
...@@ -71,22 +71,28 @@ def _get_detector_cfg(fname): ...@@ -71,22 +71,28 @@ def _get_detector_cfg(fname):
return model return model
def _create_detector_inputs(seed=0, def _create_detector_inputs(
seed=0,
with_points=True, with_points=True,
with_img=False, with_img=False,
num_gt_instance=20, num_gt_instance=20,
num_points=10,
points_feat_dim=4, points_feat_dim=4,
num_classes=3,
gt_bboxes_dim=7, gt_bboxes_dim=7,
num_classes=3): with_pts_semantic_mask=False,
with_pts_instance_mask=False,
):
_setup_seed(seed) _setup_seed(seed)
inputs_dict = dict()
if with_points: if with_points:
points = torch.rand([3, points_feat_dim]) points = torch.rand([num_points, points_feat_dim])
inputs_dict['points'] = points else:
points = None
if with_img: if with_img:
img = torch.rand(3, 10, 10) img = torch.rand(3, 10, 10)
inputs_dict['img'] = img else:
img = None
inputs_dict = dict(img=img, points=points)
gt_instance_3d = InstanceData() gt_instance_3d = InstanceData()
gt_instance_3d.bboxes_3d = LiDARInstance3DBoxes( gt_instance_3d.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([num_gt_instance, gt_bboxes_dim]), box_dim=gt_bboxes_dim) torch.rand([num_gt_instance, gt_bboxes_dim]), box_dim=gt_bboxes_dim)
...@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0, ...@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0,
data_sample = Det3DDataSample( data_sample = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes)) metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample.gt_instances_3d = gt_instance_3d data_sample.gt_instances_3d = gt_instance_3d
data_sample.seg_data = dict() data_sample.gt_pts_seg = PointData()
if with_pts_instance_mask:
pts_instance_mask = torch.randint(0, num_gt_instance, [num_points])
data_sample.gt_pts_seg['pts_instance_mask'] = pts_instance_mask
if with_pts_semantic_mask:
pts_semantic_mask = torch.randint(0, num_classes, [num_points])
data_sample.gt_pts_seg['pts_semantic_mask'] = pts_semantic_mask
return dict(inputs=inputs_dict, data_sample=data_sample) return dict(inputs=inputs_dict, data_sample=data_sample)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment