Commit 9ebb75da authored by jshilong's avatar jshilong Committed by ChaimZhu
Browse files

[refactor]Groupfree3d

parent b496f579
model = dict(
type='GroupFree3DNet',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
backbone=dict(
type='PointNet2SASSG',
in_channels=3,
......@@ -38,33 +39,36 @@ model = dict(
pred_layer_cfg=dict(
in_channels=288, shared_conv_channels=(288, 288), bias=True),
sampling_objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss',
beta=1.0,
reduction='sum',
loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(sample_mod='kps'),
train_cfg=dict(sample_mode='kps'),
test_cfg=dict(
sample_mod='kps',
sample_mode='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
......
......@@ -35,34 +35,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss',
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=10.0 / 9.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict(
sample_mod='kps',
sample_mode='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
......@@ -75,6 +78,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [
dict(
type='LoadPointsFromFile',
......@@ -102,9 +108,8 @@ train_pipeline = [
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
type='Pack3DDetInputs',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
......@@ -134,52 +139,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
box_type_3d='Depth')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'),
test=dict(
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer
lr = 0.006
optimizer = dict(
lr=lr,
weight_decay=0.0005,
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(
custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
......@@ -191,9 +204,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
}))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[56, 68])
# learning rate
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=80)
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
......@@ -34,34 +34,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss',
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=10.0 / 9.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict(
sample_mod='kps',
sample_mode='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
......@@ -74,6 +77,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [
dict(
type='LoadPointsFromFile',
......@@ -101,9 +107,8 @@ train_pipeline = [
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
type='Pack3DDetInputs',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
......@@ -133,52 +138,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
box_type_3d='Depth')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'),
test=dict(
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer
lr = 0.006
optimizer = dict(
lr=lr,
weight_decay=0.0005,
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(
custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
......@@ -190,9 +203,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
}))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[56, 68])
# learning rate
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=80)
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
......@@ -50,34 +50,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss',
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=10.0 / 9.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict(
sample_mod='kps',
sample_mode='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
......@@ -90,6 +93,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [
dict(
type='LoadPointsFromFile',
......@@ -117,9 +123,8 @@ train_pipeline = [
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
type='Pack3DDetInputs',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
......@@ -149,52 +154,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
box_type_3d='Depth')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'),
test=dict(
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer
lr = 0.006
optimizer = dict(
lr=lr,
weight_decay=0.0005,
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(
custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
......@@ -206,9 +219,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
}))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[56, 68])
# learning rate
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=80)
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
......@@ -51,34 +51,37 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]),
sampling_objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', beta=0.04, reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss',
beta=0.04,
reduction='sum',
loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss',
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=10.0 / 9.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
test_cfg=dict(
sample_mod='kps',
sample_mode='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
......@@ -91,6 +94,9 @@ class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
metainfo = dict(CLASSES=class_names)
train_pipeline = [
dict(
type='LoadPointsFromFile',
......@@ -118,9 +124,8 @@ train_pipeline = [
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0]),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
type='Pack3DDetInputs',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
......@@ -150,52 +155,60 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=50000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
box_type_3d='Depth')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'),
test=dict(
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
# optimizer
lr = 0.006
optimizer = dict(
lr=lr,
weight_decay=0.0005,
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(
custom_keys={
'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
......@@ -207,9 +220,21 @@ optimizer = dict(
'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
}))
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[56, 68])
# learning rate
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=80,
by_epoch=True,
milestones=[56, 68],
gamma=0.1)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=80)
checkpoint_config = dict(interval=1, max_keep_ckpts=10)
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))
_base_ = [
'../_base_/datasets/scannet-3d-18class.py', '../_base_/models/h3dnet.py',
'../_base_/schedules/schedule_3x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
rpn_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=24,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]])),
roi_head=dict(
bbox_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=24,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]))))
train_dataloader = dict(
batch_size=3,
num_workers=2,
)
# yapf:disable
default_hooks = dict(
logger=dict(type='LoggerHook', interval=30)
)
# yapf:enable
......@@ -229,6 +229,8 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
info['lidar_path'] = info['lidar_points']['lidar_path']
if self.modality['use_camera']:
for cam_id, img_info in info['images'].items():
if 'img_path' in img_info:
......
......@@ -128,6 +128,7 @@ class KittiDataset(Det3DDataset):
"""
ann_info = super().parse_ann_info(info)
if ann_info is None:
ann_info = dict()
# empty instance
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
......
......@@ -31,14 +31,14 @@ class Pack3DDetInputs(BaseTransform):
def __init__(
self,
keys: dict,
meta_keys: dict = ('filename', 'ori_shape', 'img_shape', 'lidar2img',
meta_keys: dict = ('img_path', 'ori_shape', 'img_shape', 'lidar2img',
'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip',
'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'pcd_trans', 'sample_idx', 'pcd_scale_factor',
'pcd_rotation', 'pcd_rotation_angle',
'pts_filename', 'transformation_3d_flow',
'trans_mat', 'affine_aug')):
'pcd_rotation', 'pcd_rotation_angle', 'lidar_path',
'transformation_3d_flow', 'trans_mat',
'affine_aug')):
self.keys = keys
self.meta_keys = meta_keys
......
......@@ -138,6 +138,7 @@ class ScanNetDataset(Det3DDataset):
ann_info = super().parse_ann_info(info)
# empty gt
if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure
......
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from os import path as osp
from typing import Callable, List, Optional, Union
from mmdet3d.core import show_multi_modality_result, show_result
import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet3d.registry import DATASETS
from mmdet.core import eval_map
from .det3d_dataset import Det3DDataset
from .pipelines import Compose
@DATASETS.register_module()
......@@ -86,128 +83,15 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info`
"""
ann_info = super().parse_ann_info(info)
# empty gt
if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure
ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
ann_info['gt_bboxes_3d'],
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
return ann_info
def _build_default_pipeline(self):
"""Build the default pipeline for this dataset."""
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='DefaultFormatBundle3D',
class_names=self.CLASSES,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
if self.modality['use_camera']:
pipeline.insert(0, dict(type='LoadImageFromFile'))
return Compose(pipeline)
# TODO fix this
def show(self, results, out_dir, show=True, pipeline=None):
"""Results visualization.
Args:
results (list[dict]): List of bounding boxes results.
out_dir (str): Output directory of visualization result.
show (bool): Visualize the results online.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
"""
assert out_dir is not None, 'Expect out_dir, got none.'
pipeline = self._get_pipeline(pipeline)
for i, result in enumerate(results):
data_info = self.data_infos[i]
pts_path = data_info['pts_path']
file_name = osp.split(pts_path)[-1].split('.')[0]
points, img_metas, img = self._extract_data(
i, pipeline, ['points', 'img_metas', 'img'])
# scale colors to [0, 255]
points = points.numpy()
points[:, 3:] *= 255
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
pred_bboxes = result['boxes_3d'].tensor.numpy()
show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
file_name, show)
# multi-modality visualization
if self.modality['use_camera']:
img = img.numpy()
# need to transpose channel to first dim
img = img.transpose(1, 2, 0)
pred_bboxes = DepthInstance3DBoxes(
pred_bboxes, origin=(0.5, 0.5, 0))
gt_bboxes = DepthInstance3DBoxes(
gt_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
gt_bboxes,
pred_bboxes,
None,
out_dir,
file_name,
box_mode='depth',
img_metas=img_metas,
show=show)
def evaluate(self,
results,
metric=None,
iou_thr=(0.25, 0.5),
iou_thr_2d=(0.5, ),
logger=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluate.
Evaluation in indoor protocol.
Args:
results (list[dict]): List of results.
metric (str | list[str], optional): Metrics to be evaluated.
Default: None.
iou_thr (list[float], optional): AP IoU thresholds for 3D
evaluation. Default: (0.25, 0.5).
iou_thr_2d (list[float], optional): AP IoU thresholds for 2D
evaluation. Default: (0.5, ).
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict: Evaluation results.
"""
# evaluate 3D detection performance
if isinstance(results[0], dict):
return super().evaluate(results, metric, iou_thr, logger, show,
out_dir, pipeline)
# evaluate 2D detection performance
else:
eval_results = OrderedDict()
annotations = [self.get_ann_info(i) for i in range(len(self))]
iou_thr_2d = (iou_thr_2d) if isinstance(iou_thr_2d,
float) else iou_thr_2d
for iou_thr_2d_single in iou_thr_2d:
mean_ap, _ = eval_map(
results,
annotations,
scale_ranges=None,
iou_thr=iou_thr_2d_single,
dataset=self.CLASSES,
logger=logger)
eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap
return eval_results
......@@ -172,7 +172,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
# image tensor.
inputs_dict = [{
k: v.to(self._device)
for k, v in _data['inputs'].items()
for k, v in _data['inputs'].items() if v is not None
} for _data in data]
batch_data_samples: List[BaseDataElement] = []
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
from mmcv import ConfigDict
from mmcv.cnn import ConvModule, xavier_init
from mmcv.cnn.bricks.transformer import (build_positional_encoding,
build_transformer_layer)
from mmcv.ops import PointsSampler as Points_Sampler
from mmcv.ops import gather_points
from mmcv.runner import BaseModule, force_fp32
from mmcv.runner import BaseModule
from mmengine import InstanceData
from torch import Tensor
from torch import nn as nn
from torch.nn import functional as F
from mmdet3d.core.post_processing import aligned_3d_nms
from mmdet3d.registry import MODELS
from mmdet.core import build_bbox_coder, multi_apply
from ..builder import build_loss
from ...core import BaseInstance3DBoxes, Det3DDataSample, SampleList
from .base_conv_bbox_head import BaseConvBboxHead
EPS = 1e-6
......@@ -38,12 +40,12 @@ class PointsObjClsModule(BaseModule):
"""
def __init__(self,
in_channel,
num_convs=3,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
in_channel: int,
num_convs: int = 3,
conv_cfg: dict = dict(type='Conv1d'),
norm_cfg: dict = dict(type='BN1d'),
act_cfg: dict = dict(type='ReLU'),
init_cfg: Optional[dict] = None):
super().__init__(init_cfg=init_cfg)
conv_channels = [in_channel for _ in range(num_convs - 1)]
conv_channels.append(1)
......@@ -85,11 +87,12 @@ class GeneralSamplingModule(nn.Module):
Sampling points with given index.
"""
def forward(self, xyz, features, sample_inds):
def forward(self, xyz: Tensor, features: Tensor,
sample_inds: Tensor) -> Tuple[Tensor]:
"""Forward pass.
Args:
xyz: (B, N, 3) the coordinates of the features.
xyz (Tensor): (B, N, 3) the coordinates of the features.
features (Tensor): (B, C, N) features to sample.
sample_inds (Tensor): (B, M) the given index,
where M is the number of points.
......@@ -118,56 +121,61 @@ class GroupFree3DHead(BaseModule):
decoding boxes.
num_decoder_layers (int): The number of transformer decoder layers.
transformerlayers (dict): Config for transformer decoder.
train_cfg (dict): Config for training.
test_cfg (dict): Config for testing.
train_cfg (dict, optional): Config for training.
test_cfg (dict, optional): Config for testing.
num_proposal (int): The number of initial sampling candidates.
pred_layer_cfg (dict): Config of classfication and regression
pred_layer_cfg (dict, optional): Config of classfication and regression
prediction layers.
size_cls_agnostic (bool): Whether the predicted size is class-agnostic.
gt_per_seed (int): the number of candidate instance each point belongs
to.
sampling_objectness_loss (dict): Config of initial sampling
sampling_objectness_loss (dict, optional): Config of initial sampling
objectness loss.
objectness_loss (dict): Config of objectness loss.
center_loss (dict): Config of center loss.
dir_class_loss (dict): Config of direction classification loss.
dir_res_loss (dict): Config of direction residual regression loss.
size_class_loss (dict): Config of size classification loss.
size_res_loss (dict): Config of size residual regression loss.
size_reg_loss (dict): Config of class-agnostic size regression loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
objectness_loss (dict, optional): Config of objectness loss.
center_loss (dict, optional): Config of center loss.
dir_class_loss (dict, optional): Config of direction classification
loss.
dir_res_loss (dict, optional): Config of direction residual
regression loss.
size_class_loss (dict, optional): Config of size classification loss.
size_res_loss (dict, optional): Config of size residual
regression loss.
size_reg_loss (dict, optional): Config of class-agnostic size
regression loss.
semantic_loss (dict, optional): Config of point-wise semantic
segmentation loss.
"""
def __init__(self,
num_classes,
in_channels,
bbox_coder,
num_decoder_layers,
transformerlayers,
decoder_self_posembeds=dict(
num_classes: int,
in_channels: int,
bbox_coder: dict,
num_decoder_layers: int,
transformerlayers: dict,
decoder_self_posembeds: dict = dict(
type='ConvBNPositionalEncoding',
input_channel=6,
num_pos_feats=288),
decoder_cross_posembeds=dict(
decoder_cross_posembeds: dict = dict(
type='ConvBNPositionalEncoding',
input_channel=3,
num_pos_feats=288),
train_cfg=None,
test_cfg=None,
num_proposal=128,
pred_layer_cfg=None,
size_cls_agnostic=True,
gt_per_seed=3,
sampling_objectness_loss=None,
objectness_loss=None,
center_loss=None,
dir_class_loss=None,
dir_res_loss=None,
size_class_loss=None,
size_res_loss=None,
size_reg_loss=None,
semantic_loss=None,
init_cfg=None):
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
num_proposal: int = 128,
pred_layer_cfg: Optional[dict] = None,
size_cls_agnostic: bool = True,
gt_per_seed: int = 3,
sampling_objectness_loss: Optional[dict] = None,
objectness_loss: Optional[dict] = None,
center_loss: Optional[dict] = None,
dir_class_loss: Optional[dict] = None,
dir_res_loss: Optional[dict] = None,
size_class_loss: Optional[dict] = None,
size_res_loss: Optional[dict] = None,
size_reg_loss: Optional[dict] = None,
semantic_loss: Optional[dict] = None,
init_cfg: Optional[dict] = None):
super(GroupFree3DHead, self).__init__(init_cfg=init_cfg)
self.num_classes = num_classes
self.train_cfg = train_cfg
......@@ -179,7 +187,7 @@ class GroupFree3DHead(BaseModule):
self.gt_per_seed = gt_per_seed
# Transformer decoder layers
if isinstance(transformerlayers, ConfigDict):
if isinstance(transformerlayers, dict):
transformerlayers = [
copy.deepcopy(transformerlayers)
for _ in range(num_decoder_layers)
......@@ -239,17 +247,17 @@ class GroupFree3DHead(BaseModule):
num_cls_out_channels=self._get_cls_out_channels(),
num_reg_out_channels=self._get_reg_out_channels()))
self.sampling_objectness_loss = build_loss(sampling_objectness_loss)
self.objectness_loss = build_loss(objectness_loss)
self.center_loss = build_loss(center_loss)
self.dir_res_loss = build_loss(dir_res_loss)
self.dir_class_loss = build_loss(dir_class_loss)
self.semantic_loss = build_loss(semantic_loss)
self.loss_sampling_objectness = MODELS.build(sampling_objectness_loss)
self.loss_objectness = MODELS.build(objectness_loss)
self.loss_center = MODELS.build(center_loss)
self.loss_dir_res = MODELS.build(dir_res_loss)
self.loss_dir_class = MODELS.build(dir_class_loss)
self.loss_semantic = MODELS.build(semantic_loss)
if self.size_cls_agnostic:
self.size_reg_loss = build_loss(size_reg_loss)
self.loss_size_reg = MODELS.build(size_reg_loss)
else:
self.size_res_loss = build_loss(size_res_loss)
self.size_class_loss = build_loss(size_class_loss)
self.loss_size_res = MODELS.build(size_res_loss)
self.loss_size_class = MODELS.build(size_class_loss)
def init_weights(self):
"""Initialize weights of transformer decoder in GroupFree3DHead."""
......@@ -279,16 +287,18 @@ class GroupFree3DHead(BaseModule):
else:
return 3 + self.num_dir_bins * 2 + self.num_sizes * 4
def _extract_input(self, feat_dict):
def _extract_input(self, feat_dict: dict) -> Tuple[Tensor]:
"""Extract inputs from features dictionary.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
torch.Tensor: Coordinates of input points.
torch.Tensor: Features of input points.
torch.Tensor: Indices of input points.
Tuple[Tensor]:
- seed_points (Tensor): Coordinates of input points.
- seed_features (Tensor): Features of input points.
- seed_indices (Tensor): Indices of input points.
"""
seed_points = feat_dict['fp_xyz'][-1]
......@@ -297,7 +307,20 @@ class GroupFree3DHead(BaseModule):
return seed_points, seed_features, seed_indices
def forward(self, feat_dict, sample_mod):
@property
def sample_mode(self):
"""
Returns:
str: Sample mode for initial candidates sampling.
"""
if self.training:
sample_mode = self.train_cfg.sample_mode
else:
sample_mode = self.test_cfg.sample_mode
assert sample_mode in ['fps', 'kps']
return sample_mode
def forward(self, feat_dict: dict) -> dict:
"""Forward pass.
Note:
......@@ -308,12 +331,12 @@ class GroupFree3DHead(BaseModule):
Args:
feat_dict (dict): Feature dict from backbone.
sample_mod (str): sample mode for initial candidates sampling.
Returns:
results (dict): Predictions of GroupFree3D head.
"""
assert sample_mod in ['fps', 'kps']
sample_mode = self.sample_mode
seed_xyz, seed_features, seed_indices = self._extract_input(feat_dict)
......@@ -323,9 +346,9 @@ class GroupFree3DHead(BaseModule):
seed_indices=seed_indices)
# 1. Initial object candidates sampling.
if sample_mod == 'fps':
if sample_mode == 'fps':
sample_inds = self.fps_module(seed_xyz, seed_features)
elif sample_mod == 'kps':
elif sample_mode == 'kps':
points_obj_cls_logits = self.points_obj_cls(
seed_features) # (batch_size, 1, num_seed)
points_obj_cls_scores = points_obj_cls_logits.sigmoid().squeeze(1)
......@@ -334,7 +357,7 @@ class GroupFree3DHead(BaseModule):
results['seeds_obj_cls_logits'] = points_obj_cls_logits
else:
raise NotImplementedError(
f'Sample mode {sample_mod} is not supported!')
f'Sample mode {sample_mode} is not supported!')
candidate_xyz, candidate_features, sample_inds = self.gsample_module(
seed_xyz, seed_features, sample_inds)
......@@ -391,40 +414,74 @@ class GroupFree3DHead(BaseModule):
return results
@force_fp32(apply_to=('bbox_preds', ))
def loss(self,
bbox_preds,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
img_metas=None,
gt_bboxes_ignore=None,
ret_target=False):
def loss(self, points: List[torch.Tensor], feats_dict: Dict[str,
torch.Tensor],
batch_data_samples: SampleList, **kwargs) -> dict:
"""
Args:
points (list[tensor]): Points cloud of multiple samples.
feats_dict (dict): Predictions from backbone or FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each sample and
corresponding annotations.
Returns:
dict: A dictionary of loss components.
"""
preds_dict = self.forward(feats_dict)
batch_gt_instance_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
batch_pts_semantic_mask = []
batch_pts_instance_mask = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instance_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
batch_pts_semantic_mask.append(
data_sample.gt_pts_seg.get('pts_semantic_mask', None))
batch_pts_instance_mask.append(
data_sample.gt_pts_seg.get('pts_instance_mask', None))
loss_inputs = (points, preds_dict, batch_gt_instance_3d)
losses = self.loss_by_feat(
*loss_inputs,
batch_pts_semantic_mask=batch_pts_semantic_mask,
batch_pts_instance_mask=batch_pts_instance_mask,
batch_input_metas=batch_input_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore)
return losses
def loss_by_feat(
self,
points: List[torch.Tensor],
feats_dict: dict,
batch_gt_instances_3d: List[InstanceData],
batch_pts_semantic_mask: Optional[List[torch.Tensor]] = None,
batch_pts_instance_mask: Optional[List[torch.Tensor]] = None,
ret_target: bool = False,
**kwargs) -> dict:
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
ret_target (Bool): Return targets or not.
feats_dict (dict): Predictions from previous component.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_pts_semantic_mask (list[tensor]): Semantic mask
of points cloud. Defaults to None.
batch_pts_semantic_mask (list[tensor]): Instance mask
of points cloud. Defaults to None.
ret_target (bool): Return targets or not. Defaults to False.
Returns:
dict: Losses of GroupFree3D.
dict: Losses of `GroupFree3D`.
"""
targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask,
bbox_preds)
targets = self.get_targets(points, feats_dict, batch_gt_instances_3d,
batch_pts_semantic_mask,
batch_pts_instance_mask)
(sampling_targets, sampling_weights, assigned_size_targets,
size_class_targets, size_res_targets, dir_class_targets,
dir_res_targets, center_targets, assigned_center_targets,
......@@ -436,8 +493,8 @@ class GroupFree3DHead(BaseModule):
losses = dict()
# calculate objectness classification loss
sampling_obj_score = bbox_preds['seeds_obj_cls_logits'].reshape(-1, 1)
sampling_objectness_loss = self.sampling_objectness_loss(
sampling_obj_score = feats_dict['seeds_obj_cls_logits'].reshape(-1, 1)
sampling_objectness_loss = self.loss_sampling_objectness(
sampling_obj_score,
1 - sampling_targets.reshape(-1),
sampling_weights.reshape(-1),
......@@ -445,14 +502,14 @@ class GroupFree3DHead(BaseModule):
losses['sampling_objectness_loss'] = sampling_objectness_loss
prefixes = ['proposal.'] + [
f's{i}.' for i in range(bbox_preds['num_decoder_layers'])
f's{i}.' for i in range(feats_dict['num_decoder_layers'])
]
num_stages = len(prefixes)
for prefix in prefixes:
# calculate objectness loss
obj_score = bbox_preds[f'{prefix}obj_scores'].transpose(2, 1)
objectness_loss = self.objectness_loss(
obj_score = feats_dict[f'{prefix}obj_scores'].transpose(2, 1)
objectness_loss = self.loss_objectness(
obj_score.reshape(-1, 1),
1 - objectness_targets.reshape(-1),
objectness_weights.reshape(-1),
......@@ -462,15 +519,15 @@ class GroupFree3DHead(BaseModule):
# calculate center loss
box_loss_weights_expand = box_loss_weights.unsqueeze(-1).expand(
-1, -1, 3)
center_loss = self.center_loss(
bbox_preds[f'{prefix}center'],
center_loss = self.loss_center(
feats_dict[f'{prefix}center'],
assigned_center_targets,
weight=box_loss_weights_expand)
losses[f'{prefix}center_loss'] = center_loss / num_stages
# calculate direction class loss
dir_class_loss = self.dir_class_loss(
bbox_preds[f'{prefix}dir_class'].transpose(2, 1),
dir_class_loss = self.loss_dir_class(
feats_dict[f'{prefix}dir_class'].transpose(2, 1),
dir_class_targets,
weight=box_loss_weights)
losses[f'{prefix}dir_class_loss'] = dir_class_loss / num_stages
......@@ -481,24 +538,24 @@ class GroupFree3DHead(BaseModule):
heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1),
1)
dir_res_norm = torch.sum(
bbox_preds[f'{prefix}dir_res_norm'] * heading_label_one_hot,
feats_dict[f'{prefix}dir_res_norm'] * heading_label_one_hot,
-1)
dir_res_loss = self.dir_res_loss(
dir_res_loss = self.loss_dir_res(
dir_res_norm, dir_res_targets, weight=box_loss_weights)
losses[f'{prefix}dir_res_loss'] = dir_res_loss / num_stages
if self.size_cls_agnostic:
# calculate class-agnostic size loss
size_reg_loss = self.size_reg_loss(
bbox_preds[f'{prefix}size'],
size_reg_loss = self.loss_size_reg(
feats_dict[f'{prefix}size'],
assigned_size_targets,
weight=box_loss_weights_expand)
losses[f'{prefix}size_reg_loss'] = size_reg_loss / num_stages
else:
# calculate size class loss
size_class_loss = self.size_class_loss(
bbox_preds[f'{prefix}size_class'].transpose(2, 1),
size_class_loss = self.loss_size_class(
feats_dict[f'{prefix}size_class'].transpose(2, 1),
size_class_targets,
weight=box_loss_weights)
losses[
......@@ -513,19 +570,19 @@ class GroupFree3DHead(BaseModule):
one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(
-1).expand(-1, -1, -1, 3).contiguous()
size_residual_norm = torch.sum(
bbox_preds[f'{prefix}size_res_norm'] *
feats_dict[f'{prefix}size_res_norm'] *
one_hot_size_targets_expand, 2)
box_loss_weights_expand = box_loss_weights.unsqueeze(
-1).expand(-1, -1, 3)
size_res_loss = self.size_res_loss(
size_res_loss = self.loss_size_res(
size_residual_norm,
size_res_targets,
weight=box_loss_weights_expand)
losses[f'{prefix}size_res_loss'] = size_res_loss / num_stages
# calculate semantic loss
semantic_loss = self.semantic_loss(
bbox_preds[f'{prefix}sem_scores'].transpose(2, 1),
semantic_loss = self.loss_semantic(
feats_dict[f'{prefix}sem_scores'].transpose(2, 1),
mask_targets,
weight=box_loss_weights)
losses[f'{prefix}semantic_loss'] = semantic_loss / num_stages
......@@ -535,27 +592,29 @@ class GroupFree3DHead(BaseModule):
return losses
def get_targets(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
bbox_preds=None,
max_gt_num=64):
def get_targets(
self,
points: List[Tensor],
feats_dict: dict = None,
batch_gt_instances_3d: List[InstanceData] = None,
batch_pts_semantic_mask: List[torch.Tensor] = None,
batch_pts_instance_mask: List[torch.Tensor] = None,
max_gt_num: int = 64,
):
"""Generate targets of GroupFree3D head.
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
max_gt_num (int): Max number of GTs for single batch.
feats_dict (torch.Tensor): Predictions of previous component.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
``labels_3d`` attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
point clouds. Defaults to None.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
point clouds. Defaults to None.
max_gt_num (int): Max number of GTs for single batch. Defaults
to 64.
Returns:
tuple[torch.Tensor]: Targets of GroupFree3D head.
......@@ -563,51 +622,67 @@ class GroupFree3DHead(BaseModule):
# find empty example
valid_gt_masks = list()
gt_num = list()
for index in range(len(gt_labels_3d)):
if len(gt_labels_3d[index]) == 0:
fake_box = gt_bboxes_3d[index].tensor.new_zeros(
1, gt_bboxes_3d[index].tensor.shape[-1])
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))
batch_gt_labels_3d = [
gt_instances_3d.labels_3d
for gt_instances_3d in batch_gt_instances_3d
]
batch_gt_bboxes_3d = [
gt_instances_3d.bboxes_3d
for gt_instances_3d in batch_gt_instances_3d
]
for index in range(len(batch_gt_labels_3d)):
if len(batch_gt_labels_3d[index]) == 0:
fake_box = batch_gt_bboxes_3d[index].tensor.new_zeros(
1, batch_gt_bboxes_3d[index].tensor.shape[-1])
batch_gt_bboxes_3d[index] = batch_gt_bboxes_3d[index].new_box(
fake_box)
batch_gt_labels_3d[index] = batch_gt_labels_3d[
index].new_zeros(1)
valid_gt_masks.append(batch_gt_labels_3d[index].new_zeros(1))
gt_num.append(1)
else:
valid_gt_masks.append(gt_labels_3d[index].new_ones(
gt_labels_3d[index].shape))
gt_num.append(gt_labels_3d[index].shape[0])
# max_gt_num = max(gt_num)
valid_gt_masks.append(batch_gt_labels_3d[index].new_ones(
batch_gt_labels_3d[index].shape))
gt_num.append(batch_gt_labels_3d[index].shape[0])
max_gt_nums = [max_gt_num for _ in range(len(gt_labels_3d))]
max_gt_nums = [max_gt_num for _ in range(len(batch_gt_labels_3d))]
if pts_semantic_mask is None:
pts_semantic_mask = [None for i in range(len(gt_labels_3d))]
pts_instance_mask = [None for i in range(len(gt_labels_3d))]
if batch_pts_semantic_mask is None:
batch_pts_semantic_mask = [
None for i in range(len(batch_gt_labels_3d))
]
batch_pts_instance_mask = [
None for i in range(len(batch_gt_labels_3d))
]
seed_points = [
bbox_preds['seed_points'][i] for i in range(len(gt_labels_3d))
feats_dict['seed_points'][i]
for i in range(len(batch_gt_labels_3d))
]
seed_indices = [
bbox_preds['seed_indices'][i] for i in range(len(gt_labels_3d))
feats_dict['seed_indices'][i]
for i in range(len(batch_gt_labels_3d))
]
candidate_indices = [
bbox_preds['query_points_sample_inds'][i]
for i in range(len(gt_labels_3d))
feats_dict['query_points_sample_inds'][i]
for i in range(len(batch_gt_labels_3d))
]
(sampling_targets, assigned_size_targets, size_class_targets,
size_res_targets, dir_class_targets, dir_res_targets, center_targets,
assigned_center_targets, mask_targets, objectness_targets,
objectness_masks) = multi_apply(self.get_targets_single, points,
gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask,
max_gt_nums, seed_points,
seed_indices, candidate_indices)
assigned_center_targets, mask_targets,
objectness_targets, objectness_masks) = multi_apply(
self._get_targets_single, points, batch_gt_bboxes_3d,
batch_gt_labels_3d, batch_pts_semantic_mask,
batch_pts_instance_mask, max_gt_nums, seed_points, seed_indices,
candidate_indices)
# pad targets as original code of GroupFree3D.
for index in range(len(gt_labels_3d)):
pad_num = max_gt_num - gt_labels_3d[index].shape[0]
for index in range(len(batch_gt_labels_3d)):
pad_num = max_gt_num - batch_gt_labels_3d[index].shape[0]
valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))
sampling_targets = torch.stack(sampling_targets)
......@@ -644,17 +719,17 @@ class GroupFree3DHead(BaseModule):
mask_targets, valid_gt_masks, objectness_targets,
objectness_weights, box_loss_weights, valid_gt_weights)
def get_targets_single(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
max_gt_nums=None,
seed_points=None,
seed_indices=None,
candidate_indices=None,
seed_points_obj_topk=4):
def _get_targets_single(self,
points: Tensor,
gt_bboxes_3d: BaseInstance3DBoxes,
gt_labels_3d: Tensor,
pts_semantic_mask: Optional[Tensor] = None,
pts_instance_mask: Optional[Tensor] = None,
max_gt_nums: Optional[int] = None,
seed_points: Optional[Tensor] = None,
seed_indices: Optional[Tensor] = None,
candidate_indices: Optional[Tensor] = None,
seed_points_obj_topk: int = 4):
"""Generate targets of GroupFree3D head for single batch.
Args:
......@@ -662,15 +737,20 @@ class GroupFree3DHead(BaseModule):
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch.
pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch.
max_gt_nums (int): Max number of GTs for single batch.
seed_points (torch.Tensor): Coordinates of seed points.
seed_indices (torch.Tensor): Indices of seed points.
candidate_indices (torch.Tensor): Indices of object candidates.
pts_semantic_mask (torch.Tensor, optional): Point-wise semantic
label of each batch. Defaults to None.
pts_instance_mask (torch.Tensor, optional): Point-wise instance
label of each batch. Defaults to None.
max_gt_nums (int, optional): Max number of GTs for single batch.
Defaults to None.
seed_points (torch.Tensor,optional): Coordinates of seed points.
Defaults to None.
seed_indices (torch.Tensor,optional): Indices of seed points.
Defaults to None.
candidate_indices (torch.Tensor,optional): Indices of object
candidates. Defaults to None.
seed_points_obj_topk (int): k value of k-Closest Points Sampling.
Defaults to 4.
Returns:
tuple[torch.Tensor]: Targets of GroupFree3D head.
......@@ -755,7 +835,7 @@ class GroupFree3DHead(BaseModule):
pts_instance_label = instance_lable.long()
pts_instance_label[pts_obj_mask == 0] = -1
elif pts_semantic_mask is not None:
elif pts_instance_mask is not None and pts_semantic_mask is not None:
for i in torch.unique(pts_instance_mask):
indices = torch.nonzero(
pts_instance_mask == i, as_tuple=False).squeeze(-1)
......@@ -863,30 +943,58 @@ class GroupFree3DHead(BaseModule):
center_targets, assigned_center_targets, mask_targets,
objectness_targets, objectness_masks)
def get_bboxes(self,
points,
bbox_preds,
input_metas,
rescale=False,
use_nms=True):
"""Generate bboxes from GroupFree3D head predictions.
def predict(self, points: List[torch.Tensor],
feats_dict: Dict[str, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> List[InstanceData]:
"""
Args:
points (list[tensor]): Point clouds of multiple samples.
feats_dict (dict): Features from FPN or backbone.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict = self(feats_dict)
batch_size = len(batch_data_samples)
batch_input_metas = []
for batch_index in range(batch_size):
metainfo = batch_data_samples[batch_index].metainfo
batch_input_metas.append(metainfo)
results_list = self.predict_by_feat(points, preds_dict,
batch_input_metas, **kwargs)
return results_list
def predict_by_feat(self,
points: List[torch.Tensor],
bbox_preds_dict: dict,
batch_input_metas: List[dict],
use_nms: bool = True,
**kwargs) -> List[InstanceData]:
"""Generate bboxes from vote head predictions.
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Predictions from GroupFree3D head.
input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool): Whether to rescale bboxes.
points (List[torch.Tensor]): Input points of multiple samples.
bbox_preds_dict (dict): Predictions from groupfree3d head.
batch_input_metas (list[dict]): Each item
contains the meta information of each sample.
use_nms (bool): Whether to apply NMS, skip nms postprocessing
while using GroupFree3D head in rpn stage.
while using vote head in rpn stage.
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData cantains 3d Bounding boxes and corresponding
scores and labels.
"""
# support multi-stage predictions
assert self.test_cfg['prediction_stages'] in \
['last', 'all', 'last_three']
prefixes = list()
if self.test_cfg['prediction_stages'] == 'last':
prefixes = [f's{self.num_decoder_layers - 1}.']
elif self.test_cfg['prediction_stages'] == 'all':
......@@ -905,9 +1013,10 @@ class GroupFree3DHead(BaseModule):
bbox3d = list()
for prefix in prefixes:
# decode boxes
obj_score = bbox_preds[f'{prefix}obj_scores'][..., -1].sigmoid()
sem_score = bbox_preds[f'{prefix}sem_scores'].softmax(-1)
bbox = self.bbox_coder.decode(bbox_preds, prefix)
obj_score = bbox_preds_dict[f'{prefix}obj_scores'][...,
-1].sigmoid()
sem_score = bbox_preds_dict[f'{prefix}sem_scores'].softmax(-1)
bbox = self.bbox_coder.decode(bbox_preds_dict, prefix)
obj_scores.append(obj_score)
sem_scores.append(sem_score)
bbox3d.append(bbox)
......@@ -915,22 +1024,27 @@ class GroupFree3DHead(BaseModule):
obj_scores = torch.cat(obj_scores, dim=1)
sem_scores = torch.cat(sem_scores, dim=1)
bbox3d = torch.cat(bbox3d, dim=1)
stack_points = torch.stack(points)
results_list = list()
if use_nms:
batch_size = bbox3d.shape[0]
results = list()
temp_results = InstanceData()
for b in range(batch_size):
bbox_selected, score_selected, labels = \
self.multiclass_nms_single(obj_scores[b], sem_scores[b],
bbox3d[b], points[b, ..., :3],
input_metas[b])
bbox = input_metas[b]['box_type_3d'](
self.multiclass_nms_single(obj_scores[b],
sem_scores[b],
bbox3d[b],
stack_points[b, ..., :3],
batch_input_metas[b])
bbox = batch_input_metas[b]['box_type_3d'](
bbox_selected,
box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot)
results.append((bbox, score_selected, labels))
return results
temp_results.bboxes_3d = bbox
temp_results.scores_3d = score_selected
temp_results.labels_3d = labels
results_list.append(temp_results)
return results_list
else:
return bbox3d
......
......@@ -214,9 +214,9 @@ class VoteHead(BaseModule):
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
batch_pts_semantic_mask.append(
data_sample.seg_data.get('pts_semantic_mask', None))
data_sample.gt_pts_seg.get('pts_semantic_mask', None))
batch_pts_instance_mask.append(
data_sample.seg_data.get('pts_instance_mask', None))
data_sample.gt_pts_seg.get('pts_instance_mask', None))
loss_inputs = (points, preds_dict, batch_gt_instance_3d)
losses = self.loss_by_feat(
......@@ -452,9 +452,9 @@ class VoteHead(BaseModule):
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images.
point clouds. Defaults to None.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images.
point clouds. Defaults to None.
Returns:
tuple[torch.Tensor]: Targets of vote head.
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmdet3d.registry import MODELS
from ...core import SampleList
from .single_stage import SingleStage3DDetector
......@@ -15,91 +14,73 @@ class GroupFree3DNet(SingleStage3DDetector):
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
init_cfg=None,
**kwargs):
super(GroupFree3DNet, self).__init__(
backbone=backbone,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
init_cfg=init_cfg,
**kwargs)
def forward_train(self,
points,
img_metas,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> dict:
"""Calculate losses from a batch of inputs dict and data samples.
Args:
points (list[torch.Tensor]): Points of each batch.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_seg`.
Returns:
dict[str: torch.Tensor]: Losses.
dict: A dictionary of loss components.
"""
# TODO: refactor votenet series to reduce redundant codes.
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_metas)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
x = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
losses = self.bbox_head.loss(points, x, batch_data_samples, **kwargs)
return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False):
"""Forward of testing.
def predict(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> SampleList:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list): Image metas.
rescale (bool): Whether to rescale results.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_metas, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
def aug_test(self, points, img_metas, imgs=None, rescale=False):
"""Test with augmentation."""
points_cat = [torch.stack(pts) for pts in points]
feats = self.extract_feats(points_cat, img_metas)
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
# only support aug_test for one sample
aug_bboxes = []
for x, pts_cat, img_meta in zip(feats, points_cat, img_metas):
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
pts_cat, bbox_preds, img_meta, rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_seg`.
rescale (bool): Whether to rescale the results.
Defaults to True.
# after merging, bboxes will be rescaled to the original image size
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,
self.bbox_head.test_cfg)
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input images. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
return [merged_bboxes]
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
x = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
results_list = self.bbox_head.predict(points, x, batch_data_samples,
**kwargs)
predictions = self.convert_to_datasample(results_list)
return predictions
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestGroupfree3d(unittest.TestCase):
def test_groupfree3d(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'GroupFree3DNet')
DefaultScope.get_instance('test_groupfree3d', scope_name='mmdet3d')
_setup_seed(0)
voxel_net_cfg = _get_detector_cfg(
'groupfree3d/groupfree3d_8x4_scannet-3d-18class-L6-O256.py')
model = MODELS.build(voxel_net_cfg)
num_gt_instance = 5
data = [
_create_detector_inputs(
num_gt_instance=num_gt_instance,
points_feat_dim=3,
with_pts_semantic_mask=True,
with_pts_instance_mask=True)
]
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['sampling_objectness_loss'], 0)
self.assertGreater(losses['proposal.objectness_loss'], 0)
self.assertGreater(losses['s0.objectness_loss'], 0)
self.assertGreater(losses['s1.size_res_loss'], 0)
self.assertGreater(losses['s4.size_class_loss'], 0)
......@@ -7,7 +7,7 @@ import numpy as np
import torch
from mmengine import InstanceData
from mmdet3d.core import Det3DDataSample, LiDARInstance3DBoxes
from mmdet3d.core import Det3DDataSample, LiDARInstance3DBoxes, PointData
def _setup_seed(seed):
......@@ -71,22 +71,28 @@ def _get_detector_cfg(fname):
return model
def _create_detector_inputs(seed=0,
def _create_detector_inputs(
seed=0,
with_points=True,
with_img=False,
num_gt_instance=20,
num_points=10,
points_feat_dim=4,
num_classes=3,
gt_bboxes_dim=7,
num_classes=3):
with_pts_semantic_mask=False,
with_pts_instance_mask=False,
):
_setup_seed(seed)
inputs_dict = dict()
if with_points:
points = torch.rand([3, points_feat_dim])
inputs_dict['points'] = points
points = torch.rand([num_points, points_feat_dim])
else:
points = None
if with_img:
img = torch.rand(3, 10, 10)
inputs_dict['img'] = img
else:
img = None
inputs_dict = dict(img=img, points=points)
gt_instance_3d = InstanceData()
gt_instance_3d.bboxes_3d = LiDARInstance3DBoxes(
torch.rand([num_gt_instance, gt_bboxes_dim]), box_dim=gt_bboxes_dim)
......@@ -94,5 +100,12 @@ def _create_detector_inputs(seed=0,
data_sample = Det3DDataSample(
metainfo=dict(box_type_3d=LiDARInstance3DBoxes))
data_sample.gt_instances_3d = gt_instance_3d
data_sample.seg_data = dict()
data_sample.gt_pts_seg = PointData()
if with_pts_instance_mask:
pts_instance_mask = torch.randint(0, num_gt_instance, [num_points])
data_sample.gt_pts_seg['pts_instance_mask'] = pts_instance_mask
if with_pts_semantic_mask:
pts_semantic_mask = torch.randint(0, num_classes, [num_points])
data_sample.gt_pts_seg['pts_semantic_mask'] = pts_semantic_mask
return dict(inputs=inputs_dict, data_sample=data_sample)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment