Commit f3b13cad authored by yeshenglong1's avatar yeshenglong1
Browse files

UpDate README.md

parent 0797920d
voxel_size = [0.1, 0.1, 0.2] voxel_size = [0.1, 0.1, 0.2]
model = dict( model = dict(
type='CenterPoint', type='CenterPoint',
pts_voxel_layer=dict( pts_voxel_layer=dict(
max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
pts_middle_encoder=dict( pts_middle_encoder=dict(
type='SparseEncoder', type='SparseEncoder',
in_channels=5, in_channels=5,
sparse_shape=[41, 1024, 1024], sparse_shape=[41, 1024, 1024],
output_channels=128, output_channels=128,
order=('conv', 'norm', 'act'), order=('conv', 'norm', 'act'),
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
128)), 128)),
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
block_type='basicblock'), block_type='basicblock'),
pts_backbone=dict( pts_backbone=dict(
type='SECOND', type='SECOND',
in_channels=256, in_channels=256,
out_channels=[128, 256], out_channels=[128, 256],
layer_nums=[5, 5], layer_nums=[5, 5],
layer_strides=[1, 2], layer_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv2d', bias=False)), conv_cfg=dict(type='Conv2d', bias=False)),
pts_neck=dict( pts_neck=dict(
type='SECONDFPN', type='SECONDFPN',
in_channels=[128, 256], in_channels=[128, 256],
out_channels=[256, 256], out_channels=[256, 256],
upsample_strides=[1, 2], upsample_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False), upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True), use_conv_for_no_stride=True),
pts_bbox_head=dict( pts_bbox_head=dict(
type='CenterHead', type='CenterHead',
in_channels=sum([256, 256]), in_channels=sum([256, 256]),
tasks=[ tasks=[
dict(num_class=1, class_names=['car']), dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']), dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']), dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']), dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']), dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
], ],
common_heads=dict( common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64, share_conv_channel=64,
bbox_coder=dict( bbox_coder=dict(
type='CenterPointBBoxCoder', type='CenterPointBBoxCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500, max_num=500,
score_threshold=0.1, score_threshold=0.1,
out_size_factor=8, out_size_factor=8,
voxel_size=voxel_size[:2], voxel_size=voxel_size[:2],
code_size=9), code_size=9),
separate_head=dict( separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3), type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True), norm_bbox=True),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
pts=dict( pts=dict(
grid_size=[1024, 1024, 40], grid_size=[1024, 1024, 40],
voxel_size=voxel_size, voxel_size=voxel_size,
out_size_factor=8, out_size_factor=8,
dense_reg=1, dense_reg=1,
gaussian_overlap=0.1, gaussian_overlap=0.1,
max_objs=500, max_objs=500,
min_radius=2, min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict( test_cfg=dict(
pts=dict( pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500, max_per_img=500,
max_pool_nms=False, max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175], min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1, score_threshold=0.1,
out_size_factor=8, out_size_factor=8,
voxel_size=voxel_size[:2], voxel_size=voxel_size[:2],
nms_type='rotate', nms_type='rotate',
pre_max_size=1000, pre_max_size=1000,
post_max_size=83, post_max_size=83,
nms_thr=0.2))) nms_thr=0.2)))
voxel_size = [0.2, 0.2, 8] voxel_size = [0.2, 0.2, 8]
model = dict( model = dict(
type='CenterPoint', type='CenterPoint',
pts_voxel_layer=dict( pts_voxel_layer=dict(
max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='PillarFeatureNet', type='PillarFeatureNet',
in_channels=5, in_channels=5,
feat_channels=[64], feat_channels=[64],
with_distance=False, with_distance=False,
voxel_size=(0.2, 0.2, 8), voxel_size=(0.2, 0.2, 8),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
legacy=False), legacy=False),
pts_middle_encoder=dict( pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
pts_backbone=dict( pts_backbone=dict(
type='SECOND', type='SECOND',
in_channels=64, in_channels=64,
out_channels=[64, 128, 256], out_channels=[64, 128, 256],
layer_nums=[3, 5, 5], layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2], layer_strides=[2, 2, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv2d', bias=False)), conv_cfg=dict(type='Conv2d', bias=False)),
pts_neck=dict( pts_neck=dict(
type='SECONDFPN', type='SECONDFPN',
in_channels=[64, 128, 256], in_channels=[64, 128, 256],
out_channels=[128, 128, 128], out_channels=[128, 128, 128],
upsample_strides=[0.5, 1, 2], upsample_strides=[0.5, 1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False), upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True), use_conv_for_no_stride=True),
pts_bbox_head=dict( pts_bbox_head=dict(
type='CenterHead', type='CenterHead',
in_channels=sum([128, 128, 128]), in_channels=sum([128, 128, 128]),
tasks=[ tasks=[
dict(num_class=1, class_names=['car']), dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']), dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']), dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']), dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']), dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
], ],
common_heads=dict( common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64, share_conv_channel=64,
bbox_coder=dict( bbox_coder=dict(
type='CenterPointBBoxCoder', type='CenterPointBBoxCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500, max_num=500,
score_threshold=0.1, score_threshold=0.1,
out_size_factor=4, out_size_factor=4,
voxel_size=voxel_size[:2], voxel_size=voxel_size[:2],
code_size=9), code_size=9),
separate_head=dict( separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3), type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True), norm_bbox=True),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
pts=dict( pts=dict(
grid_size=[512, 512, 1], grid_size=[512, 512, 1],
voxel_size=voxel_size, voxel_size=voxel_size,
out_size_factor=4, out_size_factor=4,
dense_reg=1, dense_reg=1,
gaussian_overlap=0.1, gaussian_overlap=0.1,
max_objs=500, max_objs=500,
min_radius=2, min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict( test_cfg=dict(
pts=dict( pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500, max_per_img=500,
max_pool_nms=False, max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175], min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1, score_threshold=0.1,
pc_range=[-51.2, -51.2], pc_range=[-51.2, -51.2],
out_size_factor=4, out_size_factor=4,
voxel_size=voxel_size[:2], voxel_size=voxel_size[:2],
nms_type='rotate', nms_type='rotate',
pre_max_size=1000, pre_max_size=1000,
post_max_size=83, post_max_size=83,
nms_thr=0.2))) nms_thr=0.2)))
model = dict( model = dict(
type='FCOSMono3D', type='FCOSMono3D',
pretrained='open-mmlab://detectron2/resnet101_caffe', pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=101, depth=101,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=1, frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False), norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True, norm_eval=True,
style='caffe'), style='caffe'),
neck=dict( neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
out_channels=256, out_channels=256,
start_level=1, start_level=1,
add_extra_convs='on_output', add_extra_convs='on_output',
num_outs=5, num_outs=5,
relu_before_extra_convs=True), relu_before_extra_convs=True),
bbox_head=dict( bbox_head=dict(
type='FCOSMono3DHead', type='FCOSMono3DHead',
num_classes=10, num_classes=10,
in_channels=256, in_channels=256,
stacked_convs=2, stacked_convs=2,
feat_channels=256, feat_channels=256,
use_direction_classifier=True, use_direction_classifier=True,
diff_rad_by_sin=True, diff_rad_by_sin=True,
pred_attrs=True, pred_attrs=True,
pred_velo=True, pred_velo=True,
dir_offset=0.7854, # pi/4 dir_offset=0.7854, # pi/4
strides=[8, 16, 32, 64, 128], strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
cls_branch=(256, ), cls_branch=(256, ),
reg_branch=( reg_branch=(
(256, ), # offset (256, ), # offset
(256, ), # depth (256, ), # depth
(256, ), # size (256, ), # size
(256, ), # rot (256, ), # rot
() # velo () # velo
), ),
dir_branch=(256, ), dir_branch=(256, ),
attr_branch=(256, ), attr_branch=(256, ),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=dict( loss_attr=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict( loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
norm_on_bbox=True, norm_on_bbox=True,
centerness_on_reg=True, centerness_on_reg=True,
center_sampling=True, center_sampling=True,
conv_bias=True, conv_bias=True,
dcn_on_last_conv=True), dcn_on_last_conv=True),
train_cfg=dict( train_cfg=dict(
allowed_border=0, allowed_border=0,
code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
test_cfg=dict( test_cfg=dict(
use_rotate_nms=True, use_rotate_nms=True,
nms_across_levels=False, nms_across_levels=False,
nms_pre=1000, nms_pre=1000,
nms_thr=0.8, nms_thr=0.8,
score_thr=0.05, score_thr=0.05,
min_bbox_size=0, min_bbox_size=0,
max_per_img=200)) max_per_img=200))
model = dict( model = dict(
type='GroupFree3DNet', type='GroupFree3DNet',
backbone=dict( backbone=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=3, in_channels=3,
num_points=(2048, 1024, 512, 256), num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2), radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16), num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)), (128, 128, 256)),
fp_channels=((256, 256), (256, 288)), fp_channels=((256, 256), (256, 288)),
norm_cfg=dict(type='BN2d'), norm_cfg=dict(type='BN2d'),
sa_cfg=dict( sa_cfg=dict(
type='PointSAModule', type='PointSAModule',
pool_mod='max', pool_mod='max',
use_xyz=True, use_xyz=True,
normalize_xyz=True)), normalize_xyz=True)),
bbox_head=dict( bbox_head=dict(
type='GroupFree3DHead', type='GroupFree3DHead',
in_channels=288, in_channels=288,
num_decoder_layers=6, num_decoder_layers=6,
num_proposal=256, num_proposal=256,
transformerlayers=dict( transformerlayers=dict(
type='BaseTransformerLayer', type='BaseTransformerLayer',
attn_cfgs=dict( attn_cfgs=dict(
type='GroupFree3DMHA', type='GroupFree3DMHA',
embed_dims=288, embed_dims=288,
num_heads=8, num_heads=8,
attn_drop=0.1, attn_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1)), dropout_layer=dict(type='Dropout', drop_prob=0.1)),
ffn_cfgs=dict( ffn_cfgs=dict(
embed_dims=288, embed_dims=288,
feedforward_channels=2048, feedforward_channels=2048,
ffn_drop=0.1, ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True)), act_cfg=dict(type='ReLU', inplace=True)),
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
'norm')), 'norm')),
pred_layer_cfg=dict( pred_layer_cfg=dict(
in_channels=288, shared_conv_channels=(288, 288), bias=True), in_channels=288, shared_conv_channels=(288, 288), bias=True),
sampling_objectness_loss=dict( sampling_objectness_loss=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=8.0), loss_weight=8.0),
objectness_loss=dict( objectness_loss=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
center_loss=dict( center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings # model training and testing settings
train_cfg=dict(sample_mod='kps'), train_cfg=dict(sample_mod='kps'),
test_cfg=dict( test_cfg=dict(
sample_mod='kps', sample_mod='kps',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.0, score_thr=0.0,
per_class_proposal=True, per_class_proposal=True,
prediction_stages='last')) prediction_stages='last'))
primitive_z_cfg = dict( primitive_z_cfg = dict(
type='PrimitiveHead', type='PrimitiveHead',
num_dims=2, num_dims=2,
num_classes=18, num_classes=18,
primitive_mode='z', primitive_mode='z',
upper_thresh=100.0, upper_thresh=100.0,
surface_thresh=0.5, surface_thresh=0.5,
vote_module_cfg=dict( vote_module_cfg=dict(
in_channels=256, in_channels=256,
vote_per_seed=1, vote_per_seed=1,
gt_per_seed=1, gt_per_seed=1,
conv_channels=(256, 256), conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
norm_feats=True, norm_feats=True,
vote_loss=dict( vote_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='none', reduction='none',
loss_dst_weight=10.0)), loss_dst_weight=10.0)),
vote_aggregation_cfg=dict( vote_aggregation_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=1024, num_point=1024,
radius=0.3, radius=0.3,
num_sample=16, num_sample=16,
mlp_channels=[256, 128, 128, 128], mlp_channels=[256, 128, 128, 128],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
feat_channels=(128, 128), feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
objectness_loss=dict( objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.4, 0.6], class_weight=[0.4, 0.6],
reduction='mean', reduction='mean',
loss_weight=30.0), loss_weight=30.0),
center_loss=dict( center_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='sum', reduction='sum',
loss_src_weight=0.5, loss_src_weight=0.5,
loss_dst_weight=0.5), loss_dst_weight=0.5),
semantic_reg_loss=dict( semantic_reg_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='sum', reduction='sum',
loss_src_weight=0.5, loss_src_weight=0.5,
loss_dst_weight=0.5), loss_dst_weight=0.5),
semantic_cls_loss=dict( semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
train_cfg=dict( train_cfg=dict(
dist_thresh=0.2, dist_thresh=0.2,
var_thresh=1e-2, var_thresh=1e-2,
lower_thresh=1e-6, lower_thresh=1e-6,
num_point=100, num_point=100,
num_point_line=10, num_point_line=10,
line_thresh=0.2)) line_thresh=0.2))
primitive_xy_cfg = dict( primitive_xy_cfg = dict(
type='PrimitiveHead', type='PrimitiveHead',
num_dims=1, num_dims=1,
num_classes=18, num_classes=18,
primitive_mode='xy', primitive_mode='xy',
upper_thresh=100.0, upper_thresh=100.0,
surface_thresh=0.5, surface_thresh=0.5,
vote_module_cfg=dict( vote_module_cfg=dict(
in_channels=256, in_channels=256,
vote_per_seed=1, vote_per_seed=1,
gt_per_seed=1, gt_per_seed=1,
conv_channels=(256, 256), conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
norm_feats=True, norm_feats=True,
vote_loss=dict( vote_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='none', reduction='none',
loss_dst_weight=10.0)), loss_dst_weight=10.0)),
vote_aggregation_cfg=dict( vote_aggregation_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=1024, num_point=1024,
radius=0.3, radius=0.3,
num_sample=16, num_sample=16,
mlp_channels=[256, 128, 128, 128], mlp_channels=[256, 128, 128, 128],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
feat_channels=(128, 128), feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
objectness_loss=dict( objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.4, 0.6], class_weight=[0.4, 0.6],
reduction='mean', reduction='mean',
loss_weight=30.0), loss_weight=30.0),
center_loss=dict( center_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='sum', reduction='sum',
loss_src_weight=0.5, loss_src_weight=0.5,
loss_dst_weight=0.5), loss_dst_weight=0.5),
semantic_reg_loss=dict( semantic_reg_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='sum', reduction='sum',
loss_src_weight=0.5, loss_src_weight=0.5,
loss_dst_weight=0.5), loss_dst_weight=0.5),
semantic_cls_loss=dict( semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
train_cfg=dict( train_cfg=dict(
dist_thresh=0.2, dist_thresh=0.2,
var_thresh=1e-2, var_thresh=1e-2,
lower_thresh=1e-6, lower_thresh=1e-6,
num_point=100, num_point=100,
num_point_line=10, num_point_line=10,
line_thresh=0.2)) line_thresh=0.2))
primitive_line_cfg = dict( primitive_line_cfg = dict(
type='PrimitiveHead', type='PrimitiveHead',
num_dims=0, num_dims=0,
num_classes=18, num_classes=18,
primitive_mode='line', primitive_mode='line',
upper_thresh=100.0, upper_thresh=100.0,
surface_thresh=0.5, surface_thresh=0.5,
vote_module_cfg=dict( vote_module_cfg=dict(
in_channels=256, in_channels=256,
vote_per_seed=1, vote_per_seed=1,
gt_per_seed=1, gt_per_seed=1,
conv_channels=(256, 256), conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
norm_feats=True, norm_feats=True,
vote_loss=dict( vote_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='none', reduction='none',
loss_dst_weight=10.0)), loss_dst_weight=10.0)),
vote_aggregation_cfg=dict( vote_aggregation_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=1024, num_point=1024,
radius=0.3, radius=0.3,
num_sample=16, num_sample=16,
mlp_channels=[256, 128, 128, 128], mlp_channels=[256, 128, 128, 128],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
feat_channels=(128, 128), feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
objectness_loss=dict( objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.4, 0.6], class_weight=[0.4, 0.6],
reduction='mean', reduction='mean',
loss_weight=30.0), loss_weight=30.0),
center_loss=dict( center_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='sum', reduction='sum',
loss_src_weight=1.0, loss_src_weight=1.0,
loss_dst_weight=1.0), loss_dst_weight=1.0),
semantic_reg_loss=dict( semantic_reg_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='sum', reduction='sum',
loss_src_weight=1.0, loss_src_weight=1.0,
loss_dst_weight=1.0), loss_dst_weight=1.0),
semantic_cls_loss=dict( semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=2.0), type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),
train_cfg=dict( train_cfg=dict(
dist_thresh=0.2, dist_thresh=0.2,
var_thresh=1e-2, var_thresh=1e-2,
lower_thresh=1e-6, lower_thresh=1e-6,
num_point=100, num_point=100,
num_point_line=10, num_point_line=10,
line_thresh=0.2)) line_thresh=0.2))
model = dict( model = dict(
type='H3DNet', type='H3DNet',
backbone=dict( backbone=dict(
type='MultiBackbone', type='MultiBackbone',
num_streams=4, num_streams=4,
suffixes=['net0', 'net1', 'net2', 'net3'], suffixes=['net0', 'net1', 'net2', 'net3'],
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01), norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
act_cfg=dict(type='ReLU'), act_cfg=dict(type='ReLU'),
backbones=dict( backbones=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=4, in_channels=4,
num_points=(2048, 1024, 512, 256), num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2), radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16), num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)), (128, 128, 256)),
fp_channels=((256, 256), (256, 256)), fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'), norm_cfg=dict(type='BN2d'),
sa_cfg=dict( sa_cfg=dict(
type='PointSAModule', type='PointSAModule',
pool_mod='max', pool_mod='max',
use_xyz=True, use_xyz=True,
normalize_xyz=True))), normalize_xyz=True))),
rpn_head=dict( rpn_head=dict(
type='VoteHead', type='VoteHead',
vote_module_cfg=dict( vote_module_cfg=dict(
in_channels=256, in_channels=256,
vote_per_seed=1, vote_per_seed=1,
gt_per_seed=3, gt_per_seed=3,
conv_channels=(256, 256), conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
norm_feats=True, norm_feats=True,
vote_loss=dict( vote_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='none', reduction='none',
loss_dst_weight=10.0)), loss_dst_weight=10.0)),
vote_aggregation_cfg=dict( vote_aggregation_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=256, num_point=256,
radius=0.3, radius=0.3,
num_sample=16, num_sample=16,
mlp_channels=[256, 128, 128, 128], mlp_channels=[256, 128, 128, 128],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
pred_layer_cfg=dict( pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True), in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
objectness_loss=dict( objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.2, 0.8], class_weight=[0.2, 0.8],
reduction='sum', reduction='sum',
loss_weight=5.0), loss_weight=5.0),
center_loss=dict( center_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l2', mode='l2',
reduction='sum', reduction='sum',
loss_src_weight=10.0, loss_src_weight=10.0,
loss_dst_weight=10.0), loss_dst_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
roi_head=dict( roi_head=dict(
type='H3DRoIHead', type='H3DRoIHead',
primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg], primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
bbox_head=dict( bbox_head=dict(
type='H3DBboxHead', type='H3DBboxHead',
gt_per_seed=3, gt_per_seed=3,
num_proposal=256, num_proposal=256,
suface_matching_cfg=dict( suface_matching_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=256 * 6, num_point=256 * 6,
radius=0.5, radius=0.5,
num_sample=32, num_sample=32,
mlp_channels=[128 + 6, 128, 64, 32], mlp_channels=[128 + 6, 128, 64, 32],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
line_matching_cfg=dict( line_matching_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=256 * 12, num_point=256 * 12,
radius=0.5, radius=0.5,
num_sample=32, num_sample=32,
mlp_channels=[128 + 12, 128, 64, 32], mlp_channels=[128 + 12, 128, 64, 32],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
feat_channels=(128, 128), feat_channels=(128, 128),
primitive_refine_channels=[128, 128, 128], primitive_refine_channels=[128, 128, 128],
upper_thresh=100.0, upper_thresh=100.0,
surface_thresh=0.5, surface_thresh=0.5,
line_thresh=0.5, line_thresh=0.5,
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
objectness_loss=dict( objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.2, 0.8], class_weight=[0.2, 0.8],
reduction='sum', reduction='sum',
loss_weight=5.0), loss_weight=5.0),
center_loss=dict( center_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l2', mode='l2',
reduction='sum', reduction='sum',
loss_src_weight=10.0, loss_src_weight=10.0,
loss_dst_weight=10.0), loss_dst_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1), type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1), type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1), type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
cues_objectness_loss=dict( cues_objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.3, 0.7], class_weight=[0.3, 0.7],
reduction='mean', reduction='mean',
loss_weight=5.0), loss_weight=5.0),
cues_semantic_loss=dict( cues_semantic_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.3, 0.7], class_weight=[0.3, 0.7],
reduction='mean', reduction='mean',
loss_weight=5.0), loss_weight=5.0),
proposal_objectness_loss=dict( proposal_objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.2, 0.8], class_weight=[0.2, 0.8],
reduction='none', reduction='none',
loss_weight=5.0), loss_weight=5.0),
primitive_center_loss=dict( primitive_center_loss=dict(
type='MSELoss', reduction='none', loss_weight=1.0))), type='MSELoss', reduction='none', loss_weight=1.0))),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
rpn=dict( rpn=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
rpn_proposal=dict(use_nms=False), rpn_proposal=dict(use_nms=False),
rcnn=dict( rcnn=dict(
pos_distance_thr=0.3, pos_distance_thr=0.3,
neg_distance_thr=0.6, neg_distance_thr=0.6,
sample_mod='vote', sample_mod='vote',
far_threshold=0.6, far_threshold=0.6,
near_threshold=0.3, near_threshold=0.3,
mask_surface_threshold=0.3, mask_surface_threshold=0.3,
label_surface_threshold=0.3, label_surface_threshold=0.3,
mask_line_threshold=0.3, mask_line_threshold=0.3,
label_line_threshold=0.3)), label_line_threshold=0.3)),
test_cfg=dict( test_cfg=dict(
rpn=dict( rpn=dict(
sample_mod='seed', sample_mod='seed',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.05, score_thr=0.05,
per_class_proposal=True, per_class_proposal=True,
use_nms=False), use_nms=False),
rcnn=dict( rcnn=dict(
sample_mod='seed', sample_mod='seed',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.05, score_thr=0.05,
per_class_proposal=True))) per_class_proposal=True)))
_base_ = './hv_pointpillars_fpn_nus.py' _base_ = './hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings) # model settings (based on nuScenes model settings)
# Voxel size for voxel encoder # Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range # Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related # If point cloud range is modified, do remember to change all related
# keys in the config. # keys in the config.
model = dict( model = dict(
pts_voxel_layer=dict( pts_voxel_layer=dict(
max_num_points=20, max_num_points=20,
point_cloud_range=[-80, -80, -5, 80, 80, 3], point_cloud_range=[-80, -80, -5, 80, 80, 3],
max_voxels=(60000, 60000)), max_voxels=(60000, 60000)),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
pts_middle_encoder=dict(output_shape=[640, 640]), pts_middle_encoder=dict(output_shape=[640, 640]),
pts_bbox_head=dict( pts_bbox_head=dict(
num_classes=9, num_classes=9,
anchor_generator=dict( anchor_generator=dict(
ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
# model training settings (based on nuScenes model settings) # model training settings (based on nuScenes model settings)
train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
# model settings # model settings
# Voxel size for voxel encoder # Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range # Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related # If point cloud range is modified, do remember to change all related
# keys in the config. # keys in the config.
voxel_size = [0.25, 0.25, 8] voxel_size = [0.25, 0.25, 8]
model = dict( model = dict(
type='MVXFasterRCNN', type='MVXFasterRCNN',
pts_voxel_layer=dict( pts_voxel_layer=dict(
max_num_points=64, max_num_points=64,
point_cloud_range=[-50, -50, -5, 50, 50, 3], point_cloud_range=[-50, -50, -5, 50, 50, 3],
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(30000, 40000)), max_voxels=(30000, 40000)),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='HardVFE', type='HardVFE',
in_channels=4, in_channels=4,
feat_channels=[64, 64], feat_channels=[64, 64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
with_cluster_center=True, with_cluster_center=True,
with_voxel_center=True, with_voxel_center=True,
point_cloud_range=[-50, -50, -5, 50, 50, 3], point_cloud_range=[-50, -50, -5, 50, 50, 3],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict( pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict( pts_backbone=dict(
type='SECOND', type='SECOND',
in_channels=64, in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5], layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2], layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]), out_channels=[64, 128, 256]),
pts_neck=dict( pts_neck=dict(
type='FPN', type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'), act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256], in_channels=[64, 128, 256],
out_channels=256, out_channels=256,
start_level=0, start_level=0,
num_outs=3), num_outs=3),
pts_bbox_head=dict( pts_bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=10, num_classes=10,
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator', type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]], ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4], scales=[1, 2, 4],
sizes=[ sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3) [0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3) [0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.], [1., 1., 1.],
[0.4, 0.4, 1], [0.4, 0.4, 1],
], ],
custom_values=[0, 0], custom_values=[0, 0],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True),
assigner_per_size=False, assigner_per_size=False,
diff_rad_by_sin=True, diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4 dir_offset=0.7854, # pi/4
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
pts=dict( pts=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
ignore_iof_thr=-1), ignore_iof_thr=-1),
allowed_border=0, allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1, pos_weight=-1,
debug=False)), debug=False)),
test_cfg=dict( test_cfg=dict(
pts=dict( pts=dict(
use_rotate_nms=True, use_rotate_nms=True,
nms_across_levels=False, nms_across_levels=False,
nms_pre=1000, nms_pre=1000,
nms_thr=0.2, nms_thr=0.2,
score_thr=0.05, score_thr=0.05,
min_bbox_size=0, min_bbox_size=0,
max_num=500))) max_num=500)))
_base_ = './hv_pointpillars_fpn_nus.py' _base_ = './hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings) # model settings (based on nuScenes model settings)
# Voxel size for voxel encoder # Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range # Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related # If point cloud range is modified, do remember to change all related
# keys in the config. # keys in the config.
model = dict( model = dict(
pts_voxel_layer=dict( pts_voxel_layer=dict(
max_num_points=20, max_num_points=20,
point_cloud_range=[-100, -100, -5, 100, 100, 3], point_cloud_range=[-100, -100, -5, 100, 100, 3],
max_voxels=(60000, 60000)), max_voxels=(60000, 60000)),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
pts_middle_encoder=dict(output_shape=[800, 800]), pts_middle_encoder=dict(output_shape=[800, 800]),
pts_bbox_head=dict( pts_bbox_head=dict(
num_classes=9, num_classes=9,
anchor_generator=dict( anchor_generator=dict(
ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
# model training settings (based on nuScenes model settings) # model training settings (based on nuScenes model settings)
train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
voxel_size = [0.16, 0.16, 4] voxel_size = [0.16, 0.16, 4]
model = dict( model = dict(
type='VoxelNet', type='VoxelNet',
voxel_layer=dict( voxel_layer=dict(
max_num_points=32, # max_points_per_voxel max_num_points=32, # max_points_per_voxel
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_voxels max_voxels=(16000, 40000) # (training, testing) max_voxels
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='PillarFeatureNet', type='PillarFeatureNet',
in_channels=4, in_channels=4,
feat_channels=[64], feat_channels=[64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
middle_encoder=dict( middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
backbone=dict( backbone=dict(
type='SECOND', type='SECOND',
in_channels=64, in_channels=64,
layer_nums=[3, 5, 5], layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2], layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]), out_channels=[64, 128, 256]),
neck=dict( neck=dict(
type='SECONDFPN', type='SECONDFPN',
in_channels=[64, 128, 256], in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4], upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]), out_channels=[128, 128, 128]),
bbox_head=dict( bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=3, num_classes=3,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[ ranges=[
[0, -39.68, -0.6, 70.4, 39.68, -0.6], [0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -0.6, 70.4, 39.68, -0.6], [0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -1.78, 70.4, 39.68, -1.78], [0, -39.68, -1.78, 70.4, 39.68, -1.78],
], ],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1),
], ],
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
test_cfg=dict( test_cfg=dict(
use_rotate_nms=True, use_rotate_nms=True,
nms_across_levels=False, nms_across_levels=False,
nms_thr=0.01, nms_thr=0.01,
score_thr=0.1, score_thr=0.1,
min_bbox_size=0, min_bbox_size=0,
nms_pre=100, nms_pre=100,
max_num=50)) max_num=50))
# model settings # model settings
# Voxel size for voxel encoder # Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range # Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related # If point cloud range is modified, do remember to change all related
# keys in the config. # keys in the config.
voxel_size = [0.32, 0.32, 6] voxel_size = [0.32, 0.32, 6]
model = dict( model = dict(
type='MVXFasterRCNN', type='MVXFasterRCNN',
pts_voxel_layer=dict( pts_voxel_layer=dict(
max_num_points=20, max_num_points=20,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(32000, 32000)), max_voxels=(32000, 32000)),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='HardVFE', type='HardVFE',
in_channels=5, in_channels=5,
feat_channels=[64], feat_channels=[64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
with_cluster_center=True, with_cluster_center=True,
with_voxel_center=True, with_voxel_center=True,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict( pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
pts_backbone=dict( pts_backbone=dict(
type='SECOND', type='SECOND',
in_channels=64, in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5], layer_nums=[3, 5, 5],
layer_strides=[1, 2, 2], layer_strides=[1, 2, 2],
out_channels=[64, 128, 256]), out_channels=[64, 128, 256]),
pts_neck=dict( pts_neck=dict(
type='SECONDFPN', type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256], in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4], upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]), out_channels=[128, 128, 128]),
pts_bbox_head=dict( pts_bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=3, num_classes=3,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator', type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
[-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
[-74.88, -74.88, 0, 74.88, 74.88, 0]], [-74.88, -74.88, 0, 74.88, 74.88, 0]],
sizes=[ sizes=[
[2.08, 4.73, 1.77], # car [2.08, 4.73, 1.77], # car
[0.84, 1.81, 1.77], # cyclist [0.84, 1.81, 1.77], # cyclist
[0.84, 0.91, 1.74] # pedestrian [0.84, 0.91, 1.74] # pedestrian
], ],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
diff_rad_by_sin=True, diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4 dir_offset=0.7854, # pi/4
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
pts=dict( pts=dict(
assigner=[ assigner=[
dict( # car dict( # car
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # cyclist dict( # cyclist
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # pedestrian dict( # pedestrian
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
ignore_iof_thr=-1), ignore_iof_thr=-1),
], ],
allowed_border=0, allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1, pos_weight=-1,
debug=False)), debug=False)),
test_cfg=dict( test_cfg=dict(
pts=dict( pts=dict(
use_rotate_nms=True, use_rotate_nms=True,
nms_across_levels=False, nms_across_levels=False,
nms_pre=4096, nms_pre=4096,
nms_thr=0.25, nms_thr=0.25,
score_thr=0.1, score_thr=0.1,
min_bbox_size=0, min_bbox_size=0,
max_num=500))) max_num=500)))
voxel_size = [0.05, 0.05, 0.1] voxel_size = [0.05, 0.05, 0.1]
model = dict( model = dict(
type='VoxelNet', type='VoxelNet',
voxel_layer=dict( voxel_layer=dict(
max_num_points=5, max_num_points=5,
point_cloud_range=[0, -40, -3, 70.4, 40, 1], point_cloud_range=[0, -40, -3, 70.4, 40, 1],
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000)), max_voxels=(16000, 40000)),
voxel_encoder=dict(type='HardSimpleVFE'), voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict( middle_encoder=dict(
type='SparseEncoder', type='SparseEncoder',
in_channels=4, in_channels=4,
sparse_shape=[41, 1600, 1408], sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')), order=('conv', 'norm', 'act')),
backbone=dict( backbone=dict(
type='SECOND', type='SECOND',
in_channels=256, in_channels=256,
layer_nums=[5, 5], layer_nums=[5, 5],
layer_strides=[1, 2], layer_strides=[1, 2],
out_channels=[128, 256]), out_channels=[128, 256]),
neck=dict( neck=dict(
type='SECONDFPN', type='SECONDFPN',
in_channels=[128, 256], in_channels=[128, 256],
upsample_strides=[1, 2], upsample_strides=[1, 2],
out_channels=[256, 256]), out_channels=[256, 256]),
bbox_head=dict( bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=3, num_classes=3,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[ ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78], [0, -40.0, -1.78, 70.4, 40.0, -1.78],
], ],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35, pos_iou_thr=0.35,
neg_iou_thr=0.2, neg_iou_thr=0.2,
min_pos_iou=0.2, min_pos_iou=0.2,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35, pos_iou_thr=0.35,
neg_iou_thr=0.2, neg_iou_thr=0.2,
min_pos_iou=0.2, min_pos_iou=0.2,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1),
], ],
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
test_cfg=dict( test_cfg=dict(
use_rotate_nms=True, use_rotate_nms=True,
nms_across_levels=False, nms_across_levels=False,
nms_thr=0.01, nms_thr=0.01,
score_thr=0.1, score_thr=0.1,
min_bbox_size=0, min_bbox_size=0,
nms_pre=100, nms_pre=100,
max_num=50)) max_num=50))
# model settings # model settings
# Voxel size for voxel encoder # Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range # Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related # If point cloud range is modified, do remember to change all related
# keys in the config. # keys in the config.
voxel_size = [0.08, 0.08, 0.1] voxel_size = [0.08, 0.08, 0.1]
model = dict( model = dict(
type='VoxelNet', type='VoxelNet',
voxel_layer=dict( voxel_layer=dict(
max_num_points=10, max_num_points=10,
point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(80000, 90000)), max_voxels=(80000, 90000)),
voxel_encoder=dict(type='HardSimpleVFE', num_features=5), voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
middle_encoder=dict( middle_encoder=dict(
type='SparseEncoder', type='SparseEncoder',
in_channels=5, in_channels=5,
sparse_shape=[61, 1280, 1920], sparse_shape=[61, 1280, 1920],
order=('conv', 'norm', 'act')), order=('conv', 'norm', 'act')),
backbone=dict( backbone=dict(
type='SECOND', type='SECOND',
in_channels=384, in_channels=384,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[5, 5], layer_nums=[5, 5],
layer_strides=[1, 2], layer_strides=[1, 2],
out_channels=[128, 256]), out_channels=[128, 256]),
neck=dict( neck=dict(
type='SECONDFPN', type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[128, 256], in_channels=[128, 256],
upsample_strides=[1, 2], upsample_strides=[1, 2],
out_channels=[256, 256]), out_channels=[256, 256]),
bbox_head=dict( bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=3, num_classes=3,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator', type='AlignedAnchor3DRangeGenerator',
ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
[-76.8, -51.2, 0, 76.8, 51.2, 0], [-76.8, -51.2, 0, 76.8, 51.2, 0],
[-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
sizes=[ sizes=[
[2.08, 4.73, 1.77], # car [2.08, 4.73, 1.77], # car
[0.84, 0.91, 1.74], # pedestrian [0.84, 0.91, 1.74], # pedestrian
[0.84, 1.81, 1.77] # cyclist [0.84, 1.81, 1.77] # cyclist
], ],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
diff_rad_by_sin=True, diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4 dir_offset=0.7854, # pi/4
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
assigner=[ assigner=[
dict( # car dict( # car
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # pedestrian dict( # pedestrian
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # cyclist dict( # cyclist
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
ignore_iof_thr=-1) ignore_iof_thr=-1)
], ],
allowed_border=0, allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
test_cfg=dict( test_cfg=dict(
use_rotate_nms=True, use_rotate_nms=True,
nms_across_levels=False, nms_across_levels=False,
nms_pre=4096, nms_pre=4096,
nms_thr=0.25, nms_thr=0.25,
score_thr=0.1, score_thr=0.1,
min_bbox_size=0, min_bbox_size=0,
max_num=500)) max_num=500))
model = dict( model = dict(
type='ImVoteNet', type='ImVoteNet',
img_backbone=dict( img_backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=1, frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False), norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True, norm_eval=True,
style='caffe'), style='caffe'),
img_neck=dict( img_neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
out_channels=256, out_channels=256,
num_outs=5), num_outs=5),
img_rpn_head=dict( img_rpn_head=dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
anchor_generator=dict( anchor_generator=dict(
type='AnchorGenerator', type='AnchorGenerator',
scales=[8], scales=[8],
ratios=[0.5, 1.0, 2.0], ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]), strides=[4, 8, 16, 32, 64]),
bbox_coder=dict( bbox_coder=dict(
type='DeltaXYWHBBoxCoder', type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]), target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)), loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
img_roi_head=dict( img_roi_head=dict(
type='StandardRoIHead', type='StandardRoIHead',
bbox_roi_extractor=dict( bbox_roi_extractor=dict(
type='SingleRoIExtractor', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
bbox_head=dict( bbox_head=dict(
type='Shared2FCBBoxHead', type='Shared2FCBBoxHead',
in_channels=256, in_channels=256,
fc_out_channels=1024, fc_out_channels=1024,
roi_feat_size=7, roi_feat_size=7,
num_classes=10, num_classes=10,
bbox_coder=dict( bbox_coder=dict(
type='DeltaXYWHBBoxCoder', type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.], target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]), target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False, reg_class_agnostic=False,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))), loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
img_rpn=dict( img_rpn=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
pos_iou_thr=0.7, pos_iou_thr=0.7,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
match_low_quality=True, match_low_quality=True,
ignore_iof_thr=-1), ignore_iof_thr=-1),
sampler=dict( sampler=dict(
type='RandomSampler', type='RandomSampler',
num=256, num=256,
pos_fraction=0.5, pos_fraction=0.5,
neg_pos_ub=-1, neg_pos_ub=-1,
add_gt_as_proposals=False), add_gt_as_proposals=False),
allowed_border=-1, allowed_border=-1,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
img_rpn_proposal=dict( img_rpn_proposal=dict(
nms_across_levels=False, nms_across_levels=False,
nms_pre=2000, nms_pre=2000,
nms_post=1000, nms_post=1000,
max_num=1000, max_num=1000,
nms_thr=0.7, nms_thr=0.7,
min_bbox_size=0), min_bbox_size=0),
img_rcnn=dict( img_rcnn=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
min_pos_iou=0.5, min_pos_iou=0.5,
match_low_quality=False, match_low_quality=False,
ignore_iof_thr=-1), ignore_iof_thr=-1),
sampler=dict( sampler=dict(
type='RandomSampler', type='RandomSampler',
num=512, num=512,
pos_fraction=0.25, pos_fraction=0.25,
neg_pos_ub=-1, neg_pos_ub=-1,
add_gt_as_proposals=True), add_gt_as_proposals=True),
pos_weight=-1, pos_weight=-1,
debug=False)), debug=False)),
test_cfg=dict( test_cfg=dict(
img_rpn=dict( img_rpn=dict(
nms_across_levels=False, nms_across_levels=False,
nms_pre=1000, nms_pre=1000,
nms_post=1000, nms_post=1000,
max_per_img=1000, max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7), nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0), min_bbox_size=0),
img_rcnn=dict( img_rcnn=dict(
score_thr=0.05, score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5), nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100))) max_per_img=100)))
# model settings # model settings
model = dict( model = dict(
type='MaskRCNN', type='MaskRCNN',
pretrained='torchvision://resnet50', pretrained='torchvision://resnet50',
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=1, frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True), norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True, norm_eval=True,
style='pytorch'), style='pytorch'),
neck=dict( neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
out_channels=256, out_channels=256,
num_outs=5), num_outs=5),
rpn_head=dict( rpn_head=dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
anchor_generator=dict( anchor_generator=dict(
type='AnchorGenerator', type='AnchorGenerator',
scales=[8], scales=[8],
ratios=[0.5, 1.0, 2.0], ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]), strides=[4, 8, 16, 32, 64]),
bbox_coder=dict( bbox_coder=dict(
type='DeltaXYWHBBoxCoder', type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]), target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)), loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict( roi_head=dict(
type='StandardRoIHead', type='StandardRoIHead',
bbox_roi_extractor=dict( bbox_roi_extractor=dict(
type='SingleRoIExtractor', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
bbox_head=dict( bbox_head=dict(
type='Shared2FCBBoxHead', type='Shared2FCBBoxHead',
in_channels=256, in_channels=256,
fc_out_channels=1024, fc_out_channels=1024,
roi_feat_size=7, roi_feat_size=7,
num_classes=80, num_classes=80,
bbox_coder=dict( bbox_coder=dict(
type='DeltaXYWHBBoxCoder', type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.], target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]), target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False, reg_class_agnostic=False,
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)), loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict( mask_roi_extractor=dict(
type='SingleRoIExtractor', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
mask_head=dict( mask_head=dict(
type='FCNMaskHead', type='FCNMaskHead',
num_convs=4, num_convs=4,
in_channels=256, in_channels=256,
conv_out_channels=256, conv_out_channels=256,
num_classes=80, num_classes=80,
loss_mask=dict( loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
rpn=dict( rpn=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
pos_iou_thr=0.7, pos_iou_thr=0.7,
neg_iou_thr=0.3, neg_iou_thr=0.3,
min_pos_iou=0.3, min_pos_iou=0.3,
match_low_quality=True, match_low_quality=True,
ignore_iof_thr=-1), ignore_iof_thr=-1),
sampler=dict( sampler=dict(
type='RandomSampler', type='RandomSampler',
num=256, num=256,
pos_fraction=0.5, pos_fraction=0.5,
neg_pos_ub=-1, neg_pos_ub=-1,
add_gt_as_proposals=False), add_gt_as_proposals=False),
allowed_border=-1, allowed_border=-1,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
rpn_proposal=dict( rpn_proposal=dict(
nms_across_levels=False, nms_across_levels=False,
nms_pre=2000, nms_pre=2000,
nms_post=1000, nms_post=1000,
max_num=1000, max_num=1000,
nms_thr=0.7, nms_thr=0.7,
min_bbox_size=0), min_bbox_size=0),
rcnn=dict( rcnn=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
min_pos_iou=0.5, min_pos_iou=0.5,
match_low_quality=True, match_low_quality=True,
ignore_iof_thr=-1), ignore_iof_thr=-1),
sampler=dict( sampler=dict(
type='RandomSampler', type='RandomSampler',
num=512, num=512,
pos_fraction=0.25, pos_fraction=0.25,
neg_pos_ub=-1, neg_pos_ub=-1,
add_gt_as_proposals=True), add_gt_as_proposals=True),
mask_size=28, mask_size=28,
pos_weight=-1, pos_weight=-1,
debug=False)), debug=False)),
test_cfg=dict( test_cfg=dict(
rpn=dict( rpn=dict(
nms_across_levels=False, nms_across_levels=False,
nms_pre=1000, nms_pre=1000,
nms_post=1000, nms_post=1000,
max_num=1000, max_num=1000,
nms_thr=0.7, nms_thr=0.7,
min_bbox_size=0), min_bbox_size=0),
rcnn=dict( rcnn=dict(
score_thr=0.05, score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5), nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100, max_per_img=100,
mask_thr_binary=0.5))) mask_thr_binary=0.5)))
_base_ = './paconv_ssg.py' _base_ = './paconv_ssg.py'
model = dict( model = dict(
backbone=dict( backbone=dict(
sa_cfg=dict( sa_cfg=dict(
type='PAConvCUDASAModule', type='PAConvCUDASAModule',
scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
# model settings # model settings
model = dict( model = dict(
type='EncoderDecoder3D', type='EncoderDecoder3D',
backbone=dict( backbone=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=9, # [xyz, rgb, normalized_xyz] in_channels=9, # [xyz, rgb, normalized_xyz]
num_points=(1024, 256, 64, 16), num_points=(1024, 256, 64, 16),
radius=(None, None, None, None), # use kNN instead of ball query radius=(None, None, None, None), # use kNN instead of ball query
num_samples=(32, 32, 32, 32), num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)), 512)),
fp_channels=(), fp_channels=(),
norm_cfg=dict(type='BN2d', momentum=0.1), norm_cfg=dict(type='BN2d', momentum=0.1),
sa_cfg=dict( sa_cfg=dict(
type='PAConvSAModule', type='PAConvSAModule',
pool_mod='max', pool_mod='max',
use_xyz=True, use_xyz=True,
normalize_xyz=False, normalize_xyz=False,
paconv_num_kernels=[16, 16, 16], paconv_num_kernels=[16, 16, 16],
paconv_kernel_input='w_neighbor', paconv_kernel_input='w_neighbor',
scorenet_input='w_neighbor_dist', scorenet_input='w_neighbor_dist',
scorenet_cfg=dict( scorenet_cfg=dict(
mlp_channels=[16, 16, 16], mlp_channels=[16, 16, 16],
score_norm='softmax', score_norm='softmax',
temp_factor=1.0, temp_factor=1.0,
last_bn=False))), last_bn=False))),
decode_head=dict( decode_head=dict(
type='PAConvHead', type='PAConvHead',
# PAConv model's decoder takes skip connections from beckbone # PAConv model's decoder takes skip connections from beckbone
# different from PointNet++, it also concats input features in the last # different from PointNet++, it also concats input features in the last
# level of decoder, leading to `128 + 6` as the channel number # level of decoder, leading to `128 + 6` as the channel number
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128 + 6, 128, 128, 128)), (128 + 6, 128, 128, 128)),
channels=128, channels=128,
dropout_ratio=0.5, dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'), act_cfg=dict(type='ReLU'),
loss_decode=dict( loss_decode=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
use_sigmoid=False, use_sigmoid=False,
class_weight=None, # should be modified with dataset class_weight=None, # should be modified with dataset
loss_weight=1.0)), loss_weight=1.0)),
# correlation loss to regularize PAConv's kernel weights # correlation loss to regularize PAConv's kernel weights
loss_regularization=dict( loss_regularization=dict(
type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
# model training and testing settings # model training and testing settings
train_cfg=dict(), train_cfg=dict(),
test_cfg=dict(mode='slide')) test_cfg=dict(mode='slide'))
# model settings # model settings
voxel_size = [0.05, 0.05, 0.1] voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict( model = dict(
type='PartA2', type='PartA2',
voxel_layer=dict( voxel_layer=dict(
max_num_points=5, # max_points_per_voxel max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_voxels max_voxels=(16000, 40000) # (training, testing) max_voxels
), ),
voxel_encoder=dict(type='HardSimpleVFE'), voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict( middle_encoder=dict(
type='SparseUNet', type='SparseUNet',
in_channels=4, in_channels=4,
sparse_shape=[41, 1600, 1408], sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')), order=('conv', 'norm', 'act')),
backbone=dict( backbone=dict(
type='SECOND', type='SECOND',
in_channels=256, in_channels=256,
layer_nums=[5, 5], layer_nums=[5, 5],
layer_strides=[1, 2], layer_strides=[1, 2],
out_channels=[128, 256]), out_channels=[128, 256]),
neck=dict( neck=dict(
type='SECONDFPN', type='SECONDFPN',
in_channels=[128, 256], in_channels=[128, 256],
upsample_strides=[1, 2], upsample_strides=[1, 2],
out_channels=[256, 256]), out_channels=[256, 256]),
rpn_head=dict( rpn_head=dict(
type='PartA2RPNHead', type='PartA2RPNHead',
num_classes=3, num_classes=3,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]], [0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
diff_rad_by_sin=True, diff_rad_by_sin=True,
assigner_per_size=True, assigner_per_size=True,
assign_per_class=True, assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
roi_head=dict( roi_head=dict(
type='PartAggregationROIHead', type='PartAggregationROIHead',
num_classes=3, num_classes=3,
semantic_head=dict( semantic_head=dict(
type='PointwiseSemanticHead', type='PointwiseSemanticHead',
in_channels=16, in_channels=16,
extra_width=0.2, extra_width=0.2,
seg_score_thr=0.3, seg_score_thr=0.3,
num_classes=3, num_classes=3,
loss_seg=dict( loss_seg=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
reduction='sum', reduction='sum',
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_part=dict( loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict( seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor', type='Single3DRoIAwareExtractor',
roi_layer=dict( roi_layer=dict(
type='RoIAwarePool3d', type='RoIAwarePool3d',
out_size=14, out_size=14,
max_pts_per_voxel=128, max_pts_per_voxel=128,
mode='max')), mode='max')),
part_roi_extractor=dict( part_roi_extractor=dict(
type='Single3DRoIAwareExtractor', type='Single3DRoIAwareExtractor',
roi_layer=dict( roi_layer=dict(
type='RoIAwarePool3d', type='RoIAwarePool3d',
out_size=14, out_size=14,
max_pts_per_voxel=128, max_pts_per_voxel=128,
mode='avg')), mode='avg')),
bbox_head=dict( bbox_head=dict(
type='PartA2BboxHead', type='PartA2BboxHead',
num_classes=3, num_classes=3,
seg_in_channels=16, seg_in_channels=16,
part_in_channels=4, part_in_channels=4,
seg_conv_channels=[64, 64], seg_conv_channels=[64, 64],
part_conv_channels=[64, 64], part_conv_channels=[64, 64],
merge_conv_channels=[128, 128], merge_conv_channels=[128, 128],
down_conv_channels=[128, 256], down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512], shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256], cls_channels=[256, 256],
reg_channels=[256, 256], reg_channels=[256, 256],
dropout_ratio=0.1, dropout_ratio=0.1,
roi_feat_size=14, roi_feat_size=14,
with_corner_loss=True, with_corner_loss=True,
loss_bbox=dict( loss_bbox=dict(
type='SmoothL1Loss', type='SmoothL1Loss',
beta=1.0 / 9.0, beta=1.0 / 9.0,
reduction='sum', reduction='sum',
loss_weight=1.0), loss_weight=1.0),
loss_cls=dict( loss_cls=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
use_sigmoid=True, use_sigmoid=True,
reduction='sum', reduction='sum',
loss_weight=1.0))), loss_weight=1.0))),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
rpn=dict( rpn=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1) ignore_iof_thr=-1)
], ],
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False),
rpn_proposal=dict( rpn_proposal=dict(
nms_pre=9000, nms_pre=9000,
nms_post=512, nms_post=512,
max_num=512, max_num=512,
nms_thr=0.8, nms_thr=0.8,
score_thr=0, score_thr=0,
use_rotate_nms=False), use_rotate_nms=False),
rcnn=dict( rcnn=dict(
assigner=[ assigner=[
dict( # for Pedestrian dict( # for Pedestrian
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict( iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'), type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.55, neg_iou_thr=0.55,
min_pos_iou=0.55, min_pos_iou=0.55,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Cyclist dict( # for Cyclist
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict( iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'), type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.55, neg_iou_thr=0.55,
min_pos_iou=0.55, min_pos_iou=0.55,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car dict( # for Car
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict( iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'), type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.55, neg_iou_thr=0.55,
min_pos_iou=0.55, min_pos_iou=0.55,
ignore_iof_thr=-1) ignore_iof_thr=-1)
], ],
sampler=dict( sampler=dict(
type='IoUNegPiecewiseSampler', type='IoUNegPiecewiseSampler',
num=128, num=128,
pos_fraction=0.55, pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2], neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1], neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1, neg_pos_ub=-1,
add_gt_as_proposals=False, add_gt_as_proposals=False,
return_iou=True), return_iou=True),
cls_pos_thr=0.75, cls_pos_thr=0.75,
cls_neg_thr=0.25)), cls_neg_thr=0.25)),
test_cfg=dict( test_cfg=dict(
rpn=dict( rpn=dict(
nms_pre=1024, nms_pre=1024,
nms_post=100, nms_post=100,
max_num=100, max_num=100,
nms_thr=0.7, nms_thr=0.7,
score_thr=0, score_thr=0,
use_rotate_nms=True), use_rotate_nms=True),
rcnn=dict( rcnn=dict(
use_rotate_nms=True, use_rotate_nms=True,
use_raw_score=True, use_raw_score=True,
nms_thr=0.01, nms_thr=0.01,
score_thr=0.1))) score_thr=0.1)))
_base_ = './pointnet2_ssg.py' _base_ = './pointnet2_ssg.py'
# model settings # model settings
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
type='PointNet2SAMSG', type='PointNet2SAMSG',
in_channels=6, # [xyz, rgb], should be modified with dataset in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16), num_points=(1024, 256, 64, 16),
radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
128)), 128)),
((128, 196, 256), (128, 196, 256)), ((256, 256, 512), ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
(256, 384, 512))), (256, 384, 512))),
aggregation_channels=(None, None, None, None), aggregation_channels=(None, None, None, None),
fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
fps_sample_range_lists=((-1), (-1), (-1), (-1)), fps_sample_range_lists=((-1), (-1), (-1), (-1)),
dilated_group=(False, False, False, False), dilated_group=(False, False, False, False),
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
sa_cfg=dict( sa_cfg=dict(
type='PointSAModuleMSG', type='PointSAModuleMSG',
pool_mod='max', pool_mod='max',
use_xyz=True, use_xyz=True,
normalize_xyz=False)), normalize_xyz=False)),
decode_head=dict( decode_head=dict(
fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
(128, 128, 128, 128)))) (128, 128, 128, 128))))
# model settings # model settings
model = dict( model = dict(
type='EncoderDecoder3D', type='EncoderDecoder3D',
backbone=dict( backbone=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=6, # [xyz, rgb], should be modified with dataset in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16), num_points=(1024, 256, 64, 16),
radius=(0.1, 0.2, 0.4, 0.8), radius=(0.1, 0.2, 0.4, 0.8),
num_samples=(32, 32, 32, 32), num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)), 512)),
fp_channels=(), fp_channels=(),
norm_cfg=dict(type='BN2d'), norm_cfg=dict(type='BN2d'),
sa_cfg=dict( sa_cfg=dict(
type='PointSAModule', type='PointSAModule',
pool_mod='max', pool_mod='max',
use_xyz=True, use_xyz=True,
normalize_xyz=False)), normalize_xyz=False)),
decode_head=dict( decode_head=dict(
type='PointNet2Head', type='PointNet2Head',
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128, 128, 128, 128)), (128, 128, 128, 128)),
channels=128, channels=128,
dropout_ratio=0.5, dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'), act_cfg=dict(type='ReLU'),
loss_decode=dict( loss_decode=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
use_sigmoid=False, use_sigmoid=False,
class_weight=None, # should be modified with dataset class_weight=None, # should be modified with dataset
loss_weight=1.0)), loss_weight=1.0)),
# model training and testing settings # model training and testing settings
train_cfg=dict(), train_cfg=dict(),
test_cfg=dict(mode='slide')) test_cfg=dict(mode='slide'))
model = dict( model = dict(
type='VoteNet', type='VoteNet',
backbone=dict( backbone=dict(
type='PointNet2SASSG', type='PointNet2SASSG',
in_channels=4, in_channels=4,
num_points=(2048, 1024, 512, 256), num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2), radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16), num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)), (128, 128, 256)),
fp_channels=((256, 256), (256, 256)), fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'), norm_cfg=dict(type='BN2d'),
sa_cfg=dict( sa_cfg=dict(
type='PointSAModule', type='PointSAModule',
pool_mod='max', pool_mod='max',
use_xyz=True, use_xyz=True,
normalize_xyz=True)), normalize_xyz=True)),
bbox_head=dict( bbox_head=dict(
type='VoteHead', type='VoteHead',
vote_module_cfg=dict( vote_module_cfg=dict(
in_channels=256, in_channels=256,
vote_per_seed=1, vote_per_seed=1,
gt_per_seed=3, gt_per_seed=3,
conv_channels=(256, 256), conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
norm_feats=True, norm_feats=True,
vote_loss=dict( vote_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l1', mode='l1',
reduction='none', reduction='none',
loss_dst_weight=10.0)), loss_dst_weight=10.0)),
vote_aggregation_cfg=dict( vote_aggregation_cfg=dict(
type='PointSAModule', type='PointSAModule',
num_point=256, num_point=256,
radius=0.3, radius=0.3,
num_sample=16, num_sample=16,
mlp_channels=[256, 128, 128, 128], mlp_channels=[256, 128, 128, 128],
use_xyz=True, use_xyz=True,
normalize_xyz=True), normalize_xyz=True),
pred_layer_cfg=dict( pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True), in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'), conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'), norm_cfg=dict(type='BN1d'),
objectness_loss=dict( objectness_loss=dict(
type='CrossEntropyLoss', type='CrossEntropyLoss',
class_weight=[0.2, 0.8], class_weight=[0.2, 0.8],
reduction='sum', reduction='sum',
loss_weight=5.0), loss_weight=5.0),
center_loss=dict( center_loss=dict(
type='ChamferDistance', type='ChamferDistance',
mode='l2', mode='l2',
reduction='sum', reduction='sum',
loss_src_weight=10.0, loss_src_weight=10.0,
loss_dst_weight=10.0), loss_dst_weight=10.0),
dir_class_loss=dict( dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict( dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict( size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict( size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict( semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
test_cfg=dict( test_cfg=dict(
sample_mod='seed', sample_mod='seed',
nms_thr=0.25, nms_thr=0.25,
score_thr=0.05, score_thr=0.05,
per_class_proposal=True)) per_class_proposal=True))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment