Unverified Commit 6a31be8f authored by YeShenglong1's avatar YeShenglong1 Committed by GitHub
Browse files

Add files via upload

parent 4fb17721
voxel_size = [0.1, 0.1, 0.2]
model = dict(
type='CenterPoint',
pts_voxel_layer=dict(
max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
pts_middle_encoder=dict(
type='SparseEncoder',
in_channels=5,
sparse_shape=[41, 1024, 1024],
output_channels=128,
order=('conv', 'norm', 'act'),
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
128)),
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
block_type='basicblock'),
pts_backbone=dict(
type='SECOND',
in_channels=256,
out_channels=[128, 256],
layer_nums=[5, 5],
layer_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv2d', bias=False)),
pts_neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
out_channels=[256, 256],
upsample_strides=[1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type='CenterHead',
in_channels=sum([256, 256]),
tasks=[
dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
pts=dict(
grid_size=[1024, 1024, 40],
voxel_size=voxel_size,
out_size_factor=8,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=8,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2)))
voxel_size = [0.2, 0.2, 8]
model = dict(
type='CenterPoint',
pts_voxel_layer=dict(
max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='PillarFeatureNet',
in_channels=5,
feat_channels=[64],
with_distance=False,
voxel_size=(0.2, 0.2, 8),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
legacy=False),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
pts_backbone=dict(
type='SECOND',
in_channels=64,
out_channels=[64, 128, 256],
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv2d', bias=False)),
pts_neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
out_channels=[128, 128, 128],
upsample_strides=[0.5, 1, 2],
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv', bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type='CenterHead',
in_channels=sum([128, 128, 128]),
tasks=[
dict(num_class=1, class_names=['car']),
dict(num_class=2, class_names=['truck', 'construction_vehicle']),
dict(num_class=2, class_names=['bus', 'trailer']),
dict(num_class=1, class_names=['barrier']),
dict(num_class=2, class_names=['motorcycle', 'bicycle']),
dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
pts=dict(
grid_size=[512, 512, 1],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
pc_range=[-51.2, -51.2],
out_size_factor=4,
voxel_size=voxel_size[:2],
nms_type='rotate',
pre_max_size=1000,
post_max_size=83,
nms_thr=0.2)))
model = dict(
type='FCOSMono3D',
pretrained='open-mmlab://detectron2/resnet101_caffe',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='FCOSMono3DHead',
num_classes=10,
in_channels=256,
stacked_convs=2,
feat_channels=256,
use_direction_classifier=True,
diff_rad_by_sin=True,
pred_attrs=True,
pred_velo=True,
dir_offset=0.7854, # pi/4
strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
cls_branch=(256, ),
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
() # velo
),
dir_branch=(256, ),
attr_branch=(256, ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
norm_on_bbox=True,
centerness_on_reg=True,
center_sampling=True,
conv_bias=True,
dcn_on_last_conv=True),
train_cfg=dict(
allowed_border=0,
code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.8,
score_thr=0.05,
min_bbox_size=0,
max_per_img=200))
model = dict(
type='GroupFree3DNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=3,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 288)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)),
bbox_head=dict(
type='GroupFree3DHead',
in_channels=288,
num_decoder_layers=6,
num_proposal=256,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=dict(
type='GroupFree3DMHA',
embed_dims=288,
num_heads=8,
attn_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1)),
ffn_cfgs=dict(
embed_dims=288,
feedforward_channels=2048,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True)),
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
'norm')),
pred_layer_cfg=dict(
in_channels=288, shared_conv_channels=(288, 288), bias=True),
sampling_objectness_loss=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=8.0),
objectness_loss=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
center_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(sample_mod='kps'),
test_cfg=dict(
sample_mod='kps',
nms_thr=0.25,
score_thr=0.0,
per_class_proposal=True,
prediction_stages='last'))
primitive_z_cfg = dict(
type='PrimitiveHead',
num_dims=2,
num_classes=18,
primitive_mode='z',
upper_thresh=100.0,
surface_thresh=0.5,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=1,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=1024,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.4, 0.6],
reduction='mean',
loss_weight=30.0),
center_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_reg_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
train_cfg=dict(
dist_thresh=0.2,
var_thresh=1e-2,
lower_thresh=1e-6,
num_point=100,
num_point_line=10,
line_thresh=0.2))
primitive_xy_cfg = dict(
type='PrimitiveHead',
num_dims=1,
num_classes=18,
primitive_mode='xy',
upper_thresh=100.0,
surface_thresh=0.5,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=1,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=1024,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.4, 0.6],
reduction='mean',
loss_weight=30.0),
center_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_reg_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=0.5,
loss_dst_weight=0.5),
semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
train_cfg=dict(
dist_thresh=0.2,
var_thresh=1e-2,
lower_thresh=1e-6,
num_point=100,
num_point_line=10,
line_thresh=0.2))
primitive_line_cfg = dict(
type='PrimitiveHead',
num_dims=0,
num_classes=18,
primitive_mode='line',
upper_thresh=100.0,
surface_thresh=0.5,
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=1,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=1024,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.4, 0.6],
reduction='mean',
loss_weight=30.0),
center_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=1.0,
loss_dst_weight=1.0),
semantic_reg_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='sum',
loss_src_weight=1.0,
loss_dst_weight=1.0),
semantic_cls_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),
train_cfg=dict(
dist_thresh=0.2,
var_thresh=1e-2,
lower_thresh=1e-6,
num_point=100,
num_point_line=10,
line_thresh=0.2))
model = dict(
type='H3DNet',
backbone=dict(
type='MultiBackbone',
num_streams=4,
suffixes=['net0', 'net1', 'net2', 'net3'],
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
act_cfg=dict(type='ReLU'),
backbones=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True))),
rpn_head=dict(
type='VoteHead',
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
roi_head=dict(
type='H3DRoIHead',
primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
bbox_head=dict(
type='H3DBboxHead',
gt_per_seed=3,
num_proposal=256,
suface_matching_cfg=dict(
type='PointSAModule',
num_point=256 * 6,
radius=0.5,
num_sample=32,
mlp_channels=[128 + 6, 128, 64, 32],
use_xyz=True,
normalize_xyz=True),
line_matching_cfg=dict(
type='PointSAModule',
num_point=256 * 12,
radius=0.5,
num_sample=32,
mlp_channels=[128 + 12, 128, 64, 32],
use_xyz=True,
normalize_xyz=True),
feat_channels=(128, 128),
primitive_refine_channels=[128, 128, 128],
upper_thresh=100.0,
surface_thresh=0.5,
line_thresh=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
cues_objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.3, 0.7],
reduction='mean',
loss_weight=5.0),
cues_semantic_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.3, 0.7],
reduction='mean',
loss_weight=5.0),
proposal_objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='none',
loss_weight=5.0),
primitive_center_loss=dict(
type='MSELoss', reduction='none', loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
rpn_proposal=dict(use_nms=False),
rcnn=dict(
pos_distance_thr=0.3,
neg_distance_thr=0.6,
sample_mod='vote',
far_threshold=0.6,
near_threshold=0.3,
mask_surface_threshold=0.3,
label_surface_threshold=0.3,
mask_line_threshold=0.3,
label_line_threshold=0.3)),
test_cfg=dict(
rpn=dict(
sample_mod='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True,
use_nms=False),
rcnn=dict(
sample_mod='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True)))
_base_ = './hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings)
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
model = dict(
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-80, -80, -5, 80, 80, 3],
max_voxels=(60000, 60000)),
pts_voxel_encoder=dict(
feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
pts_middle_encoder=dict(output_shape=[640, 640]),
pts_bbox_head=dict(
num_classes=9,
anchor_generator=dict(
ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
# model training settings (based on nuScenes model settings)
train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.25, 0.25, 8]
model = dict(
type='MVXFasterRCNN',
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=[-50, -50, -5, 50, 50, 3],
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=[-50, -50, -5, 50, 50, 3],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
pts=dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500)))
_base_ = './hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings)
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
model = dict(
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-100, -100, -5, 100, 100, 3],
max_voxels=(60000, 60000)),
pts_voxel_encoder=dict(
feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
pts_middle_encoder=dict(output_shape=[800, 800]),
pts_bbox_head=dict(
num_classes=9,
anchor_generator=dict(
ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
# model training settings (based on nuScenes model settings)
train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
voxel_size = [0.16, 0.16, 4]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=32, # max_points_per_voxel
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_voxels
),
voxel_encoder=dict(
type='PillarFeatureNet',
in_channels=4,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
backbone=dict(
type='SECOND',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -1.78, 70.4, 39.68, -1.78],
],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.32, 0.32, 6]
model = dict(
type='MVXFasterRCNN',
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
voxel_size=voxel_size,
max_voxels=(32000, 32000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=5,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[1, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
[-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
[-74.88, -74.88, 0, 74.88, 74.88, 0]],
sizes=[
[2.08, 4.73, 1.77], # car
[0.84, 1.81, 1.77], # cyclist
[0.84, 0.91, 1.74] # pedestrian
],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
pts=dict(
assigner=[
dict( # car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500)))
voxel_size = [0.05, 0.05, 0.1]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5,
point_cloud_range=[0, -40, -3, 70.4, 40, 1],
voxel_size=voxel_size,
max_voxels=(16000, 40000)),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50))
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.08, 0.08, 0.1]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=10,
point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
voxel_size=voxel_size,
max_voxels=(80000, 90000)),
voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
middle_encoder=dict(
type='SparseEncoder',
in_channels=5,
sparse_shape=[61, 1280, 1920],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=384,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
[-76.8, -51.2, 0, 76.8, 51.2, 0],
[-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
sizes=[
[2.08, 4.73, 1.77], # car
[0.84, 0.91, 1.74], # pedestrian
[0.84, 1.81, 1.77] # cyclist
],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
assigner=[
dict( # car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1)
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500))
model = dict(
type='ImVoteNet',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
img_neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
img_rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
img_roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
img_rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
img_rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
img_rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
img_rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
img_rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)))
# model settings
model = dict(
type='MaskRCNN',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=80,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5)))
_base_ = './paconv_ssg.py'
model = dict(
backbone=dict(
sa_cfg=dict(
type='PAConvCUDASAModule',
scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
# model settings
model = dict(
type='EncoderDecoder3D',
backbone=dict(
type='PointNet2SASSG',
in_channels=9, # [xyz, rgb, normalized_xyz]
num_points=(1024, 256, 64, 16),
radius=(None, None, None, None), # use kNN instead of ball query
num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)),
fp_channels=(),
norm_cfg=dict(type='BN2d', momentum=0.1),
sa_cfg=dict(
type='PAConvSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=False,
paconv_num_kernels=[16, 16, 16],
paconv_kernel_input='w_neighbor',
scorenet_input='w_neighbor_dist',
scorenet_cfg=dict(
mlp_channels=[16, 16, 16],
score_norm='softmax',
temp_factor=1.0,
last_bn=False))),
decode_head=dict(
type='PAConvHead',
# PAConv model's decoder takes skip connections from beckbone
# different from PointNet++, it also concats input features in the last
# level of decoder, leading to `128 + 6` as the channel number
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128 + 6, 128, 128, 128)),
channels=128,
dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
loss_decode=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
class_weight=None, # should be modified with dataset
loss_weight=1.0)),
# correlation loss to regularize PAConv's kernel weights
loss_regularization=dict(
type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide'))
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict(
type='PartA2',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_voxels
),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
roi_head=dict(
type='PartAggregationROIHead',
num_classes=3,
semantic_head=dict(
type='PointwiseSemanticHead',
in_channels=16,
extra_width=0.2,
seg_score_thr=0.3,
num_classes=3,
loss_seg=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='avg')),
bbox_head=dict(
type='PartA2BboxHead',
num_classes=3,
seg_in_channels=16,
part_in_channels=4,
seg_conv_channels=[64, 64],
part_conv_channels=[64, 64],
merge_conv_channels=[128, 128],
down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256],
reg_channels=[256, 256],
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1)
],
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
max_num=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
rcnn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1)
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25)),
test_cfg=dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
max_num=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True,
use_raw_score=True,
nms_thr=0.01,
score_thr=0.1)))
_base_ = './pointnet2_ssg.py'
# model settings
model = dict(
backbone=dict(
_delete_=True,
type='PointNet2SAMSG',
in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16),
radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
128)),
((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
(256, 384, 512))),
aggregation_channels=(None, None, None, None),
fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
fps_sample_range_lists=((-1), (-1), (-1), (-1)),
dilated_group=(False, False, False, False),
out_indices=(0, 1, 2, 3),
sa_cfg=dict(
type='PointSAModuleMSG',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
decode_head=dict(
fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
(128, 128, 128, 128))))
# model settings
model = dict(
type='EncoderDecoder3D',
backbone=dict(
type='PointNet2SASSG',
in_channels=6, # [xyz, rgb], should be modified with dataset
num_points=(1024, 256, 64, 16),
radius=(0.1, 0.2, 0.4, 0.8),
num_samples=(32, 32, 32, 32),
sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
512)),
fp_channels=(),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
decode_head=dict(
type='PointNet2Head',
fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
(128, 128, 128, 128)),
channels=128,
dropout_ratio=0.5,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
act_cfg=dict(type='ReLU'),
loss_decode=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
class_weight=None, # should be modified with dataset
loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide'))
model = dict(
type='VoteNet',
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
num_points=(2048, 1024, 512, 256),
radius=(0.2, 0.4, 0.8, 1.2),
num_samples=(64, 32, 16, 16),
sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
(128, 128, 256)),
fp_channels=((256, 256), (256, 256)),
norm_cfg=dict(type='BN2d'),
sa_cfg=dict(
type='PointSAModule',
pool_mod='max',
use_xyz=True,
normalize_xyz=True)),
bbox_head=dict(
type='VoteHead',
vote_module_cfg=dict(
in_channels=256,
vote_per_seed=1,
gt_per_seed=3,
conv_channels=(256, 256),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
norm_feats=True,
vote_loss=dict(
type='ChamferDistance',
mode='l1',
reduction='none',
loss_dst_weight=10.0)),
vote_aggregation_cfg=dict(
type='PointSAModule',
num_point=256,
radius=0.3,
num_sample=16,
mlp_channels=[256, 128, 128, 128],
use_xyz=True,
normalize_xyz=True),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
center_loss=dict(
type='ChamferDistance',
mode='l2',
reduction='sum',
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
test_cfg=dict(
sample_mod='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment