"cacheflow/git@developer.sourcefind.cn:norm/vllm.git" did not exist on "87e0bcd426c746a17229e8e292cc2997d0719e9b"
Commit ce79da2e authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'add-tta' into 'master'

Support test time augmentation

See merge request open-mmlab/mmdet.3d!70
parents f6e95edd 3c5ff9fa
...@@ -180,14 +180,14 @@ train_pipeline = [ ...@@ -180,14 +180,14 @@ train_pipeline = [
dict( dict(
type='ObjectNoise', type='ObjectNoise',
num_try=100, num_try=100,
loc_noise_std=[1.0, 1.0, 0.5], translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]), rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScaleTrans',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_range=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]), scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names), dict(type='ObjectNameFilter', classes=class_names),
...@@ -197,12 +197,26 @@ train_pipeline = [ ...@@ -197,12 +197,26 @@ train_pipeline = [
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='MultiScaleFlipAug3D',
class_names=class_names, img_scale=(1333, 800),
with_label=False), pts_scale_ratio=1,
dict(type='Collect3D', keys=['points']) flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( data = dict(
......
_base_ = [ _base_ = [
'../_base_/models/pointpillars_second_fpn.py',
'../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py', '../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py' '../_base_/default_runtime.py'
] ]
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3,
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
data = dict(
train=dict(modality=input_modality),
val=dict(modality=input_modality),
test=dict(modality=input_modality))
evaluation = dict(interval=24)
# model settings _base_ = [
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1] '../_base_/models/hv_pointpillars_secfpn.py',
voxel_size = [0.16, 0.16, 4] '../_base_/datasets/kitti-3d-3class.py',
model = dict( '../_base_/schedules/cyclic_40e.py', '../_base_/default_runtime.py'
type='VoxelNet', ]
voxel_layer=dict(
max_num_points=32,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000)),
voxel_encoder=dict(
type='PillarFeatureNet',
in_channels=4,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range),
middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
backbone=dict(
type='SECOND',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -0.6, 70.4, 39.68, -0.6],
[0, -39.68, -1.78, 70.4, 39.68, -1.78],
],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50)
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
# dataset settings # dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/' data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car'] class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(use_lidar=True, use_camera=False) # PointPillars adopted a different sampling strategies among classes
db_sampler = dict( db_sampler = dict(
data_root=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
...@@ -108,6 +20,7 @@ db_sampler = dict( ...@@ -108,6 +20,7 @@ db_sampler = dict(
classes=class_names, classes=class_names,
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10)) sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10))
# PointPillars uses different augmentation hyper parameters
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
...@@ -115,14 +28,14 @@ train_pipeline = [ ...@@ -115,14 +28,14 @@ train_pipeline = [
dict( dict(
type='ObjectNoise', type='ObjectNoise',
num_try=100, num_try=100,
loc_noise_std=[0.25, 0.25, 0.25], translation_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267]), rot_range=[-0.15707963267, 0.15707963267]),
dict(type='RandomFlip3D', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScaleTrans',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_range=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]), scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
...@@ -131,86 +44,44 @@ train_pipeline = [ ...@@ -131,86 +44,44 @@ train_pipeline = [
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='MultiScaleFlipAug3D',
class_names=class_names, img_scale=(1333, 800),
with_label=False), pts_scale_ratio=1,
dict(type='Collect3D', keys=['points']) flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( data = dict(
samples_per_gpu=6, train=dict(dataset=dict(pipeline=train_pipeline, classes=class_names)),
workers_per_gpu=4, val=dict(pipeline=test_pipeline, classes=class_names),
train=dict( test=dict(pipeline=test_pipeline, classes=class_names))
type='RepeatDataset',
times=2, # In practice PointPillars also uses a different schedule
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer # optimizer
lr = 0.001 # max learning rate lr = 0.001
optimizer = dict( optimizer = dict(lr=lr)
type='AdamW', # max_norm=35 is slightly better than 10 for PointPillars in the earlier
lr=lr, # development of the codebase thus we keep the setting. But we does not
betas=(0.95, 0.99), # the momentum is change during training # specifically tune this parameter.
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy # Use evaluation interval=2 reduce the number of evaluation timese
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2) evaluation = dict(interval=2)
# yapf:disable # PointPillars usually need longer schedule than second, we simply double
log_config = dict( # the training schedule. Do remind that since we use RepeatDataset and
interval=50, # repeat factor is 2, so we actually train 160 epochs.
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80 total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/pp_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings # model settings
voxel_size = [0.16, 0.16, 4] _base_ = './hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py'
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1] point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
model = dict( model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(12000, 20000)),
voxel_encoder=dict(
type='PillarFeatureNet',
in_channels=4,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range),
middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
backbone=dict(
type='SECOND',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
bbox_head=dict( bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=1, num_classes=1,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
_delete_=True,
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True)))
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings # model training and testing settings
train_cfg = dict( train_cfg = dict(
_delete_=True,
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
...@@ -63,20 +26,11 @@ train_cfg = dict( ...@@ -63,20 +26,11 @@ train_cfg = dict(
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
debug=False) debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
data_root = 'data/kitti/' data_root = 'data/kitti/'
class_names = ['Car'] class_names = ['Car']
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict( db_sampler = dict(
data_root=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
...@@ -93,14 +47,14 @@ train_pipeline = [ ...@@ -93,14 +47,14 @@ train_pipeline = [
dict( dict(
type='ObjectNoise', type='ObjectNoise',
num_try=100, num_try=100,
loc_noise_std=[0.25, 0.25, 0.25], translation_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267]), rot_range=[-0.15707963267, 0.15707963267]),
dict(type='RandomFlip3D', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScaleTrans',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_range=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]), scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
...@@ -109,84 +63,32 @@ train_pipeline = [ ...@@ -109,84 +63,32 @@ train_pipeline = [
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='MultiScaleFlipAug3D',
class_names=class_names, img_scale=(1333, 800),
with_label=False), pts_scale_ratio=1,
dict(type='Collect3D', keys=['points']) flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict( train=dict(
type='RepeatDataset', type='RepeatDataset',
times=2, times=2,
dataset=dict( dataset=dict(pipeline=train_pipeline, classes=class_names)),
type=dataset_type, val=dict(pipeline=test_pipeline, classes=class_names),
data_root=data_root, test=dict(pipeline=test_pipeline, classes=class_names))
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/pp_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
_base_ = [ _base_ = [
'../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py', '../_base_/models/pointpillars_second_fpn.py',
'../_base_/default_runtime.py' '../_base_/datasets/nus-3d.py',
'../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py',
] ]
# model settings # model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict( model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict( pts_neck=dict(
_delete_=True,
type='SECONDFPN', type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256], in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4], upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]), out_channels=[128, 128, 128]),
pts_bbox_head=dict( pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', _delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[ ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795], [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365], [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
...@@ -69,56 +39,4 @@ model = dict( ...@@ -69,56 +39,4 @@ model = dict(
], ],
custom_values=[0, 0], custom_values=[0, 0],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True)))
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500))
# dataset settings
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
data = dict(
train=dict(modality=input_modality),
val=dict(modality=input_modality),
test=dict(modality=input_modality))
evaluation = dict(interval=24)
_base_ = [ _base_ = [
'../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py', '../_base_/models/pointpillars_second_fpn.py',
'../_base_/default_runtime.py' '../_base_/datasets/nus-3d.py',
'../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py',
] ]
# model settings # model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict( model = dict(
type='MVXFasterRCNNV2', type='MVXFasterRCNNV2',
pretrained=dict(pts='open-mmlab://regnetx_400mf'), pretrained=dict(pts='open-mmlab://regnetx_400mf'),
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict( pts_backbone=dict(
_delete_=True,
type='NoStemRegNet', type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0), arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
out_indices=(1, 2, 3), out_indices=(1, 2, 3),
frozen_stages=-1, frozen_stages=-1,
strides=(1, 2, 2, 2), strides=(1, 2, 2, 2),
base_channels=64, base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False, norm_eval=False,
style='pytorch'), style='pytorch'),
pts_neck=dict( pts_neck=dict(in_channels=[64, 160, 384]))
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 160, 384],
out_channels=256,
start_level=0,
num_outs=3),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500))
# dataset settings
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
data = dict(
train=dict(modality=input_modality),
val=dict(modality=input_modality),
test=dict(modality=input_modality))
evaluation = dict(interval=24)
_base_ = [ _base_ = './hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py'
'../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py'
]
# model settings # model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict( model = dict(
type='MVXFasterRCNNV2',
pretrained=dict(pts='open-mmlab://regnetx_400mf'),
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict( pts_neck=dict(
type='SECONDFPN', type='SECONDFPN',
_delete_=True,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 160, 384], in_channels=[64, 160, 384],
upsample_strides=[1, 2, 4], upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]), out_channels=[128, 128, 128]),
pts_bbox_head=dict( pts_bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=10,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', _delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[ ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795], [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365], [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
...@@ -73,56 +35,5 @@ model = dict( ...@@ -73,56 +35,5 @@ model = dict(
], ],
custom_values=[0, 0], custom_values=[0, 0],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True)))
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings # model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500))
# dataset settings
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
data = dict(
train=dict(modality=input_modality),
val=dict(modality=input_modality),
test=dict(modality=input_modality))
evaluation = dict(interval=24)
# model settings _base_ = [
voxel_size = [0.05, 0.05, 0.1] '../_base_/models/hv_second_secfpn.py',
point_cloud_range = [0, -40, -3, 70.4, 40, 1] '../_base_/datasets/kitti-3d-3class.py',
'../_base_/schedules/cyclic_40e.py', '../_base_/default_runtime.py'
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256],
),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
Car=5,
Pedestrian=10,
Cyclist=10,
)),
classes=class_names,
sample_groups=dict(
Car=12,
Pedestrian=6,
Cyclist=6,
))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 40
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings _base_ = [
voxel_size = [0.05, 0.05, 0.1] '../_base_/models/hv_second_secfpn.py',
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z '../_base_/datasets/kitti-3d-car.py', '../_base_/schedules/cyclic_40e.py',
'../_base_/default_runtime.py'
]
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict( model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256],
),
bbox_head=dict( bbox_head=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=1, num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
_delete_=True,
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True)))
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings # model training and testing settings
train_cfg = dict( train_cfg = dict(
_delete_=True,
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
...@@ -66,146 +28,3 @@ train_cfg = dict( ...@@ -66,146 +28,3 @@ train_cfg = dict(
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
debug=False) debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15),
)
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 40
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -19,13 +19,3 @@ model = dict( ...@@ -19,13 +19,3 @@ model = dict(
[0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728] [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728]
]), ]),
)) ))
# optimizer
# yapf:disable
log_config = dict(
interval=30,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
...@@ -34,7 +34,7 @@ model = dict( ...@@ -34,7 +34,7 @@ model = dict(
# optimizer # optimizer
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=30,
hooks=[ hooks=[
dict(type='TextLoggerHook'), dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook') dict(type='TensorboardLoggerHook')
......
...@@ -117,7 +117,7 @@ For each operation, we list the related dict fields that are added/updated/remov ...@@ -117,7 +117,7 @@ For each operation, we list the related dict fields that are added/updated/remov
- update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg - update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg
`Collect` `Collect`
- add: img_meta (the keys of img_meta is specified by `meta_keys`) - add: img_metas (the keys of img_metas is specified by `meta_keys`)
- remove: all other keys except for those specified by `keys` - remove: all other keys except for those specified by `keys`
### Test time augmentation ### Test time augmentation
......
...@@ -28,8 +28,8 @@ def batch_processor(model, data, train_mode): ...@@ -28,8 +28,8 @@ def batch_processor(model, data, train_mode):
losses = model(**data) losses = model(**data)
loss, log_vars = parse_losses(losses) loss, log_vars = parse_losses(losses)
if 'img_meta' in data: if 'img_metas' in data:
num_samples = len(data['img_meta'].data) num_samples = len(data['img_metas'].data)
else: else:
num_samples = len(data['img'].data) num_samples = len(data['img'].data)
outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples) outputs = dict(loss=loss, log_vars=log_vars, num_samples=num_samples)
......
...@@ -8,8 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -8,8 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes, from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes) DepthInstance3DBoxes, LiDARInstance3DBoxes,
from .transforms import (bbox3d2result, bbox3d2roi, xywhr2xyxyr)
from .transforms import (bbox3d2result, bbox3d2roi, bbox3d_mapping_back,
box3d_to_corner3d_upright_depth, box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar) boxes3d_to_bev_torch_lidar)
...@@ -27,5 +28,5 @@ __all__ = [ ...@@ -27,5 +28,5 @@ __all__ = [
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result', 'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth', 'DepthInstance3DBoxes', 'box3d_to_corner3d_upright_depth', 'DepthInstance3DBoxes',
'BaseInstance3DBoxes' 'BaseInstance3DBoxes', 'bbox3d_mapping_back', 'xywhr2xyxyr'
] ]
...@@ -3,8 +3,9 @@ from .box_3d_mode import Box3DMode ...@@ -3,8 +3,9 @@ from .box_3d_mode import Box3DMode
from .cam_box3d import CameraInstance3DBoxes from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes from .lidar_box3d import LiDARInstance3DBoxes
from .utils import xywhr2xyxyr
__all__ = [ __all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes', 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes' 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr'
] ]
...@@ -334,7 +334,10 @@ class BaseInstance3DBoxes(object): ...@@ -334,7 +334,10 @@ class BaseInstance3DBoxes(object):
# use torch.cat (v.s. layers.cat) # use torch.cat (v.s. layers.cat)
# so the returned boxes never share storage with input # so the returned boxes never share storage with input
cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) cat_boxes = cls(
torch.cat([b.tensor for b in boxes_list], dim=0),
box_dim=boxes_list[0].tensor.shape[1],
with_yaw=boxes_list[0].with_yaw)
return cat_boxes return cat_boxes
def to(self, device): def to(self, device):
......
import torch import torch
def bbox3d_mapping_back(bboxes, scale_factor, flip):
"""Map bboxes from testing scale to original image scale"""
new_bboxes = bboxes.clone()
if flip:
new_bboxes.flip()
new_bboxes.scale(1 / scale_factor)
return new_bboxes
def transform_lidar_to_cam(boxes_lidar): def transform_lidar_to_cam(boxes_lidar):
""" """
Only transform format, not exactly in camera coords Only transform format, not exactly in camera coords
......
...@@ -2,9 +2,10 @@ from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, ...@@ -2,9 +2,10 @@ from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores, merge_aug_proposals, merge_aug_scores,
multiclass_nms) multiclass_nms)
from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms from .box3d_nms import aligned_3d_nms, box3d_multiclass_nms
from .merge_augs import merge_aug_bboxes_3d
__all__ = [ __all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms', 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms',
'aligned_3d_nms' 'aligned_3d_nms', 'merge_aug_bboxes_3d'
] ]
...@@ -61,7 +61,7 @@ def box3d_multiclass_nms(mlvl_bboxes, ...@@ -61,7 +61,7 @@ def box3d_multiclass_nms(mlvl_bboxes,
else: else:
bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1))) bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1)))
scores = mlvl_scores.new_zeros((0, )) scores = mlvl_scores.new_zeros((0, ))
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1))) labels = mlvl_scores.new_zeros((0, ), dtype=torch.long)
dir_scores = mlvl_scores.new_zeros((0, )) dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores return bboxes, scores, labels, dir_scores
......
import torch
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from ..bbox import bbox3d2result, bbox3d_mapping_back, xywhr2xyxyr
def merge_aug_bboxes_3d(aug_results, img_metas, test_cfg):
"""Merge augmented detection 3D bboxes and scores.
Args:
aug_results (list[dict]): The dict of detection results.
The dict contains the following keys
- boxes_3d (:obj:BaseInstance3DBoxes): detection bbox
- scores_3d (torch.Tensor): detection scores
- labels_3d (torch.Tensor): predicted box labels
img_metas (list[dict]): Meta information of each sample
test_cfg (dict): Test config.
Returns:
dict: bbox results in cpu mode, containing the merged results
- boxes_3d (:obj:BaseInstance3DBoxes): merged detection bbox
- scores_3d (torch.Tensor): merged detection scores
- labels_3d (torch.Tensor): merged predicted box labels
"""
assert len(aug_results) == len(img_metas), \
'"aug_results" should have the same length as "img_metas", got len(' \
f'aug_results)={len(aug_results)} and len(img_metas)={len(img_metas)}'
recovered_bboxes = []
recovered_scores = []
recovered_labels = []
for bboxes, img_info in zip(aug_results, img_metas):
scale_factor = img_info[0]['pcd_scale_factor']
flip = img_info[0]['pcd_flip']
recovered_scores.append(bboxes['scores_3d'])
recovered_labels.append(bboxes['labels_3d'])
bboxes = bbox3d_mapping_back(bboxes['boxes_3d'], scale_factor, flip)
recovered_bboxes.append(bboxes)
aug_bboxes = recovered_bboxes[0].cat(recovered_bboxes)
aug_bboxes_for_nms = xywhr2xyxyr(aug_bboxes.bev)
aug_scores = torch.cat(recovered_scores, dim=0)
aug_labels = torch.cat(recovered_labels, dim=0)
# TODO: use a more elegent way to deal with nms
if test_cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
merged_bboxes = []
merged_scores = []
merged_labels = []
# Apply multi-class nms when merge bboxes
if len(aug_labels) == 0:
return bbox3d2result(aug_bboxes, aug_scores, aug_labels)
for class_id in range(torch.max(aug_labels).item() + 1):
class_inds = (aug_labels == class_id)
bboxes_i = aug_bboxes[class_inds]
bboxes_nms_i = aug_bboxes_for_nms[class_inds, :]
scores_i = aug_scores[class_inds]
labels_i = aug_labels[class_inds]
if len(bboxes_nms_i) == 0:
continue
selected = nms_func(bboxes_nms_i, scores_i, test_cfg.nms_thr)
merged_bboxes.append(bboxes_i[selected, :])
merged_scores.append(scores_i[selected])
merged_labels.append(labels_i[selected])
merged_bboxes = merged_bboxes[0].cat(merged_bboxes)
merged_scores = torch.cat(merged_scores, dim=0)
merged_labels = torch.cat(merged_labels, dim=0)
_, order = merged_scores.sort(0, descending=True)
num = min(test_cfg.max_num, len(aug_bboxes))
order = order[:num]
merged_bboxes = merged_bboxes[order]
merged_scores = merged_scores[order]
merged_labels = merged_labels[order]
return bbox3d2result(merged_bboxes, merged_scores, merged_labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment