Commit f2b01720 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master' into process_raw_data

parents 08c8adb6 47850641
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
classes=class_names,
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256],
),
bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15),
)
file_client_args = dict(
backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
......@@ -28,8 +28,8 @@ model = dict(
),
pts_voxel_encoder=dict(
type='DynamicVFE',
num_input_features=4,
num_filters=[64, 64],
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
......
......@@ -12,8 +12,8 @@ model = dict(
),
voxel_encoder=dict(
type='DynamicPillarFeatureNet',
num_input_features=4,
num_filters=[64],
in_channels=4,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
......@@ -189,6 +189,7 @@ momentum_config = dict(
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=50,
......
......@@ -11,8 +11,7 @@ model = dict(
max_voxels=(-1, -1), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='DynamicVFEV3',
num_input_features=4,
type='DynamicSimpleVFE',
voxel_size=voxel_size,
point_cloud_range=point_cloud_range),
middle_encoder=dict(
......@@ -214,6 +213,7 @@ lr_config = dict(
min_lr_ratio=1e-5)
momentum_config = None
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=50,
......
......@@ -11,8 +11,7 @@ model = dict(
max_voxels=(-1, -1), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='DynamicVFEV3',
num_input_features=4,
type='DynamicSimpleVFE',
voxel_size=voxel_size,
point_cloud_range=point_cloud_range),
middle_encoder=dict(
......@@ -184,6 +183,7 @@ momentum_config = dict(
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=50,
......
......@@ -2,7 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained=('open-mmlab://resnet50_caffe_bgr'),
pretrained=('open-mmlab://detectron2/resnet50_caffe'),
backbone=dict(
type='ResNet',
depth=50,
......
......@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
......@@ -306,6 +302,7 @@ momentum_config = dict(
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=50,
......
......@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
......@@ -265,6 +261,7 @@ momentum_config = dict(
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=50,
......
......@@ -10,8 +10,8 @@ model = dict(
),
voxel_encoder=dict(
type='PillarFeatureNet',
num_input_features=4,
num_filters=[64],
in_channels=4,
feat_channels=[64],
with_distance=False,
# these two arguments should be consistent with the voxel_generator
voxel_size=[0.16, 0.16, 4],
......@@ -192,6 +192,7 @@ momentum_config = dict(
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=50,
......
......@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
......@@ -104,9 +100,21 @@ db_sampler = dict(
classes=class_names,
sample_groups=dict(Car=15),
)
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -126,7 +134,11 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
......@@ -139,15 +151,18 @@ data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False)),
val=dict(
type=dataset_type,
data_root=data_root,
......@@ -185,6 +200,7 @@ momentum_config = dict(
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
evaluation = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
......@@ -194,7 +210,7 @@ log_config = dict(
])
# yapf:enable
# runtime settings
total_epochs = 80
total_epochs = 40
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
......
......@@ -2,7 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained=('open-mmlab://resnet50_caffe_bgr'),
pretrained=('open-mmlab://detectron2/resnet50_caffe'),
backbone=dict(
type='ResNet',
depth=50,
......@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
# file_client_args = dict(backend='disk')
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict(
type='Resize',
img_scale=[(1200, 720), (1920, 1080)],
multiscale_mode='range',
img_scale=(1280, 720),
ratio_range=(0.75, 1.25),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
......@@ -135,10 +147,10 @@ train_pipeline = [
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
img_scale=(1280, 720),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
......@@ -192,6 +204,6 @@ total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None
load_from = './pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://regnetx_3.2gf',
backbone=dict(
type='RegNet',
arch='regnetx_3.2gf',
out_indices=(0, 1, 2, 3),
frozen_stages=1,
base_channels=32,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[96, 192, 432, 1008],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/nuscenes/'
classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict(
# The mean and std is used in PyCls when training RegNets
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
to_rgb=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict(
type='Resize',
img_scale=(1280, 720),
ratio_range=(0.75, 1.25),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 720),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = './pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth' # noqa
resume_from = None
workflow = [('train', 1)]
......@@ -15,8 +15,8 @@ model = dict(
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
......@@ -85,9 +85,7 @@ model = dict(
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
......@@ -138,10 +136,23 @@ db_sampler = dict(
trailer=4,
truck=4,
))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5),
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
......@@ -156,8 +167,15 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5),
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
......
import torch
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, Runner
from mmcv.runner import DistSamplerSeedHook, Runner, build_optimizer
from mmdet3d.utils import get_root_logger
from mmdet.apis.train import parse_losses
from mmdet.core import (DistEvalHook, DistOptimizerHook, EvalHook,
Fp16OptimizerHook, build_optimizer)
Fp16OptimizerHook)
from mmdet.datasets import build_dataloader, build_dataset
......
......@@ -7,7 +7,8 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from .transforms import (bbox3d2result, bbox3d2roi,
box3d_to_corner3d_upright_depth,
boxes3d_to_bev_torch_lidar)
......@@ -25,5 +26,6 @@ __all__ = [
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result',
'box3d_to_corner3d_upright_depth'
'box3d_to_corner3d_upright_depth', 'DepthInstance3DBoxes',
'BaseInstance3DBoxes'
]
......@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
def surface_equ_3d(polygon_surfaces):
# return [a, b, c], d in ax+by+cz+d=0
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
surface_vec = polygon_surfaces[:, :, :2, :] - polygon_surfaces[:, :,
1:3, :]
surface_vec = polygon_surfaces[:, :, :2, :] - \
polygon_surfaces[:, :, 1:3, :]
# normal_vec: [..., 3]
normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
# print(normal_vec.shape, points[..., 0, :].shape)
......
from .base_box3d import BaseInstance3DBoxes
from .box_3d_mode import Box3DMode
from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
__all__ = ['Box3DMode', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes']
__all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes'
]
......@@ -10,13 +10,24 @@ from .utils import limit_period, xywhr2xyxyr
class BaseInstance3DBoxes(object):
"""Base class for 3D Boxes
Note:
The box is bottom centered, i.e. the relative position of origin in
the box is [0.5, 0.5, 0].
Args:
tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix.
tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
box_dim (int): number of the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw).
Each row is (x, y, z, x_size, y_size, z_size, yaw).
Default to 7.
with_yaw (bool): Whether the box is with yaw rotation.
If False, the value of yaw will be set to 0 as minmax boxes.
Default to True.
origin (tuple): The relative position of origin in the box.
Default to [0.5, 0.5, 0]. This will guide the box be converted to
[0.5, 0.5, 0] mode.
"""
def __init__(self, tensor, box_dim=7):
def __init__(self, tensor, box_dim=7, with_yaw=True, origin=[0.5, 0.5, 0]):
if isinstance(tensor, torch.Tensor):
device = tensor.device
else:
......@@ -28,9 +39,22 @@ class BaseInstance3DBoxes(object):
tensor = tensor.reshape((0, box_dim)).to(
dtype=torch.float32, device=device)
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
self.box_dim = box_dim
if not with_yaw and tensor.shape[-1] == 6:
assert box_dim == 6
fake_rot = tensor.new_zeros(tensor.shape[0], 1)
tensor = torch.cat((tensor, fake_rot), dim=-1)
self.box_dim = box_dim + 1
else:
self.box_dim = box_dim
self.with_yaw = with_yaw
self.tensor = tensor
if origin != [0.5, 0.5, 0]:
dst = self.tensor.new_tensor([0.5, 0.5, 0])
src = self.tensor.new_tensor(origin)
self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
@property
def volume(self):
"""Computes the volume of all the boxes.
......@@ -51,12 +75,21 @@ class BaseInstance3DBoxes(object):
"""
return self.tensor[:, 3:6]
@property
def yaw(self):
"""Obtain the rotation of all the boxes.
Returns:
torch.Tensor: a vector with yaw of each box.
"""
return self.tensor[:, 6]
@property
def height(self):
"""Obtain the height of all the boxes.
Returns:
torch.Tensor: a vector with volume of each box.
torch.Tensor: a vector with height of each box.
"""
return self.tensor[:, 5]
......@@ -135,8 +168,8 @@ class BaseInstance3DBoxes(object):
pass
@abstractmethod
def flip(self):
"""Flip the boxes in horizontal direction
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction
"""
pass
......@@ -184,8 +217,26 @@ class BaseInstance3DBoxes(object):
(x_min, y_min, x_max, y_max)
Returns:
a binary vector, indicating whether each box is inside
the reference range.
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
pass
@abstractmethod
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
pass
......@@ -193,8 +244,7 @@ class BaseInstance3DBoxes(object):
"""Scale the box with horizontal and vertical scaling factors
Args:
scale_factors (float):
scale factors to scale the boxes.
scale_factors (float): scale factors to scale the boxes.
"""
self.tensor[:, :6] *= scale_factor
self.tensor[:, 7:] *= scale_factor
......@@ -218,9 +268,8 @@ class BaseInstance3DBoxes(object):
threshold (float): the threshold of minimal sizes
Returns:
Tensor:
a binary vector which represents whether each box is empty
(False) or non-empty (True).
torch.Tensor: a binary vector which represents whether each
box is empty (False) or non-empty (True).
"""
box = self.tensor
size_x = box[..., 3]
......@@ -245,15 +294,19 @@ class BaseInstance3DBoxes(object):
subject to Pytorch's indexing semantics.
Returns:
Boxes: Create a new :class:`Boxes` by indexing.
BaseInstance3DBoxes: Create a new :class:`BaseInstance3DBoxes`
by indexing.
"""
original_type = type(self)
if isinstance(item, int):
return original_type(self.tensor[item].view(1, -1))
return original_type(
self.tensor[item].view(1, -1),
box_dim=self.box_dim,
with_yaw=self.with_yaw)
b = self.tensor[item]
assert b.dim() == 2, \
f'Indexing on Boxes with {item} failed to return a matrix!'
return original_type(b)
return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
def __len__(self):
return self.tensor.shape[0]
......@@ -283,24 +336,30 @@ class BaseInstance3DBoxes(object):
def to(self, device):
original_type = type(self)
return original_type(self.tensor.to(device))
return original_type(
self.tensor.to(device),
box_dim=self.box_dim,
with_yaw=self.with_yaw)
def clone(self):
"""Clone the Boxes.
Returns:
Boxes
BaseInstance3DBoxes: Box object with the same properties as self.
"""
original_type = type(self)
return original_type(self.tensor.clone())
return original_type(
self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)
@property
def device(self):
return self.tensor.device
def __iter__(self):
"""
Yield a box as a Tensor of shape (4,) at a time.
"""Yield a box as a Tensor of shape (4,) at a time.
Returns:
torch.Tensor: a box of shape (4,).
"""
yield from self.tensor
......@@ -387,3 +446,23 @@ class BaseInstance3DBoxes(object):
iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)
return iou3d
def new_box(self, data):
"""Create a new box object with data.
The new box and its tensor has the similar properties
as self and self.tensor, respectively.
Args:
data (torch.Tensor | numpy.array | list): Data which the
returned Tensor copies.
Returns:
BaseInstance3DBoxes: A new bbox with data and other
properties are similar to self.
"""
new_tensor = self.tensor.new_tensor(data) \
if not isinstance(data, torch.Tensor) else data.to(self.device)
original_type = type(self)
return original_type(
new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)
......@@ -5,6 +5,7 @@ import torch
from .base_box3d import BaseInstance3DBoxes
from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
......@@ -61,7 +62,8 @@ class Box3DMode(IntEnum):
"""Convert boxes from `src` mode to `dst` mode.
Args:
box (tuple | list | np.ndarray | torch.Tensor):
box (tuple | list | np.ndarray |
torch.Tensor | BaseInstance3DBoxes):
can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
src (BoxMode): the src Box mode
dst (BoxMode): the target Box mode
......@@ -72,7 +74,7 @@ class Box3DMode(IntEnum):
to LiDAR. This requires a transformation matrix.
Returns:
(tuple | list | np.ndarray | torch.Tensor):
(tuple | list | np.ndarray | torch.Tensor | BaseInstance3DBoxes):
The converted box of the same type.
"""
if src == dst:
......@@ -113,6 +115,14 @@ class Box3DMode(IntEnum):
if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
else:
raise NotImplementedError(
f'Conversion from Box3DMode {src} to {dst} '
......@@ -141,10 +151,13 @@ class Box3DMode(IntEnum):
target_type = CameraInstance3DBoxes
elif dst == Box3DMode.LIDAR:
target_type = LiDARInstance3DBoxes
elif dst == Box3DMode.DEPTH:
target_type = DepthInstance3DBoxes
else:
raise NotImplementedError(
f'Conversion to {dst} through {original_type}'
' is not supported yet')
return target_type(arr, box_dim=arr.size(-1))
return target_type(
arr, box_dim=arr.size(-1), with_yaw=box.with_yaw)
else:
return arr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment