"vscode:/vscode.git/clone" did not exist on "ecde075d49286f039deb712439c694b01e38bb6a"
Unverified Commit f98bc952 authored by twang's avatar twang Committed by GitHub
Browse files

[Enhance] SECOND benchmark on Waymo (#166)

* Add second config for waymo

* Update README.md

* Update README.md

* Fix a minor typo in the kitti log link
parent e813610f
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.08, 0.08, 0.1]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=10,
point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
voxel_size=voxel_size,
max_voxels=(60000, 60000)),
voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
middle_encoder=dict(
type='SparseEncoder',
in_channels=5,
sparse_shape=[61, 1280, 1920],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=384,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
[-76.8, -51.2, 0, 76.8, 51.2, 0],
[-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
sizes=[
[2.08, 4.73, 1.77], # car
[0.84, 0.91, 1.74], # pedestrian
[0.84, 1.81, 1.77] # cyclist
],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
assigner=[
dict( # car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1)
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500)
......@@ -19,4 +19,15 @@ We implement SECOND and provide the results and checkpoints on KITTI dataset.
| Backbone |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP |Download |
| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |
| [SECFPN](./hv_second_secfpn_6x8_80e_kitti-3d-car.py)| Car |cyclic 80e|5.4||79.07|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-car/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-car/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238.log.json)|
| [SECFPN](./hv_second_secfpn_6x8_80e_kitti-3d-3class.py)| 3 Class |cyclic 80e|5.4||64.41|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-3class/hv_second_secfpn_6x8_80e_kitti-3d-3class_20200620_230238-9208083a.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-3class/hv_second_secfpn_6x8_80e_kitti-3d-3class_20200620_230238-9208083a.pth)|
| [SECFPN](./hv_second_secfpn_6x8_80e_kitti-3d-3class.py)| 3 Class |cyclic 80e|5.4||64.41|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-3class/hv_second_secfpn_6x8_80e_kitti-3d-3class_20200620_230238-9208083a.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_6x8_80e_kitti-3d-3class/hv_second_secfpn_6x8_80e_kitti-3d-3class_20200620_230238.log.json)|
### Waymo
| Backbone | Load Interval | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP@L1 | mAPH@L1 | mAP@L2 | **mAPH@L2** | Download |
| :-------: | :-----------: |:-----:| :------:| :------: | :------------: | :----: | :-----: | :-----: | :-----: | :------: |
| [SECFPN](./hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py)|5|3 Class|2x|8.12||58.0|53.5|51.5|48.3|[model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class_20201003_105141-4cc0bb58.pth) | [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/second/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class/hv_second_secfpn_sbn_2x16_2x_waymoD5-3d-3class_20201003_105141.log.json)|
| above @ Car|||2x|8.12||58.5|57.9|51.6|51.1| |
| above @ Pedestrian|||2x|8.12||63.9|54.9|56.0|48.0| |
| above @ Cyclist|||2x|8.12||48.6|47.6|46.8|45.8| |
Note: See more details about metrics and data split on Waymo [HERE](https://github.com/open-mmlab/mmdetection3d/tree/master/configs/pointpillars). For implementation details, we basically follow the original settings. All of these results are achieved without bells-and-whistles, e.g. ensemble, multi-scale training and test augmentation.
_base_ = [
'../_base_/models/hv_second_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-3class.py',
'../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py',
]
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=6, Pedestrian=8, Cyclist=10),
points_loader=dict(
type='LoadPointsFromFile', load_dim=5, use_dim=[0, 1, 2, 3, 4]))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=6, use_dim=5),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=6, use_dim=5),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment