Commit bdb3c14d authored by zhangwenwei's avatar zhangwenwei
Browse files

Refactor pipeline

parent db986fa4
...@@ -77,7 +77,6 @@ model = dict( ...@@ -77,7 +77,6 @@ model = dict(
[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78], [0, -40.0, -1.78, 70.4, 40.0, -1.78],
], ],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
...@@ -148,7 +147,7 @@ input_modality = dict( ...@@ -148,7 +147,7 @@ input_modality = dict(
) )
db_sampler = dict( db_sampler = dict(
type='MMDataBaseSampler', type='MMDataBaseSampler',
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_mm_dbinfos_train.pkl', info_path=data_root + 'kitti_mm_dbinfos_train.pkl',
rate=1.0, rate=1.0,
object_rot_range=[0.0, 0.0], object_rot_range=[0.0, 0.0],
...@@ -169,7 +168,7 @@ db_sampler = dict( ...@@ -169,7 +168,7 @@ db_sampler = dict(
Pedestrian=6, Pedestrian=6,
Cyclist=6, Cyclist=6,
), ),
) classes=class_names)
train_pipeline = [ train_pipeline = [
dict( dict(
type='Resize', type='Resize',
...@@ -223,33 +222,33 @@ data = dict( ...@@ -223,33 +222,33 @@ data = dict(
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True,
test_mode=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True,
test_mode=True)) test_mode=True))
# Training settings # Training settings
optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
......
...@@ -45,7 +45,6 @@ model = dict( ...@@ -45,7 +45,6 @@ model = dict(
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True),
...@@ -106,10 +105,12 @@ db_sampler = dict( ...@@ -106,10 +105,12 @@ db_sampler = dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5), filter_by_min_points=dict(Car=5),
), ),
sample_groups=dict(Car=15), classes=class_names,
) sample_groups=dict(Car=15))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -129,12 +130,13 @@ train_pipeline = [ ...@@ -129,12 +130,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']), dict(type='Collect3D', keys=['points']),
] ]
data = dict( data = dict(
...@@ -142,32 +144,34 @@ data = dict( ...@@ -142,32 +144,34 @@ data = dict(
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.001 # max learning rate lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
...@@ -46,7 +46,6 @@ model = dict( ...@@ -46,7 +46,6 @@ model = dict(
[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78], [0, -40.0, -1.78, 70.4, 40.0, -1.78],
], ],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
...@@ -116,7 +115,7 @@ input_modality = dict( ...@@ -116,7 +115,7 @@ input_modality = dict(
use_camera=True, use_camera=True,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False, use_road_plane=False,
...@@ -134,8 +133,10 @@ db_sampler = dict( ...@@ -134,8 +133,10 @@ db_sampler = dict(
Pedestrian=6, Pedestrian=6,
Cyclist=6, Cyclist=6,
), ),
) classes=class_names)
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -156,12 +157,13 @@ train_pipeline = [ ...@@ -156,12 +157,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']), dict(type='Collect3D', keys=['points']),
] ]
data = dict( data = dict(
...@@ -169,32 +171,34 @@ data = dict( ...@@ -169,32 +171,34 @@ data = dict(
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.003 # max learning rate lr = 0.003 # max learning rate
optimizer = dict( optimizer = dict(
......
...@@ -42,7 +42,6 @@ model = dict( ...@@ -42,7 +42,6 @@ model = dict(
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True),
...@@ -94,7 +93,7 @@ input_modality = dict( ...@@ -94,7 +93,7 @@ input_modality = dict(
use_camera=True, use_camera=True,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False, use_road_plane=False,
...@@ -104,8 +103,10 @@ db_sampler = dict( ...@@ -104,8 +103,10 @@ db_sampler = dict(
filter_by_min_points=dict(Car=5), filter_by_min_points=dict(Car=5),
), ),
sample_groups=dict(Car=15), sample_groups=dict(Car=15),
) classes=class_names)
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -125,12 +126,13 @@ train_pipeline = [ ...@@ -125,12 +126,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']), dict(type='Collect3D', keys=['points']),
] ]
data = dict( data = dict(
...@@ -138,32 +140,34 @@ data = dict( ...@@ -138,32 +140,34 @@ data = dict(
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.0018 # max learning rate lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
...@@ -42,7 +42,6 @@ model = dict( ...@@ -42,7 +42,6 @@ model = dict(
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6], [0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]], [0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
...@@ -149,6 +148,7 @@ train_cfg = dict( ...@@ -149,6 +148,7 @@ train_cfg = dict(
rpn_proposal=dict( rpn_proposal=dict(
nms_pre=9000, nms_pre=9000,
nms_post=512, nms_post=512,
max_num=512,
nms_thr=0.8, nms_thr=0.8,
score_thr=0, score_thr=0,
use_rotate_nms=False), use_rotate_nms=False),
...@@ -191,6 +191,7 @@ test_cfg = dict( ...@@ -191,6 +191,7 @@ test_cfg = dict(
rpn=dict( rpn=dict(
nms_pre=1024, nms_pre=1024,
nms_post=100, nms_post=100,
max_num=100,
nms_thr=0.7, nms_thr=0.7,
score_thr=0, score_thr=0,
use_rotate_nms=True), use_rotate_nms=True),
...@@ -208,10 +209,9 @@ input_modality = dict( ...@@ -208,10 +209,9 @@ input_modality = dict(
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False) use_camera=False)
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0], object_rot_range=[0.0, 0.0],
prepare=dict( prepare=dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
...@@ -220,12 +220,15 @@ db_sampler = dict( ...@@ -220,12 +220,15 @@ db_sampler = dict(
Pedestrian=10, Pedestrian=10,
Cyclist=10, Cyclist=10,
)), )),
classes=class_names,
sample_groups=dict( sample_groups=dict(
Car=12, Car=12,
Pedestrian=6, Pedestrian=6,
Cyclist=6, Cyclist=6,
)) ))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -240,17 +243,19 @@ train_pipeline = [ ...@@ -240,17 +243,19 @@ train_pipeline = [
scaling_uniform_noise=[0.95, 1.05]), scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']) dict(type='Collect3D', keys=['points'])
] ]
data = dict( data = dict(
...@@ -258,32 +263,34 @@ data = dict( ...@@ -258,32 +263,34 @@ data = dict(
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.001 # max learning rate lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
...@@ -40,7 +40,6 @@ model = dict( ...@@ -40,7 +40,6 @@ model = dict(
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
...@@ -176,17 +175,19 @@ input_modality = dict( ...@@ -176,17 +175,19 @@ input_modality = dict(
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False) use_camera=False)
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0], object_rot_range=[0.0, 0.0],
prepare=dict( prepare=dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5), filter_by_min_points=dict(Car=5),
), ),
classes=class_names,
sample_groups=dict(Car=15)) sample_groups=dict(Car=15))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -201,17 +202,19 @@ train_pipeline = [ ...@@ -201,17 +202,19 @@ train_pipeline = [
scaling_uniform_noise=[0.95, 1.05]), scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']) dict(type='Collect3D', keys=['points'])
] ]
data = dict( data = dict(
...@@ -219,32 +222,34 @@ data = dict( ...@@ -219,32 +222,34 @@ data = dict(
workers_per_gpu=2, workers_per_gpu=2,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.001 # max learning rate lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
...@@ -44,7 +44,6 @@ model = dict( ...@@ -44,7 +44,6 @@ model = dict(
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True),
...@@ -96,7 +95,7 @@ input_modality = dict( ...@@ -96,7 +95,7 @@ input_modality = dict(
use_camera=False, use_camera=False,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False, use_road_plane=False,
...@@ -106,9 +105,11 @@ db_sampler = dict( ...@@ -106,9 +105,11 @@ db_sampler = dict(
filter_by_min_points=dict(Car=5), filter_by_min_points=dict(Car=5),
), ),
sample_groups=dict(Car=15), sample_groups=dict(Car=15),
) classes=class_names)
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -128,12 +129,13 @@ train_pipeline = [ ...@@ -128,12 +129,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']), dict(type='Collect3D', keys=['points']),
] ]
data = dict( data = dict(
...@@ -141,32 +143,34 @@ data = dict( ...@@ -141,32 +143,34 @@ data = dict(
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.001 # max learning rate lr = 0.001 # max learning rate
optimizer = dict( optimizer = dict(
......
...@@ -42,7 +42,6 @@ model = dict( ...@@ -42,7 +42,6 @@ model = dict(
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True),
...@@ -94,7 +93,7 @@ input_modality = dict( ...@@ -94,7 +93,7 @@ input_modality = dict(
use_camera=False, use_camera=False,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False, use_road_plane=False,
...@@ -103,9 +102,12 @@ db_sampler = dict( ...@@ -103,9 +102,12 @@ db_sampler = dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5), filter_by_min_points=dict(Car=5),
), ),
classes=class_names,
sample_groups=dict(Car=15), sample_groups=dict(Car=15),
) )
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -125,12 +127,13 @@ train_pipeline = [ ...@@ -125,12 +127,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']), dict(type='Collect3D', keys=['points']),
] ]
data = dict( data = dict(
...@@ -138,32 +141,34 @@ data = dict( ...@@ -138,32 +141,34 @@ data = dict(
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl', ann_file=data_root + 'kitti_infos_train.pkl',
split='training', split='training',
training=True, pts_prefix='velodyne_reduced',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='training', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl', ann_file=data_root + 'kitti_infos_val.pkl',
split='testing', split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True)) test_mode=True))
# optimizer # optimizer
lr = 0.0018 # max learning rate lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
...@@ -60,7 +60,6 @@ model = dict( ...@@ -60,7 +60,6 @@ model = dict(
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986], [-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965], [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
], ],
strides=[2],
sizes=[ sizes=[
[1.95017717, 4.60718145, 1.72270761], # car [1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck [2.4560939, 6.73778078, 2.73004906], # truck
...@@ -128,20 +127,23 @@ input_modality = dict( ...@@ -128,20 +127,23 @@ input_modality = dict(
use_camera=False, use_camera=False,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl', info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0, rate=1.0,
use_road_plane=False, use_road_plane=False,
object_rot_range=[0.0, 0.0], object_rot_range=[0.0, 0.0],
prepare=dict(), prepare=dict(),
classes=class_names,
sample_groups=dict( sample_groups=dict(
bus=4, bus=4,
trailer=4, trailer=4,
truck=4, truck=4,
), ))
)
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5),
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925], rot_uniform_noise=[-0.3925, 0.3925],
...@@ -155,6 +157,8 @@ train_pipeline = [ ...@@ -155,6 +157,8 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5),
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0), dict(type='RandomFlip3D', flip_ratio=0),
dict( dict(
...@@ -169,28 +173,28 @@ data = dict( ...@@ -169,28 +173,28 @@ data = dict(
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl', ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=False),
val=dict( val=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl', ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=True), test_mode=True),
test=dict( test=dict(
type=dataset_type, type=dataset_type,
root_path=data_root, data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl', ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
class_names=class_names, classes=class_names,
with_label=False)) test_mode=True))
# optimizer # optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND # max_norm=10 is better for SECOND
......
...@@ -19,7 +19,7 @@ class Anchor3DRangeGenerator(object): ...@@ -19,7 +19,7 @@ class Anchor3DRangeGenerator(object):
The ranges are the same across different feature levels. But may The ranges are the same across different feature levels. But may
vary for different anchor sizes if size_per_range is True. vary for different anchor sizes if size_per_range is True.
sizes (list[list[float]]): 3D sizes of anchors. sizes (list[list[float]]): 3D sizes of anchors.
strides (list[int]): Strides of anchors in different feature levels. scales (list[int]): Scales of anchors in different feature levels.
rotations (list(float)): Rotations of anchors in a feature grid. rotations (list(float)): Rotations of anchors in a feature grid.
custom_values (tuple(float)): Customized values of that anchor. For custom_values (tuple(float)): Customized values of that anchor. For
example, in nuScenes the anchors have velocities. example, in nuScenes the anchors have velocities.
...@@ -32,7 +32,7 @@ class Anchor3DRangeGenerator(object): ...@@ -32,7 +32,7 @@ class Anchor3DRangeGenerator(object):
def __init__(self, def __init__(self,
ranges, ranges,
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
strides=[2], scales=[1],
rotations=[0, 1.5707963], rotations=[0, 1.5707963],
custom_values=(), custom_values=(),
reshape_out=True, reshape_out=True,
...@@ -46,10 +46,10 @@ class Anchor3DRangeGenerator(object): ...@@ -46,10 +46,10 @@ class Anchor3DRangeGenerator(object):
else: else:
assert len(ranges) == 1 assert len(ranges) == 1
assert mmcv.is_list_of(sizes, list) assert mmcv.is_list_of(sizes, list)
assert isinstance(strides, list) assert isinstance(scales, list)
self.sizes = sizes self.sizes = sizes
self.strides = strides self.scales = scales
self.ranges = ranges self.ranges = ranges
self.rotations = rotations self.rotations = rotations
self.custom_values = custom_values self.custom_values = custom_values
...@@ -60,7 +60,7 @@ class Anchor3DRangeGenerator(object): ...@@ -60,7 +60,7 @@ class Anchor3DRangeGenerator(object):
def __repr__(self): def __repr__(self):
s = self.__class__.__name__ + '(' s = self.__class__.__name__ + '('
s += f'anchor_range={self.ranges},\n' s += f'anchor_range={self.ranges},\n'
s += f'strides={self.strides},\n' s += f'scales={self.scales},\n'
s += f'sizes={self.sizes},\n' s += f'sizes={self.sizes},\n'
s += f'rotations={self.rotations},\n' s += f'rotations={self.rotations},\n'
s += f'reshape_out={self.reshape_out},\n' s += f'reshape_out={self.reshape_out},\n'
...@@ -75,7 +75,7 @@ class Anchor3DRangeGenerator(object): ...@@ -75,7 +75,7 @@ class Anchor3DRangeGenerator(object):
@property @property
def num_levels(self): def num_levels(self):
return len(self.strides) return len(self.scales)
def grid_anchors(self, featmap_sizes, device='cuda'): def grid_anchors(self, featmap_sizes, device='cuda'):
"""Generate grid anchors in multiple feature levels """Generate grid anchors in multiple feature levels
...@@ -96,13 +96,13 @@ class Anchor3DRangeGenerator(object): ...@@ -96,13 +96,13 @@ class Anchor3DRangeGenerator(object):
multi_level_anchors = [] multi_level_anchors = []
for i in range(self.num_levels): for i in range(self.num_levels):
anchors = self.single_level_grid_anchors( anchors = self.single_level_grid_anchors(
featmap_sizes[i], self.strides[i], device=device) featmap_sizes[i], self.scales[i], device=device)
if self.reshape_out: if self.reshape_out:
anchors = anchors.reshape(-1, anchors.size(-1)) anchors = anchors.reshape(-1, anchors.size(-1))
multi_level_anchors.append(anchors) multi_level_anchors.append(anchors)
return multi_level_anchors return multi_level_anchors
def single_level_grid_anchors(self, featmap_size, stride, device='cuda'): def single_level_grid_anchors(self, featmap_size, scale, device='cuda'):
# We reimplement the anchor generator using torch in cuda # We reimplement the anchor generator using torch in cuda
# torch: 0.6975 s for 1000 times # torch: 0.6975 s for 1000 times
# numpy: 4.3345 s for 1000 times # numpy: 4.3345 s for 1000 times
...@@ -111,7 +111,7 @@ class Anchor3DRangeGenerator(object): ...@@ -111,7 +111,7 @@ class Anchor3DRangeGenerator(object):
return self.anchors_single_range( return self.anchors_single_range(
featmap_size, featmap_size,
self.ranges[0], self.ranges[0],
stride, scale,
self.sizes, self.sizes,
self.rotations, self.rotations,
device=device) device=device)
...@@ -122,7 +122,7 @@ class Anchor3DRangeGenerator(object): ...@@ -122,7 +122,7 @@ class Anchor3DRangeGenerator(object):
self.anchors_single_range( self.anchors_single_range(
featmap_size, featmap_size,
anchor_range, anchor_range,
stride, scale,
anchor_size, anchor_size,
self.rotations, self.rotations,
device=device)) device=device))
...@@ -132,7 +132,7 @@ class Anchor3DRangeGenerator(object): ...@@ -132,7 +132,7 @@ class Anchor3DRangeGenerator(object):
def anchors_single_range(self, def anchors_single_range(self,
feature_size, feature_size,
anchor_range, anchor_range,
stride=1, scale=1,
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.5707963], rotations=[0, 1.5707963],
device='cuda'): device='cuda'):
...@@ -154,7 +154,7 @@ class Anchor3DRangeGenerator(object): ...@@ -154,7 +154,7 @@ class Anchor3DRangeGenerator(object):
anchor_range[1], anchor_range[4], feature_size[1], device=device) anchor_range[1], anchor_range[4], feature_size[1], device=device)
x_centers = torch.linspace( x_centers = torch.linspace(
anchor_range[0], anchor_range[3], feature_size[2], device=device) anchor_range[0], anchor_range[3], feature_size[2], device=device)
sizes = torch.tensor(sizes, device=device).reshape(-1, 3) sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale
rotations = torch.tensor(rotations, device=device) rotations = torch.tensor(rotations, device=device)
# torch.meshgrid default behavior is 'id', np's default is 'xy' # torch.meshgrid default behavior is 'id', np's default is 'xy'
...@@ -217,7 +217,7 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator): ...@@ -217,7 +217,7 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
def anchors_single_range(self, def anchors_single_range(self,
feature_size, feature_size,
anchor_range, anchor_range,
stride, scale,
sizes=[[1.6, 3.9, 1.56]], sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.5707963], rotations=[0, 1.5707963],
device='cuda'): device='cuda'):
...@@ -248,7 +248,7 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator): ...@@ -248,7 +248,7 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
anchor_range[3], anchor_range[3],
feature_size[2] + 1, feature_size[2] + 1,
device=device) device=device)
sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * stride sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale
rotations = torch.tensor(rotations, device=device) rotations = torch.tensor(rotations, device=device)
# shift the anchor center # shift the anchor center
......
from mmdet.datasets.builder import DATASETS from mmdet.datasets.builder import DATASETS
from .builder import build_dataset from .builder import build_dataset
from .custom_3d import Custom3DDataset
from .dataset_wrappers import RepeatFactorDataset from .dataset_wrappers import RepeatFactorDataset
from .indoor_base_dataset import IndoorBaseDataset
from .kitti2d_dataset import Kitti2DDataset from .kitti2d_dataset import Kitti2DDataset
from .kitti_dataset import KittiDataset from .kitti_dataset import KittiDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .nuscenes_dataset import NuScenesDataset from .nuscenes_dataset import NuScenesDataset
from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale, from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
IndoorPointSample, IndoorPointsColorJitter, IndoorPointSample, IndoorPointsColorJitter,
IndoorPointsColorNormalize, ObjectNoise, IndoorPointsColorNormalize, LoadAnnotations3D,
ObjectRangeFilter, ObjectSample, PointShuffle, LoadPointsFromFile, ObjectNoise, ObjectRangeFilter,
PointsRangeFilter, RandomFlip3D) ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
from .scannet_dataset import ScanNetDataset from .scannet_dataset import ScanNetDataset
from .sunrgbd_dataset import SUNRGBDDataset from .sunrgbd_dataset import SUNRGBDDataset
...@@ -21,8 +21,7 @@ __all__ = [ ...@@ -21,8 +21,7 @@ __all__ = [
'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample', 'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'IndoorLoadPointsFromFile', 'IndoorPointsColorNormalize', 'LoadPointsFromFile', 'IndoorPointsColorNormalize', 'IndoorPointSample',
'IndoorPointSample', 'IndoorLoadAnnotations3D', 'IndoorPointsColorJitter', 'LoadAnnotations3D', 'IndoorPointsColorJitter', 'IndoorGlobalRotScale',
'IndoorGlobalRotScale', 'IndoorFlipData', 'SUNRGBDDataset', 'IndoorFlipData', 'SUNRGBDDataset', 'ScanNetDataset', 'Custom3DDataset'
'ScanNetDataset', 'IndoorBaseDataset'
] ]
import mmcv import mmcv
import numpy as np import numpy as np
import torch.utils.data as torch_data from torch.utils.data import Dataset
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from .pipelines import Compose from .pipelines import Compose
@DATASETS.register_module() @DATASETS.register_module()
class IndoorBaseDataset(torch_data.Dataset): class Custom3DDataset(Dataset):
def __init__(self, def __init__(self,
root_path, data_root,
ann_file, ann_file,
pipeline=None, pipeline=None,
classes=None, classes=None,
test_mode=False, modality=None,
with_label=True): test_mode=False):
super().__init__() super().__init__()
self.root_path = root_path self.data_root = data_root
self.CLASSES = self.get_classes(classes) self.ann_file = ann_file
self.test_mode = test_mode self.test_mode = test_mode
self.label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)} self.modality = modality
mmcv.check_file_exist(ann_file)
self.data_infos = mmcv.load(ann_file) self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file)
if pipeline is not None: if pipeline is not None:
self.pipeline = Compose(pipeline) self.pipeline = Compose(pipeline)
self.with_label = with_label
def __len__(self): # set group flag for the sampler
return len(self.data_infos) if not self.test_mode:
self._set_group_flag()
def load_annotations(self, ann_file):
return mmcv.load(ann_file)
def get_data_info(self, index): def get_data_info(self, index):
info = self.data_infos[index] info = self.data_infos[index]
...@@ -38,38 +42,31 @@ class IndoorBaseDataset(torch_data.Dataset): ...@@ -38,38 +42,31 @@ class IndoorBaseDataset(torch_data.Dataset):
input_dict = dict(pts_filename=pts_filename) input_dict = dict(pts_filename=pts_filename)
if self.with_label: if not self.test_mode:
annos = self._get_ann_info(index, sample_idx) annos = self.get_ann_info(index, sample_idx)
input_dict.update(annos) input_dict['ann_info'] = annos
if len(input_dict['gt_bboxes_3d']) == 0: if len(annos['gt_bboxes_3d']) == 0:
return None return None
return input_dict return input_dict
def _rand_another(self, idx): def pre_pipeline(self, results):
pool = np.where(self.flag == self.flag[idx])[0] results['bbox3d_fields'] = []
return np.random.choice(pool) results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def prepare_train_data(self, index): def prepare_train_data(self, index):
input_dict = self.get_data_info(index) input_dict = self.get_data_info(index)
if input_dict is None: if input_dict is None:
return None return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict) example = self.pipeline(input_dict)
if len(example['gt_bboxes_3d']._data) == 0: if example is None or len(example['gt_bboxes_3d']._data) == 0:
return None return None
return example return example
def prepare_test_data(self, index): def prepare_test_data(self, index):
input_dict = self.get_data_info(index) input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict) example = self.pipeline(input_dict)
return example return example
...@@ -83,6 +80,9 @@ class IndoorBaseDataset(torch_data.Dataset): ...@@ -83,6 +80,9 @@ class IndoorBaseDataset(torch_data.Dataset):
string, take it as a file name. The file contains the name of string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset. a tuple or list, override the CLASSES defined by the dataset.
Return:
list[str]: return the list of class names
""" """
if classes is None: if classes is None:
return cls.CLASSES return cls.CLASSES
...@@ -115,8 +115,7 @@ class IndoorBaseDataset(torch_data.Dataset): ...@@ -115,8 +115,7 @@ class IndoorBaseDataset(torch_data.Dataset):
label_preds = pred_boxes['label_preds'] label_preds = pred_boxes['label_preds']
scores = pred_boxes['scores'] scores = pred_boxes['scores']
label_preds = label_preds.detach().cpu().numpy() label_preds = label_preds.detach().cpu().numpy()
num_proposal = box3d_depth.shape[0] for j in range(box3d_depth.shape[0]):
for j in range(num_proposal):
bbox_lidar = box3d_depth[j] # [7] in lidar bbox_lidar = box3d_depth[j] # [7] in lidar
bbox_lidar_bottom = bbox_lidar.copy() bbox_lidar_bottom = bbox_lidar.copy()
pred_list_i.append( pred_list_i.append(
...@@ -147,5 +146,33 @@ class IndoorBaseDataset(torch_data.Dataset): ...@@ -147,5 +146,33 @@ class IndoorBaseDataset(torch_data.Dataset):
from mmdet3d.core.evaluation import indoor_eval from mmdet3d.core.evaluation import indoor_eval
assert len(metric) > 0 assert len(metric) > 0
gt_annos = [info['annos'] for info in self.data_infos] gt_annos = [info['annos'] for info in self.data_infos]
ret_dict = indoor_eval(gt_annos, results, metric, self.label2cat) label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
ret_dict = indoor_eval(gt_annos, results, metric, label2cat)
return ret_dict return ret_dict
def __len__(self):
return len(self.data_infos)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In 3D datasets, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
...@@ -6,227 +6,76 @@ import tempfile ...@@ -6,227 +6,76 @@ import tempfile
import mmcv import mmcv
import numpy as np import numpy as np
import torch import torch
import torch.utils.data as torch_data
from mmcv.utils import print_log from mmcv.utils import print_log
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops from ..core.bbox import box_np_ops
from .pipelines import Compose from .custom_3d import Custom3DDataset
from .utils import remove_dontcare from .utils import remove_dontcare
@DATASETS.register_module() @DATASETS.register_module()
class KittiDataset(torch_data.Dataset): class KittiDataset(Custom3DDataset):
CLASSES = ('car', 'pedestrian', 'cyclist') CLASSES = ('car', 'pedestrian', 'cyclist')
def __init__(self, def __init__(self,
root_path, data_root,
ann_file, ann_file,
split, split,
pts_prefix='velodyne',
pipeline=None, pipeline=None,
training=False, classes=None,
class_names=None,
modality=None, modality=None,
with_label=True,
test_mode=False): test_mode=False):
super().__init__() super().__init__(
self.root_path = root_path data_root=data_root,
self.root_split_path = os.path.join( ann_file=ann_file,
self.root_path, 'training' if split != 'test' else 'testing') pipeline=pipeline,
self.class_names = class_names if class_names else self.CLASSES classes=classes,
self.modality = modality modality=modality,
self.with_label = with_label test_mode=test_mode)
self.root_split = os.path.join(self.data_root, split)
assert self.modality is not None assert self.modality is not None
self.modality = modality
self.test_mode = test_mode
# TODO: rm the key training if it is not needed
self.training = training
self.pcd_limit_range = [0, -40, -3, 70.4, 40, 0.0] self.pcd_limit_range = [0, -40, -3, 70.4, 40, 0.0]
self.pts_prefix = pts_prefix
self.ann_file = ann_file def _get_pts_filename(self, idx):
self.kitti_infos = mmcv.load(ann_file) pts_filename = osp.join(self.root_split, self.pts_prefix,
f'{idx:06d}.bin')
return pts_filename
# set group flag for the sampler def get_data_info(self, index):
if not self.test_mode: info = self.data_infos[index]
self._set_group_flag()
# processing pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
input_dict = self.train_pre_pipeline(input_dict)
if input_dict is None:
return None
example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def train_pre_pipeline(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_bboxes = input_dict['gt_bboxes']
gt_names = input_dict['gt_names']
difficulty = input_dict['difficulty']
input_dict['bbox_fields'] = []
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
# selected = self.keep_arrays_by_name(gt_names, self.class_names)
gt_bboxes_3d = gt_bboxes_3d[selected]
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
difficulty = difficulty[selected]
gt_bboxes_mask = np.array([n in self.class_names for n in gt_names],
dtype=np.bool_)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_bboxes'] = gt_bboxes.astype('float32')
input_dict['gt_names'] = gt_names
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
input_dict['difficulty'] = difficulty
input_dict['gt_bboxes_mask'] = gt_bboxes_mask
input_dict['gt_bboxes_3d_mask'] = copy.deepcopy(gt_bboxes_mask)
input_dict['bbox_fields'].append('gt_bboxes')
if len(gt_bboxes) == 0:
return None
return input_dict
def prepare_test_data(self, index):
input_dict = self.get_sensor_data(index)
# input_dict = self.test_pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def test_pre_pipeline(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_bboxes = input_dict['gt_bboxes']
gt_names = input_dict['gt_names']
if gt_bboxes_3d is not None:
selected = self.keep_arrays_by_name(gt_names, self.class_names)
gt_bboxes_3d = gt_bboxes_3d[selected]
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_bboxes'] = gt_bboxes
input_dict['gt_names'] = gt_names
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
return input_dict
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def get_lidar(self, idx):
lidar_file = os.path.join(self.root_split_path, 'velodyne',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_lidar_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path, 'velodyne_reduced',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_lidar_depth_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path,
'velodyne_depth_reduced', '%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_pure_depth_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path, 'depth_reduced',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_depth(self, idx):
depth_file = os.path.join(self.root_split_path, 'depth_completion',
'%06d.png' % idx)
assert os.path.exists(depth_file)
depth_img = mmcv.imread(depth_file, -1) / 256.0
return depth_img
def __len__(self):
return len(self.kitti_infos)
def get_sensor_data(self, index):
info = self.kitti_infos[index]
sample_idx = info['image']['image_idx'] sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.root_split,
info['image']['image_path'])
# TODO: consider use torch.Tensor only # TODO: consider use torch.Tensor only
rect = info['calib']['R0_rect'].astype(np.float32) rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32) Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32) P2 = info['calib']['P2'].astype(np.float32)
lidar2img = P2 @ rect @ Trv2c lidar2img = P2 @ rect @ Trv2c
if self.modality['use_depth'] and self.modality['use_lidar']: pts_filename = self._get_pts_filename(sample_idx)
points = self.get_lidar_depth_reduced(sample_idx)
elif self.modality['use_lidar']:
points = self.get_lidar(sample_idx)
elif self.modality['use_lidar_reduced']:
points = self.get_lidar_reduced(sample_idx)
elif self.modality['use_depth']:
points = self.get_pure_depth_reduced(sample_idx)
else:
assert (self.modality['use_depth'] or self.modality['use_lidar'])
if not self.modality['use_lidar_intensity']:
points = points[:, :3]
input_dict = dict( input_dict = dict(
sample_idx=sample_idx, sample_idx=sample_idx,
points=points, pts_filename=pts_filename,
lidar2img=lidar2img, img_filename=img_filename,
) lidar2img=lidar2img)
# TODO: support image input if not self.test_mode:
if self.modality['use_camera']:
image_info = info['image']
image_path = image_info['image_path']
image_path = os.path.join(self.root_path, image_path)
img = mmcv.imread(image_path)
input_dict.update(
dict(
img=img,
img_shape=img.shape,
ori_shape=img.shape,
filename=image_path))
else:
input_dict.update(dict(img_shape=info['image']['image_shape']))
if self.with_label:
annos = self.get_ann_info(index) annos = self.get_ann_info(index)
input_dict.update(annos) input_dict['ann_info'] = annos
return input_dict return input_dict
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
info = self.kitti_infos[index] info = self.data_infos[index]
rect = info['calib']['R0_rect'].astype(np.float32) rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32) Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
# P2 = info['calib']['P2'].astype(np.float32)
annos = info['annos'] annos = info['annos']
# we need other objects to avoid collision when sample # we need other objects to avoid collision when sample
...@@ -238,21 +87,30 @@ class KittiDataset(torch_data.Dataset): ...@@ -238,21 +87,30 @@ class KittiDataset(torch_data.Dataset):
# print(gt_names, len(loc)) # print(gt_names, len(loc))
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32) axis=1).astype(np.float32)
difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates # this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect, gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c) Trv2c)
# only center format is allowed. so we need to convert gt_bboxes = annos['bbox']
# kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
# box_np_ops.change_box3d_center_(gt_bboxes, [0.5, 0.5, 0], selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
# [0.5, 0.5, 0.5]) gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes = gt_bboxes[selected].astype('float32')
gt_names = gt_names[selected]
gt_labels = []
for cat in gt_names:
if cat in self.CLASSES:
gt_labels.append(self.CLASSES.index(cat))
else:
gt_labels.append(-1)
gt_labels = np.array(gt_labels)
gt_labels_3d = copy.deepcopy(gt_labels)
# For simplicity gt_bboxes means 2D gt bboxes
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_bboxes_3d=gt_bboxes_3d,
gt_bboxes=annos['bbox'], gt_labels_3d=gt_labels_3d,
gt_names=gt_names, gt_bboxes=gt_bboxes,
difficulty=difficulty) gt_labels=gt_labels)
return anns_results return anns_results
def drop_arrays_by_name(self, gt_names, used_classes): def drop_arrays_by_name(self, gt_names, used_classes):
...@@ -276,11 +134,11 @@ class KittiDataset(torch_data.Dataset): ...@@ -276,11 +134,11 @@ class KittiDataset(torch_data.Dataset):
tmp_dir = None tmp_dir = None
if not isinstance(outputs[0], dict): if not isinstance(outputs[0], dict):
result_files = self.bbox2result_kitti2d(outputs, self.class_names, result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
pklfile_prefix, pklfile_prefix,
submission_prefix) submission_prefix)
else: else:
result_files = self.bbox2result_kitti(outputs, self.class_names, result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix, pklfile_prefix,
submission_prefix) submission_prefix)
return result_files, tmp_dir return result_files, tmp_dir
...@@ -310,13 +168,13 @@ class KittiDataset(torch_data.Dataset): ...@@ -310,13 +168,13 @@ class KittiDataset(torch_data.Dataset):
""" """
result_files, tmp_dir = self.format_results(results, pklfile_prefix) result_files, tmp_dir = self.format_results(results, pklfile_prefix)
from mmdet3d.core.evaluation import kitti_eval from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.kitti_infos] gt_annos = [info['annos'] for info in self.data_infos]
if metric == 'img_bbox': if metric == 'img_bbox':
ap_result_str, ap_dict = kitti_eval( ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.class_names, eval_types=['bbox']) gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
else: else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files, ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.class_names) self.CLASSES)
print_log('\n' + ap_result_str, logger=logger) print_log('\n' + ap_result_str, logger=logger)
if tmp_dir is not None: if tmp_dir is not None:
tmp_dir.cleanup() tmp_dir.cleanup()
...@@ -327,7 +185,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -327,7 +185,7 @@ class KittiDataset(torch_data.Dataset):
class_names, class_names,
pklfile_prefix=None, pklfile_prefix=None,
submission_prefix=None): submission_prefix=None):
assert len(net_outputs) == len(self.kitti_infos) assert len(net_outputs) == len(self.data_infos)
if submission_prefix is not None: if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix) mmcv.mkdir_or_exist(submission_prefix)
...@@ -336,7 +194,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -336,7 +194,7 @@ class KittiDataset(torch_data.Dataset):
for idx, pred_dicts in enumerate( for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)): mmcv.track_iter_progress(net_outputs)):
annos = [] annos = []
info = self.kitti_infos[idx] info = self.data_infos[idx]
sample_idx = info['image']['image_idx'] sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2] image_shape = info['image']['image_shape'][:2]
...@@ -440,7 +298,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -440,7 +298,7 @@ class KittiDataset(torch_data.Dataset):
Return: Return:
List([dict]): A list of dict have the kitti format List([dict]): A list of dict have the kitti format
""" """
assert len(net_outputs) == len(self.kitti_infos) assert len(net_outputs) == len(self.data_infos)
det_annos = [] det_annos = []
print('\nConverting prediction to KITTI format') print('\nConverting prediction to KITTI format')
...@@ -457,7 +315,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -457,7 +315,7 @@ class KittiDataset(torch_data.Dataset):
location=[], location=[],
rotation_y=[], rotation_y=[],
score=[]) score=[])
sample_idx = self.kitti_infos[i]['image']['image_idx'] sample_idx = self.data_infos[i]['image']['image_idx']
num_example = 0 num_example = 0
for label in range(len(bboxes_per_sample)): for label in range(len(bboxes_per_sample)):
...@@ -511,7 +369,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -511,7 +369,7 @@ class KittiDataset(torch_data.Dataset):
mmcv.mkdir_or_exist(submission_prefix) mmcv.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}') print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos): for i, anno in enumerate(det_annos):
sample_idx = self.kitti_infos[i]['image']['image_idx'] sample_idx = self.data_infos[i]['image']['image_idx']
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt' cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f: with open(cur_det_file, 'w') as f:
bbox = anno['bbox'] bbox = anno['bbox']
......
import copy
import os.path as osp import os.path as osp
import tempfile import tempfile
import mmcv import mmcv
import numpy as np import numpy as np
import pyquaternion import pyquaternion
import torch.utils.data as torch_data
from nuscenes.utils.data_classes import Box as NuScenesBox from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops from ..core.bbox import box_np_ops
from .pipelines import Compose from .custom_3d import Custom3DDataset
@DATASETS.register_module() @DATASETS.register_module()
class NuScenesDataset(torch_data.Dataset): class NuScenesDataset(Custom3DDataset):
NumPointFeatures = 4 # xyz, timestamp. set 4 to use kitti pretrain
NameMapping = { NameMapping = {
'movable_object.barrier': 'barrier', 'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle', 'vehicle.bicycle': 'bicycle',
...@@ -71,153 +68,60 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -71,153 +68,60 @@ class NuScenesDataset(torch_data.Dataset):
def __init__(self, def __init__(self,
ann_file, ann_file,
pipeline=None, pipeline=None,
root_path=None, data_root=None,
class_names=None, classes=None,
load_interval=1, load_interval=1,
with_velocity=True, with_velocity=True,
test_mode=False, test_mode=False,
modality=None, modality=None,
eval_version='detection_cvpr_2019', eval_version='detection_cvpr_2019'):
with_label=True,
max_sweeps=10,
filter_empty_gt=True):
super().__init__()
self.data_root = root_path
self.class_names = class_names if class_names else self.CLASSES
self.test_mode = test_mode
self.load_interval = load_interval self.load_interval = load_interval
self.with_label = with_label super().__init__(
self.max_sweeps = max_sweeps data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
test_mode=test_mode)
self.ann_file = ann_file
data = mmcv.load(ann_file)
self.data_infos = list(
sorted(data['infos'], key=lambda e: e['timestamp']))
self.data_infos = self.data_infos[::load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
self.with_velocity = with_velocity self.with_velocity = with_velocity
self.eval_version = eval_version self.eval_version = eval_version
from nuscenes.eval.detection.config import config_factory from nuscenes.eval.detection.config import config_factory
self.eval_detection_configs = config_factory(self.eval_version) self.eval_detection_configs = config_factory(self.eval_version)
if modality is None: if self.modality is None:
modality = dict( self.modality = dict(
use_camera=False, use_camera=False,
use_lidar=True, use_lidar=True,
use_radar=False, use_radar=False,
use_map=False, use_map=False,
use_external=False, use_external=False,
) )
self.modality = modality
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
# kitti map: nusc det name -> kitti eval name
self._kitti_name_mapping = {
'car': 'car',
'pedestrian': 'pedestrian',
} # we only eval these classes in kitti
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __len__(self):
return len(self.data_infos)
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
input_dict = self.train_pre_pipeline(input_dict)
if input_dict is None:
return None
example = self.pipeline(input_dict)
if len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def train_pre_pipeline(self, input_dict):
if len(input_dict['gt_bboxes_3d']) == 0:
return None
return input_dict
def prepare_test_data(self, index): def load_annotations(self, ann_file):
input_dict = self.get_sensor_data(index) data = mmcv.load(ann_file)
# input_dict = self.test_pre_pipeline(input_dict) data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
example = self.pipeline(input_dict) data_infos = data_infos[::self.load_interval]
return example self.metadata = data['metadata']
self.version = self.metadata['version']
def test_pre_pipeline(self, input_dict): return data_infos
gt_names = input_dict['gt_names']
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
return input_dict
def get_sensor_data(self, index): def get_data_info(self, index):
info = self.data_infos[index] info = self.data_infos[index]
points = np.fromfile(
info['lidar_path'], dtype=np.float32, count=-1).reshape([-1, 5])
# standard protocal modified from SECOND.Pytorch # standard protocal modified from SECOND.Pytorch
points[:, 3] /= 255
points[:, 4] = 0
sweep_points_list = [points]
ts = info['timestamp'] / 1e6
for idx, sweep in enumerate(info['sweeps']):
if idx >= self.max_sweeps:
break
points_sweep = np.fromfile(
sweep['data_path'], dtype=np.float32,
count=-1).reshape([-1, 5])
sweep_ts = sweep['timestamp'] / 1e6
points_sweep[:, 3] /= 255
points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
'sensor2lidar_rotation'].T
points_sweep[:, :3] += sweep['sensor2lidar_translation']
points_sweep[:, 4] = ts - sweep_ts
sweep_points_list.append(points_sweep)
points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
input_dict = dict( input_dict = dict(
points=points,
sample_idx=info['token'], sample_idx=info['token'],
pts_filename=info['lidar_path'],
sweeps=info['sweeps'],
timestamp=info['timestamp'] / 1e6,
) )
if self.modality['use_camera']: if self.modality['use_camera']:
# TODO support image
imgs = []
ori_shapes = []
image_paths = [] image_paths = []
lidar2img_rts = [] lidar2img_rts = []
for cam_type, cam_info in info['cams'].items(): for cam_type, cam_info in info['cams'].items():
image_path = cam_info['data_path'] image_paths.append(cam_info['data_path'])
# image_path = osp.join(self.data_root, image_path)
img = mmcv.imread(image_path)
imgs.append(img)
ori_shapes.append(img.shape)
image_paths.append(image_path)
# obtain lidar to image transformation matrix # obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation']) lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[ lidar2cam_t = cam_info[
...@@ -233,16 +137,13 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -233,16 +137,13 @@ class NuScenesDataset(torch_data.Dataset):
input_dict.update( input_dict.update(
dict( dict(
img=imgs, img_filename=image_paths,
img_shape=ori_shapes,
ori_shape=ori_shapes,
filename=image_paths,
lidar2img=lidar2img_rts, lidar2img=lidar2img_rts,
)) ))
if self.with_label: if not self.test_mode:
annos = self.get_ann_info(index) annos = self.get_ann_info(index)
input_dict.update(annos) input_dict['ann_info'] = annos
return input_dict return input_dict
...@@ -256,6 +157,13 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -256,6 +157,13 @@ class NuScenesDataset(torch_data.Dataset):
box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5], box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
[0.5, 0.5, 0]) [0.5, 0.5, 0])
gt_names_3d = info['gt_names'][mask] gt_names_3d = info['gt_names'][mask]
gt_labels_3d = []
for cat in gt_names_3d:
if cat in self.CLASSES:
gt_labels_3d.append(self.CLASSES.index(cat))
else:
gt_labels_3d.append(-1)
gt_labels_3d = np.array(gt_labels_3d)
if self.with_velocity: if self.with_velocity:
gt_velocity = info['gt_velocity'][mask] gt_velocity = info['gt_velocity'][mask]
...@@ -263,18 +171,15 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -263,18 +171,15 @@ class NuScenesDataset(torch_data.Dataset):
gt_velocity[nan_mask] = [0.0, 0.0] gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1) gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
gt_bboxes_3d_mask = np.array(
[n in self.class_names for n in gt_names_3d], dtype=np.bool_)
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_bboxes_3d=gt_bboxes_3d,
gt_names_3d=gt_names_3d, gt_labels_3d=gt_labels_3d,
gt_bboxes_3d_mask=gt_bboxes_3d_mask,
) )
return anns_results return anns_results
def _format_bbox(self, results, jsonfile_prefix=None): def _format_bbox(self, results, jsonfile_prefix=None):
nusc_annos = {} nusc_annos = {}
mapped_class_names = self.class_names mapped_class_names = self.CLASSES
print('Start to convert detection format...') print('Start to convert detection format...')
for sample_id, det in enumerate(mmcv.track_iter_progress(results)): for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
...@@ -358,7 +263,7 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -358,7 +263,7 @@ class NuScenesDataset(torch_data.Dataset):
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json')) metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict() detail = dict()
metric_prefix = '{}_NuScenes'.format(result_name) metric_prefix = '{}_NuScenes'.format(result_name)
for name in self.class_names: for name in self.CLASSES:
for k, v in metrics['label_aps'][name].items(): for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v)) val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
......
...@@ -3,10 +3,10 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler ...@@ -3,10 +3,10 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale, from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale,
IndoorPointsColorJitter) IndoorPointsColorJitter)
from .indoor_loading import (IndoorLoadAnnotations3D, IndoorLoadPointsFromFile, from .indoor_loading import (IndoorPointsColorNormalize, LoadAnnotations3D,
IndoorPointsColorNormalize) LoadPointsFromFile)
from .indoor_sample import IndoorPointSample from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile from .loading import LoadMultiViewImageFromFiles
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter, from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D) RandomFlip3D)
...@@ -17,7 +17,6 @@ __all__ = [ ...@@ -17,7 +17,6 @@ __all__ = [
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData', 'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'IndoorLoadPointsFromFile', 'MMDataBaseSampler', 'IndoorPointsColorNormalize', 'LoadAnnotations3D',
'IndoorPointsColorNormalize', 'IndoorLoadAnnotations3D',
'IndoorPointSample' 'IndoorPointSample'
] ]
...@@ -55,14 +55,23 @@ class BatchSampler: ...@@ -55,14 +55,23 @@ class BatchSampler:
@OBJECTSAMPLERS.register_module() @OBJECTSAMPLERS.register_module()
class DataBaseSampler(object): class DataBaseSampler(object):
def __init__(self, info_path, root_path, rate, prepare, object_rot_range, def __init__(self,
sample_groups, use_road_plane): info_path,
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None):
super().__init__() super().__init__()
self.root_path = root_path self.data_root = data_root
self.info_path = info_path self.info_path = info_path
self.rate = rate self.rate = rate
self.prepare = prepare self.prepare = prepare
self.object_rot_range = object_rot_range self.object_rot_range = object_rot_range
self.classes = classes
self.cat2label = {name: i for i, name in enumerate(classes)}
self.label2cat = {i: name for i, name in enumerate(classes)}
with open(info_path, 'rb') as f: with open(info_path, 'rb') as f:
db_infos = pickle.load(f) db_infos = pickle.load(f)
...@@ -125,13 +134,16 @@ class DataBaseSampler(object): ...@@ -125,13 +134,16 @@ class DataBaseSampler(object):
db_infos[name] = filtered_infos db_infos[name] = filtered_infos
return db_infos return db_infos
def sample_all(self, gt_bboxes, gt_names, img=None): def sample_all(self, gt_bboxes, gt_labels, img=None):
sampled_num_dict = {} sampled_num_dict = {}
sample_num_per_class = [] sample_num_per_class = []
for class_name, max_sample_num in zip(self.sample_classes, for class_name, max_sample_num in zip(self.sample_classes,
self.sample_max_nums): self.sample_max_nums):
class_label = self.cat2label[class_name]
# sampled_num = int(max_sample_num -
# np.sum([n == class_name for n in gt_names]))
sampled_num = int(max_sample_num - sampled_num = int(max_sample_num -
np.sum([n == class_name for n in gt_names])) np.sum([n == class_label for n in gt_labels]))
sampled_num = np.round(self.rate * sampled_num).astype(np.int64) sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
sampled_num_dict[class_name] = sampled_num sampled_num_dict[class_name] = sampled_num
sample_num_per_class.append(sampled_num) sample_num_per_class.append(sampled_num)
...@@ -164,13 +176,13 @@ class DataBaseSampler(object): ...@@ -164,13 +176,13 @@ class DataBaseSampler(object):
sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0) sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0)
# center = sampled_gt_bboxes[:, 0:3] # center = sampled_gt_bboxes[:, 0:3]
num_sampled = len(sampled) # num_sampled = len(sampled)
s_points_list = [] s_points_list = []
count = 0 count = 0
for info in sampled: for info in sampled:
file_path = os.path.join( file_path = os.path.join(
self.root_path, self.data_root,
info['path']) if self.root_path else info['path'] info['path']) if self.data_root else info['path']
s_points = np.fromfile( s_points = np.fromfile(
file_path, dtype=np.float32).reshape([-1, 4]) file_path, dtype=np.float32).reshape([-1, 4])
...@@ -183,18 +195,16 @@ class DataBaseSampler(object): ...@@ -183,18 +195,16 @@ class DataBaseSampler(object):
count += 1 count += 1
s_points_list.append(s_points) s_points_list.append(s_points)
# gt_names = np.array([s['name'] for s in sampled]),
# gt_labels = np.array([self.cat2label(s) for s in gt_names])
gt_labels = np.array([self.cat2label[s['name']] for s in sampled])
ret = { ret = {
'gt_names': 'gt_labels_3d':
np.array([s['name'] for s in sampled]), gt_labels,
'difficulty':
np.array([s['difficulty'] for s in sampled]),
'gt_bboxes_3d': 'gt_bboxes_3d':
sampled_gt_bboxes, sampled_gt_bboxes,
'points': 'points':
np.concatenate(s_points_list, axis=0), np.concatenate(s_points_list, axis=0),
'gt_masks':
np.ones((num_sampled, ), dtype=np.bool_),
'group_ids': 'group_ids':
np.arange(gt_bboxes.shape[0], np.arange(gt_bboxes.shape[0],
gt_bboxes.shape[0] + len(sampled)) gt_bboxes.shape[0] + len(sampled))
...@@ -260,11 +270,12 @@ class MMDataBaseSampler(DataBaseSampler): ...@@ -260,11 +270,12 @@ class MMDataBaseSampler(DataBaseSampler):
def __init__(self, def __init__(self,
info_path, info_path,
root_path, data_root,
rate, rate,
prepare, prepare,
object_rot_range, object_rot_range,
sample_groups, sample_groups,
classes=None,
check_2D_collision=False, check_2D_collision=False,
collision_thr=0, collision_thr=0,
collision_in_classes=False, collision_in_classes=False,
...@@ -272,13 +283,12 @@ class MMDataBaseSampler(DataBaseSampler): ...@@ -272,13 +283,12 @@ class MMDataBaseSampler(DataBaseSampler):
blending_type=None): blending_type=None):
super(MMDataBaseSampler, self).__init__( super(MMDataBaseSampler, self).__init__(
info_path=info_path, info_path=info_path,
root_path=root_path, data_root=data_root,
rate=rate, rate=rate,
prepare=prepare, prepare=prepare,
object_rot_range=object_rot_range, object_rot_range=object_rot_range,
sample_groups=sample_groups, sample_groups=sample_groups,
use_road_plane=False, classes=classes)
)
self.blending_type = blending_type self.blending_type = blending_type
self.depth_consistent = depth_consistent self.depth_consistent = depth_consistent
self.check_2D_collision = check_2D_collision self.check_2D_collision = check_2D_collision
...@@ -337,7 +347,6 @@ class MMDataBaseSampler(DataBaseSampler): ...@@ -337,7 +347,6 @@ class MMDataBaseSampler(DataBaseSampler):
sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0) sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0)
sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0) sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0)
num_sampled = len(sampled)
s_points_list = [] s_points_list = []
count = 0 count = 0
...@@ -355,8 +364,8 @@ class MMDataBaseSampler(DataBaseSampler): ...@@ -355,8 +364,8 @@ class MMDataBaseSampler(DataBaseSampler):
else: else:
info = sampled[idx] info = sampled[idx]
pcd_file_path = os.path.join( pcd_file_path = os.path.join(
self.root_path, self.data_root,
info['path']) if self.root_path else info['path'] info['path']) if self.data_root else info['path']
img_file_path = pcd_file_path + '.png' img_file_path = pcd_file_path + '.png'
mask_file_path = pcd_file_path + '.mask.png' mask_file_path = pcd_file_path + '.mask.png'
s_points = np.fromfile( s_points = np.fromfile(
...@@ -389,7 +398,6 @@ class MMDataBaseSampler(DataBaseSampler): ...@@ -389,7 +398,6 @@ class MMDataBaseSampler(DataBaseSampler):
gt_bboxes_3d=sampled_gt_bboxes_3d, gt_bboxes_3d=sampled_gt_bboxes_3d,
gt_bboxes_2d=sampled_gt_bboxes_2d, gt_bboxes_2d=sampled_gt_bboxes_2d,
points=np.concatenate(s_points_list, axis=0), points=np.concatenate(s_points_list, axis=0),
gt_masks=np.ones((num_sampled, ), dtype=np.bool_),
group_ids=np.arange(gt_bboxes_3d.shape[0], group_ids=np.arange(gt_bboxes_3d.shape[0],
gt_bboxes_3d.shape[0] + len(sampled))) gt_bboxes_3d.shape[0] + len(sampled)))
......
...@@ -123,7 +123,7 @@ class IndoorGlobalRotScale(object): ...@@ -123,7 +123,7 @@ class IndoorGlobalRotScale(object):
Augment sunrgbd and scannet data with global rotating and scaling. Augment sunrgbd and scannet data with global rotating and scaling.
Args: Args:
use_height (bool): Whether to use height. shift_height (bool): Whether to use height.
Default: True. Default: True.
rot_range (list[float]): Range of rotation. rot_range (list[float]): Range of rotation.
Default: None. Default: None.
...@@ -131,8 +131,8 @@ class IndoorGlobalRotScale(object): ...@@ -131,8 +131,8 @@ class IndoorGlobalRotScale(object):
Default: None. Default: None.
""" """
def __init__(self, use_height=True, rot_range=None, scale_range=None): def __init__(self, shift_height=True, rot_range=None, scale_range=None):
self.use_height = use_height self.shift_height = shift_height
self.rot_range = np.pi * np.array(rot_range) self.rot_range = np.pi * np.array(rot_range)
self.scale_range = scale_range self.scale_range = scale_range
...@@ -215,7 +215,7 @@ class IndoorGlobalRotScale(object): ...@@ -215,7 +215,7 @@ class IndoorGlobalRotScale(object):
points[:, :3] *= scale_ratio points[:, :3] *= scale_ratio
gt_bboxes_3d[:, :3] *= scale_ratio gt_bboxes_3d[:, :3] *= scale_ratio
gt_bboxes_3d[:, 3:6] *= scale_ratio gt_bboxes_3d[:, 3:6] *= scale_ratio
if self.use_height: if self.shift_height:
points[:, -1] *= scale_ratio points[:, -1] *= scale_ratio
results['points'] = points results['points'] = points
...@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object): ...@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height) repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(rot_range={})'.format(self.rot_range) repr_str += '(rot_range={})'.format(self.rot_range)
repr_str += '(scale_range={})'.format(self.scale_range) repr_str += '(scale_range={})'.format(self.scale_range)
return repr_str return repr_str
...@@ -2,6 +2,7 @@ import mmcv ...@@ -2,6 +2,7 @@ import mmcv
import numpy as np import numpy as np
from mmdet.datasets.builder import PIPELINES from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module() @PIPELINES.register_module()
...@@ -32,35 +33,45 @@ class IndoorPointsColorNormalize(object): ...@@ -32,35 +33,45 @@ class IndoorPointsColorNormalize(object):
@PIPELINES.register_module() @PIPELINES.register_module()
class IndoorLoadPointsFromFile(object): class LoadPointsFromFile(object):
"""Indoor load points from file. """Load Points From File.
Load sunrgbd and scannet points from file. Load sunrgbd and scannet points from file.
Args: Args:
use_height (bool): Whether to use height. shift_height (bool): Whether to use shifted height.
load_dim (int): The dimension of the loaded points. load_dim (int): The dimension of the loaded points.
Default: 6. Default: 6.
use_dim (list[int]): Which dimensions of the points to be used. use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
""" """
def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]): def __init__(self, load_dim=6, use_dim=[0, 1, 2], shift_height=False):
self.use_height = use_height self.shift_height = shift_height
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert max(use_dim) < load_dim, \ assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}' f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim self.load_dim = load_dim
self.use_dim = use_dim self.use_dim = use_dim
def __call__(self, results): def _load_points(self, pts_filename):
pts_filename = results['pts_filename']
mmcv.check_file_exist(pts_filename) mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'):
points = np.load(pts_filename) points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
pts_filename = results['pts_filename']
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim) points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim] points = points[:, self.use_dim]
if self.use_height: if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99) floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1) points = np.concatenate([points, np.expand_dims(height, 1)], 1)
...@@ -69,7 +80,7 @@ class IndoorLoadPointsFromFile(object): ...@@ -69,7 +80,7 @@ class IndoorLoadPointsFromFile(object):
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height) repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(mean_color={})'.format(self.color_mean) repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim) repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim) repr_str += '(use_dim={})'.format(self.use_dim)
...@@ -77,28 +88,99 @@ class IndoorLoadPointsFromFile(object): ...@@ -77,28 +88,99 @@ class IndoorLoadPointsFromFile(object):
@PIPELINES.register_module() @PIPELINES.register_module()
class IndoorLoadAnnotations3D(object): class LoadAnnotations3D(LoadAnnotations):
"""Indoor load annotations3D. """Load Annotations3D.
Load instance mask and semantic mask of points. Load instance mask and semantic mask of points and
encapsulate the items into related fields.
Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels.
Defaults to True.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks.
Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks.
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
""" """
def __init__(self): def __init__(self,
pass with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=False,
with_seg_3d=False,
with_bbox=False,
with_label=False,
with_mask=False,
with_seg=False,
poly2mask=True):
super().__init__(with_bbox, with_label, with_mask, with_seg, poly2mask)
self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d
self.with_seg_3d = with_seg_3d
def _load_bboxes_3d(self, results):
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append(results['gt_bboxes_3d'])
return results
def __call__(self, results): def _load_labels_3d(self, results):
pts_instance_mask_path = results['pts_instance_mask_path'] results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
pts_semantic_mask_path = results['pts_semantic_mask_path'] return results
def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
mmcv.check_file_exist(pts_instance_mask_path) mmcv.check_file_exist(pts_instance_mask_path)
mmcv.check_file_exist(pts_semantic_mask_path)
pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int) pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
results['pts_instance_mask'] = pts_instance_mask results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append(results['pts_instance_mask'])
return results
def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
results['pts_semantic_mask'] = pts_semantic_mask results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append(results['pts_semantic_mask'])
return results
def __call__(self, results):
results = super().__call__(results)
if self.with_bbox_3d:
results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_label_3d:
results = self._load_labels_3d(results)
if self.with_mask_3d:
results = self._load_masks_3d(results)
if self.with_seg_3d:
results = self._load_semantic_seg_3d(results)
return results return results
def __repr__(self): def __repr__(self):
repr_str = self.__class__.__name__ indent_str = ' '
repr_str = self.__class__.__name__ + '(\n'
repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
repr_str += f'{indent_str}with_label={self.with_label},\n'
repr_str += f'{indent_str}with_mask={self.with_mask},\n'
repr_str += f'{indent_str}with_seg={self.with_seg},\n'
repr_str += f'{indent_str}poly2mask={self.poly2mask})'
return repr_str return repr_str
import os.path as osp
import mmcv import mmcv
import numpy as np import numpy as np
from mmdet.datasets.builder import PIPELINES from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class LoadPointsFromFile(object):
def __init__(self, points_dim=4, with_reflectivity=True):
self.points_dim = points_dim
self.with_reflectivity = with_reflectivity
def __call__(self, results):
if results['pts_prefix'] is not None:
filename = osp.join(results['pts_prefix'],
results['img_info']['filename'])
else:
filename = results['img_info']['filename']
points = np.fromfile(
filename, dtype=np.float32).reshape(-1, self.points_dim)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(points_dim={})'.format(self.points_dim)
repr_str += '(points_dim={})'.format(self.with_reflectivity)
return repr_str
@PIPELINES.register_module() @PIPELINES.register_module()
class LoadMultiViewImageFromFiles(object): class LoadMultiViewImageFromFiles(object):
""" Load multi channel images from a list of separate channel files. """ Load multi channel images from a list of separate channel files.
Expects results['filename'] to be a list of filenames
Expects results['img_filename'] to be a list of filenames
""" """
def __init__(self, to_float32=False, color_type='unchanged'): def __init__(self, to_float32=False, color_type='unchanged'):
...@@ -42,13 +16,7 @@ class LoadMultiViewImageFromFiles(object): ...@@ -42,13 +16,7 @@ class LoadMultiViewImageFromFiles(object):
self.color_type = color_type self.color_type = color_type
def __call__(self, results): def __call__(self, results):
if results['img_prefix'] is not None: filename = results['img_filename']
filename = [
osp.join(results['img_prefix'], fname)
for fname in results['img_info']['filename']
]
else:
filename = results['img_info']['filename']
img = np.stack( img = np.stack(
[mmcv.imread(name, self.color_type) for name in filename], axis=-1) [mmcv.imread(name, self.color_type) for name in filename], axis=-1)
if self.to_float32: if self.to_float32:
...@@ -70,3 +38,38 @@ class LoadMultiViewImageFromFiles(object): ...@@ -70,3 +38,38 @@ class LoadMultiViewImageFromFiles(object):
def __repr__(self): def __repr__(self):
return "{} (to_float32={}, color_type='{}')".format( return "{} (to_float32={}, color_type='{}')".format(
self.__class__.__name__, self.to_float32, self.color_type) self.__class__.__name__, self.to_float32, self.color_type)
@PIPELINES.register_module()
class LoadPointsFromMultiSweeps(object):
def __init__(self, sweeps_num=10):
self.sweeps_num = sweeps_num
def __call__(self, results):
points = results['points']
points[:, 3] /= 255
points[:, 4] = 0
sweep_points_list = [points]
ts = results['timestamp']
for idx, sweep in enumerate(results['sweeps']):
if idx >= self.sweeps_num:
break
points_sweep = np.fromfile(
sweep['data_path'], dtype=np.float32,
count=-1).reshape([-1, 5])
sweep_ts = sweep['timestamp'] / 1e6
points_sweep[:, 3] /= 255
points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
'sensor2lidar_rotation'].T
points_sweep[:, :3] += sweep['sensor2lidar_translation']
points_sweep[:, 4] = ts - sweep_ts
sweep_points_list.append(points_sweep)
points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
results['points'] = points
return results
def __repr__(self):
return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
...@@ -109,8 +109,8 @@ class ObjectSample(object): ...@@ -109,8 +109,8 @@ class ObjectSample(object):
def __call__(self, input_dict): def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d'] gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_names_3d = input_dict['gt_names_3d'] gt_labels_3d = input_dict['gt_labels_3d']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
# change to float for blending operation # change to float for blending operation
points = input_dict['points'] points = input_dict['points']
# rect = input_dict['rect'] # rect = input_dict['rect']
...@@ -119,27 +119,23 @@ class ObjectSample(object): ...@@ -119,27 +119,23 @@ class ObjectSample(object):
if self.sample_2d: if self.sample_2d:
img = input_dict['img'] # .astype(np.float32) img = input_dict['img'] # .astype(np.float32)
gt_bboxes_2d = input_dict['gt_bboxes'] gt_bboxes_2d = input_dict['gt_bboxes']
gt_bboxes_mask = input_dict['gt_bboxes_mask']
gt_names = input_dict['gt_names']
# Assume for now 3D & 2D bboxes are the same # Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all( sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_names_3d, gt_bboxes_2d=gt_bboxes_2d, img=img) gt_bboxes_3d, gt_labels_3d, gt_bboxes_2d=gt_bboxes_2d, img=img)
else: else:
sampled_dict = self.db_sampler.sample_all( sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_names_3d, img=None) gt_bboxes_3d, gt_labels_3d, img=None)
if sampled_dict is not None: if sampled_dict is not None:
sampled_gt_names = sampled_dict['gt_names']
sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d'] sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
sampled_points = sampled_dict['points'] sampled_points = sampled_dict['points']
sampled_gt_masks = sampled_dict['gt_masks'] sampled_gt_labels = sampled_dict['gt_labels_3d']
gt_names_3d = np.concatenate([gt_names_3d, sampled_gt_names], gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
axis=0) axis=0)
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d
]).astype(np.float32) ]).astype(np.float32)
gt_bboxes_3d_mask = np.concatenate(
[gt_bboxes_3d_mask, sampled_gt_masks], axis=0)
points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d) points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
# check the points dimension # check the points dimension
dim_inds = points.shape[-1] dim_inds = points.shape[-1]
...@@ -150,18 +146,14 @@ class ObjectSample(object): ...@@ -150,18 +146,14 @@ class ObjectSample(object):
sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d'] sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
gt_bboxes_2d = np.concatenate( gt_bboxes_2d = np.concatenate(
[gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32) [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
gt_bboxes_mask = np.concatenate(
[gt_bboxes_mask, sampled_gt_masks], axis=0)
gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0)
input_dict['gt_names'] = gt_names
input_dict['gt_bboxes'] = gt_bboxes_2d input_dict['gt_bboxes'] = gt_bboxes_2d
input_dict['gt_bboxes_mask'] = gt_bboxes_mask
input_dict['img'] = sampled_dict['img'] # .astype(np.uint8) input_dict['img'] = sampled_dict['img'] # .astype(np.uint8)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_names_3d'] = gt_names_3d input_dict['gt_labels_3d'] = gt_labels_3d
input_dict['points'] = points input_dict['points'] = points
input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
return input_dict return input_dict
def __repr__(self): def __repr__(self):
...@@ -184,12 +176,11 @@ class ObjectNoise(object): ...@@ -184,12 +176,11 @@ class ObjectNoise(object):
def __call__(self, input_dict): def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d'] gt_bboxes_3d = input_dict['gt_bboxes_3d']
points = input_dict['points'] points = input_dict['points']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
# TODO: check this inplace function # TODO: check this inplace function
noise_per_object_v3_( noise_per_object_v3_(
gt_bboxes_3d, gt_bboxes_3d,
points, points,
gt_bboxes_3d_mask,
rotation_perturb=self.rot_uniform_noise, rotation_perturb=self.rot_uniform_noise,
center_noise_std=self.loc_noise_std, center_noise_std=self.loc_noise_std,
global_random_rot_range=self.global_rot_range, global_random_rot_range=self.global_rot_range,
...@@ -322,20 +313,17 @@ class ObjectRangeFilter(object): ...@@ -322,20 +313,17 @@ class ObjectRangeFilter(object):
def __call__(self, input_dict): def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d'] gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_names_3d = input_dict['gt_names_3d'] gt_labels_3d = input_dict['gt_labels_3d']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range) mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range)
gt_bboxes_3d = gt_bboxes_3d[mask] gt_bboxes_3d = gt_bboxes_3d[mask]
gt_names_3d = gt_names_3d[mask] gt_labels_3d = gt_labels_3d[mask]
# the mask should also be updated
gt_bboxes_3d_mask = gt_bboxes_3d_mask[mask]
# limit rad to [-pi, pi] # limit rad to [-pi, pi]
gt_bboxes_3d[:, 6] = self.limit_period( gt_bboxes_3d[:, 6] = self.limit_period(
gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi) gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32') input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_names_3d'] = gt_names_3d input_dict['gt_labels_3d'] = gt_labels_3d
input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
return input_dict return input_dict
def __repr__(self): def __repr__(self):
...@@ -364,3 +352,30 @@ class PointsRangeFilter(object): ...@@ -364,3 +352,30 @@ class PointsRangeFilter(object):
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist()) repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
return repr_str return repr_str
@PIPELINES.register_module()
class ObjectNameFilter(object):
"""Filter GT objects by their names
Args:
classes (list[str]): list of class names to be kept for training
"""
def __init__(self, classes):
self.classes = classes
self.labels = list(range(len(self.classes)))
def __call__(self, input_dict):
gt_labels_3d = input_dict['gt_labels_3d']
gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],
dtype=np.bool_)
input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]
input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]
return input_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(classes={self.classes})'
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment