Commit 16344362 authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'refactor_pipeline' into 'master'

Refactor pipeline

See merge request open-mmlab/mmdet.3d!35
parents db986fa4 bdb3c14d
......@@ -77,7 +77,6 @@ model = dict(
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
......@@ -148,7 +147,7 @@ input_modality = dict(
)
db_sampler = dict(
type='MMDataBaseSampler',
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_mm_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
......@@ -169,7 +168,7 @@ db_sampler = dict(
Pedestrian=6,
Cyclist=6,
),
)
classes=class_names)
train_pipeline = [
dict(
type='Resize',
......@@ -223,33 +222,33 @@ data = dict(
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True,
classes=class_names,
test_mode=True))
# Training settings
optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
......
......@@ -45,7 +45,6 @@ model = dict(
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
......@@ -106,10 +105,12 @@ db_sampler = dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
classes=class_names,
sample_groups=dict(Car=15))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -129,12 +130,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
dict(type='Collect3D', keys=['points']),
]
data = dict(
......@@ -142,32 +144,34 @@ data = dict(
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
......@@ -46,7 +46,6 @@ model = dict(
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
......@@ -116,7 +115,7 @@ input_modality = dict(
use_camera=True,
)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
......@@ -134,8 +133,10 @@ db_sampler = dict(
Pedestrian=6,
Cyclist=6,
),
)
classes=class_names)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -156,12 +157,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
dict(type='Collect3D', keys=['points']),
]
data = dict(
......@@ -169,32 +171,34 @@ data = dict(
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
......
......@@ -42,7 +42,6 @@ model = dict(
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
......@@ -94,7 +93,7 @@ input_modality = dict(
use_camera=True,
)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
......@@ -104,8 +103,10 @@ db_sampler = dict(
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
classes=class_names)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -125,12 +126,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
dict(type='Collect3D', keys=['points']),
]
data = dict(
......@@ -138,32 +140,34 @@ data = dict(
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
......@@ -42,7 +42,6 @@ model = dict(
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
......@@ -149,6 +148,7 @@ train_cfg = dict(
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
max_num=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
......@@ -191,6 +191,7 @@ test_cfg = dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
max_num=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
......@@ -208,10 +209,9 @@ input_modality = dict(
use_lidar_intensity=True,
use_camera=False)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
......@@ -220,12 +220,15 @@ db_sampler = dict(
Pedestrian=10,
Cyclist=10,
)),
classes=class_names,
sample_groups=dict(
Car=12,
Pedestrian=6,
Cyclist=6,
))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -240,17 +243,19 @@ train_pipeline = [
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d'])
dict(type='Collect3D', keys=['points'])
]
data = dict(
......@@ -258,32 +263,34 @@ data = dict(
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
......@@ -40,7 +40,6 @@ model = dict(
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
......@@ -176,17 +175,19 @@ input_modality = dict(
use_lidar_intensity=True,
use_camera=False)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -201,17 +202,19 @@ train_pipeline = [
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d'])
dict(type='Collect3D', keys=['points'])
]
data = dict(
......@@ -219,32 +222,34 @@ data = dict(
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
......@@ -44,7 +44,6 @@ model = dict(
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
......@@ -96,7 +95,7 @@ input_modality = dict(
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
......@@ -106,9 +105,11 @@ db_sampler = dict(
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
classes=class_names)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -128,12 +129,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
dict(type='Collect3D', keys=['points']),
]
data = dict(
......@@ -141,32 +143,34 @@ data = dict(
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(
......
......@@ -42,7 +42,6 @@ model = dict(
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
......@@ -94,7 +93,7 @@ input_modality = dict(
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
......@@ -103,9 +102,12 @@ db_sampler = dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15),
)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
......@@ -125,12 +127,13 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
dict(type='Collect3D', keys=['points']),
]
data = dict(
......@@ -138,32 +141,34 @@ data = dict(
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
......
......@@ -60,7 +60,6 @@ model = dict(
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
strides=[2],
sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
......@@ -128,20 +127,23 @@ input_modality = dict(
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(),
classes=class_names,
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
),
)
))
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5),
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
......@@ -155,6 +157,8 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5),
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
......@@ -169,28 +173,28 @@ data = dict(
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=False))
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
......
......@@ -19,7 +19,7 @@ class Anchor3DRangeGenerator(object):
The ranges are the same across different feature levels. But may
vary for different anchor sizes if size_per_range is True.
sizes (list[list[float]]): 3D sizes of anchors.
strides (list[int]): Strides of anchors in different feature levels.
scales (list[int]): Scales of anchors in different feature levels.
rotations (list(float)): Rotations of anchors in a feature grid.
custom_values (tuple(float)): Customized values of that anchor. For
example, in nuScenes the anchors have velocities.
......@@ -32,7 +32,7 @@ class Anchor3DRangeGenerator(object):
def __init__(self,
ranges,
sizes=[[1.6, 3.9, 1.56]],
strides=[2],
scales=[1],
rotations=[0, 1.5707963],
custom_values=(),
reshape_out=True,
......@@ -46,10 +46,10 @@ class Anchor3DRangeGenerator(object):
else:
assert len(ranges) == 1
assert mmcv.is_list_of(sizes, list)
assert isinstance(strides, list)
assert isinstance(scales, list)
self.sizes = sizes
self.strides = strides
self.scales = scales
self.ranges = ranges
self.rotations = rotations
self.custom_values = custom_values
......@@ -60,7 +60,7 @@ class Anchor3DRangeGenerator(object):
def __repr__(self):
s = self.__class__.__name__ + '('
s += f'anchor_range={self.ranges},\n'
s += f'strides={self.strides},\n'
s += f'scales={self.scales},\n'
s += f'sizes={self.sizes},\n'
s += f'rotations={self.rotations},\n'
s += f'reshape_out={self.reshape_out},\n'
......@@ -75,7 +75,7 @@ class Anchor3DRangeGenerator(object):
@property
def num_levels(self):
return len(self.strides)
return len(self.scales)
def grid_anchors(self, featmap_sizes, device='cuda'):
"""Generate grid anchors in multiple feature levels
......@@ -96,13 +96,13 @@ class Anchor3DRangeGenerator(object):
multi_level_anchors = []
for i in range(self.num_levels):
anchors = self.single_level_grid_anchors(
featmap_sizes[i], self.strides[i], device=device)
featmap_sizes[i], self.scales[i], device=device)
if self.reshape_out:
anchors = anchors.reshape(-1, anchors.size(-1))
multi_level_anchors.append(anchors)
return multi_level_anchors
def single_level_grid_anchors(self, featmap_size, stride, device='cuda'):
def single_level_grid_anchors(self, featmap_size, scale, device='cuda'):
# We reimplement the anchor generator using torch in cuda
# torch: 0.6975 s for 1000 times
# numpy: 4.3345 s for 1000 times
......@@ -111,7 +111,7 @@ class Anchor3DRangeGenerator(object):
return self.anchors_single_range(
featmap_size,
self.ranges[0],
stride,
scale,
self.sizes,
self.rotations,
device=device)
......@@ -122,7 +122,7 @@ class Anchor3DRangeGenerator(object):
self.anchors_single_range(
featmap_size,
anchor_range,
stride,
scale,
anchor_size,
self.rotations,
device=device))
......@@ -132,7 +132,7 @@ class Anchor3DRangeGenerator(object):
def anchors_single_range(self,
feature_size,
anchor_range,
stride=1,
scale=1,
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.5707963],
device='cuda'):
......@@ -154,7 +154,7 @@ class Anchor3DRangeGenerator(object):
anchor_range[1], anchor_range[4], feature_size[1], device=device)
x_centers = torch.linspace(
anchor_range[0], anchor_range[3], feature_size[2], device=device)
sizes = torch.tensor(sizes, device=device).reshape(-1, 3)
sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale
rotations = torch.tensor(rotations, device=device)
# torch.meshgrid default behavior is 'id', np's default is 'xy'
......@@ -217,7 +217,7 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
def anchors_single_range(self,
feature_size,
anchor_range,
stride,
scale,
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.5707963],
device='cuda'):
......@@ -248,7 +248,7 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator):
anchor_range[3],
feature_size[2] + 1,
device=device)
sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * stride
sizes = torch.tensor(sizes, device=device).reshape(-1, 3) * scale
rotations = torch.tensor(rotations, device=device)
# shift the anchor center
......
from mmdet.datasets.builder import DATASETS
from .builder import build_dataset
from .custom_3d import Custom3DDataset
from .dataset_wrappers import RepeatFactorDataset
from .indoor_base_dataset import IndoorBaseDataset
from .kitti2d_dataset import Kitti2DDataset
from .kitti_dataset import KittiDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .nuscenes_dataset import NuScenesDataset
from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
IndoorPointSample, IndoorPointsColorJitter,
IndoorPointsColorNormalize, ObjectNoise,
ObjectRangeFilter, ObjectSample, PointShuffle,
PointsRangeFilter, RandomFlip3D)
IndoorPointsColorNormalize, LoadAnnotations3D,
LoadPointsFromFile, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
from .scannet_dataset import ScanNetDataset
from .sunrgbd_dataset import SUNRGBDDataset
......@@ -21,8 +21,7 @@ __all__ = [
'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'IndoorLoadPointsFromFile', 'IndoorPointsColorNormalize',
'IndoorPointSample', 'IndoorLoadAnnotations3D', 'IndoorPointsColorJitter',
'IndoorGlobalRotScale', 'IndoorFlipData', 'SUNRGBDDataset',
'ScanNetDataset', 'IndoorBaseDataset'
'LoadPointsFromFile', 'IndoorPointsColorNormalize', 'IndoorPointSample',
'LoadAnnotations3D', 'IndoorPointsColorJitter', 'IndoorGlobalRotScale',
'IndoorFlipData', 'SUNRGBDDataset', 'ScanNetDataset', 'Custom3DDataset'
]
import mmcv
import numpy as np
import torch.utils.data as torch_data
from torch.utils.data import Dataset
from mmdet.datasets import DATASETS
from .pipelines import Compose
@DATASETS.register_module()
class IndoorBaseDataset(torch_data.Dataset):
class Custom3DDataset(Dataset):
def __init__(self,
root_path,
data_root,
ann_file,
pipeline=None,
classes=None,
test_mode=False,
with_label=True):
modality=None,
test_mode=False):
super().__init__()
self.root_path = root_path
self.CLASSES = self.get_classes(classes)
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
mmcv.check_file_exist(ann_file)
self.data_infos = mmcv.load(ann_file)
self.modality = modality
self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file)
if pipeline is not None:
self.pipeline = Compose(pipeline)
self.with_label = with_label
def __len__(self):
return len(self.data_infos)
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
def load_annotations(self, ann_file):
return mmcv.load(ann_file)
def get_data_info(self, index):
info = self.data_infos[index]
......@@ -38,38 +42,31 @@ class IndoorBaseDataset(torch_data.Dataset):
input_dict = dict(pts_filename=pts_filename)
if self.with_label:
annos = self._get_ann_info(index, sample_idx)
input_dict.update(annos)
if len(input_dict['gt_bboxes_3d']) == 0:
if not self.test_mode:
annos = self.get_ann_info(index, sample_idx)
input_dict['ann_info'] = annos
if len(annos['gt_bboxes_3d']) == 0:
return None
return input_dict
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def pre_pipeline(self, results):
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
def prepare_train_data(self, index):
input_dict = self.get_data_info(index)
if input_dict is None:
return None
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
if len(example['gt_bboxes_3d']._data) == 0:
if example is None or len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def prepare_test_data(self, index):
input_dict = self.get_data_info(index)
self.pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
......@@ -83,6 +80,9 @@ class IndoorBaseDataset(torch_data.Dataset):
string, take it as a file name. The file contains the name of
classes where each line contains one class name. If classes is
a tuple or list, override the CLASSES defined by the dataset.
Return:
list[str]: return the list of class names
"""
if classes is None:
return cls.CLASSES
......@@ -115,8 +115,7 @@ class IndoorBaseDataset(torch_data.Dataset):
label_preds = pred_boxes['label_preds']
scores = pred_boxes['scores']
label_preds = label_preds.detach().cpu().numpy()
num_proposal = box3d_depth.shape[0]
for j in range(num_proposal):
for j in range(box3d_depth.shape[0]):
bbox_lidar = box3d_depth[j] # [7] in lidar
bbox_lidar_bottom = bbox_lidar.copy()
pred_list_i.append(
......@@ -147,5 +146,33 @@ class IndoorBaseDataset(torch_data.Dataset):
from mmdet3d.core.evaluation import indoor_eval
assert len(metric) > 0
gt_annos = [info['annos'] for info in self.data_infos]
ret_dict = indoor_eval(gt_annos, results, metric, self.label2cat)
label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
ret_dict = indoor_eval(gt_annos, results, metric, label2cat)
return ret_dict
def __len__(self):
return len(self.data_infos)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In 3D datasets, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
......@@ -6,227 +6,76 @@ import tempfile
import mmcv
import numpy as np
import torch
import torch.utils.data as torch_data
from mmcv.utils import print_log
from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops
from .pipelines import Compose
from .custom_3d import Custom3DDataset
from .utils import remove_dontcare
@DATASETS.register_module()
class KittiDataset(torch_data.Dataset):
class KittiDataset(Custom3DDataset):
CLASSES = ('car', 'pedestrian', 'cyclist')
def __init__(self,
root_path,
data_root,
ann_file,
split,
pts_prefix='velodyne',
pipeline=None,
training=False,
class_names=None,
classes=None,
modality=None,
with_label=True,
test_mode=False):
super().__init__()
self.root_path = root_path
self.root_split_path = os.path.join(
self.root_path, 'training' if split != 'test' else 'testing')
self.class_names = class_names if class_names else self.CLASSES
self.modality = modality
self.with_label = with_label
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
test_mode=test_mode)
self.root_split = os.path.join(self.data_root, split)
assert self.modality is not None
self.modality = modality
self.test_mode = test_mode
# TODO: rm the key training if it is not needed
self.training = training
self.pcd_limit_range = [0, -40, -3, 70.4, 40, 0.0]
self.pts_prefix = pts_prefix
self.ann_file = ann_file
self.kitti_infos = mmcv.load(ann_file)
def _get_pts_filename(self, idx):
pts_filename = osp.join(self.root_split, self.pts_prefix,
f'{idx:06d}.bin')
return pts_filename
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
input_dict = self.train_pre_pipeline(input_dict)
if input_dict is None:
return None
example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def train_pre_pipeline(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_bboxes = input_dict['gt_bboxes']
gt_names = input_dict['gt_names']
difficulty = input_dict['difficulty']
input_dict['bbox_fields'] = []
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
# selected = self.keep_arrays_by_name(gt_names, self.class_names)
gt_bboxes_3d = gt_bboxes_3d[selected]
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
difficulty = difficulty[selected]
gt_bboxes_mask = np.array([n in self.class_names for n in gt_names],
dtype=np.bool_)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_bboxes'] = gt_bboxes.astype('float32')
input_dict['gt_names'] = gt_names
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
input_dict['difficulty'] = difficulty
input_dict['gt_bboxes_mask'] = gt_bboxes_mask
input_dict['gt_bboxes_3d_mask'] = copy.deepcopy(gt_bboxes_mask)
input_dict['bbox_fields'].append('gt_bboxes')
if len(gt_bboxes) == 0:
return None
return input_dict
def prepare_test_data(self, index):
input_dict = self.get_sensor_data(index)
# input_dict = self.test_pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def test_pre_pipeline(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_bboxes = input_dict['gt_bboxes']
gt_names = input_dict['gt_names']
if gt_bboxes_3d is not None:
selected = self.keep_arrays_by_name(gt_names, self.class_names)
gt_bboxes_3d = gt_bboxes_3d[selected]
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_bboxes'] = gt_bboxes
input_dict['gt_names'] = gt_names
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
return input_dict
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def get_lidar(self, idx):
lidar_file = os.path.join(self.root_split_path, 'velodyne',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_lidar_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path, 'velodyne_reduced',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_lidar_depth_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path,
'velodyne_depth_reduced', '%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_pure_depth_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path, 'depth_reduced',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_depth(self, idx):
depth_file = os.path.join(self.root_split_path, 'depth_completion',
'%06d.png' % idx)
assert os.path.exists(depth_file)
depth_img = mmcv.imread(depth_file, -1) / 256.0
return depth_img
def __len__(self):
return len(self.kitti_infos)
def get_sensor_data(self, index):
info = self.kitti_infos[index]
def get_data_info(self, index):
info = self.data_infos[index]
sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.root_split,
info['image']['image_path'])
# TODO: consider use torch.Tensor only
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
lidar2img = P2 @ rect @ Trv2c
if self.modality['use_depth'] and self.modality['use_lidar']:
points = self.get_lidar_depth_reduced(sample_idx)
elif self.modality['use_lidar']:
points = self.get_lidar(sample_idx)
elif self.modality['use_lidar_reduced']:
points = self.get_lidar_reduced(sample_idx)
elif self.modality['use_depth']:
points = self.get_pure_depth_reduced(sample_idx)
else:
assert (self.modality['use_depth'] or self.modality['use_lidar'])
if not self.modality['use_lidar_intensity']:
points = points[:, :3]
pts_filename = self._get_pts_filename(sample_idx)
input_dict = dict(
sample_idx=sample_idx,
points=points,
lidar2img=lidar2img,
)
pts_filename=pts_filename,
img_filename=img_filename,
lidar2img=lidar2img)
# TODO: support image input
if self.modality['use_camera']:
image_info = info['image']
image_path = image_info['image_path']
image_path = os.path.join(self.root_path, image_path)
img = mmcv.imread(image_path)
input_dict.update(
dict(
img=img,
img_shape=img.shape,
ori_shape=img.shape,
filename=image_path))
else:
input_dict.update(dict(img_shape=info['image']['image_shape']))
if self.with_label:
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict.update(annos)
input_dict['ann_info'] = annos
return input_dict
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
info = self.kitti_infos[index]
info = self.data_infos[index]
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
# P2 = info['calib']['P2'].astype(np.float32)
annos = info['annos']
# we need other objects to avoid collision when sample
......@@ -238,21 +87,30 @@ class KittiDataset(torch_data.Dataset):
# print(gt_names, len(loc))
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c)
# only center format is allowed. so we need to convert
# kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
# box_np_ops.change_box3d_center_(gt_bboxes, [0.5, 0.5, 0],
# [0.5, 0.5, 0.5])
gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes = gt_bboxes[selected].astype('float32')
gt_names = gt_names[selected]
gt_labels = []
for cat in gt_names:
if cat in self.CLASSES:
gt_labels.append(self.CLASSES.index(cat))
else:
gt_labels.append(-1)
gt_labels = np.array(gt_labels)
gt_labels_3d = copy.deepcopy(gt_labels)
# For simplicity gt_bboxes means 2D gt bboxes
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_bboxes=annos['bbox'],
gt_names=gt_names,
difficulty=difficulty)
gt_labels_3d=gt_labels_3d,
gt_bboxes=gt_bboxes,
gt_labels=gt_labels)
return anns_results
def drop_arrays_by_name(self, gt_names, used_classes):
......@@ -276,11 +134,11 @@ class KittiDataset(torch_data.Dataset):
tmp_dir = None
if not isinstance(outputs[0], dict):
result_files = self.bbox2result_kitti2d(outputs, self.class_names,
result_files = self.bbox2result_kitti2d(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
else:
result_files = self.bbox2result_kitti(outputs, self.class_names,
result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
return result_files, tmp_dir
......@@ -310,13 +168,13 @@ class KittiDataset(torch_data.Dataset):
"""
result_files, tmp_dir = self.format_results(results, pklfile_prefix)
from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.kitti_infos]
gt_annos = [info['annos'] for info in self.data_infos]
if metric == 'img_bbox':
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.class_names, eval_types=['bbox'])
gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.class_names)
self.CLASSES)
print_log('\n' + ap_result_str, logger=logger)
if tmp_dir is not None:
tmp_dir.cleanup()
......@@ -327,7 +185,7 @@ class KittiDataset(torch_data.Dataset):
class_names,
pklfile_prefix=None,
submission_prefix=None):
assert len(net_outputs) == len(self.kitti_infos)
assert len(net_outputs) == len(self.data_infos)
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
......@@ -336,7 +194,7 @@ class KittiDataset(torch_data.Dataset):
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.kitti_infos[idx]
info = self.data_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
......@@ -440,7 +298,7 @@ class KittiDataset(torch_data.Dataset):
Return:
List([dict]): A list of dict have the kitti format
"""
assert len(net_outputs) == len(self.kitti_infos)
assert len(net_outputs) == len(self.data_infos)
det_annos = []
print('\nConverting prediction to KITTI format')
......@@ -457,7 +315,7 @@ class KittiDataset(torch_data.Dataset):
location=[],
rotation_y=[],
score=[])
sample_idx = self.kitti_infos[i]['image']['image_idx']
sample_idx = self.data_infos[i]['image']['image_idx']
num_example = 0
for label in range(len(bboxes_per_sample)):
......@@ -511,7 +369,7 @@ class KittiDataset(torch_data.Dataset):
mmcv.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos):
sample_idx = self.kitti_infos[i]['image']['image_idx']
sample_idx = self.data_infos[i]['image']['image_idx']
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f:
bbox = anno['bbox']
......
import copy
import os.path as osp
import tempfile
import mmcv
import numpy as np
import pyquaternion
import torch.utils.data as torch_data
from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops
from .pipelines import Compose
from .custom_3d import Custom3DDataset
@DATASETS.register_module()
class NuScenesDataset(torch_data.Dataset):
NumPointFeatures = 4 # xyz, timestamp. set 4 to use kitti pretrain
class NuScenesDataset(Custom3DDataset):
NameMapping = {
'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle',
......@@ -71,153 +68,60 @@ class NuScenesDataset(torch_data.Dataset):
def __init__(self,
ann_file,
pipeline=None,
root_path=None,
class_names=None,
data_root=None,
classes=None,
load_interval=1,
with_velocity=True,
test_mode=False,
modality=None,
eval_version='detection_cvpr_2019',
with_label=True,
max_sweeps=10,
filter_empty_gt=True):
super().__init__()
self.data_root = root_path
self.class_names = class_names if class_names else self.CLASSES
self.test_mode = test_mode
eval_version='detection_cvpr_2019'):
self.load_interval = load_interval
self.with_label = with_label
self.max_sweeps = max_sweeps
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
test_mode=test_mode)
self.ann_file = ann_file
data = mmcv.load(ann_file)
self.data_infos = list(
sorted(data['infos'], key=lambda e: e['timestamp']))
self.data_infos = self.data_infos[::load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
self.with_velocity = with_velocity
self.eval_version = eval_version
from nuscenes.eval.detection.config import config_factory
self.eval_detection_configs = config_factory(self.eval_version)
if modality is None:
modality = dict(
if self.modality is None:
self.modality = dict(
use_camera=False,
use_lidar=True,
use_radar=False,
use_map=False,
use_external=False,
)
self.modality = modality
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
# kitti map: nusc det name -> kitti eval name
self._kitti_name_mapping = {
'car': 'car',
'pedestrian': 'pedestrian',
} # we only eval these classes in kitti
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __len__(self):
return len(self.data_infos)
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
input_dict = self.train_pre_pipeline(input_dict)
if input_dict is None:
return None
example = self.pipeline(input_dict)
if len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def train_pre_pipeline(self, input_dict):
if len(input_dict['gt_bboxes_3d']) == 0:
return None
return input_dict
def prepare_test_data(self, index):
input_dict = self.get_sensor_data(index)
# input_dict = self.test_pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def test_pre_pipeline(self, input_dict):
gt_names = input_dict['gt_names']
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
return input_dict
def load_annotations(self, ann_file):
data = mmcv.load(ann_file)
data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
return data_infos
def get_sensor_data(self, index):
def get_data_info(self, index):
info = self.data_infos[index]
points = np.fromfile(
info['lidar_path'], dtype=np.float32, count=-1).reshape([-1, 5])
# standard protocal modified from SECOND.Pytorch
points[:, 3] /= 255
points[:, 4] = 0
sweep_points_list = [points]
ts = info['timestamp'] / 1e6
for idx, sweep in enumerate(info['sweeps']):
if idx >= self.max_sweeps:
break
points_sweep = np.fromfile(
sweep['data_path'], dtype=np.float32,
count=-1).reshape([-1, 5])
sweep_ts = sweep['timestamp'] / 1e6
points_sweep[:, 3] /= 255
points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
'sensor2lidar_rotation'].T
points_sweep[:, :3] += sweep['sensor2lidar_translation']
points_sweep[:, 4] = ts - sweep_ts
sweep_points_list.append(points_sweep)
points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
input_dict = dict(
points=points,
sample_idx=info['token'],
pts_filename=info['lidar_path'],
sweeps=info['sweeps'],
timestamp=info['timestamp'] / 1e6,
)
if self.modality['use_camera']:
# TODO support image
imgs = []
ori_shapes = []
image_paths = []
lidar2img_rts = []
for cam_type, cam_info in info['cams'].items():
image_path = cam_info['data_path']
# image_path = osp.join(self.data_root, image_path)
img = mmcv.imread(image_path)
imgs.append(img)
ori_shapes.append(img.shape)
image_paths.append(image_path)
image_paths.append(cam_info['data_path'])
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[
......@@ -233,16 +137,13 @@ class NuScenesDataset(torch_data.Dataset):
input_dict.update(
dict(
img=imgs,
img_shape=ori_shapes,
ori_shape=ori_shapes,
filename=image_paths,
img_filename=image_paths,
lidar2img=lidar2img_rts,
))
if self.with_label:
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict.update(annos)
input_dict['ann_info'] = annos
return input_dict
......@@ -256,6 +157,13 @@ class NuScenesDataset(torch_data.Dataset):
box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
[0.5, 0.5, 0])
gt_names_3d = info['gt_names'][mask]
gt_labels_3d = []
for cat in gt_names_3d:
if cat in self.CLASSES:
gt_labels_3d.append(self.CLASSES.index(cat))
else:
gt_labels_3d.append(-1)
gt_labels_3d = np.array(gt_labels_3d)
if self.with_velocity:
gt_velocity = info['gt_velocity'][mask]
......@@ -263,18 +171,15 @@ class NuScenesDataset(torch_data.Dataset):
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
gt_bboxes_3d_mask = np.array(
[n in self.class_names for n in gt_names_3d], dtype=np.bool_)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_names_3d=gt_names_3d,
gt_bboxes_3d_mask=gt_bboxes_3d_mask,
gt_labels_3d=gt_labels_3d,
)
return anns_results
def _format_bbox(self, results, jsonfile_prefix=None):
nusc_annos = {}
mapped_class_names = self.class_names
mapped_class_names = self.CLASSES
print('Start to convert detection format...')
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
......@@ -358,7 +263,7 @@ class NuScenesDataset(torch_data.Dataset):
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict()
metric_prefix = '{}_NuScenes'.format(result_name)
for name in self.class_names:
for name in self.CLASSES:
for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
......
......@@ -3,10 +3,10 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale,
IndoorPointsColorJitter)
from .indoor_loading import (IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
IndoorPointsColorNormalize)
from .indoor_loading import (IndoorPointsColorNormalize, LoadAnnotations3D,
LoadPointsFromFile)
from .indoor_sample import IndoorPointSample
from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
from .loading import LoadMultiViewImageFromFiles
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
......@@ -17,7 +17,6 @@ __all__ = [
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler', 'IndoorLoadPointsFromFile',
'IndoorPointsColorNormalize', 'IndoorLoadAnnotations3D',
'MMDataBaseSampler', 'IndoorPointsColorNormalize', 'LoadAnnotations3D',
'IndoorPointSample'
]
......@@ -55,14 +55,23 @@ class BatchSampler:
@OBJECTSAMPLERS.register_module()
class DataBaseSampler(object):
def __init__(self, info_path, root_path, rate, prepare, object_rot_range,
sample_groups, use_road_plane):
def __init__(self,
info_path,
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None):
super().__init__()
self.root_path = root_path
self.data_root = data_root
self.info_path = info_path
self.rate = rate
self.prepare = prepare
self.object_rot_range = object_rot_range
self.classes = classes
self.cat2label = {name: i for i, name in enumerate(classes)}
self.label2cat = {i: name for i, name in enumerate(classes)}
with open(info_path, 'rb') as f:
db_infos = pickle.load(f)
......@@ -125,13 +134,16 @@ class DataBaseSampler(object):
db_infos[name] = filtered_infos
return db_infos
def sample_all(self, gt_bboxes, gt_names, img=None):
def sample_all(self, gt_bboxes, gt_labels, img=None):
sampled_num_dict = {}
sample_num_per_class = []
for class_name, max_sample_num in zip(self.sample_classes,
self.sample_max_nums):
class_label = self.cat2label[class_name]
# sampled_num = int(max_sample_num -
# np.sum([n == class_name for n in gt_names]))
sampled_num = int(max_sample_num -
np.sum([n == class_name for n in gt_names]))
np.sum([n == class_label for n in gt_labels]))
sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
sampled_num_dict[class_name] = sampled_num
sample_num_per_class.append(sampled_num)
......@@ -164,13 +176,13 @@ class DataBaseSampler(object):
sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0)
# center = sampled_gt_bboxes[:, 0:3]
num_sampled = len(sampled)
# num_sampled = len(sampled)
s_points_list = []
count = 0
for info in sampled:
file_path = os.path.join(
self.root_path,
info['path']) if self.root_path else info['path']
self.data_root,
info['path']) if self.data_root else info['path']
s_points = np.fromfile(
file_path, dtype=np.float32).reshape([-1, 4])
......@@ -183,18 +195,16 @@ class DataBaseSampler(object):
count += 1
s_points_list.append(s_points)
# gt_names = np.array([s['name'] for s in sampled]),
# gt_labels = np.array([self.cat2label(s) for s in gt_names])
gt_labels = np.array([self.cat2label[s['name']] for s in sampled])
ret = {
'gt_names':
np.array([s['name'] for s in sampled]),
'difficulty':
np.array([s['difficulty'] for s in sampled]),
'gt_labels_3d':
gt_labels,
'gt_bboxes_3d':
sampled_gt_bboxes,
'points':
np.concatenate(s_points_list, axis=0),
'gt_masks':
np.ones((num_sampled, ), dtype=np.bool_),
'group_ids':
np.arange(gt_bboxes.shape[0],
gt_bboxes.shape[0] + len(sampled))
......@@ -260,11 +270,12 @@ class MMDataBaseSampler(DataBaseSampler):
def __init__(self,
info_path,
root_path,
data_root,
rate,
prepare,
object_rot_range,
sample_groups,
classes=None,
check_2D_collision=False,
collision_thr=0,
collision_in_classes=False,
......@@ -272,13 +283,12 @@ class MMDataBaseSampler(DataBaseSampler):
blending_type=None):
super(MMDataBaseSampler, self).__init__(
info_path=info_path,
root_path=root_path,
data_root=data_root,
rate=rate,
prepare=prepare,
object_rot_range=object_rot_range,
sample_groups=sample_groups,
use_road_plane=False,
)
classes=classes)
self.blending_type = blending_type
self.depth_consistent = depth_consistent
self.check_2D_collision = check_2D_collision
......@@ -337,7 +347,6 @@ class MMDataBaseSampler(DataBaseSampler):
sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0)
sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0)
num_sampled = len(sampled)
s_points_list = []
count = 0
......@@ -355,8 +364,8 @@ class MMDataBaseSampler(DataBaseSampler):
else:
info = sampled[idx]
pcd_file_path = os.path.join(
self.root_path,
info['path']) if self.root_path else info['path']
self.data_root,
info['path']) if self.data_root else info['path']
img_file_path = pcd_file_path + '.png'
mask_file_path = pcd_file_path + '.mask.png'
s_points = np.fromfile(
......@@ -389,7 +398,6 @@ class MMDataBaseSampler(DataBaseSampler):
gt_bboxes_3d=sampled_gt_bboxes_3d,
gt_bboxes_2d=sampled_gt_bboxes_2d,
points=np.concatenate(s_points_list, axis=0),
gt_masks=np.ones((num_sampled, ), dtype=np.bool_),
group_ids=np.arange(gt_bboxes_3d.shape[0],
gt_bboxes_3d.shape[0] + len(sampled)))
......
......@@ -123,7 +123,7 @@ class IndoorGlobalRotScale(object):
Augment sunrgbd and scannet data with global rotating and scaling.
Args:
use_height (bool): Whether to use height.
shift_height (bool): Whether to use height.
Default: True.
rot_range (list[float]): Range of rotation.
Default: None.
......@@ -131,8 +131,8 @@ class IndoorGlobalRotScale(object):
Default: None.
"""
def __init__(self, use_height=True, rot_range=None, scale_range=None):
self.use_height = use_height
def __init__(self, shift_height=True, rot_range=None, scale_range=None):
self.shift_height = shift_height
self.rot_range = np.pi * np.array(rot_range)
self.scale_range = scale_range
......@@ -215,7 +215,7 @@ class IndoorGlobalRotScale(object):
points[:, :3] *= scale_ratio
gt_bboxes_3d[:, :3] *= scale_ratio
gt_bboxes_3d[:, 3:6] *= scale_ratio
if self.use_height:
if self.shift_height:
points[:, -1] *= scale_ratio
results['points'] = points
......@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height)
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(rot_range={})'.format(self.rot_range)
repr_str += '(scale_range={})'.format(self.scale_range)
return repr_str
......@@ -2,6 +2,7 @@ import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import LoadAnnotations
@PIPELINES.register_module()
......@@ -32,35 +33,45 @@ class IndoorPointsColorNormalize(object):
@PIPELINES.register_module()
class IndoorLoadPointsFromFile(object):
"""Indoor load points from file.
class LoadPointsFromFile(object):
"""Load Points From File.
Load sunrgbd and scannet points from file.
Args:
use_height (bool): Whether to use height.
shift_height (bool): Whether to use shifted height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2].
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
"""
def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
self.use_height = use_height
def __init__(self, load_dim=6, use_dim=[0, 1, 2], shift_height=False):
self.shift_height = shift_height
if isinstance(use_dim, int):
use_dim = list(range(use_dim))
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
def __call__(self, results):
pts_filename = results['pts_filename']
def _load_points(self, pts_filename):
mmcv.check_file_exist(pts_filename)
if pts_filename.endswith('.npy'):
points = np.load(pts_filename)
else:
points = np.fromfile(pts_filename, dtype=np.float32)
return points
def __call__(self, results):
pts_filename = results['pts_filename']
points = self._load_points(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.use_height:
if self.shift_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
......@@ -69,7 +80,7 @@ class IndoorLoadPointsFromFile(object):
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height)
repr_str += '(shift_height={})'.format(self.shift_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
......@@ -77,28 +88,99 @@ class IndoorLoadPointsFromFile(object):
@PIPELINES.register_module()
class IndoorLoadAnnotations3D(object):
"""Indoor load annotations3D.
class LoadAnnotations3D(LoadAnnotations):
"""Load Annotations3D.
Load instance mask and semantic mask of points.
Load instance mask and semantic mask of points and
encapsulate the items into related fields.
Args:
with_bbox_3d (bool, optional): Whether to load 3D boxes.
Defaults to True.
with_label_3d (bool, optional): Whether to load 3D labels.
Defaults to True.
with_mask_3d (bool, optional): Whether to load 3D instance masks.
for points. Defaults to False.
with_seg_3d (bool, optional): Whether to load 3D semantic masks.
for points. Defaults to False.
with_bbox (bool, optional): Whether to load 2D boxes.
Defaults to False.
with_label (bool, optional): Whether to load 2D labels.
Defaults to False.
with_mask (bool, optional): Whether to load 2D instance masks.
Defaults to False.
with_seg (bool, optional): Whether to load 2D semantic masks.
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
"""
def __init__(self):
pass
def __init__(self,
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=False,
with_seg_3d=False,
with_bbox=False,
with_label=False,
with_mask=False,
with_seg=False,
poly2mask=True):
super().__init__(with_bbox, with_label, with_mask, with_seg, poly2mask)
self.with_bbox_3d = with_bbox_3d
self.with_label_3d = with_label_3d
self.with_mask_3d = with_mask_3d
self.with_seg_3d = with_seg_3d
def _load_bboxes_3d(self, results):
results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
results['bbox3d_fields'].append(results['gt_bboxes_3d'])
return results
def __call__(self, results):
pts_instance_mask_path = results['pts_instance_mask_path']
pts_semantic_mask_path = results['pts_semantic_mask_path']
def _load_labels_3d(self, results):
results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
return results
def _load_masks_3d(self, results):
pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
mmcv.check_file_exist(pts_instance_mask_path)
mmcv.check_file_exist(pts_semantic_mask_path)
pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
results['pts_instance_mask'] = pts_instance_mask
results['pts_mask_fields'].append(results['pts_instance_mask'])
return results
def _load_semantic_seg_3d(self, results):
pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
mmcv.check_file_exist(pts_semantic_mask_path)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
results['pts_semantic_mask'] = pts_semantic_mask
results['pts_seg_fields'].append(results['pts_semantic_mask'])
return results
def __call__(self, results):
results = super().__call__(results)
if self.with_bbox_3d:
results = self._load_bboxes_3d(results)
if results is None:
return None
if self.with_label_3d:
results = self._load_labels_3d(results)
if self.with_mask_3d:
results = self._load_masks_3d(results)
if self.with_seg_3d:
results = self._load_semantic_seg_3d(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
indent_str = ' '
repr_str = self.__class__.__name__ + '(\n'
repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
repr_str += f'{indent_str}with_label={self.with_label},\n'
repr_str += f'{indent_str}with_mask={self.with_mask},\n'
repr_str += f'{indent_str}with_seg={self.with_seg},\n'
repr_str += f'{indent_str}poly2mask={self.poly2mask})'
return repr_str
import os.path as osp
import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class LoadPointsFromFile(object):
def __init__(self, points_dim=4, with_reflectivity=True):
self.points_dim = points_dim
self.with_reflectivity = with_reflectivity
def __call__(self, results):
if results['pts_prefix'] is not None:
filename = osp.join(results['pts_prefix'],
results['img_info']['filename'])
else:
filename = results['img_info']['filename']
points = np.fromfile(
filename, dtype=np.float32).reshape(-1, self.points_dim)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(points_dim={})'.format(self.points_dim)
repr_str += '(points_dim={})'.format(self.with_reflectivity)
return repr_str
@PIPELINES.register_module()
class LoadMultiViewImageFromFiles(object):
""" Load multi channel images from a list of separate channel files.
Expects results['filename'] to be a list of filenames
Expects results['img_filename'] to be a list of filenames
"""
def __init__(self, to_float32=False, color_type='unchanged'):
......@@ -42,13 +16,7 @@ class LoadMultiViewImageFromFiles(object):
self.color_type = color_type
def __call__(self, results):
if results['img_prefix'] is not None:
filename = [
osp.join(results['img_prefix'], fname)
for fname in results['img_info']['filename']
]
else:
filename = results['img_info']['filename']
filename = results['img_filename']
img = np.stack(
[mmcv.imread(name, self.color_type) for name in filename], axis=-1)
if self.to_float32:
......@@ -70,3 +38,38 @@ class LoadMultiViewImageFromFiles(object):
def __repr__(self):
return "{} (to_float32={}, color_type='{}')".format(
self.__class__.__name__, self.to_float32, self.color_type)
@PIPELINES.register_module()
class LoadPointsFromMultiSweeps(object):
def __init__(self, sweeps_num=10):
self.sweeps_num = sweeps_num
def __call__(self, results):
points = results['points']
points[:, 3] /= 255
points[:, 4] = 0
sweep_points_list = [points]
ts = results['timestamp']
for idx, sweep in enumerate(results['sweeps']):
if idx >= self.sweeps_num:
break
points_sweep = np.fromfile(
sweep['data_path'], dtype=np.float32,
count=-1).reshape([-1, 5])
sweep_ts = sweep['timestamp'] / 1e6
points_sweep[:, 3] /= 255
points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
'sensor2lidar_rotation'].T
points_sweep[:, :3] += sweep['sensor2lidar_translation']
points_sweep[:, 4] = ts - sweep_ts
sweep_points_list.append(points_sweep)
points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
results['points'] = points
return results
def __repr__(self):
return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
......@@ -109,8 +109,8 @@ class ObjectSample(object):
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_names_3d = input_dict['gt_names_3d']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
gt_labels_3d = input_dict['gt_labels_3d']
# change to float for blending operation
points = input_dict['points']
# rect = input_dict['rect']
......@@ -119,27 +119,23 @@ class ObjectSample(object):
if self.sample_2d:
img = input_dict['img'] # .astype(np.float32)
gt_bboxes_2d = input_dict['gt_bboxes']
gt_bboxes_mask = input_dict['gt_bboxes_mask']
gt_names = input_dict['gt_names']
# Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_names_3d, gt_bboxes_2d=gt_bboxes_2d, img=img)
gt_bboxes_3d, gt_labels_3d, gt_bboxes_2d=gt_bboxes_2d, img=img)
else:
sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_names_3d, img=None)
gt_bboxes_3d, gt_labels_3d, img=None)
if sampled_dict is not None:
sampled_gt_names = sampled_dict['gt_names']
sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
sampled_points = sampled_dict['points']
sampled_gt_masks = sampled_dict['gt_masks']
sampled_gt_labels = sampled_dict['gt_labels_3d']
gt_names_3d = np.concatenate([gt_names_3d, sampled_gt_names],
gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
axis=0)
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d
]).astype(np.float32)
gt_bboxes_3d_mask = np.concatenate(
[gt_bboxes_3d_mask, sampled_gt_masks], axis=0)
points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
# check the points dimension
dim_inds = points.shape[-1]
......@@ -150,18 +146,14 @@ class ObjectSample(object):
sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
gt_bboxes_2d = np.concatenate(
[gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
gt_bboxes_mask = np.concatenate(
[gt_bboxes_mask, sampled_gt_masks], axis=0)
gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0)
input_dict['gt_names'] = gt_names
input_dict['gt_bboxes'] = gt_bboxes_2d
input_dict['gt_bboxes_mask'] = gt_bboxes_mask
input_dict['img'] = sampled_dict['img'] # .astype(np.uint8)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_names_3d'] = gt_names_3d
input_dict['gt_labels_3d'] = gt_labels_3d
input_dict['points'] = points
input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
return input_dict
def __repr__(self):
......@@ -184,12 +176,11 @@ class ObjectNoise(object):
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
points = input_dict['points']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
# TODO: check this inplace function
noise_per_object_v3_(
gt_bboxes_3d,
points,
gt_bboxes_3d_mask,
rotation_perturb=self.rot_uniform_noise,
center_noise_std=self.loc_noise_std,
global_random_rot_range=self.global_rot_range,
......@@ -322,20 +313,17 @@ class ObjectRangeFilter(object):
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_names_3d = input_dict['gt_names_3d']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
gt_labels_3d = input_dict['gt_labels_3d']
mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range)
gt_bboxes_3d = gt_bboxes_3d[mask]
gt_names_3d = gt_names_3d[mask]
# the mask should also be updated
gt_bboxes_3d_mask = gt_bboxes_3d_mask[mask]
gt_labels_3d = gt_labels_3d[mask]
# limit rad to [-pi, pi]
gt_bboxes_3d[:, 6] = self.limit_period(
gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_names_3d'] = gt_names_3d
input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
input_dict['gt_labels_3d'] = gt_labels_3d
return input_dict
def __repr__(self):
......@@ -364,3 +352,30 @@ class PointsRangeFilter(object):
repr_str = self.__class__.__name__
repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
return repr_str
@PIPELINES.register_module()
class ObjectNameFilter(object):
"""Filter GT objects by their names
Args:
classes (list[str]): list of class names to be kept for training
"""
def __init__(self, classes):
self.classes = classes
self.labels = list(range(len(self.classes)))
def __call__(self, input_dict):
gt_labels_3d = input_dict['gt_labels_3d']
gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],
dtype=np.bool_)
input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]
input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]
return input_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += f'(classes={self.classes})'
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment