Unverified Commit 583c4acc authored by Qing Lian's avatar Qing Lian Committed by GitHub
Browse files

Update waymo dataset, evaluation metrics and related configs for 2.0 (#1663)



* update waymo dataset

* [Fix] Fix all unittests and refactor tests directory and add circle ci in `test-1.x` (#1654)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>

* add waymo dataset and evaluation metrics

* convert second configs for v2.0

* [Refactor] Unify ceph config (#1677)

* refactor ceph in config

* support metric load ann file from ceph

* add doc string and remove useless code

* [Fix]Fix create data (#1659)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut

* fix creat data
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>

* [Fix] Fix seg mapping (#1681)

* [Doc]: fix markdown version (#1653)

* [CI] Add circle ci (#1647)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix seg mapping for scannet

* fix requiremetn

* fix all seg dataet

* resolve commnets
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>

* [Fix] Fix SSN configs (#1686)

* modify doc string and evaluation file location

* add doc string

* remove path mapping in flieclient args
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>
Co-authored-by: default avatarShilong Zhang <61961338+jshilong@users.noreply.github.com>
parent a8f3ec5f
...@@ -82,7 +82,7 @@ file_client_args = dict( ...@@ -82,7 +82,7 @@ file_client_args = dict(
})) }))
train_pipeline = [ train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict( dict(
type='LoadAnnotations3D', type='LoadAnnotations3D',
with_bbox=True, with_bbox=True,
...@@ -101,7 +101,7 @@ train_pipeline = [ ...@@ -101,7 +101,7 @@ train_pipeline = [
]), ]),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict(type='mmdet.Resize', scale_factor=1.0), dict(type='mmdet.Resize', scale_factor=1.0),
dict(type='Pack3DDetInputs', keys=['img']) dict(type='Pack3DDetInputs', keys=['img'])
] ]
......
...@@ -18,7 +18,9 @@ db_sampler = dict( ...@@ -18,7 +18,9 @@ db_sampler = dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15), sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15),
classes=class_names) classes=class_names,
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......
...@@ -28,11 +28,7 @@ train_pipeline = [ ...@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False, with_label_3d=False,
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(type='PointSegClassMapping'),
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=num_points, num_points=num_points,
...@@ -77,35 +73,6 @@ test_pipeline = [ ...@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points']) dict(type='Collect3D', keys=['points'])
]) ])
] ]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict( data = dict(
samples_per_gpu=16, samples_per_gpu=16,
workers_per_gpu=4, workers_per_gpu=4,
...@@ -135,7 +102,7 @@ data = dict( ...@@ -135,7 +102,7 @@ data = dict(
test_mode=True, test_mode=True,
ignore_index=len(class_names))) ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5) evaluation = dict(interval=5)
# model settings # model settings
model = dict( model = dict(
......
...@@ -28,11 +28,7 @@ train_pipeline = [ ...@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False, with_label_3d=False,
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(type='PointSegClassMapping'),
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=num_points, num_points=num_points,
...@@ -77,35 +73,6 @@ test_pipeline = [ ...@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points']) dict(type='Collect3D', keys=['points'])
]) ])
] ]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict( data = dict(
samples_per_gpu=16, samples_per_gpu=16,
workers_per_gpu=4, workers_per_gpu=4,
...@@ -135,7 +102,7 @@ data = dict( ...@@ -135,7 +102,7 @@ data = dict(
test_mode=True, test_mode=True,
ignore_index=len(class_names))) ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5) evaluation = dict(interval=5)
# model settings # model settings
model = dict( model = dict(
......
...@@ -9,6 +9,7 @@ point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1] ...@@ -9,6 +9,7 @@ point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
data_root = 'data/kitti/' data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car'] class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(CLASSES=class_names) metainfo = dict(CLASSES=class_names)
# PointPillars adopted a different sampling strategies among classes # PointPillars adopted a different sampling strategies among classes
db_sampler = dict( db_sampler = dict(
data_root=data_root, data_root=data_root,
...@@ -18,7 +19,9 @@ db_sampler = dict( ...@@ -18,7 +19,9 @@ db_sampler = dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
classes=class_names, classes=class_names,
sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15)) sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15),
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
# PointPillars uses different augmentation hyper parameters # PointPillars uses different augmentation hyper parameters
train_pipeline = [ train_pipeline = [
......
...@@ -8,8 +8,18 @@ _base_ = [ ...@@ -8,8 +8,18 @@ _base_ = [
dataset_type = 'WaymoDataset' dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/' data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist'] class_names = ['Car', 'Pedestrian', 'Cyclist']
metainfo = dict(CLASSES=class_names)
point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4] point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4]
input_modality = dict(use_lidar=True, use_camera=False) input_modality = dict(use_lidar=True, use_camera=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/',
'data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/'
}))
db_sampler = dict( db_sampler = dict(
data_root=data_root, data_root=data_root,
...@@ -27,9 +37,14 @@ db_sampler = dict( ...@@ -27,9 +37,14 @@ db_sampler = dict(
use_dim=[0, 1, 2, 3, 4])) use_dim=[0, 1, 2, 3, 4]))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5), dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), # dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='RandomFlip3D', type='RandomFlip3D',
sync_2d=False, sync_2d=False,
...@@ -42,12 +57,18 @@ train_pipeline = [ ...@@ -42,12 +57,18 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5), dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict( dict(
type='MultiScaleFlipAug3D', type='MultiScaleFlipAug3D',
img_scale=(1333, 800), img_scale=(1333, 800),
...@@ -62,51 +83,60 @@ test_pipeline = [ ...@@ -62,51 +83,60 @@ test_pipeline = [
dict(type='RandomFlip3D'), dict(type='RandomFlip3D'),
dict( dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range), type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(type='Pack3DDetInputs', keys=['points']),
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]) ])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=4, batch_size=4,
workers_per_gpu=4, num_workers=4,
train=dict( persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=2, times=2,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl', ann_file='waymo_infos_train.pkl',
split='training', data_prefix=dict(pts='training/velodyne'),
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names,
test_mode=False, test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR', box_type_3d='LiDAR',
# load one frame every five frames # load one frame every five frames
load_interval=5)), load_interval=5)))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl', data_prefix=dict(pts='training/velodyne'),
split='training', ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names,
test_mode=True, test_mode=True,
box_type_3d='LiDAR'), metainfo=metainfo,
test=dict( box_type_3d='LiDAR'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl', data_prefix=dict(pts='training/velodyne'),
split='training', ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names,
test_mode=True, test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR')) box_type_3d='LiDAR'))
...@@ -17,7 +17,7 @@ file_client_args = dict( ...@@ -17,7 +17,7 @@ file_client_args = dict(
})) }))
train_pipeline = [ train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict( dict(
type='LoadAnnotations3D', type='LoadAnnotations3D',
with_bbox=True, with_bbox=True,
...@@ -37,7 +37,7 @@ train_pipeline = [ ...@@ -37,7 +37,7 @@ train_pipeline = [
]), ]),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4), dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4),
dict(type='Pack3DDetInputs', keys=['img']) dict(type='Pack3DDetInputs', keys=['img'])
] ]
......
...@@ -18,4 +18,4 @@ model = dict( ...@@ -18,4 +18,4 @@ model = dict(
style='pytorch'), style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384])) pts_neck=dict(in_channels=[64, 160, 384]))
# dataset settings # dataset settings
data = dict(samples_per_gpu=1, workers_per_gpu=2) train_dataloader = dict(batch_size=1, num_workers=2)
...@@ -2,6 +2,7 @@ _base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py' ...@@ -2,6 +2,7 @@ _base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py'
# model settings # model settings
model = dict( model = dict(
type='MVXFasterRCNN', type='MVXFasterRCNN',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_backbone=dict( pts_backbone=dict(
_delete_=True, _delete_=True,
type='NoStemRegNet', type='NoStemRegNet',
......
...@@ -29,8 +29,9 @@ train_pipeline = [ ...@@ -29,8 +29,9 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),
...@@ -48,20 +49,14 @@ test_pipeline = [ ...@@ -48,20 +49,14 @@ test_pipeline = [
translation_std=[0, 0, 0]), translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'), dict(type='RandomFlip3D'),
dict( dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range), type='PointsRangeFilter', point_cloud_range=point_cloud_range)
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=2, batch_size=2, num_workers=4, dataset=dict(pipeline=train_pipeline))
workers_per_gpu=4, test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
train=dict(pipeline=train_pipeline, classes=class_names), val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
# model settings # model settings
model = dict( model = dict(
...@@ -141,35 +136,37 @@ model = dict( ...@@ -141,35 +136,37 @@ model = dict(
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
_delete_=True, _delete_=True,
pts=dict( pts=dict(
assigner=[ assigner=[
dict( # bicycle dict( # bicycle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # motorcycle dict( # motorcycle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # pedestrian dict( # pedestrian
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
...@@ -183,35 +180,35 @@ model = dict( ...@@ -183,35 +180,35 @@ model = dict(
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # car dict( # car
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # emergency vehicle dict( # emergency vehicle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # bus dict( # bus
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # other vehicle dict( # other vehicle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # truck dict( # truck
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
......
...@@ -174,9 +174,7 @@ train_pipeline = [ ...@@ -174,9 +174,7 @@ train_pipeline = [
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(
type='PointSegClassMapping', type='PointSegClassMapping'),
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=num_points, num_points=num_points,
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import mmcv import mmcv
import mmdet import mmdet
import mmseg
from .version import __version__, short_version from .version import __version__, short_version
...@@ -18,8 +17,8 @@ def digit_version(version_str): ...@@ -18,8 +17,8 @@ def digit_version(version_str):
return digit_version return digit_version
mmcv_minimum_version = '1.4.8' mmcv_minimum_version = '2.0.0rc0'
mmcv_maximum_version = '1.6.0' mmcv_maximum_version = '2.0.0rc0'
mmcv_version = digit_version(mmcv.__version__) mmcv_version = digit_version(mmcv.__version__)
...@@ -37,13 +36,4 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version) ...@@ -37,13 +36,4 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version)
f'Please install mmdet>={mmdet_minimum_version}, ' \ f'Please install mmdet>={mmdet_minimum_version}, ' \
f'<={mmdet_maximum_version}.' f'<={mmdet_maximum_version}.'
mmseg_minimum_version = '0.20.0'
mmseg_maximum_version = '1.0.0'
mmseg_version = digit_version(mmseg.__version__)
assert (mmseg_version >= digit_version(mmseg_minimum_version)
and mmseg_version <= digit_version(mmseg_maximum_version)), \
f'MMSEG=={mmseg.__version__} is used but incompatible. ' \
f'Please install mmseg>={mmseg_minimum_version}, ' \
f'<={mmseg_maximum_version}.'
__all__ = ['__version__', 'short_version'] __all__ = ['__version__', 'short_version']
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict from collections import OrderedDict
from typing import List, Tuple, Union from typing import List, Tuple, Union
...@@ -7,7 +8,8 @@ from nuscenes.utils.geometry_utils import view_points ...@@ -7,7 +8,8 @@ from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box from shapely.geometry import MultiPoint, box
from mmdet3d.structures import points_cam2img from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
...@@ -165,6 +167,149 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]): ...@@ -165,6 +167,149 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
"""Get the 2D annotation records for a given info.
This function is used to get 2D annotations when loading annotations from
a dataset class. The original version in the data converter will be
deprecated in the future.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
repro_recs = []
# if no annotations in info (test dataset), then return
if annos is None:
return repro_recs
# Get all the annotation with the specified visibilties.
# filter the annotation bboxes by occluded attributes
ann_dicts = annos
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
loc_3d = np.copy(loc)
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(
corner_coords,
imsize=(info['image']['image_shape'][1],
info['image']['image_shape'][0]))
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x,
max_y, sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velocity'] = -1 # no velocity in KITTI
center_3d = np.array(loc).reshape([1, 3])
center_2d_with_depth = box_np_ops.points_cam2img(
center_3d, camera_intrinsic, with_depth=True)
center_2d_with_depth = center_2d_with_depth.squeeze().tolist()
repro_rec['center_2d'] = center_2d_with_depth[:2]
repro_rec['depth'] = center_2d_with_depth[2]
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def convert_annos(info: dict, cam_idx: int) -> dict:
"""Convert front-cam anns to i-th camera (KITTI-style info)."""
rect = info['calib']['R0_rect'].astype(np.float32)
lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32)
lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32)
annos = info['annos']
converted_annos = copy.deepcopy(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True)
# convert gt_bboxes_3d to cam coordinates
gt_bboxes_3d = gt_bboxes_3d.convert_to(
Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).tensor.numpy()
converted_annos['location'] = gt_bboxes_3d[:, :3]
converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6]
converted_annos['rotation_y'] = gt_bboxes_3d[:, 6]
return converted_annos
def post_process_coords( def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900) corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]: ) -> Union[Tuple[float, float, float, float], None]:
...@@ -254,3 +399,67 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float, ...@@ -254,3 +399,67 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
coco_rec['bbox_3d_isvalid'] = True coco_rec['bbox_3d_isvalid'] = True
return coco_rec return coco_rec
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
...@@ -22,6 +22,7 @@ class KittiDataset(Det3DDataset): ...@@ -22,6 +22,7 @@ class KittiDataset(Det3DDataset):
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`. as input. Defaults to `dict(use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset. box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
...@@ -49,7 +50,7 @@ class KittiDataset(Det3DDataset): ...@@ -49,7 +50,7 @@ class KittiDataset(Det3DDataset):
ann_file: str, ann_file: str,
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True), modality: Optional[dict] = dict(use_lidar=True),
default_cam_key='CAM2', default_cam_key: str = 'CAM2',
box_type_3d: str = 'LiDAR', box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
......
...@@ -193,9 +193,9 @@ class _S3DISSegDataset(Seg3DDataset): ...@@ -193,9 +193,9 @@ class _S3DISSegDataset(Seg3DDataset):
[255, 0, 255], [100, 100, 255], [200, 200, 100], [255, 0, 255], [100, 100, 255], [200, 200, 100],
[170, 120, 200], [255, 0, 0], [200, 100, 100], [170, 120, 200], [255, 0, 0], [200, 100, 100],
[10, 200, 100], [200, 200, 200], [50, 50, 50]], [10, 200, 100], [200, 200, 200], [50, 50, 50]],
'valid_class_ids': 'seg_valid_class_ids':
tuple(range(13)), tuple(range(13)),
'all_class_ids': 'seg_all_class_ids':
tuple(range(14)) # possibly with 'stair' class tuple(range(14)) # possibly with 'stair' class
} }
......
...@@ -50,7 +50,12 @@ class ScanNetDataset(Det3DDataset): ...@@ -50,7 +50,12 @@ class ScanNetDataset(Det3DDataset):
'CLASSES': 'CLASSES':
('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin'),
# the valid ids of segmentation annotations
'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'seg_all_class_ids':
tuple(range(1, 41))
} }
def __init__(self, def __init__(self,
...@@ -67,6 +72,17 @@ class ScanNetDataset(Det3DDataset): ...@@ -67,6 +72,17 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
**kwargs): **kwargs):
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
ann_file=ann_file, ann_file=ann_file,
...@@ -78,6 +94,8 @@ class ScanNetDataset(Det3DDataset): ...@@ -78,6 +94,8 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt=filter_empty_gt, filter_empty_gt=filter_empty_gt,
test_mode=test_mode, test_mode=test_mode,
**kwargs) **kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
assert 'use_camera' in self.modality and \ assert 'use_camera' in self.modality and \
'use_lidar' in self.modality 'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar'] assert self.modality['use_camera'] or self.modality['use_lidar']
...@@ -122,6 +140,9 @@ class ScanNetDataset(Det3DDataset): ...@@ -122,6 +140,9 @@ class ScanNetDataset(Det3DDataset):
info['pts_semantic_mask_path']) info['pts_semantic_mask_path'])
info = super().parse_data_info(info) info = super().parse_data_info(info)
# only be used in `PointSegClassMapping` in pipeline
# to map original semantic class to valid category ids.
info['seg_label_mapping'] = self.seg_label_mapping
return info return info
def parse_ann_info(self, info: dict) -> dict: def parse_ann_info(self, info: dict) -> dict:
...@@ -207,9 +228,9 @@ class ScanNetSegDataset(Seg3DDataset): ...@@ -207,9 +228,9 @@ class ScanNetSegDataset(Seg3DDataset):
[227, 119, 194], [227, 119, 194],
[82, 84, 163], [82, 84, 163],
], ],
'valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 'seg_valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
28, 33, 34, 36, 39), 24, 28, 33, 34, 36, 39),
'all_class_ids': 'seg_all_class_ids':
tuple(range(41)), tuple(range(41)),
} }
...@@ -280,9 +301,9 @@ class ScanNetInstanceSegDataset(Seg3DDataset): ...@@ -280,9 +301,9 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
[227, 119, 194], [227, 119, 194],
[82, 84, 163], [82, 84, 163],
], ],
'valid_class_ids': 'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'all_class_ids': 'seg_all_class_ids':
tuple(range(41)) tuple(range(41))
} }
......
...@@ -50,8 +50,8 @@ class Seg3DDataset(BaseDataset): ...@@ -50,8 +50,8 @@ class Seg3DDataset(BaseDataset):
METAINFO = { METAINFO = {
'CLASSES': None, # names of all classes data used for the task 'CLASSES': None, # names of all classes data used for the task
'PALETTE': None, # official color for visualization 'PALETTE': None, # official color for visualization
'valid_class_ids': None, # class_ids used for training 'seg_valid_class_ids': None, # class_ids used for training
'all_class_ids': None, # all possible class_ids in loaded seg mask 'seg_all_class_ids': None, # all possible class_ids in loaded seg mask
} }
def __init__(self, def __init__(self,
...@@ -84,12 +84,12 @@ class Seg3DDataset(BaseDataset): ...@@ -84,12 +84,12 @@ class Seg3DDataset(BaseDataset):
# Get label mapping for custom classes # Get label mapping for custom classes
new_classes = metainfo.get('CLASSES', None) new_classes = metainfo.get('CLASSES', None)
self.label_mapping, self.label2cat, valid_class_ids = \ self.label_mapping, self.label2cat, seg_valid_class_ids = \
self.get_label_mapping(new_classes) self.get_label_mapping(new_classes)
metainfo['label_mapping'] = self.label_mapping metainfo['label_mapping'] = self.label_mapping
metainfo['label2cat'] = self.label2cat metainfo['label2cat'] = self.label2cat
metainfo['valid_class_ids'] = valid_class_ids metainfo['seg_valid_class_ids'] = seg_valid_class_ids
# generate palette if it is not defined based on # generate palette if it is not defined based on
# label mapping, otherwise directly use palette # label mapping, otherwise directly use palette
...@@ -99,6 +99,16 @@ class Seg3DDataset(BaseDataset): ...@@ -99,6 +99,16 @@ class Seg3DDataset(BaseDataset):
metainfo['PALETTE'] = updated_palette metainfo['PALETTE'] = updated_palette
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__( super().__init__(
ann_file=ann_file, ann_file=ann_file,
metainfo=metainfo, metainfo=metainfo,
...@@ -108,6 +118,7 @@ class Seg3DDataset(BaseDataset): ...@@ -108,6 +118,7 @@ class Seg3DDataset(BaseDataset):
test_mode=test_mode, test_mode=test_mode,
**kwargs) **kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
self.scene_idxs = self.get_scene_idxs(scene_idxs) self.scene_idxs = self.get_scene_idxs(scene_idxs)
# set group flag for the sampler # set group flag for the sampler
...@@ -137,7 +148,6 @@ class Seg3DDataset(BaseDataset): ...@@ -137,7 +148,6 @@ class Seg3DDataset(BaseDataset):
old_classes = self.METAINFO.get('CLASSSES', None) old_classes = self.METAINFO.get('CLASSSES', None)
if (new_classes is not None and old_classes is not None if (new_classes is not None and old_classes is not None
and list(new_classes) != list(old_classes)): and list(new_classes) != list(old_classes)):
label_mapping = {}
if not set(new_classes).issubset(old_classes): if not set(new_classes).issubset(old_classes):
raise ValueError( raise ValueError(
f'new classes {new_classes} is not a ' f'new classes {new_classes} is not a '
...@@ -145,12 +155,12 @@ class Seg3DDataset(BaseDataset): ...@@ -145,12 +155,12 @@ class Seg3DDataset(BaseDataset):
# obtain true id from valid_class_ids # obtain true id from valid_class_ids
valid_class_ids = [ valid_class_ids = [
self.METAINFO['valid_class_ids'][old_classes.index(cls_name)] self.METAINFO['seg_valid_class_ids'][old_classes.index(
for cls_name in new_classes cls_name)] for cls_name in new_classes
] ]
label_mapping = { label_mapping = {
cls_id: self.ignore_index cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids'] for cls_id in self.METAINFO['seg_all_class_ids']
} }
label_mapping.update( label_mapping.update(
{cls_id: i {cls_id: i
...@@ -159,18 +169,19 @@ class Seg3DDataset(BaseDataset): ...@@ -159,18 +169,19 @@ class Seg3DDataset(BaseDataset):
else: else:
label_mapping = { label_mapping = {
cls_id: self.ignore_index cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids'] for cls_id in self.METAINFO['seg_all_class_ids']
} }
label_mapping.update({ label_mapping.update({
cls_id: i cls_id: i
for i, cls_id in enumerate(self.METAINFO['valid_class_ids']) for i, cls_id in enumerate(
self.METAINFO['seg_valid_class_ids'])
}) })
# map label to category name # map label to category name
label2cat = { label2cat = {
i: cat_name i: cat_name
for i, cat_name in enumerate(self.METAINFO['CLASSES']) for i, cat_name in enumerate(self.METAINFO['CLASSES'])
} }
valid_class_ids = self.METAINFO['valid_class_ids'] valid_class_ids = self.METAINFO['seg_valid_class_ids']
return label_mapping, label2cat, valid_class_ids return label_mapping, label2cat, valid_class_ids
......
...@@ -41,9 +41,9 @@ class SemanticKITTIDataset(Seg3DDataset): ...@@ -41,9 +41,9 @@ class SemanticKITTIDataset(Seg3DDataset):
'bus', 'person', 'bicyclist', 'motorcyclist', 'road', 'bus', 'person', 'bicyclist', 'motorcyclist', 'road',
'parking', 'sidewalk', 'other-ground', 'building', 'fence', 'parking', 'sidewalk', 'other-ground', 'building', 'fence',
'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'), 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'),
'valid_class_ids': 'seg_valid_class_ids':
tuple(range(20)), tuple(range(20)),
'all_class_ids': 'seg_all_class_ids':
tuple(range(20)) tuple(range(20))
} }
......
...@@ -277,9 +277,8 @@ class PointSegClassMapping(BaseTransform): ...@@ -277,9 +277,8 @@ class PointSegClassMapping(BaseTransform):
Required Keys: Required Keys:
- lidar_points (dict) - seg_label_mapping (np.ndarray)
- pts_semantic_mask (np.ndarray)
- lidar_path (str)
Added Keys: Added Keys:
...@@ -287,11 +286,6 @@ class PointSegClassMapping(BaseTransform): ...@@ -287,11 +286,6 @@ class PointSegClassMapping(BaseTransform):
Map valid classes as 0~len(valid_cat_ids)-1 and Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids). others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int]): A tuple of valid category.
max_cat_id (int, optional): The max possible cat_id in input
segmentation mask. Defaults to 40.
""" """
def transform(self, results: dict) -> None: def transform(self, results: dict) -> None:
...@@ -309,10 +303,9 @@ class PointSegClassMapping(BaseTransform): ...@@ -309,10 +303,9 @@ class PointSegClassMapping(BaseTransform):
assert 'pts_semantic_mask' in results assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask'] pts_semantic_mask = results['pts_semantic_mask']
assert 'label_mapping' in results assert 'seg_label_mapping' in results
label_mapping = results['label_mapping'] label_mapping = results['seg_label_mapping']
converted_pts_sem_mask = \ converted_pts_sem_mask = label_mapping[pts_semantic_mask]
np.array([label_mapping[mask] for mask in pts_semantic_mask])
results['pts_semantic_mask'] = converted_pts_sem_mask results['pts_semantic_mask'] = converted_pts_sem_mask
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import os import os.path as osp
import tempfile from typing import Callable, List, Optional, Union
from os import path as osp
import mmcv
import numpy as np import numpy as np
import torch
from mmcv.utils import print_log
from mmdet3d.registry import DATASETS from mmdet3d.registry import DATASETS
from mmdet3d.structures import Box3DMode, points_cam2img from mmdet3d.structures import CameraInstance3DBoxes
from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset from .kitti_dataset import KittiDataset
...@@ -26,524 +23,207 @@ class WaymoDataset(KittiDataset): ...@@ -26,524 +23,207 @@ class WaymoDataset(KittiDataset):
Args: Args:
data_root (str): Path of dataset root. data_root (str): Path of dataset root.
ann_file (str): Path of annotation file. ann_file (str): Path of annotation file.
split (str): Split of input data. data_prefix (list[dict]): data prefix for point cloud and
pts_prefix (str, optional): Prefix of points files. camera data dict, default to dict(
Defaults to 'velodyne'. pts='velodyne',
CAM_FRONT='image_0',
CAM_FRONT_RIGHT='image_1',
CAM_FRONT_LEFT='image_2',
CAM_SIDE_RIGHT='image_3',
CAM_SIDE_LEFT='image_4')
pipeline (list[dict], optional): Pipeline used for data processing. pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None. Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None. as input. Defaults to `dict(use_lidar=True)`.
default_cam_key (str, optional): Default camera key for lidar2img
association.
box_type_3d (str, optional): Type of 3D box of this dataset. box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': Box in LiDAR coordinates.
- 'LiDAR': box in LiDAR coordinates - 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Depth': box in depth coordinates, usually for indoor dataset - 'Camera': Box in camera coordinates.
- 'Camera': box in camera coordinates
filter_empty_gt (bool, optional): Whether to filter empty GT. filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
pcd_limit_range (list(float), optional): The range of point cloud used pcd_limit_range (list, optional): The range of point cloud used to
to filter invalid predicted boxes. filter invalid predicted boxes.
Default: [-85, -85, -5, 85, 85, 5]. Default: [-85, -85, -5, 85, 85, 5].
cam_sync_instances (bool, optional): If use the camera sync label
supported from waymo version 1.3.1.
load_interval (int, optional): load frame interval.
task (str, optional): task for 3D detection (lidar, mono3d).
lidar: take all the ground trurh in the frame.
mono3d: take the groundtruth that can be seen in the cam.
max_sweeps (int, optional): max sweep for each frame.
""" """
CLASSES = ('Car', 'Cyclist', 'Pedestrian') CLASSES = ('Car', 'Pedestrian', 'Cyclist')
def __init__(self, def __init__(self,
data_root, data_root: str,
ann_file, ann_file: str,
split, data_prefix: dict = dict(
pts_prefix='velodyne', pts='velodyne',
pipeline=None, CAM_FRONT='image_0',
classes=None, CAM_FRONT_RIGHT='image_1',
modality=None, CAM_FRONT_LEFT='image_2',
box_type_3d='LiDAR', CAM_SIDE_RIGHT='image_3',
filter_empty_gt=True, CAM_SIDE_LEFT='image_4'),
test_mode=False, pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
default_cam_key: str = 'CAM_FRONT',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
cam_sync_instances=False,
load_interval=1, load_interval=1,
pcd_limit_range=[-85, -85, -5, 85, 85, 5], task='lidar',
max_sweeps=0,
**kwargs): **kwargs):
self.load_interval = load_interval
# set loading mode for different task settings
self.cam_sync_instances = cam_sync_instances
# construct self.cat_ids for vision-only anns parsing
self.cat_ids = range(len(self.CLASSES))
self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
self.max_sweeps = max_sweeps
self.task = task
# we do not provide file_client_args to custom_3d init
# because we want disk loading for info
# while ceph loading for KITTI2Waymo
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
ann_file=ann_file, ann_file=ann_file,
split=split,
pts_prefix=pts_prefix,
pipeline=pipeline, pipeline=pipeline,
classes=classes,
modality=modality, modality=modality,
box_type_3d=box_type_3d, box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt, filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
pcd_limit_range=pcd_limit_range, pcd_limit_range=pcd_limit_range,
default_cam_key=default_cam_key,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs) **kwargs)
# to load a subset, just set the load_interval in the dataset config def parse_ann_info(self, info: dict) -> dict:
self.data_infos = self.data_infos[::load_interval] """Get annotation info according to the given index.
if hasattr(self, 'flag'):
self.flag = self.flag[::load_interval]
def _get_pts_filename(self, idx):
pts_filename = osp.join(self.root_split, self.pts_prefix,
f'{idx:07d}.bin')
return pts_filename
def get_data_info(self, index):
"""Get data info according to the given index.
Args: Args:
index (int): Index of the sample data to get. info (dict): Data information of single data sample.
Returns: Returns:
dict: Standard input_dict consists of the dict: annotation information consists of the following keys:
data information.
- bboxes_3d (:obj:`LiDARInstance3DBoxes`):
- sample_idx (str): sample index 3D ground truth bboxes.
- pts_filename (str): filename of point clouds - bbox_labels_3d (np.ndarray): Labels of ground truths.
- img_prefix (str): prefix of image files - gt_bboxes (np.ndarray): 2D ground truth bboxes.
- img_info (dict): image info - gt_labels (np.ndarray): Labels of ground truths.
- lidar2img (list[np.ndarray], optional): transformations from - difficulty (int): Difficulty defined by KITTI.
lidar to different cameras 0, 1, 2 represent xxxxx respectively.
- ann_info (dict): annotation info
""" """
info = self.data_infos[index] ann_info = Det3DDataset.parse_ann_info(self, info)
sample_idx = info['image']['image_idx'] if ann_info is None:
img_filename = os.path.join(self.data_root, # empty instance
info['image']['image_path']) anns_results = {}
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
# TODO: consider use torch.Tensor only anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
rect = info['calib']['R0_rect'].astype(np.float32) return anns_results
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P0 = info['calib']['P0'].astype(np.float32) ann_info = self._remove_dontcare(ann_info)
lidar2img = P0 @ rect @ Trv2c # in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
# convert gt_bboxes_3d to velodyne coordinates with `lidar2cam`
pts_filename = self._get_pts_filename(sample_idx) if 'gt_bboxes' in ann_info:
input_dict = dict( gt_bboxes = ann_info['gt_bboxes']
sample_idx=sample_idx, gt_labels = ann_info['gt_labels']
pts_filename=pts_filename,
img_prefix=None,
img_info=dict(filename=img_filename),
lidar2img=lidar2img)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None,
data_format='waymo'):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
data_format (str, optional): Output data format.
Default: 'waymo'. Another supported choice is 'kitti'.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
else: else:
tmp_dir = None gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
assert ('waymo' in data_format or 'kitti' in data_format), \ if 'centers_2d' in ann_info:
f'invalid data_format {data_format}' centers_2d = ann_info['centers_2d']
depths = ann_info['depths']
if (not isinstance(outputs[0], dict)) or 'img_bbox' in outputs[0]:
raise TypeError('Not supported type for reformat results.')
elif 'pts_bbox' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = f'{submission_prefix}_{name}'
else:
submission_prefix_ = None
result_files_ = self.bbox2result_kitti(results_, self.CLASSES,
pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
else: else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES, centers_2d = np.zeros((0, 2), dtype=np.float32)
pklfile_prefix, depths = np.zeros((0), dtype=np.float32)
submission_prefix)
if 'waymo' in data_format:
from mmdet3d.evaluation.functional.waymo_utils import \
KITTI2Waymo # noqa
waymo_root = osp.join(
self.data_root.split('kitti_format')[0], 'waymo_format')
if self.split == 'training':
waymo_tfrecords_dir = osp.join(waymo_root, 'validation')
prefix = '1'
elif self.split == 'testing':
waymo_tfrecords_dir = osp.join(waymo_root, 'testing')
prefix = '2'
else:
raise ValueError('Not supported split value.')
save_tmp_dir = tempfile.TemporaryDirectory()
waymo_results_save_dir = save_tmp_dir.name
waymo_results_final_path = f'{pklfile_prefix}.bin'
if 'pts_bbox' in result_files:
converter = KITTI2Waymo(result_files['pts_bbox'],
waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path, prefix)
else:
converter = KITTI2Waymo(result_files, waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path, prefix)
converter.convert()
save_tmp_dir.cleanup()
return result_files, tmp_dir
def evaluate(self,
results,
metric='waymo',
logger=None,
pklfile_prefix=None,
submission_prefix=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'waymo'. Another supported metric is 'kitti'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str, optional): The prefix of pkl files including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str: float]: results of each evaluation metric
"""
assert ('waymo' in metric or 'kitti' in metric), \
f'invalid metric {metric}'
if 'kitti' in metric:
result_files, tmp_dir = self.format_results(
results,
pklfile_prefix,
submission_prefix,
data_format='kitti')
from mmdet3d.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.data_infos]
if isinstance(result_files, dict):
ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bev', '3d']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float(
'{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else:
ap_result_str, ap_dict = kitti_eval(
gt_annos,
result_files,
self.CLASSES,
eval_types=['bev', '3d'])
print_log('\n' + ap_result_str, logger=logger)
if 'waymo' in metric:
waymo_root = osp.join(
self.data_root.split('kitti_format')[0], 'waymo_format')
if pklfile_prefix is None:
eval_tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(eval_tmp_dir.name, 'results')
else:
eval_tmp_dir = None
result_files, tmp_dir = self.format_results(
results,
pklfile_prefix,
submission_prefix,
data_format='waymo')
import subprocess
ret_bytes = subprocess.check_output(
'mmdet3d/core/evaluation/waymo_utils/' +
f'compute_detection_metrics_main {pklfile_prefix}.bin ' +
f'{waymo_root}/gt.bin',
shell=True)
ret_texts = ret_bytes.decode('utf-8')
print_log(ret_texts)
# parse the text to get ap_dict
ap_dict = {
'Vehicle/L1 mAP': 0,
'Vehicle/L1 mAPH': 0,
'Vehicle/L2 mAP': 0,
'Vehicle/L2 mAPH': 0,
'Pedestrian/L1 mAP': 0,
'Pedestrian/L1 mAPH': 0,
'Pedestrian/L2 mAP': 0,
'Pedestrian/L2 mAPH': 0,
'Sign/L1 mAP': 0,
'Sign/L1 mAPH': 0,
'Sign/L2 mAP': 0,
'Sign/L2 mAPH': 0,
'Cyclist/L1 mAP': 0,
'Cyclist/L1 mAPH': 0,
'Cyclist/L2 mAP': 0,
'Cyclist/L2 mAPH': 0,
'Overall/L1 mAP': 0,
'Overall/L1 mAPH': 0,
'Overall/L2 mAP': 0,
'Overall/L2 mAPH': 0
}
mAP_splits = ret_texts.split('mAP ')
mAPH_splits = ret_texts.split('mAPH ')
for idx, key in enumerate(ap_dict.keys()):
split_idx = int(idx / 2) + 1
if idx % 2 == 0: # mAP
ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])
else: # mAPH
ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])
ap_dict['Overall/L1 mAP'] = \
(ap_dict['Vehicle/L1 mAP'] + ap_dict['Pedestrian/L1 mAP'] +
ap_dict['Cyclist/L1 mAP']) / 3
ap_dict['Overall/L1 mAPH'] = \
(ap_dict['Vehicle/L1 mAPH'] + ap_dict['Pedestrian/L1 mAPH'] +
ap_dict['Cyclist/L1 mAPH']) / 3
ap_dict['Overall/L2 mAP'] = \
(ap_dict['Vehicle/L2 mAP'] + ap_dict['Pedestrian/L2 mAP'] +
ap_dict['Cyclist/L2 mAP']) / 3
ap_dict['Overall/L2 mAPH'] = \
(ap_dict['Vehicle/L2 mAPH'] + ap_dict['Pedestrian/L2 mAPH'] +
ap_dict['Cyclist/L2 mAPH']) / 3
if eval_tmp_dir is not None:
eval_tmp_dir.cleanup()
if tmp_dir is not None:
tmp_dir.cleanup()
if show or out_dir: if self.task == 'mono3d':
self.show(results, out_dir, show=show, pipeline=pipeline) gt_bboxes_3d = CameraInstance3DBoxes(
return ap_dict ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5))
def bbox2result_kitti(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert results to kitti format for evaluation and test submission.
Args:
net_outputs (List[np.ndarray]): list of array storing the
bbox and score
class_nanes (List[String]): A list of class names
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
List[dict]: A list of dict have the kitti 3d format
"""
assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
det_annos = []
print('\nConverting prediction to KITTI format')
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.data_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
box_dict = self.convert_valid_bboxes(pred_dicts, info)
if len(box_dict['bbox']) > 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
box_preds_lidar = box_dict['box3d_lidar']
label_preds = box_dict['label_preds']
anno = {
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': []
}
for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(
-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
anno['bbox'].append(bbox)
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
anno['score'].append(score)
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
if submission_prefix is not None:
curr_file = f'{submission_prefix}/{sample_idx:07d}.txt'
with open(curr_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.
format(anno['name'][idx], anno['alpha'][idx],
bbox[idx][0], bbox[idx][1],
bbox[idx][2], bbox[idx][3],
dims[idx][1], dims[idx][2],
dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][2], anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
else:
annos.append({
'name': np.array([]),
'truncated': np.array([]),
'occluded': np.array([]),
'alpha': np.array([]),
'bbox': np.zeros([0, 4]),
'dimensions': np.zeros([0, 3]),
'location': np.zeros([0, 3]),
'rotation_y': np.array([]),
'score': np.array([]),
})
annos[-1]['sample_idx'] = np.array(
[sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out)
print(f'Result is saved to {out}.')
return det_annos
def convert_valid_bboxes(self, box_dict, info):
"""Convert the boxes into valid format.
Args:
box_dict (dict): Bounding boxes to be converted.
- boxes_3d (:obj:``LiDARInstance3DBoxes``): 3D bounding boxes.
- scores_3d (np.ndarray): Scores of predicted boxes.
- labels_3d (np.ndarray): Class labels of predicted boxes.
info (dict): Dataset information dictionary.
Returns:
dict: Valid boxes after conversion.
- bbox (np.ndarray): 2D bounding boxes (in camera 0).
- box3d_camera (np.ndarray): 3D boxes in camera coordinates.
- box3d_lidar (np.ndarray): 3D boxes in lidar coordinates.
- scores (np.ndarray): Scores of predicted boxes.
- label_preds (np.ndarray): Class labels of predicted boxes.
- sample_idx (np.ndarray): Sample index.
"""
# TODO: refactor this function
box_preds = box_dict['boxes_3d']
scores = box_dict['scores_3d']
labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if len(box_preds) == 0:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P0 = info['calib']['P0'].astype(np.float32)
P0 = box_preds.tensor.new_tensor(P0)
box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
box_corners = box_preds_camera.corners
box_corners_in_image = points_cam2img(box_corners, P0)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check box_preds
limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(box_preds.center < limit_range[3:]))
valid_inds = valid_pcd_inds.all(-1)
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx,
)
else: else:
return dict( lidar2cam = np.array(
bbox=np.zeros([0, 4]), info['images'][self.default_cam_key]['lidar2cam'])
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]), gt_bboxes_3d = CameraInstance3DBoxes(
scores=np.zeros([0]), ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d,
label_preds=np.zeros([0, 4]), np.linalg.inv(lidar2cam))
sample_idx=sample_idx,
) anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=ann_info['gt_labels_3d'],
gt_bboxes=gt_bboxes,
gt_labels=gt_labels,
centers_2d=centers_2d,
depths=depths)
return anns_results
def load_data_list(self) -> List[dict]:
"""Add the load interval."""
data_list = super().load_data_list()
data_list = data_list[::self.load_interval]
return data_list
def parse_data_info(self, info: dict) -> dict:
"""if task is lidar or multiview det, use super() method elif task is
mono3d, split the info from frame-wise to img-wise."""
if self.task != 'mono3d':
if self.cam_sync_instances:
# use the cam sync labels
info['instances'] = info['cam_sync_instances']
return super().parse_data_info(info)
else:
# in the mono3d, the instances is from cam sync.
data_list = []
if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \
osp.join(
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
if self.modality['use_camera']:
for cam_key, img_info in info['images'].items():
if 'img_path' in img_info:
cam_prefix = self.data_prefix.get(cam_key, '')
img_info['img_path'] = osp.join(
cam_prefix, img_info['img_path'])
for (cam_key, img_info) in info['images'].items():
camera_info = dict()
camera_info['images'] = dict()
camera_info['images'][cam_key] = img_info
if 'cam_instances' in info \
and cam_key in info['cam_instances']:
camera_info['instances'] = info['cam_instances'][cam_key]
else:
camera_info['instances'] = []
camera_info['ego2global'] = info['ego2global']
if 'image_sweeps' in info:
camera_info['image_sweeps'] = info['image_sweeps']
# TODO check if need to modify the sample id
# TODO check when will use it except for evaluation.
camera_info['sample_id'] = info['sample_id']
if not self.test_mode:
# used in training
camera_info['ann_info'] = self.parse_ann_info(camera_info)
if self.test_mode and self.load_eval_anns:
info['eval_ann_info'] = self.parse_ann_info(info)
data_list.append(camera_info)
return data_list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment