Unverified Commit 583c4acc authored by Qing Lian's avatar Qing Lian Committed by GitHub
Browse files

Update waymo dataset, evaluation metrics and related configs for 2.0 (#1663)



* update waymo dataset

* [Fix] Fix all unittests and refactor tests directory and add circle ci in `test-1.x` (#1654)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>

* add waymo dataset and evaluation metrics

* convert second configs for v2.0

* [Refactor] Unify ceph config (#1677)

* refactor ceph in config

* support metric load ann file from ceph

* add doc string and remove useless code

* [Fix]Fix create data (#1659)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut

* fix creat data
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>

* [Fix] Fix seg mapping (#1681)

* [Doc]: fix markdown version (#1653)

* [CI] Add circle ci (#1647)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix seg mapping for scannet

* fix requiremetn

* fix all seg dataet

* resolve commnets
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>

* [Fix] Fix SSN configs (#1686)

* modify doc string and evaluation file location

* add doc string

* remove path mapping in flieclient args
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>
Co-authored-by: default avatarShilong Zhang <61961338+jshilong@users.noreply.github.com>
parent a8f3ec5f
...@@ -82,7 +82,7 @@ file_client_args = dict( ...@@ -82,7 +82,7 @@ file_client_args = dict(
})) }))
train_pipeline = [ train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict( dict(
type='LoadAnnotations3D', type='LoadAnnotations3D',
with_bbox=True, with_bbox=True,
...@@ -101,7 +101,7 @@ train_pipeline = [ ...@@ -101,7 +101,7 @@ train_pipeline = [
]), ]),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict(type='mmdet.Resize', scale_factor=1.0), dict(type='mmdet.Resize', scale_factor=1.0),
dict(type='Pack3DDetInputs', keys=['img']) dict(type='Pack3DDetInputs', keys=['img'])
] ]
......
...@@ -18,7 +18,9 @@ db_sampler = dict( ...@@ -18,7 +18,9 @@ db_sampler = dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15), sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15),
classes=class_names) classes=class_names,
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......
...@@ -28,11 +28,7 @@ train_pipeline = [ ...@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False, with_label_3d=False,
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(type='PointSegClassMapping'),
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=num_points, num_points=num_points,
...@@ -77,35 +73,6 @@ test_pipeline = [ ...@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points']) dict(type='Collect3D', keys=['points'])
]) ])
] ]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict( data = dict(
samples_per_gpu=16, samples_per_gpu=16,
workers_per_gpu=4, workers_per_gpu=4,
...@@ -135,7 +102,7 @@ data = dict( ...@@ -135,7 +102,7 @@ data = dict(
test_mode=True, test_mode=True,
ignore_index=len(class_names))) ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5) evaluation = dict(interval=5)
# model settings # model settings
model = dict( model = dict(
......
...@@ -28,11 +28,7 @@ train_pipeline = [ ...@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False, with_label_3d=False,
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(type='PointSegClassMapping'),
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=num_points, num_points=num_points,
...@@ -77,35 +73,6 @@ test_pipeline = [ ...@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points']) dict(type='Collect3D', keys=['points'])
]) ])
] ]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict( data = dict(
samples_per_gpu=16, samples_per_gpu=16,
workers_per_gpu=4, workers_per_gpu=4,
...@@ -135,7 +102,7 @@ data = dict( ...@@ -135,7 +102,7 @@ data = dict(
test_mode=True, test_mode=True,
ignore_index=len(class_names))) ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5) evaluation = dict(interval=5)
# model settings # model settings
model = dict( model = dict(
......
...@@ -9,6 +9,7 @@ point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1] ...@@ -9,6 +9,7 @@ point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
data_root = 'data/kitti/' data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car'] class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(CLASSES=class_names) metainfo = dict(CLASSES=class_names)
# PointPillars adopted a different sampling strategies among classes # PointPillars adopted a different sampling strategies among classes
db_sampler = dict( db_sampler = dict(
data_root=data_root, data_root=data_root,
...@@ -18,7 +19,9 @@ db_sampler = dict( ...@@ -18,7 +19,9 @@ db_sampler = dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
classes=class_names, classes=class_names,
sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15)) sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15),
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
# PointPillars uses different augmentation hyper parameters # PointPillars uses different augmentation hyper parameters
train_pipeline = [ train_pipeline = [
......
...@@ -8,8 +8,18 @@ _base_ = [ ...@@ -8,8 +8,18 @@ _base_ = [
dataset_type = 'WaymoDataset' dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/' data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist'] class_names = ['Car', 'Pedestrian', 'Cyclist']
metainfo = dict(CLASSES=class_names)
point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4] point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4]
input_modality = dict(use_lidar=True, use_camera=False) input_modality = dict(use_lidar=True, use_camera=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/',
'data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/'
}))
db_sampler = dict( db_sampler = dict(
data_root=data_root, data_root=data_root,
...@@ -27,9 +37,14 @@ db_sampler = dict( ...@@ -27,9 +37,14 @@ db_sampler = dict(
use_dim=[0, 1, 2, 3, 4])) use_dim=[0, 1, 2, 3, 4]))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5), dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler), # dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='RandomFlip3D', type='RandomFlip3D',
sync_2d=False, sync_2d=False,
...@@ -42,12 +57,18 @@ train_pipeline = [ ...@@ -42,12 +57,18 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5), dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict( dict(
type='MultiScaleFlipAug3D', type='MultiScaleFlipAug3D',
img_scale=(1333, 800), img_scale=(1333, 800),
...@@ -62,51 +83,60 @@ test_pipeline = [ ...@@ -62,51 +83,60 @@ test_pipeline = [
dict(type='RandomFlip3D'), dict(type='RandomFlip3D'),
dict( dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range), type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(type='Pack3DDetInputs', keys=['points']),
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]) ])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=4, batch_size=4,
workers_per_gpu=4, num_workers=4,
train=dict( persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', type='RepeatDataset',
times=2, times=2,
dataset=dict( dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl', ann_file='waymo_infos_train.pkl',
split='training', data_prefix=dict(pts='training/velodyne'),
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names,
test_mode=False, test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset. # and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR', box_type_3d='LiDAR',
# load one frame every five frames # load one frame every five frames
load_interval=5)), load_interval=5)))
val=dict( val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl', data_prefix=dict(pts='training/velodyne'),
split='training', ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names,
test_mode=True, test_mode=True,
box_type_3d='LiDAR'), metainfo=metainfo,
test=dict( box_type_3d='LiDAR'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type, type=dataset_type,
data_root=data_root, data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl', data_prefix=dict(pts='training/velodyne'),
split='training', ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
classes=class_names,
test_mode=True, test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR')) box_type_3d='LiDAR'))
...@@ -17,7 +17,7 @@ file_client_args = dict( ...@@ -17,7 +17,7 @@ file_client_args = dict(
})) }))
train_pipeline = [ train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict( dict(
type='LoadAnnotations3D', type='LoadAnnotations3D',
with_bbox=True, with_bbox=True,
...@@ -37,7 +37,7 @@ train_pipeline = [ ...@@ -37,7 +37,7 @@ train_pipeline = [
]), ]),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args), dict(type='LoadImageFromFileMono3D'),
dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4), dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4),
dict(type='Pack3DDetInputs', keys=['img']) dict(type='Pack3DDetInputs', keys=['img'])
] ]
......
...@@ -18,4 +18,4 @@ model = dict( ...@@ -18,4 +18,4 @@ model = dict(
style='pytorch'), style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384])) pts_neck=dict(in_channels=[64, 160, 384]))
# dataset settings # dataset settings
data = dict(samples_per_gpu=1, workers_per_gpu=2) train_dataloader = dict(batch_size=1, num_workers=2)
...@@ -2,6 +2,7 @@ _base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py' ...@@ -2,6 +2,7 @@ _base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py'
# model settings # model settings
model = dict( model = dict(
type='MVXFasterRCNN', type='MVXFasterRCNN',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_backbone=dict( pts_backbone=dict(
_delete_=True, _delete_=True,
type='NoStemRegNet', type='NoStemRegNet',
......
...@@ -29,8 +29,9 @@ train_pipeline = [ ...@@ -29,8 +29,9 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),
...@@ -48,20 +49,14 @@ test_pipeline = [ ...@@ -48,20 +49,14 @@ test_pipeline = [
translation_std=[0, 0, 0]), translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'), dict(type='RandomFlip3D'),
dict( dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range), type='PointsRangeFilter', point_cloud_range=point_cloud_range)
dict( ]),
type='DefaultFormatBundle3D', dict(type='Pack3DDetInputs', keys=['points'])
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
] ]
data = dict( train_dataloader = dict(
samples_per_gpu=2, batch_size=2, num_workers=4, dataset=dict(pipeline=train_pipeline))
workers_per_gpu=4, test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
train=dict(pipeline=train_pipeline, classes=class_names), val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
# model settings # model settings
model = dict( model = dict(
...@@ -141,35 +136,37 @@ model = dict( ...@@ -141,35 +136,37 @@ model = dict(
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='mmdet.FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings # model training and testing settings
train_cfg=dict( train_cfg=dict(
_delete_=True, _delete_=True,
pts=dict( pts=dict(
assigner=[ assigner=[
dict( # bicycle dict( # bicycle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # motorcycle dict( # motorcycle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # pedestrian dict( # pedestrian
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
...@@ -183,35 +180,35 @@ model = dict( ...@@ -183,35 +180,35 @@ model = dict(
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # car dict( # car
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # emergency vehicle dict( # emergency vehicle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # bus dict( # bus
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # other vehicle dict( # other vehicle
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
min_pos_iou=0.4, min_pos_iou=0.4,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # truck dict( # truck
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
......
...@@ -174,9 +174,7 @@ train_pipeline = [ ...@@ -174,9 +174,7 @@ train_pipeline = [
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(
type='PointSegClassMapping', type='PointSegClassMapping'),
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=num_points, num_points=num_points,
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import mmcv import mmcv
import mmdet import mmdet
import mmseg
from .version import __version__, short_version from .version import __version__, short_version
...@@ -18,8 +17,8 @@ def digit_version(version_str): ...@@ -18,8 +17,8 @@ def digit_version(version_str):
return digit_version return digit_version
mmcv_minimum_version = '1.4.8' mmcv_minimum_version = '2.0.0rc0'
mmcv_maximum_version = '1.6.0' mmcv_maximum_version = '2.0.0rc0'
mmcv_version = digit_version(mmcv.__version__) mmcv_version = digit_version(mmcv.__version__)
...@@ -37,13 +36,4 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version) ...@@ -37,13 +36,4 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version)
f'Please install mmdet>={mmdet_minimum_version}, ' \ f'Please install mmdet>={mmdet_minimum_version}, ' \
f'<={mmdet_maximum_version}.' f'<={mmdet_maximum_version}.'
mmseg_minimum_version = '0.20.0'
mmseg_maximum_version = '1.0.0'
mmseg_version = digit_version(mmseg.__version__)
assert (mmseg_version >= digit_version(mmseg_minimum_version)
and mmseg_version <= digit_version(mmseg_maximum_version)), \
f'MMSEG=={mmseg.__version__} is used but incompatible. ' \
f'Please install mmseg>={mmseg_minimum_version}, ' \
f'<={mmseg_maximum_version}.'
__all__ = ['__version__', 'short_version'] __all__ = ['__version__', 'short_version']
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict from collections import OrderedDict
from typing import List, Tuple, Union from typing import List, Tuple, Union
...@@ -7,7 +8,8 @@ from nuscenes.utils.geometry_utils import view_points ...@@ -7,7 +8,8 @@ from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box from shapely.geometry import MultiPoint, box
from mmdet3d.structures import points_cam2img from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
...@@ -165,6 +167,149 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]): ...@@ -165,6 +167,149 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
"""Get the 2D annotation records for a given info.
This function is used to get 2D annotations when loading annotations from
a dataset class. The original version in the data converter will be
deprecated in the future.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
repro_recs = []
# if no annotations in info (test dataset), then return
if annos is None:
return repro_recs
# Get all the annotation with the specified visibilties.
# filter the annotation bboxes by occluded attributes
ann_dicts = annos
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
loc_3d = np.copy(loc)
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(
corner_coords,
imsize=(info['image']['image_shape'][1],
info['image']['image_shape'][0]))
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x,
max_y, sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velocity'] = -1 # no velocity in KITTI
center_3d = np.array(loc).reshape([1, 3])
center_2d_with_depth = box_np_ops.points_cam2img(
center_3d, camera_intrinsic, with_depth=True)
center_2d_with_depth = center_2d_with_depth.squeeze().tolist()
repro_rec['center_2d'] = center_2d_with_depth[:2]
repro_rec['depth'] = center_2d_with_depth[2]
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def convert_annos(info: dict, cam_idx: int) -> dict:
"""Convert front-cam anns to i-th camera (KITTI-style info)."""
rect = info['calib']['R0_rect'].astype(np.float32)
lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32)
lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32)
annos = info['annos']
converted_annos = copy.deepcopy(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True)
# convert gt_bboxes_3d to cam coordinates
gt_bboxes_3d = gt_bboxes_3d.convert_to(
Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).tensor.numpy()
converted_annos['location'] = gt_bboxes_3d[:, :3]
converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6]
converted_annos['rotation_y'] = gt_bboxes_3d[:, 6]
return converted_annos
def post_process_coords( def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900) corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]: ) -> Union[Tuple[float, float, float, float], None]:
...@@ -254,3 +399,67 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float, ...@@ -254,3 +399,67 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
coco_rec['bbox_3d_isvalid'] = True coco_rec['bbox_3d_isvalid'] = True
return coco_rec return coco_rec
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
...@@ -22,6 +22,7 @@ class KittiDataset(Det3DDataset): ...@@ -22,6 +22,7 @@ class KittiDataset(Det3DDataset):
Defaults to None. Defaults to None.
modality (dict, optional): Modality to specify the sensor data used modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`. as input. Defaults to `dict(use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset. box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`. to its original format then converted them to `box_type_3d`.
...@@ -49,7 +50,7 @@ class KittiDataset(Det3DDataset): ...@@ -49,7 +50,7 @@ class KittiDataset(Det3DDataset):
ann_file: str, ann_file: str,
pipeline: List[Union[dict, Callable]] = [], pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True), modality: Optional[dict] = dict(use_lidar=True),
default_cam_key='CAM2', default_cam_key: str = 'CAM2',
box_type_3d: str = 'LiDAR', box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
......
...@@ -193,9 +193,9 @@ class _S3DISSegDataset(Seg3DDataset): ...@@ -193,9 +193,9 @@ class _S3DISSegDataset(Seg3DDataset):
[255, 0, 255], [100, 100, 255], [200, 200, 100], [255, 0, 255], [100, 100, 255], [200, 200, 100],
[170, 120, 200], [255, 0, 0], [200, 100, 100], [170, 120, 200], [255, 0, 0], [200, 100, 100],
[10, 200, 100], [200, 200, 200], [50, 50, 50]], [10, 200, 100], [200, 200, 200], [50, 50, 50]],
'valid_class_ids': 'seg_valid_class_ids':
tuple(range(13)), tuple(range(13)),
'all_class_ids': 'seg_all_class_ids':
tuple(range(14)) # possibly with 'stair' class tuple(range(14)) # possibly with 'stair' class
} }
......
...@@ -50,7 +50,12 @@ class ScanNetDataset(Det3DDataset): ...@@ -50,7 +50,12 @@ class ScanNetDataset(Det3DDataset):
'CLASSES': 'CLASSES':
('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator', 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin') 'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin'),
# the valid ids of segmentation annotations
'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'seg_all_class_ids':
tuple(range(1, 41))
} }
def __init__(self, def __init__(self,
...@@ -67,6 +72,17 @@ class ScanNetDataset(Det3DDataset): ...@@ -67,6 +72,17 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt: bool = True, filter_empty_gt: bool = True,
test_mode: bool = False, test_mode: bool = False,
**kwargs): **kwargs):
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
ann_file=ann_file, ann_file=ann_file,
...@@ -78,6 +94,8 @@ class ScanNetDataset(Det3DDataset): ...@@ -78,6 +94,8 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt=filter_empty_gt, filter_empty_gt=filter_empty_gt,
test_mode=test_mode, test_mode=test_mode,
**kwargs) **kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
assert 'use_camera' in self.modality and \ assert 'use_camera' in self.modality and \
'use_lidar' in self.modality 'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar'] assert self.modality['use_camera'] or self.modality['use_lidar']
...@@ -122,6 +140,9 @@ class ScanNetDataset(Det3DDataset): ...@@ -122,6 +140,9 @@ class ScanNetDataset(Det3DDataset):
info['pts_semantic_mask_path']) info['pts_semantic_mask_path'])
info = super().parse_data_info(info) info = super().parse_data_info(info)
# only be used in `PointSegClassMapping` in pipeline
# to map original semantic class to valid category ids.
info['seg_label_mapping'] = self.seg_label_mapping
return info return info
def parse_ann_info(self, info: dict) -> dict: def parse_ann_info(self, info: dict) -> dict:
...@@ -207,9 +228,9 @@ class ScanNetSegDataset(Seg3DDataset): ...@@ -207,9 +228,9 @@ class ScanNetSegDataset(Seg3DDataset):
[227, 119, 194], [227, 119, 194],
[82, 84, 163], [82, 84, 163],
], ],
'valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 'seg_valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
28, 33, 34, 36, 39), 24, 28, 33, 34, 36, 39),
'all_class_ids': 'seg_all_class_ids':
tuple(range(41)), tuple(range(41)),
} }
...@@ -280,9 +301,9 @@ class ScanNetInstanceSegDataset(Seg3DDataset): ...@@ -280,9 +301,9 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
[227, 119, 194], [227, 119, 194],
[82, 84, 163], [82, 84, 163],
], ],
'valid_class_ids': 'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39), (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'all_class_ids': 'seg_all_class_ids':
tuple(range(41)) tuple(range(41))
} }
......
...@@ -50,8 +50,8 @@ class Seg3DDataset(BaseDataset): ...@@ -50,8 +50,8 @@ class Seg3DDataset(BaseDataset):
METAINFO = { METAINFO = {
'CLASSES': None, # names of all classes data used for the task 'CLASSES': None, # names of all classes data used for the task
'PALETTE': None, # official color for visualization 'PALETTE': None, # official color for visualization
'valid_class_ids': None, # class_ids used for training 'seg_valid_class_ids': None, # class_ids used for training
'all_class_ids': None, # all possible class_ids in loaded seg mask 'seg_all_class_ids': None, # all possible class_ids in loaded seg mask
} }
def __init__(self, def __init__(self,
...@@ -84,12 +84,12 @@ class Seg3DDataset(BaseDataset): ...@@ -84,12 +84,12 @@ class Seg3DDataset(BaseDataset):
# Get label mapping for custom classes # Get label mapping for custom classes
new_classes = metainfo.get('CLASSES', None) new_classes = metainfo.get('CLASSES', None)
self.label_mapping, self.label2cat, valid_class_ids = \ self.label_mapping, self.label2cat, seg_valid_class_ids = \
self.get_label_mapping(new_classes) self.get_label_mapping(new_classes)
metainfo['label_mapping'] = self.label_mapping metainfo['label_mapping'] = self.label_mapping
metainfo['label2cat'] = self.label2cat metainfo['label2cat'] = self.label2cat
metainfo['valid_class_ids'] = valid_class_ids metainfo['seg_valid_class_ids'] = seg_valid_class_ids
# generate palette if it is not defined based on # generate palette if it is not defined based on
# label mapping, otherwise directly use palette # label mapping, otherwise directly use palette
...@@ -99,6 +99,16 @@ class Seg3DDataset(BaseDataset): ...@@ -99,6 +99,16 @@ class Seg3DDataset(BaseDataset):
metainfo['PALETTE'] = updated_palette metainfo['PALETTE'] = updated_palette
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__( super().__init__(
ann_file=ann_file, ann_file=ann_file,
metainfo=metainfo, metainfo=metainfo,
...@@ -108,6 +118,7 @@ class Seg3DDataset(BaseDataset): ...@@ -108,6 +118,7 @@ class Seg3DDataset(BaseDataset):
test_mode=test_mode, test_mode=test_mode,
**kwargs) **kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
self.scene_idxs = self.get_scene_idxs(scene_idxs) self.scene_idxs = self.get_scene_idxs(scene_idxs)
# set group flag for the sampler # set group flag for the sampler
...@@ -137,7 +148,6 @@ class Seg3DDataset(BaseDataset): ...@@ -137,7 +148,6 @@ class Seg3DDataset(BaseDataset):
old_classes = self.METAINFO.get('CLASSSES', None) old_classes = self.METAINFO.get('CLASSSES', None)
if (new_classes is not None and old_classes is not None if (new_classes is not None and old_classes is not None
and list(new_classes) != list(old_classes)): and list(new_classes) != list(old_classes)):
label_mapping = {}
if not set(new_classes).issubset(old_classes): if not set(new_classes).issubset(old_classes):
raise ValueError( raise ValueError(
f'new classes {new_classes} is not a ' f'new classes {new_classes} is not a '
...@@ -145,12 +155,12 @@ class Seg3DDataset(BaseDataset): ...@@ -145,12 +155,12 @@ class Seg3DDataset(BaseDataset):
# obtain true id from valid_class_ids # obtain true id from valid_class_ids
valid_class_ids = [ valid_class_ids = [
self.METAINFO['valid_class_ids'][old_classes.index(cls_name)] self.METAINFO['seg_valid_class_ids'][old_classes.index(
for cls_name in new_classes cls_name)] for cls_name in new_classes
] ]
label_mapping = { label_mapping = {
cls_id: self.ignore_index cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids'] for cls_id in self.METAINFO['seg_all_class_ids']
} }
label_mapping.update( label_mapping.update(
{cls_id: i {cls_id: i
...@@ -159,18 +169,19 @@ class Seg3DDataset(BaseDataset): ...@@ -159,18 +169,19 @@ class Seg3DDataset(BaseDataset):
else: else:
label_mapping = { label_mapping = {
cls_id: self.ignore_index cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids'] for cls_id in self.METAINFO['seg_all_class_ids']
} }
label_mapping.update({ label_mapping.update({
cls_id: i cls_id: i
for i, cls_id in enumerate(self.METAINFO['valid_class_ids']) for i, cls_id in enumerate(
self.METAINFO['seg_valid_class_ids'])
}) })
# map label to category name # map label to category name
label2cat = { label2cat = {
i: cat_name i: cat_name
for i, cat_name in enumerate(self.METAINFO['CLASSES']) for i, cat_name in enumerate(self.METAINFO['CLASSES'])
} }
valid_class_ids = self.METAINFO['valid_class_ids'] valid_class_ids = self.METAINFO['seg_valid_class_ids']
return label_mapping, label2cat, valid_class_ids return label_mapping, label2cat, valid_class_ids
......
...@@ -41,9 +41,9 @@ class SemanticKITTIDataset(Seg3DDataset): ...@@ -41,9 +41,9 @@ class SemanticKITTIDataset(Seg3DDataset):
'bus', 'person', 'bicyclist', 'motorcyclist', 'road', 'bus', 'person', 'bicyclist', 'motorcyclist', 'road',
'parking', 'sidewalk', 'other-ground', 'building', 'fence', 'parking', 'sidewalk', 'other-ground', 'building', 'fence',
'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'), 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'),
'valid_class_ids': 'seg_valid_class_ids':
tuple(range(20)), tuple(range(20)),
'all_class_ids': 'seg_all_class_ids':
tuple(range(20)) tuple(range(20))
} }
......
...@@ -277,9 +277,8 @@ class PointSegClassMapping(BaseTransform): ...@@ -277,9 +277,8 @@ class PointSegClassMapping(BaseTransform):
Required Keys: Required Keys:
- lidar_points (dict) - seg_label_mapping (np.ndarray)
- pts_semantic_mask (np.ndarray)
- lidar_path (str)
Added Keys: Added Keys:
...@@ -287,11 +286,6 @@ class PointSegClassMapping(BaseTransform): ...@@ -287,11 +286,6 @@ class PointSegClassMapping(BaseTransform):
Map valid classes as 0~len(valid_cat_ids)-1 and Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids). others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int]): A tuple of valid category.
max_cat_id (int, optional): The max possible cat_id in input
segmentation mask. Defaults to 40.
""" """
def transform(self, results: dict) -> None: def transform(self, results: dict) -> None:
...@@ -309,10 +303,9 @@ class PointSegClassMapping(BaseTransform): ...@@ -309,10 +303,9 @@ class PointSegClassMapping(BaseTransform):
assert 'pts_semantic_mask' in results assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask'] pts_semantic_mask = results['pts_semantic_mask']
assert 'label_mapping' in results assert 'seg_label_mapping' in results
label_mapping = results['label_mapping'] label_mapping = results['seg_label_mapping']
converted_pts_sem_mask = \ converted_pts_sem_mask = label_mapping[pts_semantic_mask]
np.array([label_mapping[mask] for mask in pts_semantic_mask])
results['pts_semantic_mask'] = converted_pts_sem_mask results['pts_semantic_mask'] = converted_pts_sem_mask
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment