Unverified Commit 583c4acc authored by Qing Lian's avatar Qing Lian Committed by GitHub
Browse files

Update waymo dataset, evaluation metrics and related configs for 2.0 (#1663)



* update waymo dataset

* [Fix] Fix all unittests and refactor tests directory and add circle ci in `test-1.x` (#1654)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>

* add waymo dataset and evaluation metrics

* convert second configs for v2.0

* [Refactor] Unify ceph config (#1677)

* refactor ceph in config

* support metric load ann file from ceph

* add doc string and remove useless code

* [Fix]Fix create data (#1659)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut

* fix creat data
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>

* [Fix] Fix seg mapping (#1681)

* [Doc]: fix markdown version (#1653)

* [CI] Add circle ci (#1647)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix seg mapping for scannet

* fix requiremetn

* fix all seg dataet

* resolve commnets
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>

* [Fix] Fix SSN configs (#1686)

* modify doc string and evaluation file location

* add doc string

* remove path mapping in flieclient args
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>
Co-authored-by: default avatarShilong Zhang <61961338+jshilong@users.noreply.github.com>
parent a8f3ec5f
......@@ -82,7 +82,7 @@ file_client_args = dict(
}))
train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
......@@ -101,7 +101,7 @@ train_pipeline = [
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(type='mmdet.Resize', scale_factor=1.0),
dict(type='Pack3DDetInputs', keys=['img'])
]
......
......@@ -18,7 +18,9 @@ db_sampler = dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15),
classes=class_names)
classes=class_names,
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......
......@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(type='PointSegClassMapping'),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
......@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
......@@ -135,7 +102,7 @@ data = dict(
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5)
evaluation = dict(interval=5)
# model settings
model = dict(
......
......@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(type='PointSegClassMapping'),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
......@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
......@@ -135,7 +102,7 @@ data = dict(
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5)
evaluation = dict(interval=5)
# model settings
model = dict(
......
......@@ -9,6 +9,7 @@ point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(CLASSES=class_names)
# PointPillars adopted a different sampling strategies among classes
db_sampler = dict(
data_root=data_root,
......@@ -18,7 +19,9 @@ db_sampler = dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
classes=class_names,
sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15))
sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15),
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
# PointPillars uses different augmentation hyper parameters
train_pipeline = [
......
......@@ -8,8 +8,18 @@ _base_ = [
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
metainfo = dict(CLASSES=class_names)
point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4]
input_modality = dict(use_lidar=True, use_camera=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/',
'data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/'
}))
db_sampler = dict(
data_root=data_root,
......@@ -27,9 +37,14 @@ db_sampler = dict(
use_dim=[0, 1, 2, 3, 4]))
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
# dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
......@@ -42,12 +57,18 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
......@@ -62,51 +83,60 @@ test_pipeline = [
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Pack3DDetInputs', keys=['points']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
ann_file='waymo_infos_train.pkl',
data_prefix=dict(pts='training/velodyne'),
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
load_interval=5)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
data_prefix=dict(pts='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
metainfo=metainfo,
box_type_3d='LiDAR'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
data_prefix=dict(pts='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR'))
......@@ -17,7 +17,7 @@ file_client_args = dict(
}))
train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
......@@ -37,7 +37,7 @@ train_pipeline = [
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4),
dict(type='Pack3DDetInputs', keys=['img'])
]
......
......@@ -18,4 +18,4 @@ model = dict(
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
# dataset settings
data = dict(samples_per_gpu=1, workers_per_gpu=2)
train_dataloader = dict(batch_size=1, num_workers=2)
......@@ -2,6 +2,7 @@ _base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py'
# model settings
model = dict(
type='MVXFasterRCNN',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
......
......@@ -29,8 +29,9 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),
......@@ -48,20 +49,14 @@ test_pipeline = [
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(pipeline=train_pipeline, classes=class_names),
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
train_dataloader = dict(
batch_size=2, num_workers=4, dataset=dict(pipeline=train_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# model settings
model = dict(
......@@ -141,35 +136,37 @@ model = dict(
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
_delete_=True,
pts=dict(
assigner=[
dict( # bicycle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # motorcycle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
......@@ -183,35 +180,35 @@ model = dict(
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # car
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # emergency vehicle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # bus
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # other vehicle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # truck
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
......
......@@ -174,9 +174,7 @@ train_pipeline = [
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
type='PointSegClassMapping'),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
......
......@@ -2,7 +2,6 @@
import mmcv
import mmdet
import mmseg
from .version import __version__, short_version
......@@ -18,8 +17,8 @@ def digit_version(version_str):
return digit_version
mmcv_minimum_version = '1.4.8'
mmcv_maximum_version = '1.6.0'
mmcv_minimum_version = '2.0.0rc0'
mmcv_maximum_version = '2.0.0rc0'
mmcv_version = digit_version(mmcv.__version__)
......@@ -37,13 +36,4 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version)
f'Please install mmdet>={mmdet_minimum_version}, ' \
f'<={mmdet_maximum_version}.'
mmseg_minimum_version = '0.20.0'
mmseg_maximum_version = '1.0.0'
mmseg_version = digit_version(mmseg.__version__)
assert (mmseg_version >= digit_version(mmseg_minimum_version)
and mmseg_version <= digit_version(mmseg_maximum_version)), \
f'MMSEG=={mmseg.__version__} is used but incompatible. ' \
f'Please install mmseg>={mmseg_minimum_version}, ' \
f'<={mmseg_maximum_version}.'
__all__ = ['__version__', 'short_version']
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict
from typing import List, Tuple, Union
......@@ -7,7 +8,8 @@ from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
from mmdet3d.structures import points_cam2img
from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
......@@ -165,6 +167,149 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
"""Get the 2D annotation records for a given info.
This function is used to get 2D annotations when loading annotations from
a dataset class. The original version in the data converter will be
deprecated in the future.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
repro_recs = []
# if no annotations in info (test dataset), then return
if annos is None:
return repro_recs
# Get all the annotation with the specified visibilties.
# filter the annotation bboxes by occluded attributes
ann_dicts = annos
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
loc_3d = np.copy(loc)
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(
corner_coords,
imsize=(info['image']['image_shape'][1],
info['image']['image_shape'][0]))
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x,
max_y, sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velocity'] = -1 # no velocity in KITTI
center_3d = np.array(loc).reshape([1, 3])
center_2d_with_depth = box_np_ops.points_cam2img(
center_3d, camera_intrinsic, with_depth=True)
center_2d_with_depth = center_2d_with_depth.squeeze().tolist()
repro_rec['center_2d'] = center_2d_with_depth[:2]
repro_rec['depth'] = center_2d_with_depth[2]
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def convert_annos(info: dict, cam_idx: int) -> dict:
"""Convert front-cam anns to i-th camera (KITTI-style info)."""
rect = info['calib']['R0_rect'].astype(np.float32)
lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32)
lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32)
annos = info['annos']
converted_annos = copy.deepcopy(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True)
# convert gt_bboxes_3d to cam coordinates
gt_bboxes_3d = gt_bboxes_3d.convert_to(
Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).tensor.numpy()
converted_annos['location'] = gt_bboxes_3d[:, :3]
converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6]
converted_annos['rotation_y'] = gt_bboxes_3d[:, 6]
return converted_annos
def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]:
......@@ -254,3 +399,67 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
coco_rec['bbox_3d_isvalid'] = True
return coco_rec
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
......@@ -22,6 +22,7 @@ class KittiDataset(Det3DDataset):
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
......@@ -49,7 +50,7 @@ class KittiDataset(Det3DDataset):
ann_file: str,
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
default_cam_key='CAM2',
default_cam_key: str = 'CAM2',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......
......@@ -193,9 +193,9 @@ class _S3DISSegDataset(Seg3DDataset):
[255, 0, 255], [100, 100, 255], [200, 200, 100],
[170, 120, 200], [255, 0, 0], [200, 100, 100],
[10, 200, 100], [200, 200, 200], [50, 50, 50]],
'valid_class_ids':
'seg_valid_class_ids':
tuple(range(13)),
'all_class_ids':
'seg_all_class_ids':
tuple(range(14)) # possibly with 'stair' class
}
......
......@@ -50,7 +50,12 @@ class ScanNetDataset(Det3DDataset):
'CLASSES':
('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin')
'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin'),
# the valid ids of segmentation annotations
'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'seg_all_class_ids':
tuple(range(1, 41))
}
def __init__(self,
......@@ -67,6 +72,17 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt: bool = True,
test_mode: bool = False,
**kwargs):
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -78,6 +94,8 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
**kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
assert 'use_camera' in self.modality and \
'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar']
......@@ -122,6 +140,9 @@ class ScanNetDataset(Det3DDataset):
info['pts_semantic_mask_path'])
info = super().parse_data_info(info)
# only be used in `PointSegClassMapping` in pipeline
# to map original semantic class to valid category ids.
info['seg_label_mapping'] = self.seg_label_mapping
return info
def parse_ann_info(self, info: dict) -> dict:
......@@ -207,9 +228,9 @@ class ScanNetSegDataset(Seg3DDataset):
[227, 119, 194],
[82, 84, 163],
],
'valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24,
28, 33, 34, 36, 39),
'all_class_ids':
'seg_valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
24, 28, 33, 34, 36, 39),
'seg_all_class_ids':
tuple(range(41)),
}
......@@ -280,9 +301,9 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
[227, 119, 194],
[82, 84, 163],
],
'valid_class_ids':
'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'all_class_ids':
'seg_all_class_ids':
tuple(range(41))
}
......
......@@ -50,8 +50,8 @@ class Seg3DDataset(BaseDataset):
METAINFO = {
'CLASSES': None, # names of all classes data used for the task
'PALETTE': None, # official color for visualization
'valid_class_ids': None, # class_ids used for training
'all_class_ids': None, # all possible class_ids in loaded seg mask
'seg_valid_class_ids': None, # class_ids used for training
'seg_all_class_ids': None, # all possible class_ids in loaded seg mask
}
def __init__(self,
......@@ -84,12 +84,12 @@ class Seg3DDataset(BaseDataset):
# Get label mapping for custom classes
new_classes = metainfo.get('CLASSES', None)
self.label_mapping, self.label2cat, valid_class_ids = \
self.label_mapping, self.label2cat, seg_valid_class_ids = \
self.get_label_mapping(new_classes)
metainfo['label_mapping'] = self.label_mapping
metainfo['label2cat'] = self.label2cat
metainfo['valid_class_ids'] = valid_class_ids
metainfo['seg_valid_class_ids'] = seg_valid_class_ids
# generate palette if it is not defined based on
# label mapping, otherwise directly use palette
......@@ -99,6 +99,16 @@ class Seg3DDataset(BaseDataset):
metainfo['PALETTE'] = updated_palette
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__(
ann_file=ann_file,
metainfo=metainfo,
......@@ -108,6 +118,7 @@ class Seg3DDataset(BaseDataset):
test_mode=test_mode,
**kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
self.scene_idxs = self.get_scene_idxs(scene_idxs)
# set group flag for the sampler
......@@ -137,7 +148,6 @@ class Seg3DDataset(BaseDataset):
old_classes = self.METAINFO.get('CLASSSES', None)
if (new_classes is not None and old_classes is not None
and list(new_classes) != list(old_classes)):
label_mapping = {}
if not set(new_classes).issubset(old_classes):
raise ValueError(
f'new classes {new_classes} is not a '
......@@ -145,12 +155,12 @@ class Seg3DDataset(BaseDataset):
# obtain true id from valid_class_ids
valid_class_ids = [
self.METAINFO['valid_class_ids'][old_classes.index(cls_name)]
for cls_name in new_classes
self.METAINFO['seg_valid_class_ids'][old_classes.index(
cls_name)] for cls_name in new_classes
]
label_mapping = {
cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids']
for cls_id in self.METAINFO['seg_all_class_ids']
}
label_mapping.update(
{cls_id: i
......@@ -159,18 +169,19 @@ class Seg3DDataset(BaseDataset):
else:
label_mapping = {
cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids']
for cls_id in self.METAINFO['seg_all_class_ids']
}
label_mapping.update({
cls_id: i
for i, cls_id in enumerate(self.METAINFO['valid_class_ids'])
for i, cls_id in enumerate(
self.METAINFO['seg_valid_class_ids'])
})
# map label to category name
label2cat = {
i: cat_name
for i, cat_name in enumerate(self.METAINFO['CLASSES'])
}
valid_class_ids = self.METAINFO['valid_class_ids']
valid_class_ids = self.METAINFO['seg_valid_class_ids']
return label_mapping, label2cat, valid_class_ids
......
......@@ -41,9 +41,9 @@ class SemanticKITTIDataset(Seg3DDataset):
'bus', 'person', 'bicyclist', 'motorcyclist', 'road',
'parking', 'sidewalk', 'other-ground', 'building', 'fence',
'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'),
'valid_class_ids':
'seg_valid_class_ids':
tuple(range(20)),
'all_class_ids':
'seg_all_class_ids':
tuple(range(20))
}
......
......@@ -277,9 +277,8 @@ class PointSegClassMapping(BaseTransform):
Required Keys:
- lidar_points (dict)
- lidar_path (str)
- seg_label_mapping (np.ndarray)
- pts_semantic_mask (np.ndarray)
Added Keys:
......@@ -287,11 +286,6 @@ class PointSegClassMapping(BaseTransform):
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int]): A tuple of valid category.
max_cat_id (int, optional): The max possible cat_id in input
segmentation mask. Defaults to 40.
"""
def transform(self, results: dict) -> None:
......@@ -309,10 +303,9 @@ class PointSegClassMapping(BaseTransform):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
assert 'label_mapping' in results
label_mapping = results['label_mapping']
converted_pts_sem_mask = \
np.array([label_mapping[mask] for mask in pts_semantic_mask])
assert 'seg_label_mapping' in results
label_mapping = results['seg_label_mapping']
converted_pts_sem_mask = label_mapping[pts_semantic_mask]
results['pts_semantic_mask'] = converted_pts_sem_mask
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment