Unverified Commit 583c4acc authored by Qing Lian's avatar Qing Lian Committed by GitHub
Browse files

Update waymo dataset, evaluation metrics and related configs for 2.0 (#1663)



* update waymo dataset

* [Fix] Fix all unittests and refactor tests directory and add circle ci in `test-1.x` (#1654)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>

* add waymo dataset and evaluation metrics

* convert second configs for v2.0

* [Refactor] Unify ceph config (#1677)

* refactor ceph in config

* support metric load ann file from ceph

* add doc string and remove useless code

* [Fix]Fix create data (#1659)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix part of uts

* fix comments

* change foler

* refactor test directory

* fix kitti metric ut

* fix all ut

* fix creat data
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>

* [Fix] Fix seg mapping (#1681)

* [Doc]: fix markdown version (#1653)

* [CI] Add circle ci (#1647)

* add circle ci

* delete github ci

* fix ci

* fix ut

* fix markdown version

* rm

* fix seg mapping for scannet

* fix requiremetn

* fix all seg dataet

* resolve commnets
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>

* [Fix] Fix SSN configs (#1686)

* modify doc string and evaluation file location

* add doc string

* remove path mapping in flieclient args
Co-authored-by: default avatarChaimZhu <zhuchenming@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <shenkun@pjlab.org.cn>
Co-authored-by: default avatarVVsssssk <88368822+VVsssssk@users.noreply.github.com>
Co-authored-by: default avatarShilong Zhang <61961338+jshilong@users.noreply.github.com>
parent a8f3ec5f
......@@ -82,7 +82,7 @@ file_client_args = dict(
}))
train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
......@@ -101,7 +101,7 @@ train_pipeline = [
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(type='mmdet.Resize', scale_factor=1.0),
dict(type='Pack3DDetInputs', keys=['img'])
]
......
......@@ -18,7 +18,9 @@ db_sampler = dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15),
classes=class_names)
classes=class_names,
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......
......@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(type='PointSegClassMapping'),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
......@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
......@@ -135,7 +102,7 @@ data = dict(
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5)
evaluation = dict(interval=5)
# model settings
model = dict(
......
......@@ -28,11 +28,7 @@ train_pipeline = [
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(type='PointSegClassMapping'),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
......@@ -77,35 +73,6 @@ test_pipeline = [
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
33, 34, 36, 39),
max_cat_id=40),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
......@@ -135,7 +102,7 @@ data = dict(
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline, interval=5)
evaluation = dict(interval=5)
# model settings
model = dict(
......
......@@ -9,6 +9,7 @@ point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(CLASSES=class_names)
# PointPillars adopted a different sampling strategies among classes
db_sampler = dict(
data_root=data_root,
......@@ -18,7 +19,9 @@ db_sampler = dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
classes=class_names,
sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15))
sample_groups=dict(Car=15, Pedestrian=15, Cyclist=15),
points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
# PointPillars uses different augmentation hyper parameters
train_pipeline = [
......
......@@ -8,8 +8,18 @@ _base_ = [
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
metainfo = dict(CLASSES=class_names)
point_cloud_range = [-76.8, -51.2, -2, 76.8, 51.2, 4]
input_modality = dict(use_lidar=True, use_camera=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/',
'data/waymo/':
's3://openmmlab/datasets/detection3d/waymo/'
}))
db_sampler = dict(
data_root=data_root,
......@@ -27,9 +37,14 @@ db_sampler = dict(
use_dim=[0, 1, 2, 3, 4]))
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
# dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
......@@ -42,12 +57,18 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
......@@ -62,51 +83,60 @@ test_pipeline = [
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Pack3DDetInputs', keys=['points']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
ann_file='waymo_infos_train.pkl',
data_prefix=dict(pts='training/velodyne'),
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
load_interval=5)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
data_prefix=dict(pts='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
metainfo=metainfo,
box_type_3d='LiDAR'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
data_prefix=dict(pts='training/velodyne'),
ann_file='waymo_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
metainfo=metainfo,
box_type_3d='LiDAR'))
......@@ -17,7 +17,7 @@ file_client_args = dict(
}))
train_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
......@@ -37,7 +37,7 @@ train_pipeline = [
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D', file_client_args=file_client_args),
dict(type='LoadImageFromFileMono3D'),
dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4),
dict(type='Pack3DDetInputs', keys=['img'])
]
......
......@@ -18,4 +18,4 @@ model = dict(
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
# dataset settings
data = dict(samples_per_gpu=1, workers_per_gpu=2)
train_dataloader = dict(batch_size=1, num_workers=2)
......@@ -2,6 +2,7 @@ _base_ = './hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py'
# model settings
model = dict(
type='MVXFasterRCNN',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
......
......@@ -29,8 +29,9 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=5, use_dim=5),
......@@ -48,20 +49,14 @@ test_pipeline = [
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(pipeline=train_pipeline, classes=class_names),
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
train_dataloader = dict(
batch_size=2, num_workers=4, dataset=dict(pipeline=train_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# model settings
model = dict(
......@@ -141,35 +136,37 @@ model = dict(
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
_delete_=True,
pts=dict(
assigner=[
dict( # bicycle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # motorcycle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
......@@ -183,35 +180,35 @@ model = dict(
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # car
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # emergency vehicle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # bus
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # other vehicle
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # truck
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
......
......@@ -174,9 +174,7 @@ train_pipeline = [
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
type='PointSegClassMapping'),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
......
......@@ -2,7 +2,6 @@
import mmcv
import mmdet
import mmseg
from .version import __version__, short_version
......@@ -18,8 +17,8 @@ def digit_version(version_str):
return digit_version
mmcv_minimum_version = '1.4.8'
mmcv_maximum_version = '1.6.0'
mmcv_minimum_version = '2.0.0rc0'
mmcv_maximum_version = '2.0.0rc0'
mmcv_version = digit_version(mmcv.__version__)
......@@ -37,13 +36,4 @@ assert (mmdet_version >= digit_version(mmdet_minimum_version)
f'Please install mmdet>={mmdet_minimum_version}, ' \
f'<={mmdet_maximum_version}.'
mmseg_minimum_version = '0.20.0'
mmseg_maximum_version = '1.0.0'
mmseg_version = digit_version(mmseg.__version__)
assert (mmseg_version >= digit_version(mmseg_minimum_version)
and mmseg_version <= digit_version(mmseg_maximum_version)), \
f'MMSEG=={mmseg.__version__} is used but incompatible. ' \
f'Please install mmseg>={mmseg_minimum_version}, ' \
f'<={mmseg_maximum_version}.'
__all__ = ['__version__', 'short_version']
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from collections import OrderedDict
from typing import List, Tuple, Union
......@@ -7,7 +8,8 @@ from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
from mmdet3d.structures import points_cam2img
from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
......@@ -165,6 +167,149 @@ def get_2d_boxes(nusc, sample_data_token: str, visibilities: List[str]):
return repro_recs
def get_waymo_2d_boxes(info, cam_idx, occluded, annos=None, mono3d=True):
"""Get the 2D annotation records for a given info.
This function is used to get 2D annotations when loading annotations from
a dataset class. The original version in the data converter will be
deprecated in the future.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
repro_recs = []
# if no annotations in info (test dataset), then return
if annos is None:
return repro_recs
# Get all the annotation with the specified visibilties.
# filter the annotation bboxes by occluded attributes
ann_dicts = annos
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
loc_3d = np.copy(loc)
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(
corner_coords,
imsize=(info['image']['image_shape'][1],
info['image']['image_shape'][0]))
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_waymo_mono3d_record(ann_rec, min_x, min_y, max_x,
max_y, sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velocity'] = -1 # no velocity in KITTI
center_3d = np.array(loc).reshape([1, 3])
center_2d_with_depth = box_np_ops.points_cam2img(
center_3d, camera_intrinsic, with_depth=True)
center_2d_with_depth = center_2d_with_depth.squeeze().tolist()
repro_rec['center_2d'] = center_2d_with_depth[:2]
repro_rec['depth'] = center_2d_with_depth[2]
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def convert_annos(info: dict, cam_idx: int) -> dict:
"""Convert front-cam anns to i-th camera (KITTI-style info)."""
rect = info['calib']['R0_rect'].astype(np.float32)
lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32)
lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32)
annos = info['annos']
converted_annos = copy.deepcopy(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True)
# convert gt_bboxes_3d to cam coordinates
gt_bboxes_3d = gt_bboxes_3d.convert_to(
Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).tensor.numpy()
converted_annos['location'] = gt_bboxes_3d[:, :3]
converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6]
converted_annos['rotation_y'] = gt_bboxes_3d[:, 6]
return converted_annos
def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]:
......@@ -254,3 +399,67 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
coco_rec['bbox_3d_isvalid'] = True
return coco_rec
def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
The original version in the data converter will be deprecated in the
future.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
kitti_categories = ('Car', 'Pedestrian', 'Cyclist')
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox_label'] = coco_rec['category_id']
coco_rec['bbox_label_3d'] = coco_rec['bbox_label']
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
......@@ -22,6 +22,7 @@ class KittiDataset(Det3DDataset):
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
......@@ -49,7 +50,7 @@ class KittiDataset(Det3DDataset):
ann_file: str,
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
default_cam_key='CAM2',
default_cam_key: str = 'CAM2',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......
......@@ -193,9 +193,9 @@ class _S3DISSegDataset(Seg3DDataset):
[255, 0, 255], [100, 100, 255], [200, 200, 100],
[170, 120, 200], [255, 0, 0], [200, 100, 100],
[10, 200, 100], [200, 200, 200], [50, 50, 50]],
'valid_class_ids':
'seg_valid_class_ids':
tuple(range(13)),
'all_class_ids':
'seg_all_class_ids':
tuple(range(14)) # possibly with 'stair' class
}
......
......@@ -50,7 +50,12 @@ class ScanNetDataset(Det3DDataset):
'CLASSES':
('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain', 'refrigerator',
'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin')
'showercurtrain', 'toilet', 'sink', 'bathtub', 'garbagebin'),
# the valid ids of segmentation annotations
'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'seg_all_class_ids':
tuple(range(1, 41))
}
def __init__(self,
......@@ -67,6 +72,17 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt: bool = True,
test_mode: bool = False,
**kwargs):
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -78,6 +94,8 @@ class ScanNetDataset(Det3DDataset):
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
**kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
assert 'use_camera' in self.modality and \
'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar']
......@@ -122,6 +140,9 @@ class ScanNetDataset(Det3DDataset):
info['pts_semantic_mask_path'])
info = super().parse_data_info(info)
# only be used in `PointSegClassMapping` in pipeline
# to map original semantic class to valid category ids.
info['seg_label_mapping'] = self.seg_label_mapping
return info
def parse_ann_info(self, info: dict) -> dict:
......@@ -207,9 +228,9 @@ class ScanNetSegDataset(Seg3DDataset):
[227, 119, 194],
[82, 84, 163],
],
'valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24,
28, 33, 34, 36, 39),
'all_class_ids':
'seg_valid_class_ids': (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
24, 28, 33, 34, 36, 39),
'seg_all_class_ids':
tuple(range(41)),
}
......@@ -280,9 +301,9 @@ class ScanNetInstanceSegDataset(Seg3DDataset):
[227, 119, 194],
[82, 84, 163],
],
'valid_class_ids':
'seg_valid_class_ids':
(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39),
'all_class_ids':
'seg_all_class_ids':
tuple(range(41))
}
......
......@@ -50,8 +50,8 @@ class Seg3DDataset(BaseDataset):
METAINFO = {
'CLASSES': None, # names of all classes data used for the task
'PALETTE': None, # official color for visualization
'valid_class_ids': None, # class_ids used for training
'all_class_ids': None, # all possible class_ids in loaded seg mask
'seg_valid_class_ids': None, # class_ids used for training
'seg_all_class_ids': None, # all possible class_ids in loaded seg mask
}
def __init__(self,
......@@ -84,12 +84,12 @@ class Seg3DDataset(BaseDataset):
# Get label mapping for custom classes
new_classes = metainfo.get('CLASSES', None)
self.label_mapping, self.label2cat, valid_class_ids = \
self.label_mapping, self.label2cat, seg_valid_class_ids = \
self.get_label_mapping(new_classes)
metainfo['label_mapping'] = self.label_mapping
metainfo['label2cat'] = self.label2cat
metainfo['valid_class_ids'] = valid_class_ids
metainfo['seg_valid_class_ids'] = seg_valid_class_ids
# generate palette if it is not defined based on
# label mapping, otherwise directly use palette
......@@ -99,6 +99,16 @@ class Seg3DDataset(BaseDataset):
metainfo['PALETTE'] = updated_palette
# construct seg_label_mapping for semantic mask
seg_max_cat_id = len(self.METAINFO['seg_all_class_ids'])
seg_valid_cat_ids = self.METAINFO['seg_valid_class_ids']
neg_label = len(seg_valid_cat_ids)
seg_label_mapping = np.ones(
seg_max_cat_id + 1, dtype=np.int) * neg_label
for cls_idx, cat_id in enumerate(seg_valid_cat_ids):
seg_label_mapping[cat_id] = cls_idx
self.seg_label_mapping = seg_label_mapping
super().__init__(
ann_file=ann_file,
metainfo=metainfo,
......@@ -108,6 +118,7 @@ class Seg3DDataset(BaseDataset):
test_mode=test_mode,
**kwargs)
self.metainfo['seg_label_mapping'] = self.seg_label_mapping
self.scene_idxs = self.get_scene_idxs(scene_idxs)
# set group flag for the sampler
......@@ -137,7 +148,6 @@ class Seg3DDataset(BaseDataset):
old_classes = self.METAINFO.get('CLASSSES', None)
if (new_classes is not None and old_classes is not None
and list(new_classes) != list(old_classes)):
label_mapping = {}
if not set(new_classes).issubset(old_classes):
raise ValueError(
f'new classes {new_classes} is not a '
......@@ -145,12 +155,12 @@ class Seg3DDataset(BaseDataset):
# obtain true id from valid_class_ids
valid_class_ids = [
self.METAINFO['valid_class_ids'][old_classes.index(cls_name)]
for cls_name in new_classes
self.METAINFO['seg_valid_class_ids'][old_classes.index(
cls_name)] for cls_name in new_classes
]
label_mapping = {
cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids']
for cls_id in self.METAINFO['seg_all_class_ids']
}
label_mapping.update(
{cls_id: i
......@@ -159,18 +169,19 @@ class Seg3DDataset(BaseDataset):
else:
label_mapping = {
cls_id: self.ignore_index
for cls_id in self.METAINFO['all_class_ids']
for cls_id in self.METAINFO['seg_all_class_ids']
}
label_mapping.update({
cls_id: i
for i, cls_id in enumerate(self.METAINFO['valid_class_ids'])
for i, cls_id in enumerate(
self.METAINFO['seg_valid_class_ids'])
})
# map label to category name
label2cat = {
i: cat_name
for i, cat_name in enumerate(self.METAINFO['CLASSES'])
}
valid_class_ids = self.METAINFO['valid_class_ids']
valid_class_ids = self.METAINFO['seg_valid_class_ids']
return label_mapping, label2cat, valid_class_ids
......
......@@ -41,9 +41,9 @@ class SemanticKITTIDataset(Seg3DDataset):
'bus', 'person', 'bicyclist', 'motorcyclist', 'road',
'parking', 'sidewalk', 'other-ground', 'building', 'fence',
'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'),
'valid_class_ids':
'seg_valid_class_ids':
tuple(range(20)),
'all_class_ids':
'seg_all_class_ids':
tuple(range(20))
}
......
......@@ -277,9 +277,8 @@ class PointSegClassMapping(BaseTransform):
Required Keys:
- lidar_points (dict)
- lidar_path (str)
- seg_label_mapping (np.ndarray)
- pts_semantic_mask (np.ndarray)
Added Keys:
......@@ -287,11 +286,6 @@ class PointSegClassMapping(BaseTransform):
Map valid classes as 0~len(valid_cat_ids)-1 and
others as len(valid_cat_ids).
Args:
valid_cat_ids (tuple[int]): A tuple of valid category.
max_cat_id (int, optional): The max possible cat_id in input
segmentation mask. Defaults to 40.
"""
def transform(self, results: dict) -> None:
......@@ -309,10 +303,9 @@ class PointSegClassMapping(BaseTransform):
assert 'pts_semantic_mask' in results
pts_semantic_mask = results['pts_semantic_mask']
assert 'label_mapping' in results
label_mapping = results['label_mapping']
converted_pts_sem_mask = \
np.array([label_mapping[mask] for mask in pts_semantic_mask])
assert 'seg_label_mapping' in results
label_mapping = results['seg_label_mapping']
converted_pts_sem_mask = label_mapping[pts_semantic_mask]
results['pts_semantic_mask'] = converted_pts_sem_mask
......
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
from os import path as osp
import os.path as osp
from typing import Callable, List, Optional, Union
import mmcv
import numpy as np
import torch
from mmcv.utils import print_log
from mmdet3d.registry import DATASETS
from mmdet3d.structures import Box3DMode, points_cam2img
from mmdet3d.structures import CameraInstance3DBoxes
from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset
......@@ -26,524 +23,207 @@ class WaymoDataset(KittiDataset):
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
split (str): Split of input data.
pts_prefix (str, optional): Prefix of points files.
Defaults to 'velodyne'.
data_prefix (list[dict]): data prefix for point cloud and
camera data dict, default to dict(
pts='velodyne',
CAM_FRONT='image_0',
CAM_FRONT_RIGHT='image_1',
CAM_FRONT_LEFT='image_2',
CAM_SIDE_RIGHT='image_3',
CAM_SIDE_LEFT='image_4')
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
as input. Defaults to `dict(use_lidar=True)`.
default_cam_key (str, optional): Default camera key for lidar2img
association.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list(float), optional): The range of point cloud used
to filter invalid predicted boxes.
pcd_limit_range (list, optional): The range of point cloud used to
filter invalid predicted boxes.
Default: [-85, -85, -5, 85, 85, 5].
cam_sync_instances (bool, optional): If use the camera sync label
supported from waymo version 1.3.1.
load_interval (int, optional): load frame interval.
task (str, optional): task for 3D detection (lidar, mono3d).
lidar: take all the ground trurh in the frame.
mono3d: take the groundtruth that can be seen in the cam.
max_sweeps (int, optional): max sweep for each frame.
"""
CLASSES = ('Car', 'Cyclist', 'Pedestrian')
CLASSES = ('Car', 'Pedestrian', 'Cyclist')
def __init__(self,
data_root,
ann_file,
split,
pts_prefix='velodyne',
pipeline=None,
classes=None,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
data_root: str,
ann_file: str,
data_prefix: dict = dict(
pts='velodyne',
CAM_FRONT='image_0',
CAM_FRONT_RIGHT='image_1',
CAM_FRONT_LEFT='image_2',
CAM_SIDE_RIGHT='image_3',
CAM_SIDE_LEFT='image_4'),
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
default_cam_key: str = 'CAM_FRONT',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
cam_sync_instances=False,
load_interval=1,
pcd_limit_range=[-85, -85, -5, 85, 85, 5],
task='lidar',
max_sweeps=0,
**kwargs):
self.load_interval = load_interval
# set loading mode for different task settings
self.cam_sync_instances = cam_sync_instances
# construct self.cat_ids for vision-only anns parsing
self.cat_ids = range(len(self.CLASSES))
self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
self.max_sweeps = max_sweeps
self.task = task
# we do not provide file_client_args to custom_3d init
# because we want disk loading for info
# while ceph loading for KITTI2Waymo
super().__init__(
data_root=data_root,
ann_file=ann_file,
split=split,
pts_prefix=pts_prefix,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
pcd_limit_range=pcd_limit_range,
default_cam_key=default_cam_key,
data_prefix=data_prefix,
test_mode=test_mode,
**kwargs)
# to load a subset, just set the load_interval in the dataset config
self.data_infos = self.data_infos[::load_interval]
if hasattr(self, 'flag'):
self.flag = self.flag[::load_interval]
def _get_pts_filename(self, idx):
pts_filename = osp.join(self.root_split, self.pts_prefix,
f'{idx:07d}.bin')
return pts_filename
def get_data_info(self, index):
"""Get data info according to the given index.
def parse_ann_info(self, info: dict) -> dict:
"""Get annotation info according to the given index.
Args:
index (int): Index of the sample data to get.
info (dict): Data information of single data sample.
Returns:
dict: Standard input_dict consists of the
data information.
- sample_idx (str): sample index
- pts_filename (str): filename of point clouds
- img_prefix (str): prefix of image files
- img_info (dict): image info
- lidar2img (list[np.ndarray], optional): transformations from
lidar to different cameras
- ann_info (dict): annotation info
dict: annotation information consists of the following keys:
- bboxes_3d (:obj:`LiDARInstance3DBoxes`):
3D ground truth bboxes.
- bbox_labels_3d (np.ndarray): Labels of ground truths.
- gt_bboxes (np.ndarray): 2D ground truth bboxes.
- gt_labels (np.ndarray): Labels of ground truths.
- difficulty (int): Difficulty defined by KITTI.
0, 1, 2 represent xxxxx respectively.
"""
info = self.data_infos[index]
sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.data_root,
info['image']['image_path'])
# TODO: consider use torch.Tensor only
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P0 = info['calib']['P0'].astype(np.float32)
lidar2img = P0 @ rect @ Trv2c
pts_filename = self._get_pts_filename(sample_idx)
input_dict = dict(
sample_idx=sample_idx,
pts_filename=pts_filename,
img_prefix=None,
img_info=dict(filename=img_filename),
lidar2img=lidar2img)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None,
data_format='waymo'):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
data_format (str, optional): Output data format.
Default: 'waymo'. Another supported choice is 'kitti'.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
ann_info = Det3DDataset.parse_ann_info(self, info)
if ann_info is None:
# empty instance
anns_results = {}
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
return anns_results
ann_info = self._remove_dontcare(ann_info)
# in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
# convert gt_bboxes_3d to velodyne coordinates with `lidar2cam`
if 'gt_bboxes' in ann_info:
gt_bboxes = ann_info['gt_bboxes']
gt_labels = ann_info['gt_labels']
else:
tmp_dir = None
assert ('waymo' in data_format or 'kitti' in data_format), \
f'invalid data_format {data_format}'
if (not isinstance(outputs[0], dict)) or 'img_bbox' in outputs[0]:
raise TypeError('Not supported type for reformat results.')
elif 'pts_bbox' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = f'{submission_prefix}_{name}'
else:
submission_prefix_ = None
result_files_ = self.bbox2result_kitti(results_, self.CLASSES,
pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if 'centers_2d' in ann_info:
centers_2d = ann_info['centers_2d']
depths = ann_info['depths']
else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
if 'waymo' in data_format:
from mmdet3d.evaluation.functional.waymo_utils import \
KITTI2Waymo # noqa
waymo_root = osp.join(
self.data_root.split('kitti_format')[0], 'waymo_format')
if self.split == 'training':
waymo_tfrecords_dir = osp.join(waymo_root, 'validation')
prefix = '1'
elif self.split == 'testing':
waymo_tfrecords_dir = osp.join(waymo_root, 'testing')
prefix = '2'
else:
raise ValueError('Not supported split value.')
save_tmp_dir = tempfile.TemporaryDirectory()
waymo_results_save_dir = save_tmp_dir.name
waymo_results_final_path = f'{pklfile_prefix}.bin'
if 'pts_bbox' in result_files:
converter = KITTI2Waymo(result_files['pts_bbox'],
waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path, prefix)
else:
converter = KITTI2Waymo(result_files, waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path, prefix)
converter.convert()
save_tmp_dir.cleanup()
return result_files, tmp_dir
def evaluate(self,
results,
metric='waymo',
logger=None,
pklfile_prefix=None,
submission_prefix=None,
show=False,
out_dir=None,
pipeline=None):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str], optional): Metrics to be evaluated.
Default: 'waymo'. Another supported metric is 'kitti'.
logger (logging.Logger | str, optional): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str, optional): The prefix of pkl files including
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str, optional): The prefix of submission data.
If not specified, the submission data will not be generated.
show (bool, optional): Whether to visualize.
Default: False.
out_dir (str, optional): Path to save the visualization results.
Default: None.
pipeline (list[dict], optional): raw data loading for showing.
Default: None.
Returns:
dict[str: float]: results of each evaluation metric
"""
assert ('waymo' in metric or 'kitti' in metric), \
f'invalid metric {metric}'
if 'kitti' in metric:
result_files, tmp_dir = self.format_results(
results,
pklfile_prefix,
submission_prefix,
data_format='kitti')
from mmdet3d.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.data_infos]
if isinstance(result_files, dict):
ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bev', '3d']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float(
'{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else:
ap_result_str, ap_dict = kitti_eval(
gt_annos,
result_files,
self.CLASSES,
eval_types=['bev', '3d'])
print_log('\n' + ap_result_str, logger=logger)
if 'waymo' in metric:
waymo_root = osp.join(
self.data_root.split('kitti_format')[0], 'waymo_format')
if pklfile_prefix is None:
eval_tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(eval_tmp_dir.name, 'results')
else:
eval_tmp_dir = None
result_files, tmp_dir = self.format_results(
results,
pklfile_prefix,
submission_prefix,
data_format='waymo')
import subprocess
ret_bytes = subprocess.check_output(
'mmdet3d/core/evaluation/waymo_utils/' +
f'compute_detection_metrics_main {pklfile_prefix}.bin ' +
f'{waymo_root}/gt.bin',
shell=True)
ret_texts = ret_bytes.decode('utf-8')
print_log(ret_texts)
# parse the text to get ap_dict
ap_dict = {
'Vehicle/L1 mAP': 0,
'Vehicle/L1 mAPH': 0,
'Vehicle/L2 mAP': 0,
'Vehicle/L2 mAPH': 0,
'Pedestrian/L1 mAP': 0,
'Pedestrian/L1 mAPH': 0,
'Pedestrian/L2 mAP': 0,
'Pedestrian/L2 mAPH': 0,
'Sign/L1 mAP': 0,
'Sign/L1 mAPH': 0,
'Sign/L2 mAP': 0,
'Sign/L2 mAPH': 0,
'Cyclist/L1 mAP': 0,
'Cyclist/L1 mAPH': 0,
'Cyclist/L2 mAP': 0,
'Cyclist/L2 mAPH': 0,
'Overall/L1 mAP': 0,
'Overall/L1 mAPH': 0,
'Overall/L2 mAP': 0,
'Overall/L2 mAPH': 0
}
mAP_splits = ret_texts.split('mAP ')
mAPH_splits = ret_texts.split('mAPH ')
for idx, key in enumerate(ap_dict.keys()):
split_idx = int(idx / 2) + 1
if idx % 2 == 0: # mAP
ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])
else: # mAPH
ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])
ap_dict['Overall/L1 mAP'] = \
(ap_dict['Vehicle/L1 mAP'] + ap_dict['Pedestrian/L1 mAP'] +
ap_dict['Cyclist/L1 mAP']) / 3
ap_dict['Overall/L1 mAPH'] = \
(ap_dict['Vehicle/L1 mAPH'] + ap_dict['Pedestrian/L1 mAPH'] +
ap_dict['Cyclist/L1 mAPH']) / 3
ap_dict['Overall/L2 mAP'] = \
(ap_dict['Vehicle/L2 mAP'] + ap_dict['Pedestrian/L2 mAP'] +
ap_dict['Cyclist/L2 mAP']) / 3
ap_dict['Overall/L2 mAPH'] = \
(ap_dict['Vehicle/L2 mAPH'] + ap_dict['Pedestrian/L2 mAPH'] +
ap_dict['Cyclist/L2 mAPH']) / 3
if eval_tmp_dir is not None:
eval_tmp_dir.cleanup()
if tmp_dir is not None:
tmp_dir.cleanup()
centers_2d = np.zeros((0, 2), dtype=np.float32)
depths = np.zeros((0), dtype=np.float32)
if show or out_dir:
self.show(results, out_dir, show=show, pipeline=pipeline)
return ap_dict
if self.task == 'mono3d':
gt_bboxes_3d = CameraInstance3DBoxes(
ann_info['gt_bboxes_3d'],
box_dim=ann_info['gt_bboxes_3d'].shape[-1],
origin=(0.5, 0.5, 0.5))
def bbox2result_kitti(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert results to kitti format for evaluation and test submission.
Args:
net_outputs (List[np.ndarray]): list of array storing the
bbox and score
class_nanes (List[String]): A list of class names
pklfile_prefix (str): The prefix of pkl file.
submission_prefix (str): The prefix of submission file.
Returns:
List[dict]: A list of dict have the kitti 3d format
"""
assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
det_annos = []
print('\nConverting prediction to KITTI format')
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.data_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
box_dict = self.convert_valid_bboxes(pred_dicts, info)
if len(box_dict['bbox']) > 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
box_preds_lidar = box_dict['box3d_lidar']
label_preds = box_dict['label_preds']
anno = {
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': []
}
for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(
-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
anno['bbox'].append(bbox)
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
anno['score'].append(score)
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
if submission_prefix is not None:
curr_file = f'{submission_prefix}/{sample_idx:07d}.txt'
with open(curr_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.
format(anno['name'][idx], anno['alpha'][idx],
bbox[idx][0], bbox[idx][1],
bbox[idx][2], bbox[idx][3],
dims[idx][1], dims[idx][2],
dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][2], anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
else:
annos.append({
'name': np.array([]),
'truncated': np.array([]),
'occluded': np.array([]),
'alpha': np.array([]),
'bbox': np.zeros([0, 4]),
'dimensions': np.zeros([0, 3]),
'location': np.zeros([0, 3]),
'rotation_y': np.array([]),
'score': np.array([]),
})
annos[-1]['sample_idx'] = np.array(
[sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out)
print(f'Result is saved to {out}.')
return det_annos
def convert_valid_bboxes(self, box_dict, info):
"""Convert the boxes into valid format.
Args:
box_dict (dict): Bounding boxes to be converted.
- boxes_3d (:obj:``LiDARInstance3DBoxes``): 3D bounding boxes.
- scores_3d (np.ndarray): Scores of predicted boxes.
- labels_3d (np.ndarray): Class labels of predicted boxes.
info (dict): Dataset information dictionary.
Returns:
dict: Valid boxes after conversion.
- bbox (np.ndarray): 2D bounding boxes (in camera 0).
- box3d_camera (np.ndarray): 3D boxes in camera coordinates.
- box3d_lidar (np.ndarray): 3D boxes in lidar coordinates.
- scores (np.ndarray): Scores of predicted boxes.
- label_preds (np.ndarray): Class labels of predicted boxes.
- sample_idx (np.ndarray): Sample index.
"""
# TODO: refactor this function
box_preds = box_dict['boxes_3d']
scores = box_dict['scores_3d']
labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if len(box_preds) == 0:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P0 = info['calib']['P0'].astype(np.float32)
P0 = box_preds.tensor.new_tensor(P0)
box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
box_corners = box_preds_camera.corners
box_corners_in_image = points_cam2img(box_corners, P0)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check box_preds
limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(box_preds.center < limit_range[3:]))
valid_inds = valid_pcd_inds.all(-1)
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx,
)
else:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx,
)
lidar2cam = np.array(
info['images'][self.default_cam_key]['lidar2cam'])
gt_bboxes_3d = CameraInstance3DBoxes(
ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d,
np.linalg.inv(lidar2cam))
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=ann_info['gt_labels_3d'],
gt_bboxes=gt_bboxes,
gt_labels=gt_labels,
centers_2d=centers_2d,
depths=depths)
return anns_results
def load_data_list(self) -> List[dict]:
"""Add the load interval."""
data_list = super().load_data_list()
data_list = data_list[::self.load_interval]
return data_list
def parse_data_info(self, info: dict) -> dict:
"""if task is lidar or multiview det, use super() method elif task is
mono3d, split the info from frame-wise to img-wise."""
if self.task != 'mono3d':
if self.cam_sync_instances:
# use the cam sync labels
info['instances'] = info['cam_sync_instances']
return super().parse_data_info(info)
else:
# in the mono3d, the instances is from cam sync.
data_list = []
if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \
osp.join(
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
if self.modality['use_camera']:
for cam_key, img_info in info['images'].items():
if 'img_path' in img_info:
cam_prefix = self.data_prefix.get(cam_key, '')
img_info['img_path'] = osp.join(
cam_prefix, img_info['img_path'])
for (cam_key, img_info) in info['images'].items():
camera_info = dict()
camera_info['images'] = dict()
camera_info['images'][cam_key] = img_info
if 'cam_instances' in info \
and cam_key in info['cam_instances']:
camera_info['instances'] = info['cam_instances'][cam_key]
else:
camera_info['instances'] = []
camera_info['ego2global'] = info['ego2global']
if 'image_sweeps' in info:
camera_info['image_sweeps'] = info['image_sweeps']
# TODO check if need to modify the sample id
# TODO check when will use it except for evaluation.
camera_info['sample_id'] = info['sample_id']
if not self.test_mode:
# used in training
camera_info['ann_info'] = self.parse_ann_info(camera_info)
if self.test_mode and self.load_eval_anns:
info['eval_ann_info'] = self.parse_ann_info(info)
data_list.append(camera_info)
return data_list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment