Commit 3b8d508a authored by lishj6's avatar lishj6 🏸
Browse files

init_0905

parent e968ab0f
Pipeline #2906 canceled with stages
# Copyright (c) OpenMMLab. All rights reserved.
import pickle
import numpy as np
from nuscenes import NuScenes
from nuscenes.utils.data_classes import Box
from pyquaternion import Quaternion
from tools.data_converter import nuscenes_converter as nuscenes_converter
map_name_from_general_to_detection = {
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.wheelchair': 'ignore',
'human.pedestrian.stroller': 'ignore',
'human.pedestrian.personal_mobility': 'ignore',
'human.pedestrian.police_officer': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'animal': 'ignore',
'vehicle.car': 'car',
'vehicle.motorcycle': 'motorcycle',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.truck': 'truck',
'vehicle.construction': 'construction_vehicle',
'vehicle.emergency.ambulance': 'ignore',
'vehicle.emergency.police': 'ignore',
'vehicle.trailer': 'trailer',
'movable_object.barrier': 'barrier',
'movable_object.trafficcone': 'traffic_cone',
'movable_object.pushable_pullable': 'ignore',
'movable_object.debris': 'ignore',
'static_object.bicycle_rack': 'ignore',
}
classes = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
def get_gt(info):
"""Generate gt labels from info.
Args:
info(dict): Infos needed to generate gt labels.
Returns:
Tensor: GT bboxes.
Tensor: GT labels.
"""
ego2global_rotation = info['cams']['CAM_FRONT']['ego2global_rotation']
ego2global_translation = info['cams']['CAM_FRONT'][
'ego2global_translation']
trans = -np.array(ego2global_translation)
rot = Quaternion(ego2global_rotation).inverse
gt_boxes = list()
gt_labels = list()
for ann_info in info['ann_infos']:
# Use ego coordinate.
if (map_name_from_general_to_detection[ann_info['category_name']]
not in classes
or ann_info['num_lidar_pts'] + ann_info['num_radar_pts'] <= 0):
continue
box = Box(
ann_info['translation'],
ann_info['size'],
Quaternion(ann_info['rotation']),
velocity=ann_info['velocity'],
)
box.translate(trans)
box.rotate(rot)
box_xyz = np.array(box.center)
box_dxdydz = np.array(box.wlh)[[1, 0, 2]]
box_yaw = np.array([box.orientation.yaw_pitch_roll[0]])
box_velo = np.array(box.velocity[:2])
gt_box = np.concatenate([box_xyz, box_dxdydz, box_yaw, box_velo])
gt_boxes.append(gt_box)
gt_labels.append(
classes.index(
map_name_from_general_to_detection[ann_info['category_name']]))
return gt_boxes, gt_labels
def nuscenes_data_prep(root_path, info_prefix, version, max_sweeps=10):
"""Prepare data related to nuScenes dataset.
Related data consists of '.pkl' files recording basic infos,
2D annotations and groundtruth database.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
version (str): Dataset version.
max_sweeps (int, optional): Number of input consecutive frames.
Default: 10
"""
nuscenes_converter.create_nuscenes_infos(
root_path, info_prefix, version=version, max_sweeps=max_sweeps)
def add_ann_adj_info(extra_tag):
nuscenes_version = 'v1.0-trainval'
dataroot = './data/nuscenes/'
nuscenes = NuScenes(nuscenes_version, dataroot)
for set in ['train', 'val']:
dataset = pickle.load(
open('%s/%s_infos_%s.pkl' % (dataroot, extra_tag, set), 'rb'))
for id in range(len(dataset['infos'])):
if id % 10 == 0:
print('%d/%d' % (id, len(dataset['infos'])))
info = dataset['infos'][id]
# get sweep adjacent frame info
sample = nuscenes.get('sample', info['token'])
ann_infos = list()
for ann in sample['anns']:
ann_info = nuscenes.get('sample_annotation', ann)
velocity = nuscenes.box_velocity(ann_info['token'])
if np.any(np.isnan(velocity)):
velocity = np.zeros(3)
ann_info['velocity'] = velocity
ann_infos.append(ann_info)
dataset['infos'][id]['ann_infos'] = ann_infos
dataset['infos'][id]['ann_infos'] = get_gt(dataset['infos'][id])
dataset['infos'][id]['scene_token'] = sample['scene_token']
scene = nuscenes.get('scene', sample['scene_token'])
dataset['infos'][id]['scene_name'] = scene['name']
dataset['infos'][id]['occ_path'] = \
'./data/nuscenes/gts/%s/%s'%(scene['name'], info['token'])
with open('%s/%s_infos_%s.pkl' % (dataroot, extra_tag, set),
'wb') as fid:
pickle.dump(dataset, fid)
if __name__ == '__main__':
dataset = 'nuscenes'
version = 'v1.0'
train_version = f'{version}-trainval'
root_path = 'data/nuscenes'
extra_tag = 'bevdetv2-nuscenes'
nuscenes_data_prep(
root_path=root_path,
info_prefix=extra_tag,
version=train_version,
max_sweeps=0)
print('add_ann_infos')
add_ann_adj_info(extra_tag)
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import pickle
from os import path as osp
import mmcv
import numpy as np
from mmcv import track_iter_progress
from mmcv.ops import roi_align
from pycocotools import mask as maskUtils
from pycocotools.coco import COCO
from mmdet3d.core.bbox import box_np_ops as box_np_ops
from mmdet3d.datasets import build_dataset
from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps
def _poly2mask(mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
rle = maskUtils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = maskUtils.decode(rle)
return mask
def _parse_coco_ann_info(ann_info):
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks_ann = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0:
continue
bbox = [x1, y1, x1 + w, y1 + h]
if ann.get('iscrowd', False):
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_masks_ann.append(ann['segmentation'])
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)
return ann
def crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):
import torch
from torch.nn.modules.utils import _pair
device = pos_proposals.device
num_pos = pos_proposals.size(0)
fake_inds = (
torch.arange(num_pos,
device=device).to(dtype=pos_proposals.dtype)[:, None])
rois = torch.cat([fake_inds, pos_proposals], dim=1) # Nx5
mask_size = _pair(28)
rois = rois.to(device=device)
gt_masks_th = (
torch.from_numpy(gt_masks).to(device).index_select(
0, pos_assigned_gt_inds).to(dtype=rois.dtype))
# Use RoIAlign could apparently accelerate the training (~0.1s/iter)
targets = (
roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))
return targets
def crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):
num_pos = pos_proposals.shape[0]
masks = []
img_patches = []
for i in range(num_pos):
gt_mask = gt_masks[pos_assigned_gt_inds[i]]
bbox = pos_proposals[i, :].astype(np.int32)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1 + 1, 1)
h = np.maximum(y2 - y1 + 1, 1)
mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]
masked_img = gt_mask[..., None] * org_img
img_patch = masked_img[y1:y1 + h, x1:x1 + w]
img_patches.append(img_patch)
masks.append(mask_patch)
return img_patches, masks
def create_groundtruth_database(dataset_class_name,
data_path,
info_prefix,
info_path=None,
mask_anno_path=None,
used_classes=None,
database_save_path=None,
db_info_save_path=None,
relative_path=True,
add_rgb=False,
lidar_only=False,
bev_only=False,
coors_range=None,
with_mask=False):
"""Given the raw data, generate the ground truth database.
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str, optional): Path of the info file.
Default: None.
mask_anno_path (str, optional): Path of the mask_anno.
Default: None.
used_classes (list[str], optional): Classes have been used.
Default: None.
database_save_path (str, optional): Path to save database.
Default: None.
db_info_save_path (str, optional): Path to save db_info.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
with_mask (bool, optional): Whether to use mask.
Default: False.
"""
print(f'Create GT Database of {dataset_class_name}')
dataset_cfg = dict(
type=dataset_class_name, data_root=data_path, ann_file=info_path)
if dataset_class_name == 'KittiDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=with_mask,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
elif dataset_class_name == 'NuScenesDataset':
dataset_cfg.update(
use_valid_flag=True,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True)
])
elif dataset_class_name == 'WaymoDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=6,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
dataset = build_dataset(dataset_cfg)
if database_save_path is None:
database_save_path = osp.join(data_path, f'{info_prefix}_gt_database')
if db_info_save_path is None:
db_info_save_path = osp.join(data_path,
f'{info_prefix}_dbinfos_train.pkl')
mmcv.mkdir_or_exist(database_save_path)
all_db_infos = dict()
if with_mask:
coco = COCO(osp.join(data_path, mask_anno_path))
imgIds = coco.getImgIds()
file2id = dict()
for i in imgIds:
info = coco.loadImgs([i])[0]
file2id.update({info['file_name']: i})
group_counter = 0
for j in track_iter_progress(list(range(len(dataset)))):
input_dict = dataset.get_data_info(j)
dataset.pre_pipeline(input_dict)
example = dataset.pipeline(input_dict)
annos = example['ann_info']
image_idx = example['sample_idx']
points = example['points'].tensor.numpy()
gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
names = annos['gt_names']
group_dict = dict()
if 'group_ids' in annos:
group_ids = annos['group_ids']
else:
group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
if 'difficulty' in annos:
difficulty = annos['difficulty']
num_obj = gt_boxes_3d.shape[0]
point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
if with_mask:
# prepare masks
gt_boxes = annos['gt_bboxes']
img_path = osp.split(example['img_info']['filename'])[-1]
if img_path not in file2id.keys():
print(f'skip image {img_path} for empty mask')
continue
img_id = file2id[img_path]
kins_annIds = coco.getAnnIds(imgIds=img_id)
kins_raw_info = coco.loadAnns(kins_annIds)
kins_ann_info = _parse_coco_ann_info(kins_raw_info)
h, w = annos['img_shape'][:2]
gt_masks = [
_poly2mask(mask, h, w) for mask in kins_ann_info['masks']
]
# get mask inds based on iou mapping
bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
mask_inds = bbox_iou.argmax(axis=0)
valid_inds = (bbox_iou.max(axis=0) > 0.5)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches, object_masks = crop_image_patch(
gt_boxes, gt_masks, mask_inds, annos['img'])
for i in range(num_obj):
filename = f'{image_idx}_{names[i]}_{i}.bin'
abs_filepath = osp.join(database_save_path, filename)
rel_filepath = osp.join(f'{info_prefix}_gt_database', filename)
# save point clouds and image patches for each object
gt_points = points[point_indices[:, i]]
gt_points[:, :3] -= gt_boxes_3d[i, :3]
if with_mask:
if object_masks[i].sum() == 0 or not valid_inds[i]:
# Skip object for empty or invalid mask
continue
img_patch_path = abs_filepath + '.png'
mask_patch_path = abs_filepath + '.mask.png'
mmcv.imwrite(object_img_patches[i], img_patch_path)
mmcv.imwrite(object_masks[i], mask_patch_path)
with open(abs_filepath, 'w') as f:
gt_points.tofile(f)
if (used_classes is None) or names[i] in used_classes:
db_info = {
'name': names[i],
'path': rel_filepath,
'image_idx': image_idx,
'gt_idx': i,
'box3d_lidar': gt_boxes_3d[i],
'num_points_in_gt': gt_points.shape[0],
'difficulty': difficulty[i],
}
local_group_id = group_ids[i]
# if local_group_id >= 0:
if local_group_id not in group_dict:
group_dict[local_group_id] = group_counter
group_counter += 1
db_info['group_id'] = group_dict[local_group_id]
if 'score' in annos:
db_info['score'] = annos['score'][i]
if with_mask:
db_info.update({'box2d_camera': gt_boxes[i]})
if names[i] in all_db_infos:
all_db_infos[names[i]].append(db_info)
else:
all_db_infos[names[i]] = [db_info]
for k, v in all_db_infos.items():
print(f'load {len(v)} {k} database infos')
with open(db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
class GTDatabaseCreater:
"""Given the raw data, generate the ground truth database. This is the
parallel version. For serialized version, please refer to
`create_groundtruth_database`
Args:
dataset_class_name (str): Name of the input dataset.
data_path (str): Path of the data.
info_prefix (str): Prefix of the info file.
info_path (str, optional): Path of the info file.
Default: None.
mask_anno_path (str, optional): Path of the mask_anno.
Default: None.
used_classes (list[str], optional): Classes have been used.
Default: None.
database_save_path (str, optional): Path to save database.
Default: None.
db_info_save_path (str, optional): Path to save db_info.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
with_mask (bool, optional): Whether to use mask.
Default: False.
num_worker (int, optional): the number of parallel workers to use.
Default: 8.
"""
def __init__(self,
dataset_class_name,
data_path,
info_prefix,
info_path=None,
mask_anno_path=None,
used_classes=None,
database_save_path=None,
db_info_save_path=None,
relative_path=True,
add_rgb=False,
lidar_only=False,
bev_only=False,
coors_range=None,
with_mask=False,
num_worker=8) -> None:
self.dataset_class_name = dataset_class_name
self.data_path = data_path
self.info_prefix = info_prefix
self.info_path = info_path
self.mask_anno_path = mask_anno_path
self.used_classes = used_classes
self.database_save_path = database_save_path
self.db_info_save_path = db_info_save_path
self.relative_path = relative_path
self.add_rgb = add_rgb
self.lidar_only = lidar_only
self.bev_only = bev_only
self.coors_range = coors_range
self.with_mask = with_mask
self.num_worker = num_worker
self.pipeline = None
def create_single(self, input_dict):
group_counter = 0
single_db_infos = dict()
example = self.pipeline(input_dict)
annos = example['ann_info']
image_idx = example['sample_idx']
points = example['points'].tensor.numpy()
gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
names = annos['gt_names']
group_dict = dict()
if 'group_ids' in annos:
group_ids = annos['group_ids']
else:
group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
if 'difficulty' in annos:
difficulty = annos['difficulty']
num_obj = gt_boxes_3d.shape[0]
point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
if self.with_mask:
# prepare masks
gt_boxes = annos['gt_bboxes']
img_path = osp.split(example['img_info']['filename'])[-1]
if img_path not in self.file2id.keys():
print(f'skip image {img_path} for empty mask')
return single_db_infos
img_id = self.file2id[img_path]
kins_annIds = self.coco.getAnnIds(imgIds=img_id)
kins_raw_info = self.coco.loadAnns(kins_annIds)
kins_ann_info = _parse_coco_ann_info(kins_raw_info)
h, w = annos['img_shape'][:2]
gt_masks = [
_poly2mask(mask, h, w) for mask in kins_ann_info['masks']
]
# get mask inds based on iou mapping
bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
mask_inds = bbox_iou.argmax(axis=0)
valid_inds = (bbox_iou.max(axis=0) > 0.5)
# mask the image
# use more precise crop when it is ready
# object_img_patches = np.ascontiguousarray(
# np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
# crop image patches using roi_align
# object_img_patches = crop_image_patch_v2(
# torch.Tensor(gt_boxes),
# torch.Tensor(mask_inds).long(), object_img_patches)
object_img_patches, object_masks = crop_image_patch(
gt_boxes, gt_masks, mask_inds, annos['img'])
for i in range(num_obj):
filename = f'{image_idx}_{names[i]}_{i}.bin'
abs_filepath = osp.join(self.database_save_path, filename)
rel_filepath = osp.join(f'{self.info_prefix}_gt_database',
filename)
# save point clouds and image patches for each object
gt_points = points[point_indices[:, i]]
gt_points[:, :3] -= gt_boxes_3d[i, :3]
if self.with_mask:
if object_masks[i].sum() == 0 or not valid_inds[i]:
# Skip object for empty or invalid mask
continue
img_patch_path = abs_filepath + '.png'
mask_patch_path = abs_filepath + '.mask.png'
mmcv.imwrite(object_img_patches[i], img_patch_path)
mmcv.imwrite(object_masks[i], mask_patch_path)
with open(abs_filepath, 'w') as f:
gt_points.tofile(f)
if (self.used_classes is None) or names[i] in self.used_classes:
db_info = {
'name': names[i],
'path': rel_filepath,
'image_idx': image_idx,
'gt_idx': i,
'box3d_lidar': gt_boxes_3d[i],
'num_points_in_gt': gt_points.shape[0],
'difficulty': difficulty[i],
}
local_group_id = group_ids[i]
# if local_group_id >= 0:
if local_group_id not in group_dict:
group_dict[local_group_id] = group_counter
group_counter += 1
db_info['group_id'] = group_dict[local_group_id]
if 'score' in annos:
db_info['score'] = annos['score'][i]
if self.with_mask:
db_info.update({'box2d_camera': gt_boxes[i]})
if names[i] in single_db_infos:
single_db_infos[names[i]].append(db_info)
else:
single_db_infos[names[i]] = [db_info]
return single_db_infos
def create(self):
print(f'Create GT Database of {self.dataset_class_name}')
dataset_cfg = dict(
type=self.dataset_class_name,
data_root=self.data_path,
ann_file=self.info_path)
if self.dataset_class_name == 'KittiDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=self.with_mask,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
elif self.dataset_class_name == 'NuScenesDataset':
dataset_cfg.update(
use_valid_flag=True,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True)
])
elif self.dataset_class_name == 'WaymoDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
),
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=6,
use_dim=6,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
dataset = build_dataset(dataset_cfg)
self.pipeline = dataset.pipeline
if self.database_save_path is None:
self.database_save_path = osp.join(
self.data_path, f'{self.info_prefix}_gt_database')
if self.db_info_save_path is None:
self.db_info_save_path = osp.join(
self.data_path, f'{self.info_prefix}_dbinfos_train.pkl')
mmcv.mkdir_or_exist(self.database_save_path)
if self.with_mask:
self.coco = COCO(osp.join(self.data_path, self.mask_anno_path))
imgIds = self.coco.getImgIds()
self.file2id = dict()
for i in imgIds:
info = self.coco.loadImgs([i])[0]
self.file2id.update({info['file_name']: i})
def loop_dataset(i):
input_dict = dataset.get_data_info(i)
dataset.pre_pipeline(input_dict)
return input_dict
multi_db_infos = mmcv.track_parallel_progress(
self.create_single, ((loop_dataset(i)
for i in range(len(dataset))), len(dataset)),
self.num_worker)
print('Make global unique group id')
group_counter_offset = 0
all_db_infos = dict()
for single_db_infos in track_iter_progress(multi_db_infos):
group_id = -1
for name, name_db_infos in single_db_infos.items():
for db_info in name_db_infos:
group_id = max(group_id, db_info['group_id'])
db_info['group_id'] += group_counter_offset
if name not in all_db_infos:
all_db_infos[name] = []
all_db_infos[name].extend(name_db_infos)
group_counter_offset += (group_id + 1)
for k, v in all_db_infos.items():
print(f'load {len(v)} {k} database infos')
with open(self.db_info_save_path, 'wb') as f:
pickle.dump(all_db_infos, f)
# Copyright (c) OpenMMLab. All rights reserved.
import os
import mmcv
import numpy as np
from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData
from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
def create_indoor_info_file(data_path,
pkl_prefix='sunrgbd',
save_path=None,
workers=4,
**kwargs):
"""Create indoor information file.
Get information of the raw data and save it to the pkl file.
Args:
data_path (str): Path of the data.
pkl_prefix (str, optional): Prefix of the pkl to be saved.
Default: 'sunrgbd'.
save_path (str, optional): Path of the pkl to be saved. Default: None.
workers (int, optional): Number of threads to be used. Default: 4.
kwargs (dict): Additional parameters for dataset-specific Data class.
May include `use_v1` for SUN RGB-D and `num_points`.
"""
assert os.path.exists(data_path)
assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \
f'unsupported indoor dataset {pkl_prefix}'
save_path = data_path if save_path is None else save_path
assert os.path.exists(save_path)
# generate infos for both detection and segmentation task
if pkl_prefix in ['sunrgbd', 'scannet']:
train_filename = os.path.join(save_path,
f'{pkl_prefix}_infos_train.pkl')
val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
if pkl_prefix == 'sunrgbd':
# SUN RGB-D has a train-val split
num_points = kwargs.get('num_points', -1)
use_v1 = kwargs.get('use_v1', False)
train_dataset = SUNRGBDData(
root_path=data_path,
split='train',
use_v1=use_v1,
num_points=num_points)
val_dataset = SUNRGBDData(
root_path=data_path,
split='val',
use_v1=use_v1,
num_points=num_points)
else:
# ScanNet has a train-val-test split
train_dataset = ScanNetData(root_path=data_path, split='train')
val_dataset = ScanNetData(root_path=data_path, split='val')
test_dataset = ScanNetData(root_path=data_path, split='test')
test_filename = os.path.join(save_path,
f'{pkl_prefix}_infos_test.pkl')
infos_train = train_dataset.get_infos(
num_workers=workers, has_label=True)
mmcv.dump(infos_train, train_filename, 'pkl')
print(f'{pkl_prefix} info train file is saved to {train_filename}')
infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
mmcv.dump(infos_val, val_filename, 'pkl')
print(f'{pkl_prefix} info val file is saved to {val_filename}')
if pkl_prefix == 'scannet':
infos_test = test_dataset.get_infos(
num_workers=workers, has_label=False)
mmcv.dump(infos_test, test_filename, 'pkl')
print(f'{pkl_prefix} info test file is saved to {test_filename}')
# generate infos for the semantic segmentation task
# e.g. re-sampled scene indexes and label weights
# scene indexes are used to re-sample rooms with different number of points
# label weights are used to balance classes with different number of points
if pkl_prefix == 'scannet':
# label weight computation function is adopted from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
num_points = kwargs.get('num_points', 8192)
train_dataset = ScanNetSegData(
data_root=data_path,
ann_file=train_filename,
split='train',
num_points=num_points,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
# TODO: do we need to generate on val set?
val_dataset = ScanNetSegData(
data_root=data_path,
ann_file=val_filename,
split='val',
num_points=num_points,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
# no need to generate for test set
train_dataset.get_seg_infos()
val_dataset.get_seg_infos()
elif pkl_prefix == 's3dis':
# S3DIS doesn't have a fixed train-val split
# it has 6 areas instead, so we generate info file for each of them
# in training, we will use dataset to wrap different areas
splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]]
for split in splits:
dataset = S3DISData(root_path=data_path, split=split)
info = dataset.get_infos(num_workers=workers, has_label=True)
filename = os.path.join(save_path,
f'{pkl_prefix}_infos_{split}.pkl')
mmcv.dump(info, filename, 'pkl')
print(f'{pkl_prefix} info {split} file is saved to {filename}')
num_points = kwargs.get('num_points', 4096)
seg_dataset = S3DISSegData(
data_root=data_path,
ann_file=filename,
split=split,
num_points=num_points,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
seg_dataset.get_seg_infos()
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from pathlib import Path
import mmcv
import numpy as np
from nuscenes.utils.geometry_utils import view_points
from mmdet3d.core.bbox import box_np_ops, points_cam2img
from .kitti_data_utils import WaymoInfoGatherer, get_kitti_image_info
from .nuscenes_converter import post_process_coords
kitti_categories = ('Pedestrian', 'Cyclist', 'Car')
def convert_to_kitti_info_version2(info):
"""convert kitti info v1 to v2 if possible.
Args:
info (dict): Info of the input kitti data.
- image (dict): image info
- calib (dict): calibration info
- point_cloud (dict): point cloud info
"""
if 'image' not in info or 'calib' not in info or 'point_cloud' not in info:
info['image'] = {
'image_shape': info['img_shape'],
'image_idx': info['image_idx'],
'image_path': info['img_path'],
}
info['calib'] = {
'R0_rect': info['calib/R0_rect'],
'Tr_velo_to_cam': info['calib/Tr_velo_to_cam'],
'P2': info['calib/P2'],
}
info['point_cloud'] = {
'velodyne_path': info['velodyne_path'],
}
def _read_imageset_file(path):
with open(path, 'r') as f:
lines = f.readlines()
return [int(line) for line in lines]
class _NumPointsInGTCalculater:
"""Calculate the number of points inside the ground truth box. This is the
parallel version. For the serialized version, please refer to
`_calculate_num_points_in_gt`.
Args:
data_path (str): Path of the data.
relative_path (bool): Whether to use relative path.
remove_outside (bool, optional): Whether to remove points which are
outside of image. Default: True.
num_features (int, optional): Number of features per point.
Default: False.
num_worker (int, optional): the number of parallel workers to use.
Default: 8.
"""
def __init__(self,
data_path,
relative_path,
remove_outside=True,
num_features=4,
num_worker=8) -> None:
self.data_path = data_path
self.relative_path = relative_path
self.remove_outside = remove_outside
self.num_features = num_features
self.num_worker = num_worker
def calculate_single(self, info):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
if self.relative_path:
v_path = str(Path(self.data_path) / pc_info['velodyne_path'])
else:
v_path = pc_info['velodyne_path']
points_v = np.fromfile(
v_path, dtype=np.float32,
count=-1).reshape([-1, self.num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if self.remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info['image_shape'])
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
return info
def calculate(self, infos):
ret_infos = mmcv.track_parallel_progress(self.calculate_single, infos,
self.num_worker)
for i, ret_info in enumerate(ret_infos):
infos[i] = ret_info
def _calculate_num_points_in_gt(data_path,
infos,
relative_path,
remove_outside=True,
num_features=4):
for info in mmcv.track_iter_progress(infos):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
if relative_path:
v_path = str(Path(data_path) / pc_info['velodyne_path'])
else:
v_path = pc_info['velodyne_path']
points_v = np.fromfile(
v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info['image_shape'])
# points_v = points_v[points_v[:, 0] > 0]
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
def create_kitti_info_file(data_path,
pkl_prefix='kitti',
with_plane=False,
save_path=None,
relative_path=True):
"""Create info file of KITTI dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str, optional): Prefix of the info file to be generated.
Default: 'kitti'.
with_plane (bool, optional): Whether to use plane information.
Default: False.
save_path (str, optional): Path to save the info file.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
"""
imageset_folder = Path(data_path) / 'ImageSets'
train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
print('Generate info. this may take several minutes.')
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
kitti_infos_train = get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
with_plane=with_plane,
image_ids=train_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
filename = save_path / f'{pkl_prefix}_infos_train.pkl'
print(f'Kitti info train file is saved to {filename}')
mmcv.dump(kitti_infos_train, filename)
kitti_infos_val = get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
with_plane=with_plane,
image_ids=val_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
filename = save_path / f'{pkl_prefix}_infos_val.pkl'
print(f'Kitti info val file is saved to {filename}')
mmcv.dump(kitti_infos_val, filename)
filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
print(f'Kitti info trainval file is saved to {filename}')
mmcv.dump(kitti_infos_train + kitti_infos_val, filename)
kitti_infos_test = get_kitti_image_info(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
with_plane=False,
image_ids=test_img_ids,
relative_path=relative_path)
filename = save_path / f'{pkl_prefix}_infos_test.pkl'
print(f'Kitti info test file is saved to {filename}')
mmcv.dump(kitti_infos_test, filename)
def create_waymo_info_file(data_path,
pkl_prefix='waymo',
save_path=None,
relative_path=True,
max_sweeps=5,
workers=8):
"""Create info file of waymo dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str, optional): Prefix of the info file to be generated.
Default: 'waymo'.
save_path (str, optional): Path to save the info file.
Default: None.
relative_path (bool, optional): Whether to use relative path.
Default: True.
max_sweeps (int, optional): Max sweeps before the detection frame
to be used. Default: 5.
"""
imageset_folder = Path(data_path) / 'ImageSets'
train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
print('Generate info. this may take several minutes.')
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
waymo_infos_gatherer_trainval = WaymoInfoGatherer(
data_path,
training=True,
velodyne=True,
calib=True,
pose=True,
relative_path=relative_path,
max_sweeps=max_sweeps,
num_worker=workers)
waymo_infos_gatherer_test = WaymoInfoGatherer(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
pose=True,
relative_path=relative_path,
max_sweeps=max_sweeps,
num_worker=workers)
num_points_in_gt_calculater = _NumPointsInGTCalculater(
data_path,
relative_path,
num_features=6,
remove_outside=False,
num_worker=workers)
waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids)
num_points_in_gt_calculater.calculate(waymo_infos_train)
filename = save_path / f'{pkl_prefix}_infos_train.pkl'
print(f'Waymo info train file is saved to {filename}')
mmcv.dump(waymo_infos_train, filename)
waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids)
num_points_in_gt_calculater.calculate(waymo_infos_val)
filename = save_path / f'{pkl_prefix}_infos_val.pkl'
print(f'Waymo info val file is saved to {filename}')
mmcv.dump(waymo_infos_val, filename)
filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
print(f'Waymo info trainval file is saved to {filename}')
mmcv.dump(waymo_infos_train + waymo_infos_val, filename)
waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids)
filename = save_path / f'{pkl_prefix}_infos_test.pkl'
print(f'Waymo info test file is saved to {filename}')
mmcv.dump(waymo_infos_test, filename)
def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False,
num_features=4,
front_camera_id=2):
"""Create reduced point clouds for given info.
Args:
data_path (str): Path of original data.
info_path (str): Path of data info.
save_path (str, optional): Path to save reduced point cloud
data. Default: None.
back (bool, optional): Whether to flip the points to back.
Default: False.
num_features (int, optional): Number of point features. Default: 4.
front_camera_id (int, optional): The referenced/front camera ID.
Default: 2.
"""
kitti_infos = mmcv.load(info_path)
for info in mmcv.track_iter_progress(kitti_infos):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
v_path = pc_info['velodyne_path']
v_path = Path(data_path) / v_path
points_v = np.fromfile(
str(v_path), dtype=np.float32,
count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
if front_camera_id == 2:
P2 = calib['P2']
else:
P2 = calib[f'P{str(front_camera_id)}']
Trv2c = calib['Tr_velo_to_cam']
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if back:
points_v[:, 0] = -points_v[:, 0]
points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
image_info['image_shape'])
if save_path is None:
save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')
if not save_dir.exists():
save_dir.mkdir()
save_filename = save_dir / v_path.name
# save_filename = str(v_path) + '_reduced'
if back:
save_filename += '_back'
else:
save_filename = str(Path(save_path) / v_path.name)
if back:
save_filename += '_back'
with open(save_filename, 'w') as f:
points_v.tofile(f)
def create_reduced_point_cloud(data_path,
pkl_prefix,
train_info_path=None,
val_info_path=None,
test_info_path=None,
save_path=None,
with_back=False):
"""Create reduced point clouds for training/validation/testing.
Args:
data_path (str): Path of original data.
pkl_prefix (str): Prefix of info files.
train_info_path (str, optional): Path of training set info.
Default: None.
val_info_path (str, optional): Path of validation set info.
Default: None.
test_info_path (str, optional): Path of test set info.
Default: None.
save_path (str, optional): Path to save reduced point cloud data.
Default: None.
with_back (bool, optional): Whether to flip the points to back.
Default: False.
"""
if train_info_path is None:
train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'
if val_info_path is None:
val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl'
if test_info_path is None:
test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl'
print('create reduced point cloud for training set')
_create_reduced_point_cloud(data_path, train_info_path, save_path)
print('create reduced point cloud for validation set')
_create_reduced_point_cloud(data_path, val_info_path, save_path)
print('create reduced point cloud for testing set')
_create_reduced_point_cloud(data_path, test_info_path, save_path)
if with_back:
_create_reduced_point_cloud(
data_path, train_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, val_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, test_info_path, save_path, back=True)
def export_2d_annotation(root_path, info_path, mono3d=True):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
mono3d (bool, optional): Whether to export mono3d annotation.
Default: True.
"""
# get bbox annotations for camera
kitti_infos = mmcv.load(info_path)
cat2Ids = [
dict(id=kitti_categories.index(cat_name), name=cat_name)
for cat_name in kitti_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
from os import path as osp
for info in mmcv.track_iter_progress(kitti_infos):
coco_infos = get_2d_boxes(info, occluded=[0, 1, 2, 3], mono3d=mono3d)
(height, width,
_) = mmcv.imread(osp.join(root_path,
info['image']['image_path'])).shape
coco_2d_dict['images'].append(
dict(
file_name=info['image']['image_path'],
id=info['image']['image_idx'],
Tri2v=info['calib']['Tr_imu_to_velo'],
Trv2c=info['calib']['Tr_velo_to_cam'],
rect=info['calib']['R0_rect'],
cam_intrinsic=info['calib']['P2'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
if mono3d:
json_prefix = f'{info_path[:-4]}_mono3d'
else:
json_prefix = f'{info_path[:-4]}'
mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def get_2d_boxes(info, occluded, mono3d=True):
"""Get the 2D annotation records for a given info.
Args:
info: Information of the given sample data.
occluded: Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get calibration information
P2 = info['calib']['P2']
repro_recs = []
# if no annotations in info (test dataset), then return
if 'annos' not in info:
return repro_recs
# Get all the annotation with the specified visibilties.
ann_dicts = info['annos']
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
sample_data_token = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
loc = loc + dim * (dst - src)
offset = (info['calib']['P2'][0, 3] - info['calib']['P0'][0, 3]) \
/ info['calib']['P2'][0, 0]
loc_3d = np.copy(loc)
loc_3d[0, 0] += offset
gt_bbox_3d = np.concatenate([loc, dim, rot], axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], [0.5, 0.5, 0.5],
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
camera_intrinsic = P2
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token,
info['image']['image_path'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
repro_rec['bbox_cam3d'] = np.concatenate(
[loc_3d, dim, rot],
axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velo_cam3d'] = -1 # no velocity in KITTI
center3d = np.array(loc).reshape([1, 3])
center2d = points_cam2img(
center3d, camera_intrinsic, with_depth=True)
repro_rec['center2d'] = center2d.squeeze().tolist()
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['center2d'][2] <= 0:
continue
repro_rec['attribute_name'] = -1 # no attribute in KITTI
repro_rec['attribute_id'] = -1
repro_recs.append(repro_rec)
return repro_recs
def generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, x_size, y_size of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
key_mapping = {
'name': 'category_name',
'num_points_in_gt': 'num_lidar_pts',
'sample_annotation_token': 'sample_annotation_token',
'sample_data_token': 'sample_data_token',
}
for key, value in ann_rec.items():
if key in key_mapping.keys():
repro_rec[key_mapping[key]] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in kitti_categories:
return None
cat_name = repro_rec['category_name']
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = kitti_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from concurrent import futures as futures
from os import path as osp
from pathlib import Path
import mmcv
import numpy as np
from PIL import Image
from skimage import io
def get_image_index_str(img_idx, use_prefix_id=False):
if use_prefix_id:
return '{:07d}'.format(img_idx)
else:
return '{:06d}'.format(img_idx)
def get_kitti_info_path(idx,
prefix,
info_type='image_2',
file_tail='.png',
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
img_idx_str = get_image_index_str(idx, use_prefix_id)
img_idx_str += file_tail
prefix = Path(prefix)
if training:
file_path = Path('training') / info_type / img_idx_str
else:
file_path = Path('testing') / info_type / img_idx_str
if exist_check and not (prefix / file_path).exists():
raise ValueError('file not exist: {}'.format(file_path))
if relative_path:
return str(file_path)
else:
return str(prefix / file_path)
def get_image_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
info_type='image_2',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.png', training,
relative_path, exist_check, use_prefix_id)
def get_label_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
info_type='label_2',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_plane_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
info_type='planes',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_velodyne_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,
relative_path, exist_check, use_prefix_id)
def get_calib_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_pose_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'pose', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_timestamp_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'timestamp', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_label_anno(label_path):
annotations = {}
annotations.update({
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': []
})
with open(label_path, 'r') as f:
lines = f.readlines()
# if len(lines) == 0 or len(lines[0]) < 15:
# content = []
# else:
content = [line.strip().split(' ') for line in lines]
num_objects = len([x[0] for x in content if x[0] != 'DontCare'])
annotations['name'] = np.array([x[0] for x in content])
num_gt = len(annotations['name'])
annotations['truncated'] = np.array([float(x[1]) for x in content])
annotations['occluded'] = np.array([int(x[2]) for x in content])
annotations['alpha'] = np.array([float(x[3]) for x in content])
annotations['bbox'] = np.array([[float(info) for info in x[4:8]]
for x in content]).reshape(-1, 4)
# dimensions will convert hwl format to standard lhw(camera) format.
annotations['dimensions'] = np.array([[float(info) for info in x[8:11]]
for x in content
]).reshape(-1, 3)[:, [2, 0, 1]]
annotations['location'] = np.array([[float(info) for info in x[11:14]]
for x in content]).reshape(-1, 3)
annotations['rotation_y'] = np.array([float(x[14])
for x in content]).reshape(-1)
if len(content) != 0 and len(content[0]) == 16: # have score
annotations['score'] = np.array([float(x[15]) for x in content])
else:
annotations['score'] = np.zeros((annotations['bbox'].shape[0], ))
index = list(range(num_objects)) + [-1] * (num_gt - num_objects)
annotations['index'] = np.array(index, dtype=np.int32)
annotations['group_ids'] = np.arange(num_gt, dtype=np.int32)
return annotations
def _extend_matrix(mat):
mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)
return mat
def get_kitti_image_info(path,
training=True,
label_info=True,
velodyne=False,
calib=False,
with_plane=False,
image_ids=7481,
extend_matrix=True,
num_worker=8,
relative_path=True,
with_imageshape=True):
"""
KITTI annotation format version 2:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 4
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam: ...
P2: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
root_path = Path(path)
if not isinstance(image_ids, list):
image_ids = list(range(image_ids))
def map_func(idx):
info = {}
pc_info = {'num_features': 4}
calib_info = {}
image_info = {'image_idx': idx}
annotations = None
if velodyne:
pc_info['velodyne_path'] = get_velodyne_path(
idx, path, training, relative_path)
image_info['image_path'] = get_image_path(idx, path, training,
relative_path)
if with_imageshape:
img_path = image_info['image_path']
if relative_path:
img_path = str(root_path / img_path)
image_info['image_shape'] = np.array(
io.imread(img_path).shape[:2], dtype=np.int32)
if label_info:
label_path = get_label_path(idx, path, training, relative_path)
if relative_path:
label_path = str(root_path / label_path)
annotations = get_label_anno(label_path)
info['image'] = image_info
info['point_cloud'] = pc_info
if calib:
calib_path = get_calib_path(
idx, path, training, relative_path=False)
with open(calib_path, 'r') as f:
lines = f.readlines()
P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
]).reshape([3, 4])
P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
]).reshape([3, 4])
P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
]).reshape([3, 4])
P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
]).reshape([3, 4])
if extend_matrix:
P0 = _extend_matrix(P0)
P1 = _extend_matrix(P1)
P2 = _extend_matrix(P2)
P3 = _extend_matrix(P3)
R0_rect = np.array([
float(info) for info in lines[4].split(' ')[1:10]
]).reshape([3, 3])
if extend_matrix:
rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
rect_4x4[3, 3] = 1.
rect_4x4[:3, :3] = R0_rect
else:
rect_4x4 = R0_rect
Tr_velo_to_cam = np.array([
float(info) for info in lines[5].split(' ')[1:13]
]).reshape([3, 4])
Tr_imu_to_velo = np.array([
float(info) for info in lines[6].split(' ')[1:13]
]).reshape([3, 4])
if extend_matrix:
Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo)
calib_info['P0'] = P0
calib_info['P1'] = P1
calib_info['P2'] = P2
calib_info['P3'] = P3
calib_info['R0_rect'] = rect_4x4
calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
calib_info['Tr_imu_to_velo'] = Tr_imu_to_velo
info['calib'] = calib_info
if with_plane:
plane_path = get_plane_path(idx, path, training, relative_path)
if relative_path:
plane_path = str(root_path / plane_path)
lines = mmcv.list_from_file(plane_path)
info['plane'] = np.array([float(i) for i in lines[3].split()])
if annotations is not None:
info['annos'] = annotations
add_difficulty_to_annos(info)
return info
with futures.ThreadPoolExecutor(num_worker) as executor:
image_infos = executor.map(map_func, image_ids)
return list(image_infos)
class WaymoInfoGatherer:
"""
Parallel version of waymo dataset information gathering.
Waymo annotation format version like KITTI:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 6
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam0: ...
P0: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
def __init__(self,
path,
training=True,
label_info=True,
velodyne=False,
calib=False,
pose=False,
extend_matrix=True,
num_worker=8,
relative_path=True,
with_imageshape=True,
max_sweeps=5) -> None:
self.path = path
self.training = training
self.label_info = label_info
self.velodyne = velodyne
self.calib = calib
self.pose = pose
self.extend_matrix = extend_matrix
self.num_worker = num_worker
self.relative_path = relative_path
self.with_imageshape = with_imageshape
self.max_sweeps = max_sweeps
def gather_single(self, idx):
root_path = Path(self.path)
info = {}
pc_info = {'num_features': 6}
calib_info = {}
image_info = {'image_idx': idx}
annotations = None
if self.velodyne:
pc_info['velodyne_path'] = get_velodyne_path(
idx,
self.path,
self.training,
self.relative_path,
use_prefix_id=True)
with open(
get_timestamp_path(
idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)) as f:
info['timestamp'] = np.int64(f.read())
image_info['image_path'] = get_image_path(
idx,
self.path,
self.training,
self.relative_path,
info_type='image_0',
use_prefix_id=True)
if self.with_imageshape:
img_path = image_info['image_path']
if self.relative_path:
img_path = str(root_path / img_path)
# io using PIL is significantly faster than skimage
w, h = Image.open(img_path).size
image_info['image_shape'] = np.array((h, w), dtype=np.int32)
if self.label_info:
label_path = get_label_path(
idx,
self.path,
self.training,
self.relative_path,
info_type='label_all',
use_prefix_id=True)
if self.relative_path:
label_path = str(root_path / label_path)
annotations = get_label_anno(label_path)
info['image'] = image_info
info['point_cloud'] = pc_info
if self.calib:
calib_path = get_calib_path(
idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)
with open(calib_path, 'r') as f:
lines = f.readlines()
P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
]).reshape([3, 4])
P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
]).reshape([3, 4])
P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
]).reshape([3, 4])
P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
]).reshape([3, 4])
P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]
]).reshape([3, 4])
if self.extend_matrix:
P0 = _extend_matrix(P0)
P1 = _extend_matrix(P1)
P2 = _extend_matrix(P2)
P3 = _extend_matrix(P3)
P4 = _extend_matrix(P4)
R0_rect = np.array([
float(info) for info in lines[5].split(' ')[1:10]
]).reshape([3, 3])
if self.extend_matrix:
rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
rect_4x4[3, 3] = 1.
rect_4x4[:3, :3] = R0_rect
else:
rect_4x4 = R0_rect
Tr_velo_to_cam = np.array([
float(info) for info in lines[6].split(' ')[1:13]
]).reshape([3, 4])
if self.extend_matrix:
Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
calib_info['P0'] = P0
calib_info['P1'] = P1
calib_info['P2'] = P2
calib_info['P3'] = P3
calib_info['P4'] = P4
calib_info['R0_rect'] = rect_4x4
calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
info['calib'] = calib_info
if self.pose:
pose_path = get_pose_path(
idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)
info['pose'] = np.loadtxt(pose_path)
if annotations is not None:
info['annos'] = annotations
info['annos']['camera_id'] = info['annos'].pop('score')
add_difficulty_to_annos(info)
sweeps = []
prev_idx = idx
while len(sweeps) < self.max_sweeps:
prev_info = {}
prev_idx -= 1
prev_info['velodyne_path'] = get_velodyne_path(
prev_idx,
self.path,
self.training,
self.relative_path,
exist_check=False,
use_prefix_id=True)
if_prev_exists = osp.exists(
Path(self.path) / prev_info['velodyne_path'])
if if_prev_exists:
with open(
get_timestamp_path(
prev_idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)) as f:
prev_info['timestamp'] = np.int64(f.read())
prev_pose_path = get_pose_path(
prev_idx,
self.path,
self.training,
relative_path=False,
use_prefix_id=True)
prev_info['pose'] = np.loadtxt(prev_pose_path)
sweeps.append(prev_info)
else:
break
info['sweeps'] = sweeps
return info
def gather(self, image_ids):
if not isinstance(image_ids, list):
image_ids = list(range(image_ids))
image_infos = mmcv.track_parallel_progress(self.gather_single,
image_ids, self.num_worker)
return list(image_infos)
def kitti_anno_to_label_file(annos, folder):
folder = Path(folder)
for anno in annos:
image_idx = anno['metadata']['image_idx']
label_lines = []
for j in range(anno['bbox'].shape[0]):
label_dict = {
'name': anno['name'][j],
'alpha': anno['alpha'][j],
'bbox': anno['bbox'][j],
'location': anno['location'][j],
'dimensions': anno['dimensions'][j],
'rotation_y': anno['rotation_y'][j],
'score': anno['score'][j],
}
label_line = kitti_result_line(label_dict)
label_lines.append(label_line)
label_file = folder / f'{get_image_index_str(image_idx)}.txt'
label_str = '\n'.join(label_lines)
with open(label_file, 'w') as f:
f.write(label_str)
def add_difficulty_to_annos(info):
min_height = [40, 25,
25] # minimum height for evaluated groundtruth/detections
max_occlusion = [
0, 1, 2
] # maximum occlusion level of the groundtruth used for evaluation
max_trunc = [
0.15, 0.3, 0.5
] # maximum truncation level of the groundtruth used for evaluation
annos = info['annos']
dims = annos['dimensions'] # lhw format
bbox = annos['bbox']
height = bbox[:, 3] - bbox[:, 1]
occlusion = annos['occluded']
truncation = annos['truncated']
diff = []
easy_mask = np.ones((len(dims), ), dtype=np.bool)
moderate_mask = np.ones((len(dims), ), dtype=np.bool)
hard_mask = np.ones((len(dims), ), dtype=np.bool)
i = 0
for h, o, t in zip(height, occlusion, truncation):
if o > max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]:
easy_mask[i] = False
if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]:
moderate_mask[i] = False
if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]:
hard_mask[i] = False
i += 1
is_easy = easy_mask
is_moderate = np.logical_xor(easy_mask, moderate_mask)
is_hard = np.logical_xor(hard_mask, moderate_mask)
for i in range(len(dims)):
if is_easy[i]:
diff.append(0)
elif is_moderate[i]:
diff.append(1)
elif is_hard[i]:
diff.append(2)
else:
diff.append(-1)
annos['difficulty'] = np.array(diff, np.int32)
return diff
def kitti_result_line(result_dict, precision=4):
prec_float = '{' + ':.{}f'.format(precision) + '}'
res_line = []
all_field_default = OrderedDict([
('name', None),
('truncated', -1),
('occluded', -1),
('alpha', -10),
('bbox', None),
('dimensions', [-1, -1, -1]),
('location', [-1000, -1000, -1000]),
('rotation_y', -10),
('score', 0.0),
])
res_dict = [(key, None) for key, val in all_field_default.items()]
res_dict = OrderedDict(res_dict)
for key, val in result_dict.items():
if all_field_default[key] is None and val is None:
raise ValueError('you must specify a value for {}'.format(key))
res_dict[key] = val
for key, val in res_dict.items():
if key == 'name':
res_line.append(val)
elif key in ['truncated', 'alpha', 'rotation_y', 'score']:
if val is None:
res_line.append(str(all_field_default[key]))
else:
res_line.append(prec_float.format(val))
elif key == 'occluded':
if val is None:
res_line.append(str(all_field_default[key]))
else:
res_line.append('{}'.format(val))
elif key in ['bbox', 'dimensions', 'location']:
if val is None:
res_line += [str(v) for v in all_field_default[key]]
else:
res_line += [prec_float.format(v) for v in val]
else:
raise ValueError('unknown key. supported key:{}'.format(
res_dict.keys()))
return ' '.join(res_line)
# Copyright (c) OpenMMLab. All rights reserved.
import os
from logging import warning
from os import path as osp
import mmcv
import numpy as np
from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
from pyquaternion import Quaternion
from mmdet3d.datasets import LyftDataset
from .nuscenes_converter import (get_2d_boxes, get_available_scenes,
obtain_sensor2top)
lyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
'motorcycle', 'bicycle', 'pedestrian', 'animal')
def create_lyft_infos(root_path,
info_prefix,
version='v1.01-train',
max_sweeps=10):
"""Create info file of lyft dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
version (str, optional): Version of the data.
Default: 'v1.01-train'.
max_sweeps (int, optional): Max number of sweeps.
Default: 10.
"""
lyft = Lyft(
data_path=osp.join(root_path, version),
json_path=osp.join(root_path, version, version),
verbose=True)
available_vers = ['v1.01-train', 'v1.01-test']
assert version in available_vers
if version == 'v1.01-train':
train_scenes = mmcv.list_from_file('data/lyft/train.txt')
val_scenes = mmcv.list_from_file('data/lyft/val.txt')
elif version == 'v1.01-test':
train_scenes = mmcv.list_from_file('data/lyft/test.txt')
val_scenes = []
else:
raise ValueError('unknown')
# filter existing scenes.
available_scenes = get_available_scenes(lyft)
available_scene_names = [s['name'] for s in available_scenes]
train_scenes = list(
filter(lambda x: x in available_scene_names, train_scenes))
val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
train_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in train_scenes
])
val_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in val_scenes
])
test = 'test' in version
if test:
print(f'test scene: {len(train_scenes)}')
else:
print(f'train scene: {len(train_scenes)}, \
val scene: {len(val_scenes)}')
train_lyft_infos, val_lyft_infos = _fill_trainval_infos(
lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
metadata = dict(version=version)
if test:
print(f'test sample: {len(train_lyft_infos)}')
data = dict(infos=train_lyft_infos, metadata=metadata)
info_name = f'{info_prefix}_infos_test'
info_path = osp.join(root_path, f'{info_name}.pkl')
mmcv.dump(data, info_path)
else:
print(f'train sample: {len(train_lyft_infos)}, \
val sample: {len(val_lyft_infos)}')
data = dict(infos=train_lyft_infos, metadata=metadata)
train_info_name = f'{info_prefix}_infos_train'
info_path = osp.join(root_path, f'{train_info_name}.pkl')
mmcv.dump(data, info_path)
data['infos'] = val_lyft_infos
val_info_name = f'{info_prefix}_infos_val'
info_val_path = osp.join(root_path, f'{val_info_name}.pkl')
mmcv.dump(data, info_val_path)
def _fill_trainval_infos(lyft,
train_scenes,
val_scenes,
test=False,
max_sweeps=10):
"""Generate the train/val infos from the raw data.
Args:
lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.
train_scenes (list[str]): Basic information of training scenes.
val_scenes (list[str]): Basic information of validation scenes.
test (bool, optional): Whether use the test mode. In the test mode, no
annotations can be accessed. Default: False.
max_sweeps (int, optional): Max number of sweeps. Default: 10.
Returns:
tuple[list[dict]]: Information of training set and
validation set that will be saved to the info file.
"""
train_lyft_infos = []
val_lyft_infos = []
for sample in mmcv.track_iter_progress(lyft.sample):
lidar_token = sample['data']['LIDAR_TOP']
sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
cs_record = lyft.get('calibrated_sensor',
sd_rec['calibrated_sensor_token'])
pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token'])
abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token)
# nuScenes devkit returns more convenient relative paths while
# lyft devkit returns absolute paths
abs_lidar_path = str(abs_lidar_path) # absolute path
lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1]
# relative path
mmcv.check_file_exist(lidar_path)
info = {
'lidar_path': lidar_path,
'token': sample['token'],
'sweeps': [],
'cams': dict(),
'lidar2ego_translation': cs_record['translation'],
'lidar2ego_rotation': cs_record['rotation'],
'ego2global_translation': pose_record['translation'],
'ego2global_rotation': pose_record['rotation'],
'timestamp': sample['timestamp'],
}
l2e_r = info['lidar2ego_rotation']
l2e_t = info['lidar2ego_translation']
e2g_r = info['ego2global_rotation']
e2g_t = info['ego2global_translation']
l2e_r_mat = Quaternion(l2e_r).rotation_matrix
e2g_r_mat = Quaternion(e2g_r).rotation_matrix
# obtain 6 image's information per frame
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
for cam in camera_types:
cam_token = sample['data'][cam]
cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token)
cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat,
e2g_t, e2g_r_mat, cam)
cam_info.update(cam_intrinsic=cam_intrinsic)
info['cams'].update({cam: cam_info})
# obtain sweeps for a single key-frame
sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
sweeps = []
while len(sweeps) < max_sweeps:
if not sd_rec['prev'] == '':
sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t,
l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
sweeps.append(sweep)
sd_rec = lyft.get('sample_data', sd_rec['prev'])
else:
break
info['sweeps'] = sweeps
# obtain annotation
if not test:
annotations = [
lyft.get('sample_annotation', token)
for token in sample['anns']
]
locs = np.array([b.center for b in boxes]).reshape(-1, 3)
dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
rots = np.array([b.orientation.yaw_pitch_roll[0]
for b in boxes]).reshape(-1, 1)
names = [b.name for b in boxes]
for i in range(len(names)):
if names[i] in LyftDataset.NameMapping:
names[i] = LyftDataset.NameMapping[names[i]]
names = np.array(names)
# we need to convert box size to
# the format of our lidar coordinate system
# which is x_size, y_size, z_size (corresponding to l, w, h)
gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
assert len(gt_boxes) == len(
annotations), f'{len(gt_boxes)}, {len(annotations)}'
info['gt_boxes'] = gt_boxes
info['gt_names'] = names
info['num_lidar_pts'] = np.array(
[a['num_lidar_pts'] for a in annotations])
info['num_radar_pts'] = np.array(
[a['num_radar_pts'] for a in annotations])
if sample['scene_token'] in train_scenes:
train_lyft_infos.append(info)
else:
val_lyft_infos.append(info)
return train_lyft_infos, val_lyft_infos
def export_2d_annotation(root_path, info_path, version):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
version (str): Dataset version.
"""
warning.warn('DeprecationWarning: 2D annotations are not used on the '
'Lyft dataset. The function export_2d_annotation will be '
'deprecated.')
# get bbox annotations for camera
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
lyft_infos = mmcv.load(info_path)['infos']
lyft = Lyft(
data_path=osp.join(root_path, version),
json_path=osp.join(root_path, version, version),
verbose=True)
# info_2d_list = []
cat2Ids = [
dict(id=lyft_categories.index(cat_name), name=cat_name)
for cat_name in lyft_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
for info in mmcv.track_iter_progress(lyft_infos):
for cam in camera_types:
cam_info = info['cams'][cam]
coco_infos = get_2d_boxes(
lyft,
cam_info['sample_data_token'],
visibilities=['', '1', '2', '3', '4'])
(height, width, _) = mmcv.imread(cam_info['data_path']).shape
coco_2d_dict['images'].append(
dict(
file_name=cam_info['data_path'],
id=cam_info['sample_data_token'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import numpy as np
def fix_lyft(root_folder='./data/lyft', version='v1.01'):
# refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa
lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
root_folder = os.path.join(root_folder, f'{version}-train')
lidar_path = os.path.join(root_folder, lidar_path)
assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
f'dataset and make sure {lidar_path} is present.'
points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
try:
points.reshape([-1, 5])
print(f'This fix is not required for version {version}.')
except ValueError:
new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
new_points.tofile(lidar_path)
print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
parser.add_argument(
'--root-folder',
type=str,
default='./data/lyft',
help='specify the root path of Lyft dataset')
parser.add_argument(
'--version',
type=str,
default='v1.01',
help='specify Lyft dataset version')
args = parser.parse_args()
if __name__ == '__main__':
fix_lyft(root_folder=args.root_folder, version=args.version)
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import base64
from os import path as osp
import mmcv
import numpy as np
from nuimages import NuImages
from nuimages.utils.utils import mask_decode, name_to_index_mapping
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
NAME_MAPPING = {
'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.car': 'car',
'vehicle.construction': 'construction_vehicle',
'vehicle.motorcycle': 'motorcycle',
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'human.pedestrian.police_officer': 'pedestrian',
'movable_object.trafficcone': 'traffic_cone',
'vehicle.trailer': 'trailer',
'vehicle.truck': 'truck',
}
def parse_args():
parser = argparse.ArgumentParser(description='Data converter arg parser')
parser.add_argument(
'--data-root',
type=str,
default='./data/nuimages',
help='specify the root path of dataset')
parser.add_argument(
'--version',
type=str,
nargs='+',
default=['v1.0-mini'],
required=False,
help='specify the dataset version')
parser.add_argument(
'--out-dir',
type=str,
default='./data/nuimages/annotations/',
required=False,
help='path to save the exported json')
parser.add_argument(
'--nproc',
type=int,
default=4,
required=False,
help='workers to process semantic masks')
parser.add_argument('--extra-tag', type=str, default='nuimages')
args = parser.parse_args()
return args
def get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root):
"""Get semantic segmentation map for an image.
Args:
nuim (obj:`NuImages`): NuImages dataset object
img_info (dict): Meta information of img
Returns:
np.ndarray: Semantic segmentation map of the image
"""
sd_token = img_info['token']
image_id = img_info['id']
name_to_index = name_to_index_mapping(nuim.category)
# Get image data.
width, height = img_info['width'], img_info['height']
semseg_mask = np.zeros((height, width)).astype('uint8')
# Load stuff / surface regions.
surface_anns = [
o for o in nuim.surface_ann if o['sample_data_token'] == sd_token
]
# Draw stuff / surface regions.
for ann in surface_anns:
# Get color and mask.
category_token = ann['category_token']
category_name = nuim.get('category', category_token)['name']
if ann['mask'] is None:
continue
mask = mask_decode(ann['mask'])
# Draw mask for semantic segmentation.
semseg_mask[mask == 1] = name_to_index[category_name]
# Load object instances.
object_anns = [
o for o in nuim.object_ann if o['sample_data_token'] == sd_token
]
# Sort by token to ensure that objects always appear in the
# instance mask in the same order.
object_anns = sorted(object_anns, key=lambda k: k['token'])
# Draw object instances.
# The 0 index is reserved for background; thus, the instances
# should start from index 1.
annotations = []
for i, ann in enumerate(object_anns, start=1):
# Get color, box, mask and name.
category_token = ann['category_token']
category_name = nuim.get('category', category_token)['name']
if ann['mask'] is None:
continue
mask = mask_decode(ann['mask'])
# Draw masks for semantic segmentation and instance segmentation.
semseg_mask[mask == 1] = name_to_index[category_name]
if category_name in NAME_MAPPING:
cat_name = NAME_MAPPING[category_name]
cat_id = cat2id[cat_name]
x_min, y_min, x_max, y_max = ann['bbox']
# encode calibrated instance mask
mask_anno = dict()
mask_anno['counts'] = base64.b64decode(
ann['mask']['counts']).decode()
mask_anno['size'] = ann['mask']['size']
data_anno = dict(
image_id=image_id,
category_id=cat_id,
bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
area=(x_max - x_min) * (y_max - y_min),
segmentation=mask_anno,
iscrowd=0)
annotations.append(data_anno)
# after process, save semantic masks
img_filename = img_info['file_name']
seg_filename = img_filename.replace('jpg', 'png')
seg_filename = osp.join(seg_root, seg_filename)
mmcv.imwrite(semseg_mask, seg_filename)
return annotations, np.max(semseg_mask)
def export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc):
print('Process category information')
categories = []
categories = [
dict(id=nus_categories.index(cat_name), name=cat_name)
for cat_name in nus_categories
]
cat2id = {k_v['name']: k_v['id'] for k_v in categories}
images = []
print('Process image meta information...')
for sample_info in mmcv.track_iter_progress(nuim.sample_data):
if sample_info['is_key_frame']:
img_idx = len(images)
images.append(
dict(
id=img_idx,
token=sample_info['token'],
file_name=sample_info['filename'],
width=sample_info['width'],
height=sample_info['height']))
seg_root = f'{out_dir}semantic_masks'
mmcv.mkdir_or_exist(seg_root)
mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated'))
global process_img_anno
def process_img_anno(img_info):
single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id,
out_dir, data_root,
seg_root)
return single_img_annos, max_cls_id
print('Process img annotations...')
if nproc > 1:
outputs = mmcv.track_parallel_progress(
process_img_anno, images, nproc=nproc)
else:
outputs = []
for img_info in mmcv.track_iter_progress(images):
outputs.append(process_img_anno(img_info))
# Determine the index of object annotation
print('Process annotation information...')
annotations = []
max_cls_ids = []
for single_img_annos, max_cls_id in outputs:
max_cls_ids.append(max_cls_id)
for img_anno in single_img_annos:
img_anno.update(id=len(annotations))
annotations.append(img_anno)
max_cls_id = max(max_cls_ids)
print(f'Max ID of class in the semantic map: {max_cls_id}')
coco_format_json = dict(
images=images, annotations=annotations, categories=categories)
mmcv.mkdir_or_exist(out_dir)
out_file = osp.join(out_dir, f'{extra_tag}_{version}.json')
print(f'Annotation dumped to {out_file}')
mmcv.dump(coco_format_json, out_file)
def main():
args = parse_args()
for version in args.version:
nuim = NuImages(
dataroot=args.data_root, version=version, verbose=True, lazy=True)
export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag,
version, args.nproc)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import os
from collections import OrderedDict
from os import path as osp
from typing import List, Tuple, Union
import mmcv
import numpy as np
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
from mmdet3d.core.bbox import points_cam2img
from mmdet3d.datasets import NuScenesDataset
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
nus_attributes = ('cycle.with_rider', 'cycle.without_rider',
'pedestrian.moving', 'pedestrian.standing',
'pedestrian.sitting_lying_down', 'vehicle.moving',
'vehicle.parked', 'vehicle.stopped', 'None')
def create_nuscenes_infos(root_path,
info_prefix,
version='v1.0-trainval',
max_sweeps=10):
"""Create info file of nuscene dataset.
Given the raw data, generate its related info file in pkl format.
Args:
root_path (str): Path of the data root.
info_prefix (str): Prefix of the info file to be generated.
version (str, optional): Version of the data.
Default: 'v1.0-trainval'.
max_sweeps (int, optional): Max number of sweeps.
Default: 10.
"""
from nuscenes.nuscenes import NuScenes
nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
from nuscenes.utils import splits
available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
assert version in available_vers
if version == 'v1.0-trainval':
train_scenes = splits.train
val_scenes = splits.val
elif version == 'v1.0-test':
train_scenes = splits.test
val_scenes = []
elif version == 'v1.0-mini':
train_scenes = splits.mini_train
val_scenes = splits.mini_val
else:
raise ValueError('unknown')
# filter existing scenes.
available_scenes = get_available_scenes(nusc)
available_scene_names = [s['name'] for s in available_scenes]
train_scenes = list(
filter(lambda x: x in available_scene_names, train_scenes))
val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
train_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in train_scenes
])
val_scenes = set([
available_scenes[available_scene_names.index(s)]['token']
for s in val_scenes
])
test = 'test' in version
if test:
print('test scene: {}'.format(len(train_scenes)))
else:
print('train scene: {}, val scene: {}'.format(
len(train_scenes), len(val_scenes)))
train_nusc_infos, val_nusc_infos = _fill_trainval_infos(
nusc, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
metadata = dict(version=version)
if test:
print('test sample: {}'.format(len(train_nusc_infos)))
data = dict(infos=train_nusc_infos, metadata=metadata)
info_path = osp.join(root_path,
'{}_infos_test.pkl'.format(info_prefix))
mmcv.dump(data, info_path)
else:
print('train sample: {}, val sample: {}'.format(
len(train_nusc_infos), len(val_nusc_infos)))
data = dict(infos=train_nusc_infos, metadata=metadata)
info_path = osp.join(root_path,
'{}_infos_train.pkl'.format(info_prefix))
mmcv.dump(data, info_path)
data['infos'] = val_nusc_infos
info_val_path = osp.join(root_path,
'{}_infos_val.pkl'.format(info_prefix))
mmcv.dump(data, info_val_path)
def get_available_scenes(nusc):
"""Get available scenes from the input nuscenes class.
Given the raw data, get the information of available scenes for
further info generation.
Args:
nusc (class): Dataset class in the nuScenes dataset.
Returns:
available_scenes (list[dict]): List of basic information for the
available scenes.
"""
available_scenes = []
print('total scene num: {}'.format(len(nusc.scene)))
for scene in nusc.scene:
scene_token = scene['token']
scene_rec = nusc.get('scene', scene_token)
sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
has_more_frames = True
scene_not_exist = False
while has_more_frames:
lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
lidar_path = str(lidar_path)
if os.getcwd() in lidar_path:
# path from lyftdataset is absolute path
lidar_path = lidar_path.split(f'{os.getcwd()}/')[-1]
# relative path
if not mmcv.is_filepath(lidar_path):
scene_not_exist = True
break
else:
break
if scene_not_exist:
continue
available_scenes.append(scene)
print('exist scene num: {}'.format(len(available_scenes)))
return available_scenes
def _fill_trainval_infos(nusc,
train_scenes,
val_scenes,
test=False,
max_sweeps=10):
"""Generate the train/val infos from the raw data.
Args:
nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
train_scenes (list[str]): Basic information of training scenes.
val_scenes (list[str]): Basic information of validation scenes.
test (bool, optional): Whether use the test mode. In test mode, no
annotations can be accessed. Default: False.
max_sweeps (int, optional): Max number of sweeps. Default: 10.
Returns:
tuple[list[dict]]: Information of training set and validation set
that will be saved to the info file.
"""
train_nusc_infos = []
val_nusc_infos = []
for sample in mmcv.track_iter_progress(nusc.sample):
lidar_token = sample['data']['LIDAR_TOP']
sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
cs_record = nusc.get('calibrated_sensor',
sd_rec['calibrated_sensor_token'])
pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
mmcv.check_file_exist(lidar_path)
info = {
'lidar_path': lidar_path,
'token': sample['token'],
'sweeps': [],
'cams': dict(),
'lidar2ego_translation': cs_record['translation'],
'lidar2ego_rotation': cs_record['rotation'],
'ego2global_translation': pose_record['translation'],
'ego2global_rotation': pose_record['rotation'],
'timestamp': sample['timestamp'],
}
l2e_r = info['lidar2ego_rotation']
l2e_t = info['lidar2ego_translation']
e2g_r = info['ego2global_rotation']
e2g_t = info['ego2global_translation']
l2e_r_mat = Quaternion(l2e_r).rotation_matrix
e2g_r_mat = Quaternion(e2g_r).rotation_matrix
# obtain 6 image's information per frame
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
for cam in camera_types:
cam_token = sample['data'][cam]
cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)
cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,
e2g_t, e2g_r_mat, cam)
cam_info.update(cam_intrinsic=cam_intrinsic)
info['cams'].update({cam: cam_info})
# obtain sweeps for a single key-frame
sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
sweeps = []
while len(sweeps) < max_sweeps:
if not sd_rec['prev'] == '':
sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
sweeps.append(sweep)
sd_rec = nusc.get('sample_data', sd_rec['prev'])
else:
break
info['sweeps'] = sweeps
# obtain annotation
if not test:
annotations = [
nusc.get('sample_annotation', token)
for token in sample['anns']
]
locs = np.array([b.center for b in boxes]).reshape(-1, 3)
dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
rots = np.array([b.orientation.yaw_pitch_roll[0]
for b in boxes]).reshape(-1, 1)
velocity = np.array(
[nusc.box_velocity(token)[:2] for token in sample['anns']])
valid_flag = np.array(
[(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0
for anno in annotations],
dtype=bool).reshape(-1)
# convert velo from global to lidar
for i in range(len(boxes)):
velo = np.array([*velocity[i], 0.0])
velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
l2e_r_mat).T
velocity[i] = velo[:2]
names = [b.name for b in boxes]
for i in range(len(names)):
if names[i] in NuScenesDataset.NameMapping:
names[i] = NuScenesDataset.NameMapping[names[i]]
names = np.array(names)
# we need to convert box size to
# the format of our lidar coordinate system
# which is x_size, y_size, z_size (corresponding to l, w, h)
gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
assert len(gt_boxes) == len(
annotations), f'{len(gt_boxes)}, {len(annotations)}'
info['gt_boxes'] = gt_boxes
info['gt_names'] = names
info['gt_velocity'] = velocity.reshape(-1, 2)
info['num_lidar_pts'] = np.array(
[a['num_lidar_pts'] for a in annotations])
info['num_radar_pts'] = np.array(
[a['num_radar_pts'] for a in annotations])
info['valid_flag'] = valid_flag
if sample['scene_token'] in train_scenes:
train_nusc_infos.append(info)
else:
val_nusc_infos.append(info)
return train_nusc_infos, val_nusc_infos
def obtain_sensor2top(nusc,
sensor_token,
l2e_t,
l2e_r_mat,
e2g_t,
e2g_r_mat,
sensor_type='lidar'):
"""Obtain the info with RT matric from general sensor to Top LiDAR.
Args:
nusc (class): Dataset class in the nuScenes dataset.
sensor_token (str): Sample data token corresponding to the
specific sensor type.
l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
in shape (3, 3).
e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
e2g_r_mat (np.ndarray): Rotation matrix from ego to global
in shape (3, 3).
sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'.
Returns:
sweep (dict): Sweep information after transformation.
"""
sd_rec = nusc.get('sample_data', sensor_token)
cs_record = nusc.get('calibrated_sensor',
sd_rec['calibrated_sensor_token'])
pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
data_path = str(nusc.get_sample_data_path(sd_rec['token']))
if os.getcwd() in data_path: # path from lyftdataset is absolute path
data_path = data_path.split(f'{os.getcwd()}/')[-1] # relative path
sweep = {
'data_path': data_path,
'type': sensor_type,
'sample_data_token': sd_rec['token'],
'sensor2ego_translation': cs_record['translation'],
'sensor2ego_rotation': cs_record['rotation'],
'ego2global_translation': pose_record['translation'],
'ego2global_rotation': pose_record['rotation'],
'timestamp': sd_rec['timestamp']
}
l2e_r_s = sweep['sensor2ego_rotation']
l2e_t_s = sweep['sensor2ego_translation']
e2g_r_s = sweep['ego2global_rotation']
e2g_t_s = sweep['ego2global_translation']
# obtain the RT from sensor to Top LiDAR
# sweep->ego->global->ego'->lidar
l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
) + l2e_t @ np.linalg.inv(l2e_r_mat).T
sweep['sensor2lidar_rotation'] = R.T # points @ R.T + T
sweep['sensor2lidar_translation'] = T
return sweep
def export_2d_annotation(root_path, info_path, version, mono3d=True):
"""Export 2d annotation from the info file and raw data.
Args:
root_path (str): Root path of the raw data.
info_path (str): Path of the info file.
version (str): Dataset version.
mono3d (bool, optional): Whether to export mono3d annotation.
Default: True.
"""
# get bbox annotations for camera
camera_types = [
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_FRONT_LEFT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_BACK_RIGHT',
]
nusc_infos = mmcv.load(info_path)['infos']
nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
# info_2d_list = []
cat2Ids = [
dict(id=nus_categories.index(cat_name), name=cat_name)
for cat_name in nus_categories
]
coco_ann_id = 0
coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
for info in mmcv.track_iter_progress(nusc_infos):
for cam in camera_types:
cam_info = info['cams'][cam]
coco_infos = get_2d_boxes(
nusc,
cam_info['sample_data_token'],
visibilities=['', '1', '2', '3', '4'],
mono3d=mono3d)
(height, width, _) = mmcv.imread(cam_info['data_path']).shape
coco_2d_dict['images'].append(
dict(
file_name=cam_info['data_path'].split('data/nuscenes/')
[-1],
id=cam_info['sample_data_token'],
token=info['token'],
cam2ego_rotation=cam_info['sensor2ego_rotation'],
cam2ego_translation=cam_info['sensor2ego_translation'],
ego2global_rotation=info['ego2global_rotation'],
ego2global_translation=info['ego2global_translation'],
cam_intrinsic=cam_info['cam_intrinsic'],
width=width,
height=height))
for coco_info in coco_infos:
if coco_info is None:
continue
# add an empty key for coco format
coco_info['segmentation'] = []
coco_info['id'] = coco_ann_id
coco_2d_dict['annotations'].append(coco_info)
coco_ann_id += 1
if mono3d:
json_prefix = f'{info_path[:-4]}_mono3d'
else:
json_prefix = f'{info_path[:-4]}'
mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
def get_2d_boxes(nusc,
sample_data_token: str,
visibilities: List[str],
mono3d=True):
"""Get the 2D annotation records for a given `sample_data_token`.
Args:
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
mono3d (bool): Whether to get boxes with mono3d annotation.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec = nusc.get('sample_data', sample_data_token)
assert sd_rec[
'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \
' for camera sample_data!'
if not sd_rec['is_key_frame']:
raise ValueError(
'The 2D re-projections are available only for keyframes.')
s_rec = nusc.get('sample', sd_rec['sample_token'])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
camera_intrinsic = np.array(cs_rec['camera_intrinsic'])
# Get all the annotation with the specified visibilties.
ann_recs = [
nusc.get('sample_annotation', token) for token in s_rec['anns']
]
ann_recs = [
ann_rec for ann_rec in ann_recs
if (ann_rec['visibility_token'] in visibilities)
]
repro_recs = []
for ann_rec in ann_recs:
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = ann_rec['token']
ann_rec['sample_data_token'] = sample_data_token
# Get the box in global coordinates.
box = nusc.get_box(ann_rec['token'])
# Move them to the ego-pose frame.
box.translate(-np.array(pose_rec['translation']))
box.rotate(Quaternion(pose_rec['rotation']).inverse)
# Move them to the calibrated sensor frame.
box.translate(-np.array(cs_rec['translation']))
box.rotate(Quaternion(cs_rec['rotation']).inverse)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box.corners()
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
sample_data_token, sd_rec['filename'])
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
loc = box.center.tolist()
dim = box.wlh
dim[[0, 1, 2]] = dim[[1, 2, 0]] # convert wlh to our lhw
dim = dim.tolist()
rot = box.orientation.yaw_pitch_roll[0]
rot = [-rot] # convert the rot to our cam coordinate
global_velo2d = nusc.box_velocity(box.token)[:2]
global_velo3d = np.array([*global_velo2d, 0.0])
e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix
c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix
cam_velo3d = global_velo3d @ np.linalg.inv(
e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T
velo = cam_velo3d[0::2].tolist()
repro_rec['bbox_cam3d'] = loc + dim + rot
repro_rec['velo_cam3d'] = velo
center3d = np.array(loc).reshape([1, 3])
center2d = points_cam2img(
center3d, camera_intrinsic, with_depth=True)
repro_rec['center2d'] = center2d.squeeze().tolist()
# normalized center2D + depth
# if samples with depth < 0 will be removed
if repro_rec['center2d'][2] <= 0:
continue
ann_token = nusc.get('sample_annotation',
box.token)['attribute_tokens']
if len(ann_token) == 0:
attr_name = 'None'
else:
attr_name = nusc.get('attribute', ann_token[0])['name']
attr_id = nus_attributes.index(attr_name)
repro_rec['attribute_name'] = attr_name
repro_rec['attribute_id'] = attr_id
repro_recs.append(repro_rec)
return repro_recs
def post_process_coords(
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
) -> Union[Tuple[float, float, float, float], None]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (list[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
Return:
tuple [float]: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
img_canvas = box(0, 0, imsize[0], imsize[1])
if polygon_from_2d_box.intersects(img_canvas):
img_intersection = polygon_from_2d_box.intersection(img_canvas)
intersection_coords = np.array(
[coord for coord in img_intersection.exterior.coords])
min_x = min(intersection_coords[:, 0])
min_y = min(intersection_coords[:, 1])
max_x = max(intersection_coords[:, 0])
max_y = max(intersection_coords[:, 1])
return min_x, min_y, max_x, max_y
else:
return None
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
sample_data_token: str, filename: str) -> OrderedDict:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample 2D annotation record.
- file_name (str): file name
- image_id (str): sample data token
- area (float): 2d box area
- category_name (str): category name
- category_id (int): category id
- bbox (list[float]): left x, top y, dx, dy of 2d box
- iscrowd (int): whether the area is crowd
"""
repro_rec = OrderedDict()
repro_rec['sample_data_token'] = sample_data_token
coco_rec = dict()
relevant_keys = [
'attribute_tokens',
'category_name',
'instance_token',
'next',
'num_lidar_pts',
'num_radar_pts',
'prev',
'sample_annotation_token',
'sample_data_token',
'visibility_token',
]
for key, value in ann_rec.items():
if key in relevant_keys:
repro_rec[key] = value
repro_rec['bbox_corners'] = [x1, y1, x2, y2]
repro_rec['filename'] = filename
coco_rec['file_name'] = filename
coco_rec['image_id'] = sample_data_token
coco_rec['area'] = (y2 - y1) * (x2 - x1)
if repro_rec['category_name'] not in NuScenesDataset.NameMapping:
return None
cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]
coco_rec['category_name'] = cat_name
coco_rec['category_id'] = nus_categories.index(cat_name)
coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
coco_rec['iscrowd'] = 0
return coco_rec
# Copyright (c) OpenMMLab. All rights reserved.
import os
from concurrent import futures as futures
from os import path as osp
import mmcv
import numpy as np
class S3DISData(object):
"""S3DIS data.
Generate s3dis infos for s3dis_converter.
Args:
root_path (str): Root path of the raw data.
split (str, optional): Set split type of the data. Default: 'Area_1'.
"""
def __init__(self, root_path, split='Area_1'):
self.root_dir = root_path
self.split = split
self.data_dir = osp.join(root_path,
'Stanford3dDataset_v1.2_Aligned_Version')
# Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture
# classes for detection: table, chair, sofa, bookcase, board.
self.cat_ids = np.array([7, 8, 9, 10, 11])
self.cat_ids2class = {
cat_id: i
for i, cat_id in enumerate(list(self.cat_ids))
}
assert split in [
'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'
]
self.sample_id_list = os.listdir(osp.join(self.data_dir,
split)) # conferenceRoom_1
for sample_id in self.sample_id_list:
if os.path.isfile(osp.join(self.data_dir, split, sample_id)):
self.sample_id_list.remove(sample_id)
def __len__(self):
return len(self.sample_id_list)
def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
"""Get data infos.
This method gets information from the raw data.
Args:
num_workers (int, optional): Number of threads to be used.
Default: 4.
has_label (bool, optional): Whether the data has label.
Default: True.
sample_id_list (list[int], optional): Index list of the sample.
Default: None.
Returns:
infos (list[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx):
print(f'{self.split} sample_idx: {sample_idx}')
info = dict()
pc_info = {
'num_features': 6,
'lidar_idx': f'{self.split}_{sample_idx}'
}
info['point_cloud'] = pc_info
pts_filename = osp.join(self.root_dir, 's3dis_data',
f'{self.split}_{sample_idx}_point.npy')
pts_instance_mask_path = osp.join(
self.root_dir, 's3dis_data',
f'{self.split}_{sample_idx}_ins_label.npy')
pts_semantic_mask_path = osp.join(
self.root_dir, 's3dis_data',
f'{self.split}_{sample_idx}_sem_label.npy')
points = np.load(pts_filename).astype(np.float32)
pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
points.tofile(
osp.join(self.root_dir, 'points',
f'{self.split}_{sample_idx}.bin'))
pts_instance_mask.tofile(
osp.join(self.root_dir, 'instance_mask',
f'{self.split}_{sample_idx}.bin'))
pts_semantic_mask.tofile(
osp.join(self.root_dir, 'semantic_mask',
f'{self.split}_{sample_idx}.bin'))
info['pts_path'] = osp.join('points',
f'{self.split}_{sample_idx}.bin')
info['pts_instance_mask_path'] = osp.join(
'instance_mask', f'{self.split}_{sample_idx}.bin')
info['pts_semantic_mask_path'] = osp.join(
'semantic_mask', f'{self.split}_{sample_idx}.bin')
info['annos'] = self.get_bboxes(points, pts_instance_mask,
pts_semantic_mask)
return info
sample_id_list = sample_id_list if sample_id_list is not None \
else self.sample_id_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):
"""Convert instance masks to axis-aligned bounding boxes.
Args:
points (np.array): Scene points of shape (n, 6).
pts_instance_mask (np.ndarray): Instance labels of shape (n,).
pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).
Returns:
dict: A dict containing detection infos with following keys:
- gt_boxes_upright_depth (np.ndarray): Bounding boxes
of shape (n, 6)
- class (np.ndarray): Box labels of shape (n,)
- gt_num (int): Number of boxes.
"""
bboxes, labels = [], []
for i in range(1, pts_instance_mask.max() + 1):
ids = pts_instance_mask == i
# check if all instance points have same semantic label
assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
label = pts_semantic_mask[ids][0]
# keep only furniture objects
if label in self.cat_ids2class:
labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])
pts = points[:, :3][ids]
min_pts = pts.min(axis=0)
max_pts = pts.max(axis=0)
locations = (min_pts + max_pts) / 2
dimensions = max_pts - min_pts
bboxes.append(np.concatenate((locations, dimensions)))
annotation = dict()
# follow ScanNet and SUN RGB-D keys
annotation['gt_boxes_upright_depth'] = np.array(bboxes)
annotation['class'] = np.array(labels)
annotation['gt_num'] = len(labels)
return annotation
class S3DISSegData(object):
"""S3DIS dataset used to generate infos for semantic segmentation task.
Args:
data_root (str): Root path of the raw data.
ann_file (str): The generated scannet infos.
split (str, optional): Set split type of the data. Default: 'train'.
num_points (int, optional): Number of points in each data input.
Default: 8192.
label_weight_func (function, optional): Function to compute the
label weight. Default: None.
"""
def __init__(self,
data_root,
ann_file,
split='Area_1',
num_points=4096,
label_weight_func=None):
self.data_root = data_root
self.data_infos = mmcv.load(ann_file)
self.split = split
self.num_points = num_points
self.all_ids = np.arange(13) # all possible ids
self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12]) # used for seg task
self.ignore_index = len(self.cat_ids)
self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
self.ignore_index
for i, cat_id in enumerate(self.cat_ids):
self.cat_id2class[cat_id] = i
# label weighting function is taken from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
label_weight_func is None else label_weight_func
def get_seg_infos(self):
scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
save_folder = osp.join(self.data_root, 'seg_info')
mmcv.mkdir_or_exist(save_folder)
np.save(
osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
scene_idxs)
np.save(
osp.join(save_folder, f'{self.split}_label_weight.npy'),
label_weight)
print(f'{self.split} resampled scene index and label weight saved')
def _convert_to_label(self, mask):
"""Convert class_id in loaded segmentation mask to label."""
if isinstance(mask, str):
if mask.endswith('npy'):
mask = np.load(mask)
else:
mask = np.fromfile(mask, dtype=np.int64)
label = self.cat_id2class[mask]
return label
def get_scene_idxs_and_label_weight(self):
"""Compute scene_idxs for data sampling and label weight for loss
calculation.
We sample more times for scenes with more points. Label_weight is
inversely proportional to number of class points.
"""
num_classes = len(self.cat_ids)
num_point_all = []
label_weight = np.zeros((num_classes + 1, )) # ignore_index
for data_info in self.data_infos:
label = self._convert_to_label(
osp.join(self.data_root, data_info['pts_semantic_mask_path']))
num_point_all.append(label.shape[0])
class_count, _ = np.histogram(label, range(num_classes + 2))
label_weight += class_count
# repeat scene_idx for num_scene_point // num_sample_point times
sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = []
for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++
label_weight = label_weight[:-1].astype(np.float32)
label_weight = label_weight / label_weight.sum()
label_weight = self.label_weight_func(label_weight).astype(np.float32)
return scene_idxs, label_weight
# Copyright (c) OpenMMLab. All rights reserved.
import os
from concurrent import futures as futures
from os import path as osp
import mmcv
import numpy as np
class ScanNetData(object):
"""ScanNet data.
Generate scannet infos for scannet_converter.
Args:
root_path (str): Root path of the raw data.
split (str, optional): Set split type of the data. Default: 'train'.
"""
def __init__(self, root_path, split='train'):
self.root_dir = root_path
self.split = split
self.split_dir = osp.join(root_path)
self.classes = [
'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin'
]
self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
self.label2cat = {self.cat2label[t]: t for t in self.cat2label}
self.cat_ids = np.array(
[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
self.cat_ids2class = {
nyu40id: i
for i, nyu40id in enumerate(list(self.cat_ids))
}
assert split in ['train', 'val', 'test']
split_file = osp.join(self.root_dir, 'meta_data',
f'scannetv2_{split}.txt')
mmcv.check_file_exist(split_file)
self.sample_id_list = mmcv.list_from_file(split_file)
self.test_mode = (split == 'test')
def __len__(self):
return len(self.sample_id_list)
def get_aligned_box_label(self, idx):
box_file = osp.join(self.root_dir, 'scannet_instance_data',
f'{idx}_aligned_bbox.npy')
mmcv.check_file_exist(box_file)
return np.load(box_file)
def get_unaligned_box_label(self, idx):
box_file = osp.join(self.root_dir, 'scannet_instance_data',
f'{idx}_unaligned_bbox.npy')
mmcv.check_file_exist(box_file)
return np.load(box_file)
def get_axis_align_matrix(self, idx):
matrix_file = osp.join(self.root_dir, 'scannet_instance_data',
f'{idx}_axis_align_matrix.npy')
mmcv.check_file_exist(matrix_file)
return np.load(matrix_file)
def get_images(self, idx):
paths = []
path = osp.join(self.root_dir, 'posed_images', idx)
for file in sorted(os.listdir(path)):
if file.endswith('.jpg'):
paths.append(osp.join('posed_images', idx, file))
return paths
def get_extrinsics(self, idx):
extrinsics = []
path = osp.join(self.root_dir, 'posed_images', idx)
for file in sorted(os.listdir(path)):
if file.endswith('.txt') and not file == 'intrinsic.txt':
extrinsics.append(np.loadtxt(osp.join(path, file)))
return extrinsics
def get_intrinsics(self, idx):
matrix_file = osp.join(self.root_dir, 'posed_images', idx,
'intrinsic.txt')
mmcv.check_file_exist(matrix_file)
return np.loadtxt(matrix_file)
def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
"""Get data infos.
This method gets information from the raw data.
Args:
num_workers (int, optional): Number of threads to be used.
Default: 4.
has_label (bool, optional): Whether the data has label.
Default: True.
sample_id_list (list[int], optional): Index list of the sample.
Default: None.
Returns:
infos (list[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx):
print(f'{self.split} sample_idx: {sample_idx}')
info = dict()
pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
info['point_cloud'] = pc_info
pts_filename = osp.join(self.root_dir, 'scannet_instance_data',
f'{sample_idx}_vert.npy')
points = np.load(pts_filename)
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
points.tofile(
osp.join(self.root_dir, 'points', f'{sample_idx}.bin'))
info['pts_path'] = osp.join('points', f'{sample_idx}.bin')
# update with RGB image paths if exist
if os.path.exists(osp.join(self.root_dir, 'posed_images')):
info['intrinsics'] = self.get_intrinsics(sample_idx)
all_extrinsics = self.get_extrinsics(sample_idx)
all_img_paths = self.get_images(sample_idx)
# some poses in ScanNet are invalid
extrinsics, img_paths = [], []
for extrinsic, img_path in zip(all_extrinsics, all_img_paths):
if np.all(np.isfinite(extrinsic)):
img_paths.append(img_path)
extrinsics.append(extrinsic)
info['extrinsics'] = extrinsics
info['img_paths'] = img_paths
if not self.test_mode:
pts_instance_mask_path = osp.join(
self.root_dir, 'scannet_instance_data',
f'{sample_idx}_ins_label.npy')
pts_semantic_mask_path = osp.join(
self.root_dir, 'scannet_instance_data',
f'{sample_idx}_sem_label.npy')
pts_instance_mask = np.load(pts_instance_mask_path).astype(
np.int64)
pts_semantic_mask = np.load(pts_semantic_mask_path).astype(
np.int64)
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
pts_instance_mask.tofile(
osp.join(self.root_dir, 'instance_mask',
f'{sample_idx}.bin'))
pts_semantic_mask.tofile(
osp.join(self.root_dir, 'semantic_mask',
f'{sample_idx}.bin'))
info['pts_instance_mask_path'] = osp.join(
'instance_mask', f'{sample_idx}.bin')
info['pts_semantic_mask_path'] = osp.join(
'semantic_mask', f'{sample_idx}.bin')
if has_label:
annotations = {}
# box is of shape [k, 6 + class]
aligned_box_label = self.get_aligned_box_label(sample_idx)
unaligned_box_label = self.get_unaligned_box_label(sample_idx)
annotations['gt_num'] = aligned_box_label.shape[0]
if annotations['gt_num'] != 0:
aligned_box = aligned_box_label[:, :-1] # k, 6
unaligned_box = unaligned_box_label[:, :-1]
classes = aligned_box_label[:, -1] # k
annotations['name'] = np.array([
self.label2cat[self.cat_ids2class[classes[i]]]
for i in range(annotations['gt_num'])
])
# default names are given to aligned bbox for compatibility
# we also save unaligned bbox info with marked names
annotations['location'] = aligned_box[:, :3]
annotations['dimensions'] = aligned_box[:, 3:6]
annotations['gt_boxes_upright_depth'] = aligned_box
annotations['unaligned_location'] = unaligned_box[:, :3]
annotations['unaligned_dimensions'] = unaligned_box[:, 3:6]
annotations[
'unaligned_gt_boxes_upright_depth'] = unaligned_box
annotations['index'] = np.arange(
annotations['gt_num'], dtype=np.int32)
annotations['class'] = np.array([
self.cat_ids2class[classes[i]]
for i in range(annotations['gt_num'])
])
axis_align_matrix = self.get_axis_align_matrix(sample_idx)
annotations['axis_align_matrix'] = axis_align_matrix # 4x4
info['annos'] = annotations
return info
sample_id_list = sample_id_list if sample_id_list is not None \
else self.sample_id_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
class ScanNetSegData(object):
"""ScanNet dataset used to generate infos for semantic segmentation task.
Args:
data_root (str): Root path of the raw data.
ann_file (str): The generated scannet infos.
split (str, optional): Set split type of the data. Default: 'train'.
num_points (int, optional): Number of points in each data input.
Default: 8192.
label_weight_func (function, optional): Function to compute the
label weight. Default: None.
"""
def __init__(self,
data_root,
ann_file,
split='train',
num_points=8192,
label_weight_func=None):
self.data_root = data_root
self.data_infos = mmcv.load(ann_file)
self.split = split
assert split in ['train', 'val', 'test']
self.num_points = num_points
self.all_ids = np.arange(41) # all possible ids
self.cat_ids = np.array([
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,
39
]) # used for seg task
self.ignore_index = len(self.cat_ids)
self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
self.ignore_index
for i, cat_id in enumerate(self.cat_ids):
self.cat_id2class[cat_id] = i
# label weighting function is taken from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
label_weight_func is None else label_weight_func
def get_seg_infos(self):
if self.split == 'test':
return
scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
save_folder = osp.join(self.data_root, 'seg_info')
mmcv.mkdir_or_exist(save_folder)
np.save(
osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
scene_idxs)
np.save(
osp.join(save_folder, f'{self.split}_label_weight.npy'),
label_weight)
print(f'{self.split} resampled scene index and label weight saved')
def _convert_to_label(self, mask):
"""Convert class_id in loaded segmentation mask to label."""
if isinstance(mask, str):
if mask.endswith('npy'):
mask = np.load(mask)
else:
mask = np.fromfile(mask, dtype=np.int64)
label = self.cat_id2class[mask]
return label
def get_scene_idxs_and_label_weight(self):
"""Compute scene_idxs for data sampling and label weight for loss
calculation.
We sample more times for scenes with more points. Label_weight is
inversely proportional to number of class points.
"""
num_classes = len(self.cat_ids)
num_point_all = []
label_weight = np.zeros((num_classes + 1, )) # ignore_index
for data_info in self.data_infos:
label = self._convert_to_label(
osp.join(self.data_root, data_info['pts_semantic_mask_path']))
num_point_all.append(label.shape[0])
class_count, _ = np.histogram(label, range(num_classes + 2))
label_weight += class_count
# repeat scene_idx for num_scene_point // num_sample_point times
sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = []
for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++
label_weight = label_weight[:-1].astype(np.float32)
label_weight = label_weight / label_weight.sum()
label_weight = self.label_weight_func(label_weight).astype(np.float32)
return scene_idxs, label_weight
# Copyright (c) OpenMMLab. All rights reserved.
from concurrent import futures as futures
from os import path as osp
import mmcv
import numpy as np
from scipy import io as sio
def random_sampling(points, num_points, replace=None):
"""Random sampling.
Sampling point cloud to a certain number of points.
Args:
points (ndarray): Point cloud.
num_points (int): The number of samples.
replace (bool): Whether the sample is with or without replacement.
Returns:
points (ndarray): Point cloud after sampling.
"""
if num_points < 0:
return points
if replace is None:
replace = (points.shape[0] < num_points)
choices = np.random.choice(points.shape[0], num_points, replace=replace)
return points[choices]
class SUNRGBDInstance(object):
def __init__(self, line):
data = line.split(' ')
data[1:] = [float(x) for x in data[1:]]
self.classname = data[0]
self.xmin = data[1]
self.ymin = data[2]
self.xmax = data[1] + data[3]
self.ymax = data[2] + data[4]
self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
self.centroid = np.array([data[5], data[6], data[7]])
self.width = data[8]
self.length = data[9]
self.height = data[10]
# data[9] is x_size (length), data[8] is y_size (width), data[10] is
# z_size (height) in our depth coordinate system,
# l corresponds to the size along the x axis
self.size = np.array([data[9], data[8], data[10]]) * 2
self.orientation = np.zeros((3, ))
self.orientation[0] = data[11]
self.orientation[1] = data[12]
self.heading_angle = np.arctan2(self.orientation[1],
self.orientation[0])
self.box3d = np.concatenate(
[self.centroid, self.size, self.heading_angle[None]])
class SUNRGBDData(object):
"""SUNRGBD data.
Generate scannet infos for sunrgbd_converter.
Args:
root_path (str): Root path of the raw data.
split (str, optional): Set split type of the data. Default: 'train'.
use_v1 (bool, optional): Whether to use v1. Default: False.
num_points (int, optional): Number of points to sample. Set to -1
to utilize all points. Defaults to -1.
"""
def __init__(self, root_path, split='train', use_v1=False, num_points=-1):
self.root_dir = root_path
self.split = split
self.split_dir = osp.join(root_path, 'sunrgbd_trainval')
self.num_points = num_points
self.classes = [
'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'
]
self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
self.label2cat = {
label: self.classes[label]
for label in range(len(self.classes))
}
assert split in ['train', 'val', 'test']
split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')
mmcv.check_file_exist(split_file)
self.sample_id_list = map(int, mmcv.list_from_file(split_file))
self.image_dir = osp.join(self.split_dir, 'image')
self.calib_dir = osp.join(self.split_dir, 'calib')
self.depth_dir = osp.join(self.split_dir, 'depth')
if use_v1:
self.label_dir = osp.join(self.split_dir, 'label_v1')
else:
self.label_dir = osp.join(self.split_dir, 'label')
def __len__(self):
return len(self.sample_id_list)
def get_image(self, idx):
img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')
return mmcv.imread(img_filename)
def get_image_shape(self, idx):
image = self.get_image(idx)
return np.array(image.shape[:2], dtype=np.int32)
def get_depth(self, idx):
depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')
depth = sio.loadmat(depth_filename)['instance']
return depth
def get_calibration(self, idx):
calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')
lines = [line.rstrip() for line in open(calib_filepath)]
Rt = np.array([float(x) for x in lines[0].split(' ')])
Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)
K = np.array([float(x) for x in lines[1].split(' ')])
K = np.reshape(K, (3, 3), order='F').astype(np.float32)
return K, Rt
def get_label_objects(self, idx):
label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')
lines = [line.rstrip() for line in open(label_filename)]
objects = [SUNRGBDInstance(line) for line in lines]
return objects
def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
"""Get data infos.
This method gets information from the raw data.
Args:
num_workers (int, optional): Number of threads to be used.
Default: 4.
has_label (bool, optional): Whether the data has label.
Default: True.
sample_id_list (list[int], optional): Index list of the sample.
Default: None.
Returns:
infos (list[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx):
print(f'{self.split} sample_idx: {sample_idx}')
# convert depth to points
pc_upright_depth = self.get_depth(sample_idx)
pc_upright_depth_subsampled = random_sampling(
pc_upright_depth, self.num_points)
info = dict()
pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
info['point_cloud'] = pc_info
mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
pc_upright_depth_subsampled.tofile(
osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))
info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')
img_path = osp.join('image', f'{sample_idx:06d}.jpg')
image_info = {
'image_idx': sample_idx,
'image_shape': self.get_image_shape(sample_idx),
'image_path': img_path
}
info['image'] = image_info
K, Rt = self.get_calibration(sample_idx)
calib_info = {'K': K, 'Rt': Rt}
info['calib'] = calib_info
if has_label:
obj_list = self.get_label_objects(sample_idx)
annotations = {}
annotations['gt_num'] = len([
obj.classname for obj in obj_list
if obj.classname in self.cat2label.keys()
])
if annotations['gt_num'] != 0:
annotations['name'] = np.array([
obj.classname for obj in obj_list
if obj.classname in self.cat2label.keys()
])
annotations['bbox'] = np.concatenate([
obj.box2d.reshape(1, 4) for obj in obj_list
if obj.classname in self.cat2label.keys()
],
axis=0)
annotations['location'] = np.concatenate([
obj.centroid.reshape(1, 3) for obj in obj_list
if obj.classname in self.cat2label.keys()
],
axis=0)
annotations['dimensions'] = 2 * np.array([
[obj.length, obj.width, obj.height] for obj in obj_list
if obj.classname in self.cat2label.keys()
]) # lwh (depth) format
annotations['rotation_y'] = np.array([
obj.heading_angle for obj in obj_list
if obj.classname in self.cat2label.keys()
])
annotations['index'] = np.arange(
len(obj_list), dtype=np.int32)
annotations['class'] = np.array([
self.cat2label[obj.classname] for obj in obj_list
if obj.classname in self.cat2label.keys()
])
annotations['gt_boxes_upright_depth'] = np.stack(
[
obj.box3d for obj in obj_list
if obj.classname in self.cat2label.keys()
],
axis=0) # (K,8)
info['annos'] = annotations
return info
sample_id_list = sample_id_list if \
sample_id_list is not None else self.sample_id_list
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
# Copyright (c) OpenMMLab. All rights reserved.
r"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
try:
from waymo_open_dataset import dataset_pb2
except ImportError:
raise ImportError(
'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" '
'to install the official devkit first.')
from glob import glob
from os.path import join
import mmcv
import numpy as np
import tensorflow as tf
from waymo_open_dataset.utils import range_image_utils, transform_utils
from waymo_open_dataset.utils.frame_utils import \
parse_range_image_and_camera_projection
class Waymo2KITTI(object):
"""Waymo to KITTI converter.
This class serves as the converter to change the waymo raw data to KITTI
format.
Args:
load_dir (str): Directory to load waymo raw data.
save_dir (str): Directory to save data in KITTI format.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (int, optional): Number of workers for the parallel process.
test_mode (bool, optional): Whether in the test_mode. Default: False.
"""
def __init__(self,
load_dir,
save_dir,
prefix,
workers=64,
test_mode=False):
self.filter_empty_3dboxes = True
self.filter_no_label_zone_points = True
self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST']
# Only data collected in specific locations will be converted
# If set None, this filter is disabled
# Available options: location_sf (main dataset)
self.selected_waymo_locations = None
self.save_track_id = False
# turn on eager execution for older tensorflow versions
if int(tf.__version__.split('.')[0]) < 2:
tf.enable_eager_execution()
self.lidar_list = [
'_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',
'_SIDE_LEFT'
]
self.type_list = [
'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST'
]
self.waymo_to_kitti_class_map = {
'UNKNOWN': 'DontCare',
'PEDESTRIAN': 'Pedestrian',
'VEHICLE': 'Car',
'CYCLIST': 'Cyclist',
'SIGN': 'Sign' # not in kitti
}
self.load_dir = load_dir
self.save_dir = save_dir
self.prefix = prefix
self.workers = int(workers)
self.test_mode = test_mode
self.tfrecord_pathnames = sorted(
glob(join(self.load_dir, '*.tfrecord')))
self.label_save_dir = f'{self.save_dir}/label_'
self.label_all_save_dir = f'{self.save_dir}/label_all'
self.image_save_dir = f'{self.save_dir}/image_'
self.calib_save_dir = f'{self.save_dir}/calib'
self.point_cloud_save_dir = f'{self.save_dir}/velodyne'
self.pose_save_dir = f'{self.save_dir}/pose'
self.timestamp_save_dir = f'{self.save_dir}/timestamp'
self.create_folder()
def convert(self):
"""Convert action."""
print('Start converting ...')
mmcv.track_parallel_progress(self.convert_one, range(len(self)),
self.workers)
print('\nFinished ...')
def convert_one(self, file_idx):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
pathname = self.tfrecord_pathnames[file_idx]
dataset = tf.data.TFRecordDataset(pathname, compression_type='')
for frame_idx, data in enumerate(dataset):
frame = dataset_pb2.Frame()
frame.ParseFromString(bytearray(data.numpy()))
if (self.selected_waymo_locations is not None
and frame.context.stats.location
not in self.selected_waymo_locations):
continue
self.save_image(frame, file_idx, frame_idx)
self.save_calib(frame, file_idx, frame_idx)
self.save_lidar(frame, file_idx, frame_idx)
self.save_pose(frame, file_idx, frame_idx)
self.save_timestamp(frame, file_idx, frame_idx)
if not self.test_mode:
self.save_label(frame, file_idx, frame_idx)
def __len__(self):
"""Length of the filename list."""
return len(self.tfrecord_pathnames)
def save_image(self, frame, file_idx, frame_idx):
"""Parse and save the images in png format.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
for img in frame.images:
img_path = f'{self.image_save_dir}{str(img.name - 1)}/' + \
f'{self.prefix}{str(file_idx).zfill(3)}' + \
f'{str(frame_idx).zfill(3)}.png'
img = mmcv.imfrombytes(img.image)
mmcv.imwrite(img, img_path)
def save_calib(self, frame, file_idx, frame_idx):
"""Parse and save the calibration data.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
# waymo front camera to kitti reference camera
T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0],
[1.0, 0.0, 0.0]])
camera_calibs = []
R0_rect = [f'{i:e}' for i in np.eye(3).flatten()]
Tr_velo_to_cams = []
calib_context = ''
for camera in frame.context.camera_calibrations:
# extrinsic parameters
T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape(
4, 4)
T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle)
Tr_velo_to_cam = \
self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam
if camera.name == 1: # FRONT = 1, see dataset.proto for details
self.T_velo_to_front_cam = Tr_velo_to_cam.copy()
Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, ))
Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam])
# intrinsic parameters
camera_calib = np.zeros((3, 4))
camera_calib[0, 0] = camera.intrinsic[0]
camera_calib[1, 1] = camera.intrinsic[1]
camera_calib[0, 2] = camera.intrinsic[2]
camera_calib[1, 2] = camera.intrinsic[3]
camera_calib[2, 2] = 1
camera_calib = list(camera_calib.reshape(12))
camera_calib = [f'{i:e}' for i in camera_calib]
camera_calibs.append(camera_calib)
# all camera ids are saved as id-1 in the result because
# camera 0 is unknown in the proto
for i in range(5):
calib_context += 'P' + str(i) + ': ' + \
' '.join(camera_calibs[i]) + '\n'
calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\n'
for i in range(5):
calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \
' '.join(Tr_velo_to_cams[i]) + '\n'
with open(
f'{self.calib_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt',
'w+') as fp_calib:
fp_calib.write(calib_context)
fp_calib.close()
def save_lidar(self, frame, file_idx, frame_idx):
"""Parse and save the lidar data in psd format.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
range_images, camera_projections, range_image_top_pose = \
parse_range_image_and_camera_projection(frame)
# First return
points_0, cp_points_0, intensity_0, elongation_0, mask_indices_0 = \
self.convert_range_image_to_point_cloud(
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=0
)
points_0 = np.concatenate(points_0, axis=0)
intensity_0 = np.concatenate(intensity_0, axis=0)
elongation_0 = np.concatenate(elongation_0, axis=0)
mask_indices_0 = np.concatenate(mask_indices_0, axis=0)
# Second return
points_1, cp_points_1, intensity_1, elongation_1, mask_indices_1 = \
self.convert_range_image_to_point_cloud(
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=1
)
points_1 = np.concatenate(points_1, axis=0)
intensity_1 = np.concatenate(intensity_1, axis=0)
elongation_1 = np.concatenate(elongation_1, axis=0)
mask_indices_1 = np.concatenate(mask_indices_1, axis=0)
points = np.concatenate([points_0, points_1], axis=0)
intensity = np.concatenate([intensity_0, intensity_1], axis=0)
elongation = np.concatenate([elongation_0, elongation_1], axis=0)
mask_indices = np.concatenate([mask_indices_0, mask_indices_1], axis=0)
# timestamp = frame.timestamp_micros * np.ones_like(intensity)
# concatenate x,y,z, intensity, elongation, timestamp (6-dim)
point_cloud = np.column_stack(
(points, intensity, elongation, mask_indices))
pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'
point_cloud.astype(np.float32).tofile(pc_path)
def save_label(self, frame, file_idx, frame_idx):
"""Parse and save the label data in txt format.
The relation between waymo and kitti coordinates is noteworthy:
1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)
2. x-y-z: front-left-up (waymo) -> right-down-front(kitti)
3. bbox origin at volumetric center (waymo) -> bottom center (kitti)
4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo)
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
fp_label_all = open(
f'{self.label_all_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')
id_to_bbox = dict()
id_to_name = dict()
for labels in frame.projected_lidar_labels:
name = labels.name
for label in labels.labels:
# TODO: need a workaround as bbox may not belong to front cam
bbox = [
label.box.center_x - label.box.length / 2,
label.box.center_y - label.box.width / 2,
label.box.center_x + label.box.length / 2,
label.box.center_y + label.box.width / 2
]
id_to_bbox[label.id] = bbox
id_to_name[label.id] = name - 1
for obj in frame.laser_labels:
bounding_box = None
name = None
id = obj.id
for lidar in self.lidar_list:
if id + lidar in id_to_bbox:
bounding_box = id_to_bbox.get(id + lidar)
name = str(id_to_name.get(id + lidar))
break
if bounding_box is None or name is None:
name = '0'
bounding_box = (0, 0, 0, 0)
my_type = self.type_list[obj.type]
if my_type not in self.selected_waymo_classes:
continue
if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1:
continue
my_type = self.waymo_to_kitti_class_map[my_type]
height = obj.box.height
width = obj.box.width
length = obj.box.length
x = obj.box.center_x
y = obj.box.center_y
z = obj.box.center_z - height / 2
# project bounding box to the virtual reference frame
pt_ref = self.T_velo_to_front_cam @ \
np.array([x, y, z, 1]).reshape((4, 1))
x, y, z, _ = pt_ref.flatten().tolist()
rotation_y = -obj.box.heading - np.pi / 2
track_id = obj.id
# not available
truncated = 0
occluded = 0
alpha = -10
line = my_type + \
' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format(
round(truncated, 2), occluded, round(alpha, 2),
round(bounding_box[0], 2), round(bounding_box[1], 2),
round(bounding_box[2], 2), round(bounding_box[3], 2),
round(height, 2), round(width, 2), round(length, 2),
round(x, 2), round(y, 2), round(z, 2),
round(rotation_y, 2))
if self.save_track_id:
line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n'
else:
line_all = line[:-1] + ' ' + name + '\n'
fp_label = open(
f'{self.label_save_dir}{name}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')
fp_label.write(line)
fp_label.close()
fp_label_all.write(line_all)
fp_label_all.close()
def save_pose(self, frame, file_idx, frame_idx):
"""Parse and save the pose data.
Note that SDC's own pose is not included in the regular training
of KITTI dataset. KITTI raw dataset contains ego motion files
but are not often used. Pose is important for algorithms that
take advantage of the temporal information.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
pose = np.array(frame.pose.transform).reshape(4, 4)
np.savetxt(
join(f'{self.pose_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
pose)
def save_timestamp(self, frame, file_idx, frame_idx):
"""Save the timestamp data in a separate file instead of the
pointcloud.
Note that SDC's own pose is not included in the regular training
of KITTI dataset. KITTI raw dataset contains ego motion files
but are not often used. Pose is important for algorithms that
take advantage of the temporal information.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
with open(
join(f'{self.timestamp_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
'w') as f:
f.write(str(frame.timestamp_micros))
def create_folder(self):
"""Create folder for data preprocessing."""
if not self.test_mode:
dir_list1 = [
self.label_all_save_dir, self.calib_save_dir,
self.point_cloud_save_dir, self.pose_save_dir,
self.timestamp_save_dir
]
dir_list2 = [self.label_save_dir, self.image_save_dir]
else:
dir_list1 = [
self.calib_save_dir, self.point_cloud_save_dir,
self.pose_save_dir, self.timestamp_save_dir
]
dir_list2 = [self.image_save_dir]
for d in dir_list1:
mmcv.mkdir_or_exist(d)
for d in dir_list2:
for i in range(5):
mmcv.mkdir_or_exist(f'{d}{str(i)}')
def convert_range_image_to_point_cloud(self,
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=0):
"""Convert range images to point cloud.
Args:
frame (:obj:`Frame`): Open dataset frame.
range_images (dict): Mapping from laser_name to list of two
range images corresponding with two returns.
camera_projections (dict): Mapping from laser_name to list of two
camera projections corresponding with two returns.
range_image_top_pose (:obj:`Transform`): Range image pixel pose for
top lidar.
ri_index (int, optional): 0 for the first return,
1 for the second return. Default: 0.
Returns:
tuple[list[np.ndarray]]: (List of points with shape [N, 3],
camera projections of points with shape [N, 6], intensity
with shape [N, 1], elongation with shape [N, 1], points'
position in the depth map (element offset if points come from
the main lidar otherwise -1) with shape[N, 1]). All the
lists have the length of lidar numbers (5).
"""
calibrations = sorted(
frame.context.laser_calibrations, key=lambda c: c.name)
points = []
cp_points = []
intensity = []
elongation = []
mask_indices = []
frame_pose = tf.convert_to_tensor(
value=np.reshape(np.array(frame.pose.transform), [4, 4]))
# [H, W, 6]
range_image_top_pose_tensor = tf.reshape(
tf.convert_to_tensor(value=range_image_top_pose.data),
range_image_top_pose.shape.dims)
# [H, W, 3, 3]
range_image_top_pose_tensor_rotation = \
transform_utils.get_rotation_matrix(
range_image_top_pose_tensor[..., 0],
range_image_top_pose_tensor[..., 1],
range_image_top_pose_tensor[..., 2])
range_image_top_pose_tensor_translation = \
range_image_top_pose_tensor[..., 3:]
range_image_top_pose_tensor = transform_utils.get_transform(
range_image_top_pose_tensor_rotation,
range_image_top_pose_tensor_translation)
for c in calibrations:
range_image = range_images[c.name][ri_index]
if len(c.beam_inclinations) == 0:
beam_inclinations = range_image_utils.compute_inclination(
tf.constant(
[c.beam_inclination_min, c.beam_inclination_max]),
height=range_image.shape.dims[0])
else:
beam_inclinations = tf.constant(c.beam_inclinations)
beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])
range_image_tensor = tf.reshape(
tf.convert_to_tensor(value=range_image.data),
range_image.shape.dims)
pixel_pose_local = None
frame_pose_local = None
if c.name == dataset_pb2.LaserName.TOP:
pixel_pose_local = range_image_top_pose_tensor
pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
frame_pose_local = tf.expand_dims(frame_pose, axis=0)
range_image_mask = range_image_tensor[..., 0] > 0
if self.filter_no_label_zone_points:
nlz_mask = range_image_tensor[..., 3] != 1.0 # 1.0: in NLZ
range_image_mask = range_image_mask & nlz_mask
range_image_cartesian = \
range_image_utils.extract_point_cloud_from_range_image(
tf.expand_dims(range_image_tensor[..., 0], axis=0),
tf.expand_dims(extrinsic, axis=0),
tf.expand_dims(tf.convert_to_tensor(
value=beam_inclinations), axis=0),
pixel_pose=pixel_pose_local,
frame_pose=frame_pose_local)
mask_index = tf.where(range_image_mask)
range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
points_tensor = tf.gather_nd(range_image_cartesian, mask_index)
cp = camera_projections[c.name][ri_index]
cp_tensor = tf.reshape(
tf.convert_to_tensor(value=cp.data), cp.shape.dims)
cp_points_tensor = tf.gather_nd(cp_tensor, mask_index)
points.append(points_tensor.numpy())
cp_points.append(cp_points_tensor.numpy())
intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],
mask_index)
intensity.append(intensity_tensor.numpy())
elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],
mask_index)
elongation.append(elongation_tensor.numpy())
if c.name == 1:
mask_index = (ri_index * range_image_mask.shape[0] +
mask_index[:, 0]
) * range_image_mask.shape[1] + mask_index[:, 1]
mask_index = mask_index.numpy().astype(elongation[-1].dtype)
else:
mask_index = np.full_like(elongation[-1], -1)
mask_indices.append(mask_index)
return points, cp_points, intensity, elongation, mask_indices
def cart_to_homo(self, mat):
"""Convert transformation matrix in Cartesian coordinates to
homogeneous format.
Args:
mat (np.ndarray): Transformation matrix in Cartesian.
The input matrix shape is 3x3 or 3x4.
Returns:
np.ndarray: Transformation matrix in homogeneous format.
The matrix shape is 4x4.
"""
ret = np.eye(4)
if mat.shape == (3, 3):
ret[:3, :3] = mat
elif mat.shape == (3, 4):
ret[:3, :] = mat
else:
raise ValueError(mat.shape)
return ret
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser, Namespace
from pathlib import Path
from tempfile import TemporaryDirectory
import mmcv
try:
from model_archiver.model_packaging import package_model
from model_archiver.model_packaging_utils import ModelExportUtils
except ImportError:
package_model = None
def mmdet3d2torchserve(
config_file: str,
checkpoint_file: str,
output_folder: str,
model_name: str,
model_version: str = '1.0',
force: bool = False,
):
"""Converts MMDetection3D model (config + checkpoint) to TorchServe `.mar`.
Args:
config_file (str):
In MMDetection3D config format.
The contents vary for each task repository.
checkpoint_file (str):
In MMDetection3D checkpoint format.
The contents vary for each task repository.
output_folder (str):
Folder where `{model_name}.mar` will be created.
The file created will be in TorchServe archive format.
model_name (str):
If not None, used for naming the `{model_name}.mar` file
that will be created under `output_folder`.
If None, `{Path(checkpoint_file).stem}` will be used.
model_version (str, optional):
Model's version. Default: '1.0'.
force (bool, optional):
If True, if there is an existing `{model_name}.mar`
file under `output_folder` it will be overwritten.
Default: False.
"""
mmcv.mkdir_or_exist(output_folder)
config = mmcv.Config.fromfile(config_file)
with TemporaryDirectory() as tmpdir:
config.dump(f'{tmpdir}/config.py')
args = Namespace(
**{
'model_file': f'{tmpdir}/config.py',
'serialized_file': checkpoint_file,
'handler': f'{Path(__file__).parent}/mmdet3d_handler.py',
'model_name': model_name or Path(checkpoint_file).stem,
'version': model_version,
'export_path': output_folder,
'force': force,
'requirements_file': None,
'extra_files': None,
'runtime': 'python',
'archive_format': 'default'
})
manifest = ModelExportUtils.generate_manifest_json(args)
package_model(args, manifest)
def parse_args():
parser = ArgumentParser(
description='Convert MMDetection models to TorchServe `.mar` format.')
parser.add_argument('config', type=str, help='config file path')
parser.add_argument('checkpoint', type=str, help='checkpoint file path')
parser.add_argument(
'--output-folder',
type=str,
required=True,
help='Folder where `{model_name}.mar` will be created.')
parser.add_argument(
'--model-name',
type=str,
default=None,
help='If not None, used for naming the `{model_name}.mar`'
'file that will be created under `output_folder`.'
'If None, `{Path(checkpoint_file).stem}` will be used.')
parser.add_argument(
'--model-version',
type=str,
default='1.0',
help='Number used for versioning.')
parser.add_argument(
'-f',
'--force',
action='store_true',
help='overwrite the existing `{model_name}.mar`')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
if package_model is None:
raise ImportError('`torch-model-archiver` is required.'
'Try: pip install torch-model-archiver')
mmdet3d2torchserve(args.config, args.checkpoint, args.output_folder,
args.model_name, args.model_version, args.force)
# Copyright (c) OpenMMLab. All rights reserved.
import base64
import os
import numpy as np
import torch
from ts.torch_handler.base_handler import BaseHandler
from mmdet3d.apis import inference_detector, init_model
from mmdet3d.core.points import get_points_type
class MMdet3dHandler(BaseHandler):
"""MMDetection3D Handler used in TorchServe.
Handler to load models in MMDetection3D, and it will process data to get
predicted results. For now, it only supports SECOND.
"""
threshold = 0.5
load_dim = 4
use_dim = [0, 1, 2, 3]
coord_type = 'LIDAR'
attribute_dims = None
def initialize(self, context):
"""Initialize function loads the model in MMDetection3D.
Args:
context (context): It is a JSON Object containing information
pertaining to the model artifacts parameters.
"""
properties = context.system_properties
self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
self.device = torch.device(self.map_location + ':' +
str(properties.get('gpu_id')) if torch.cuda.
is_available() else self.map_location)
self.manifest = context.manifest
model_dir = properties.get('model_dir')
serialized_file = self.manifest['model']['serializedFile']
checkpoint = os.path.join(model_dir, serialized_file)
self.config_file = os.path.join(model_dir, 'config.py')
self.model = init_model(self.config_file, checkpoint, self.device)
self.initialized = True
def preprocess(self, data):
"""Preprocess function converts data into LiDARPoints class.
Args:
data (List): Input data from the request.
Returns:
`LiDARPoints` : The preprocess function returns the input
point cloud data as LiDARPoints class.
"""
for row in data:
# Compat layer: normally the envelope should just return the data
# directly, but older versions of Torchserve didn't have envelope.
pts = row.get('data') or row.get('body')
if isinstance(pts, str):
pts = base64.b64decode(pts)
points = np.frombuffer(pts, dtype=np.float32)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
points_class = get_points_type(self.coord_type)
points = points_class(
points,
points_dim=points.shape[-1],
attribute_dims=self.attribute_dims)
return points
def inference(self, data):
"""Inference Function.
This function is used to make a prediction call on the
given input request.
Args:
data (`LiDARPoints`): LiDARPoints class passed to make
the inference request.
Returns:
List(dict) : The predicted result is returned in this function.
"""
results, _ = inference_detector(self.model, data)
return results
def postprocess(self, data):
"""Postprocess function.
This function makes use of the output from the inference and
converts it into a torchserve supported response output.
Args:
data (List[dict]): The data received from the prediction
output of the model.
Returns:
List: The post process function returns a list of the predicted
output.
"""
output = []
for pts_index, result in enumerate(data):
output.append([])
if 'pts_bbox' in result.keys():
pred_bboxes = result['pts_bbox']['boxes_3d'].tensor.numpy()
pred_scores = result['pts_bbox']['scores_3d'].numpy()
else:
pred_bboxes = result['boxes_3d'].tensor.numpy()
pred_scores = result['scores_3d'].numpy()
index = pred_scores > self.threshold
bbox_coords = pred_bboxes[index].tolist()
score = pred_scores[index].tolist()
output[pts_index].append({'3dbbox': bbox_coords, 'score': score})
return output
from argparse import ArgumentParser
import numpy as np
import requests
from mmdet3d.apis import inference_detector, init_model
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument('model_name', help='The model name in the server')
parser.add_argument(
'--inference-addr',
default='127.0.0.1:8080',
help='Address and port of the inference server')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--score-thr', type=float, default=0.5, help='3d bbox score threshold')
args = parser.parse_args()
return args
def parse_result(input):
bbox = input[0]['3dbbox']
result = np.array(bbox)
return result
def main(args):
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# test a single point cloud file
model_result, _ = inference_detector(model, args.pcd)
# filter the 3d bboxes whose scores > 0.5
if 'pts_bbox' in model_result[0].keys():
pred_bboxes = model_result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
pred_scores = model_result[0]['pts_bbox']['scores_3d'].numpy()
else:
pred_bboxes = model_result[0]['boxes_3d'].tensor.numpy()
pred_scores = model_result[0]['scores_3d'].numpy()
model_result = pred_bboxes[pred_scores > 0.5]
url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
with open(args.pcd, 'rb') as points:
response = requests.post(url, points)
server_result = parse_result(response.json())
assert np.allclose(model_result, server_result)
if __name__ == '__main__':
args = parse_args()
main(args)
#!/usr/bin/env bash
CONFIG=$1
CHECKPOINT=$2
GPUS=$3
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29501}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
python -m torch.distributed.launch \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/test.py \
$CONFIG \
$CHECKPOINT \
--launcher pytorch \
${@:4}
#!/usr/bin/env bash
#export PYTORCH_MIOPEN_SUGGEST_NHWC=1
#export MIOPEN_FIND_MODE=1
export TORCHINDUCTOR_LAYOUT_OPTIMIZATION=1
export PYTORCH_MIOPEN_SUGGEST_NHWC=1
export TORCHINDUCTOR_COORDINATE_DESCENT_TUNING=1
export MIOPEN_PRECISION_FP32_FP32_FP32_TF32_FP32=1
export LD_LIBRARY_PATH=/root/rocblas-install/lib:$LD_LIBRARY_PATH
CONFIG=$1
GPUS=$2
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-29500}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
torchrun \
--nnodes=$NNODES \
--node_rank=$NODE_RANK \
--master_addr=$MASTER_ADDR \
--nproc_per_node=$GPUS \
--master_port=$PORT \
$(dirname "$0")/train.py \
$CONFIG \
--seed 0 \
--launcher pytorch ${@:3}
import argparse
import sys
import os
sys.path.insert(0, os.getcwd())
import torch.onnx
from mmcv import Config
from mmdeploy.backend.tensorrt.utils import save, search_cuda_version
try:
# If mmdet version > 2.23.0, compat_cfg would be imported and
# used from mmdet instead of mmdet3d.
from mmdet.utils import compat_cfg
except ImportError:
from mmdet3d.utils import compat_cfg
import os
from typing import Dict, Optional, Sequence, Union
import h5py
import mmcv
import numpy as np
import onnx
import pycuda.driver as cuda
import tensorrt as trt
import torch
import tqdm
from mmcv.runner import load_checkpoint
from mmdeploy.apis.core import no_mp
from mmdeploy.backend.tensorrt.calib_utils import HDF5Calibrator
from mmdeploy.backend.tensorrt.init_plugins import load_tensorrt_plugin
from mmdeploy.utils import load_config
from packaging import version
from torch.utils.data import DataLoader
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_model
from mmdet.datasets import replace_ImageToTensor
from tools.misc.fuse_conv_bn import fuse_module
class HDF5CalibratorBEVDet(HDF5Calibrator):
def get_batch(self, names: Sequence[str], **kwargs) -> list:
"""Get batch data."""
if self.count < self.dataset_length:
if self.count % 100 == 0:
print('%d/%d' % (self.count, self.dataset_length))
ret = []
for name in names:
input_group = self.calib_data[name]
if name == 'img':
data_np = input_group[str(self.count)][...].astype(
np.float32)
else:
data_np = input_group[str(self.count)][...].astype(
np.int32)
# tile the tensor so we can keep the same distribute
opt_shape = self.input_shapes[name]['opt_shape']
data_shape = data_np.shape
reps = [
int(np.ceil(opt_s / data_s))
for opt_s, data_s in zip(opt_shape, data_shape)
]
data_np = np.tile(data_np, reps)
slice_list = tuple(slice(0, end) for end in opt_shape)
data_np = data_np[slice_list]
data_np_cuda_ptr = cuda.mem_alloc(data_np.nbytes)
cuda.memcpy_htod(data_np_cuda_ptr,
np.ascontiguousarray(data_np))
self.buffers[name] = data_np_cuda_ptr
ret.append(self.buffers[name])
self.count += 1
return ret
else:
return None
def parse_args():
parser = argparse.ArgumentParser(description='Deploy BEVDet with Tensorrt')
parser.add_argument('config', help='deploy config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument('work_dir', help='work dir to save file')
parser.add_argument(
'--prefix', default='bevdet', help='prefix of the save file name')
parser.add_argument(
'--fp16', action='store_true', help='Whether to use tensorrt fp16')
parser.add_argument(
'--int8', action='store_true', help='Whether to use tensorrt int8')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument('--calib_num', type=int, help='num to calib')
args = parser.parse_args()
return args
def get_plugin_names():
return [pc.name for pc in trt.get_plugin_registry().plugin_creator_list]
def create_calib_input_data_impl(calib_file: str,
dataloader: DataLoader,
model_partition: bool = False,
metas: list = [],
calib_num = None) -> None:
with h5py.File(calib_file, mode='w') as file:
calib_data_group = file.create_group('calib_data')
assert not model_partition
# create end2end group
input_data_group = calib_data_group.create_group('end2end')
input_group_img = input_data_group.create_group('img')
input_keys = [
'ranks_bev', 'ranks_depth', 'ranks_feat', 'interval_starts',
'interval_lengths'
]
input_groups = []
for input_key in input_keys:
input_groups.append(input_data_group.create_group(input_key))
metas = [
metas[i].int().detach().cpu().numpy() for i in range(len(metas))
]
for data_id, input_data in enumerate(tqdm.tqdm(dataloader)):
# save end2end data
if (calib_num is not None) and (data_id > calib_num):
break
input_tensor = input_data['img_inputs'][0][0]
input_ndarray = input_tensor.squeeze(0).detach().cpu().numpy()
# print(input_ndarray.shape, input_ndarray.dtype)
input_group_img.create_dataset(
str(data_id),
shape=input_ndarray.shape,
compression='gzip',
compression_opts=4,
data=input_ndarray)
for kid, input_key in enumerate(input_keys):
input_groups[kid].create_dataset(
str(data_id),
shape=metas[kid].shape,
compression='gzip',
compression_opts=4,
data=metas[kid])
file.flush()
def create_calib_input_data(calib_file: str,
deploy_cfg: Union[str, mmcv.Config],
model_cfg: Union[str, mmcv.Config],
model_checkpoint: Optional[str] = None,
dataset_cfg: Optional[Union[str,
mmcv.Config]] = None,
dataset_type: str = 'val',
device: str = 'cpu',
metas: list = [None],
calib_num = None) -> None:
"""Create dataset for post-training quantization.
Args:
calib_file (str): The output calibration data file.
deploy_cfg (str | mmcv.Config): Deployment config file or
Config object.
model_cfg (str | mmcv.Config): Model config file or Config object.
model_checkpoint (str): A checkpoint path of PyTorch model,
defaults to `None`.
dataset_cfg (Optional[Union[str, mmcv.Config]], optional): Model
config to provide calibration dataset. If none, use `model_cfg`
as the dataset config. Defaults to None.
dataset_type (str, optional): The dataset type. Defaults to 'val'.
device (str, optional): Device to create dataset. Defaults to 'cpu'.
"""
with no_mp():
if dataset_cfg is None:
dataset_cfg = model_cfg
# load cfg if necessary
deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg)
if dataset_cfg is None:
dataset_cfg = model_cfg
# load dataset_cfg if necessary
dataset_cfg = load_config(dataset_cfg)[0]
from mmdeploy.apis.utils import build_task_processor
task_processor = build_task_processor(model_cfg, deploy_cfg, device)
dataset = task_processor.build_dataset(dataset_cfg, dataset_type)
dataloader = task_processor.build_dataloader(
dataset, 1, 1, dist=False, shuffle=False)
create_calib_input_data_impl(
calib_file, dataloader, model_partition=False, metas=metas, calib_num=calib_num)
def from_onnx(onnx_model: Union[str, onnx.ModelProto],
output_file_prefix: str,
input_shapes: Dict[str, Sequence[int]],
max_workspace_size: int = 0,
fp16_mode: bool = False,
int8_mode: bool = False,
int8_param: Optional[dict] = None,
device_id: int = 0,
log_level: trt.Logger.Severity = trt.Logger.ERROR,
**kwargs) -> trt.ICudaEngine:
"""Create a tensorrt engine from ONNX.
Modified from mmdeploy.backend.tensorrt.utils.from_onnx
"""
import os
old_cuda_device = os.environ.get('CUDA_DEVICE', None)
os.environ['CUDA_DEVICE'] = str(device_id)
import pycuda.autoinit # noqa:F401
if old_cuda_device is not None:
os.environ['CUDA_DEVICE'] = old_cuda_device
else:
os.environ.pop('CUDA_DEVICE')
load_tensorrt_plugin()
# create builder and network
logger = trt.Logger(log_level)
builder = trt.Builder(logger)
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
# parse onnx
parser = trt.OnnxParser(network, logger)
if isinstance(onnx_model, str):
onnx_model = onnx.load(onnx_model)
if not parser.parse(onnx_model.SerializeToString()):
error_msgs = ''
for error in range(parser.num_errors):
error_msgs += f'{parser.get_error(error)}\n'
raise RuntimeError(f'Failed to parse onnx, {error_msgs}')
# config builder
if version.parse(trt.__version__) < version.parse('8'):
builder.max_workspace_size = max_workspace_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size
cuda_version = search_cuda_version()
if cuda_version is not None:
version_major = int(cuda_version.split('.')[0])
if version_major < 11:
# cu11 support cublasLt, so cudnn heuristic tactic should disable CUBLAS_LT # noqa E501
tactic_source = config.get_tactic_sources() - (
1 << int(trt.TacticSource.CUBLAS_LT))
config.set_tactic_sources(tactic_source)
profile = builder.create_optimization_profile()
for input_name, param in input_shapes.items():
min_shape = param['min_shape']
opt_shape = param['opt_shape']
max_shape = param['max_shape']
profile.set_shape(input_name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
if fp16_mode:
if version.parse(trt.__version__) < version.parse('8'):
builder.fp16_mode = fp16_mode
config.set_flag(trt.BuilderFlag.FP16)
if int8_mode:
config.set_flag(trt.BuilderFlag.INT8)
assert int8_param is not None
config.int8_calibrator = HDF5CalibratorBEVDet(
int8_param['calib_file'],
input_shapes,
model_type=int8_param['model_type'],
device_id=device_id,
algorithm=int8_param.get(
'algorithm', trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2))
if version.parse(trt.__version__) < version.parse('8'):
builder.int8_mode = int8_mode
builder.int8_calibrator = config.int8_calibrator
# create engine
engine = builder.build_engine(network, config)
assert engine is not None, 'Failed to create TensorRT engine'
save(engine, output_file_prefix + '.engine')
print('Save engine at ', output_file_prefix + '.engine')
return engine
def main():
args = parse_args()
max_workspace_size = 200*200*256*(2**8)
if not os.path.exists(args.work_dir):
os.makedirs(args.work_dir)
load_tensorrt_plugin()
# assert 'bev_pool_v2' in get_plugin_names(), \
# 'bev_pool_v2 is not in the plugin list of tensorrt, ' \
# 'please install mmdeploy from ' \
# 'https://github.com/HuangJunJie2017/mmdeploy.git'
# if args.int8:
# assert args.fp16
model_prefix = args.prefix
if args.int8:
model_prefix = model_prefix + '_int8'
elif args.fp16:
model_prefix = model_prefix + '_fp16'
cfg = Config.fromfile(args.config)
cfg.model.pretrained = None
cfg.model.type = cfg.model.type + 'TRT'
cfg = compat_cfg(cfg)
cfg.gpu_ids = [0]
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
plg_lib = importlib.import_module(_module_path)
# build the dataloader
test_dataloader_default_args = dict(
samples_per_gpu=1, workers_per_gpu=0, dist=False, shuffle=False)
if isinstance(cfg.data.test, dict):
cfg.data.test.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(
cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
for ds_cfg in cfg.data.test:
ds_cfg.test_mode = True
if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:
for ds_cfg in cfg.data.test:
ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
test_loader_cfg = {
**test_dataloader_default_args,
**cfg.data.get('test_dataloader', {})
}
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(dataset, **test_loader_cfg)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
# assert model.img_view_transformer.grid_size[0] == 128
# assert model.img_view_transformer.grid_size[1] == 128
# assert model.img_view_transformer.grid_size[2] == 1
if os.path.exists(args.checkpoint):
load_checkpoint(model, args.checkpoint, map_location='cpu')
else:
print(args.checkpoint, " does not exists!")
if args.fuse_conv_bn:
model_prefix = model_prefix + '_fuse'
model = fuse_module(model)
model.cuda()
model.eval()
for i, data in enumerate(data_loader):
inputs = [t.cuda() for t in data['img_inputs'][0]]
img = inputs[0].squeeze(0)
if img.shape[0] > 6:
img = img[:6]
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT']:
metas = model.get_bev_pool_input(inputs, img_metas=data['img_metas'])
else:
if model.__class__.__name__ in ['BEVDetOCCTRT']:
metas = model.get_bev_pool_input(inputs)
elif model.__class__.__name__ in ['BEVDepthOCCTRT', 'BEVDepthPanoTRT']:
metas, mlp_input = model.get_bev_pool_input(inputs)
if model.__class__.__name__ in ['FBOCCTRT', 'FBOCC2DTRT', 'BEVDetOCCTRT']:
onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
metas[2].int().contiguous(), metas[0].int().contiguous(),
metas[3].int().contiguous(), metas[4].int().contiguous())
dynamic_axes={
"ranks_depth" : {0: 'M'},
"ranks_feat" : {0: 'M'},
"ranks_bev" : {0: 'M'},
"interval_starts" : {0: 'N'},
"interval_lengths" : {0: 'N'},
}
input_names=[
'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
'interval_starts', 'interval_lengths'
]
elif model.__class__.__name__ in ['BEVDepthOCCTRT', 'BEVDepthPanoTRT']:
onnx_input = (img.float().contiguous(), metas[1].int().contiguous(),
metas[2].int().contiguous(), metas[0].int().contiguous(),
metas[3].int().contiguous(), metas[4].int().contiguous(), mlp_input)
dynamic_axes={
"ranks_depth" : {0: 'M'},
"ranks_feat" : {0: 'M'},
"ranks_bev" : {0: 'M'},
"interval_starts" : {0: 'N'},
"interval_lengths" : {0: 'N'},
# "mlp_input" : {0: 'K'},
}
input_names=[
'img', 'ranks_depth', 'ranks_feat', 'ranks_bev',
'interval_starts', 'interval_lengths', 'mlp_input',
]
with torch.no_grad():
if (model.wdet3d == True) and (model.wocc == False) :
output_names=[f'output_{j}' for j in range(6 * len(model.pts_bbox_head.task_heads))]
elif (model.wdet3d == True) and (model.wocc == True) :
output_names=[f'output_{j}' for j in range(1 + 6 * len(model.pts_bbox_head.task_heads))]
elif (model.wdet3d == False) and (model.wocc == True) :
output_names=[f'output_{j}' for j in range(1)]
else:
raise(" At least one of wdet3d and wocc is set as True!! ")
# part1
from functools import partial
model.forward = partial(model.forward_part1,
mlp_input = mlp_input
)
onnx_path = args.work_dir + 'part1.onnx'
torch.onnx.export(
model,
(img.float().contiguous(),),
onnx_path,
export_params=True,
opset_version=11,
input_names=['img'],
output_names=['tran_feat','depth'])
# check onnx model
onnx_model = onnx.load(onnx_path)
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model_file = 'model.onnx'
onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
print('====== onnx is saved at : ', onnx_path)
tran_feat, depth = model.forward(img)
# # from onnxsim import simplify
# # model_simp, check = simplify(onnx_model)
# # assert check, "Simplified ONNX model could not be validated"
# # onnx.save(model_simp, onnx_path)
# # print('====== onnx is saved at : ', onnx_path)
# part2
from functools import partial
model.forward = partial(model.forward_part2,
ranks_depth = onnx_input[1],
ranks_feat = onnx_input[2],
ranks_bev = onnx_input[3],
interval_starts = onnx_input[4],
interval_lengths = onnx_input[5]
)
onnx_path = args.work_dir + 'part2.onnx'
torch.onnx.export(
model,
(tran_feat.float().contiguous(), depth.float().contiguous()),
onnx_path,
export_params=True,
opset_version=11,
input_names=['tran_feat','depth'],
output_names=['bev_feat'])
# check onnx model
onnx_model = onnx.load(onnx_path)
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model_file = 'model.onnx'
onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
print('====== onnx is saved at : ', onnx_path)
bev_pool_feat = model.forward(tran_feat, depth)
# # from onnxsim import simplify
# # model_simp, check = simplify(onnx_model)
# # assert check, "Simplified ONNX model could not be validated"
# # onnx.save(model_simp, onnx_path)
# # print('====== onnx is saved at : ', onnx_path)
# part3
model.forward = model.forward_part3
onnx_path = args.work_dir + 'part3.onnx'
torch.onnx.export(
model,
(bev_pool_feat,),
onnx_path,
export_params=True,
opset_version=11,
input_names=['bev_feat'],
output_names=['occ_pred', 'inst_center_reg', 'inst_center_height', 'inst_center_heatmap'])
# check onnx model
onnx_model = onnx.load(onnx_path)
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
model_file = 'model.onnx'
onnx.save(onnx.shape_inference.infer_shapes(onnx_model), onnx_path)
print('====== onnx is saved at : ', onnx_path)
# # from onnxsim import simplify
# # model_simp, check = simplify(onnx_model)
# # assert check, "Simplified ONNX model could not be validated"
# # onnx.save(model_simp, onnx_path)
# # print('====== onnx is saved at : ', onnx_path)
from functools import partial
model.forward = partial(model.forward_ori,
ranks_depth = onnx_input[1],
ranks_feat = onnx_input[2],
ranks_bev = onnx_input[3],
interval_starts = onnx_input[4],
interval_lengths = onnx_input[5],
mlp_input = mlp_input
)
torch.onnx.export(
model,
(onnx_input[0],),
args.work_dir + model_prefix + '.onnx',
opset_version=11,
dynamic_axes=dynamic_axes,
input_names=['img'],
output_names=['occ_pred', 'inst_center_reg', 'inst_center_height', 'inst_center_heatmap']
)
print('output_names:', output_names)
print('====== onnx is saved at : ', args.work_dir + model_prefix + '.onnx')
# check onnx model
onnx_model = onnx.load(args.work_dir + model_prefix + '.onnx')
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
outs = model.forward(onnx_input[0])
model.forward = model.forward_with_argmax
out = model(*onnx_input)
output_names = [f'cls_occ_label']
torch.onnx.export(
model,
onnx_input,
args.work_dir + model_prefix + '_with_argmax.onnx',
opset_version=11,
dynamic_axes=dynamic_axes,
input_names=input_names,
output_names=output_names)
print('output_names:', output_names)
print('====== onnx is saved at : ', args.work_dir + model_prefix + '_with_argmax.onnx')
# check onnx model
onnx_model = onnx.load(args.work_dir + model_prefix + '_with_argmax.onnx')
try:
onnx.checker.check_model(onnx_model)
except Exception:
print('ONNX Model Incorrect')
else:
print('ONNX Model Correct')
break
return
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment