Unverified Commit 579b0799 authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump to V0.6.0 (#118)



* Add gitlab CI back

* clean isort

* Update gitlab CI version

* Update mmcv install

* fix unit test bug

* waymo

* Use new flake8

* Update mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py, tools/data_converter/waymo_converter.py files

* Add baseline configs for waymo

* fix linting

* yapf reformat

* update waymo results

* Update waymo model zoo and docs

* Bump v0.6.0

* Fix a minor bug when converting waymo data

* Fix cmds in the waymo doc

* Fix setup.cfg to pass isort test

* Fix waymo configs

* Update model zoo link & doc

* update version date

* clean ci
Co-authored-by: default avatarwangtai <wangtai@sensetime.com>
Co-authored-by: default avatarTai-Wang <tab_wang@outlook.com>
parent 62ce67c0
......@@ -251,7 +251,6 @@ class DataBaseSampler(object):
file_path = os.path.join(
self.data_root,
info['path']) if self.data_root else info['path']
results = dict(pts_filename=file_path)
s_points = self.points_loader(results)['points']
s_points[:, :3] += info['box3d_lidar'][:3]
......
import mmcv
import numpy as np
import os
import tempfile
import torch
from mmcv.utils import print_log
from os import path as osp
from mmdet.datasets import DATASETS
from ..core.bbox import Box3DMode, points_cam2img
from ..core.evaluation.waymo_utils.prediction_kitti_to_waymo import KITTI2Waymo
from .kitti_dataset import KittiDataset
@DATASETS.register_module()
class WaymoDataset(KittiDataset):
"""Waymo Dataset.
This class serves as the API for experiments on the Waymo Dataset.
Please refer to `<https://waymo.com/open/download/>`_for data downloading.
It is recommended to symlink the dataset root to $MMDETECTION3D/data and
organize them as the doc shows.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
split (str): Split of input data.
pts_prefix (str, optional): Prefix of points files.
Defaults to 'velodyne'.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
pcd_limit_range (list): The range of point cloud used to filter
invalid predicted boxes. Default: [-85, -85, -5, 85, 85, 5].
"""
CLASSES = ('Car', 'Cyclist', 'Pedestrian')
def __init__(self,
data_root,
ann_file,
split,
pts_prefix='velodyne',
pipeline=None,
classes=None,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
load_interval=1,
pcd_limit_range=[-85, -85, -5, 85, 85, 5]):
super().__init__(
data_root=data_root,
ann_file=ann_file,
split=split,
pts_prefix=pts_prefix,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
pcd_limit_range=pcd_limit_range)
# to load a subset, just set the load_interval in the dataset config
self.data_infos = self.data_infos[::load_interval]
if hasattr(self, 'flag'):
self.flag = self.flag[::load_interval]
def _get_pts_filename(self, idx):
pts_filename = osp.join(self.root_split, self.pts_prefix,
f'{idx:07d}.bin')
return pts_filename
def get_data_info(self, index):
"""Get data info according to the given index.
Args:
index (int): Index of the sample data to get.
Returns:
dict: Standard input_dict consists of the
data information.
- sample_idx (str): sample index
- pts_filename (str): filename of point clouds
- img_prefix (str | None): prefix of image files
- img_info (dict): image info
- lidar2img (list[np.ndarray], optional): transformations from
lidar to different cameras
- ann_info (dict): annotation info
"""
info = self.data_infos[index]
sample_idx = info['image']['image_idx']
img_filename = os.path.join(self.data_root,
info['image']['image_path'])
# TODO: consider use torch.Tensor only
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P0 = info['calib']['P0'].astype(np.float32)
lidar2img = P0 @ rect @ Trv2c
pts_filename = self._get_pts_filename(sample_idx)
input_dict = dict(
sample_idx=sample_idx,
pts_filename=pts_filename,
img_prefix=None,
img_info=dict(filename=img_filename),
lidar2img=lidar2img)
if not self.test_mode:
annos = self.get_ann_info(index)
input_dict['ann_info'] = annos
return input_dict
def format_results(self,
outputs,
pklfile_prefix=None,
submission_prefix=None,
data_format='waymo'):
"""Format the results to pkl file.
Args:
outputs (list[dict]): Testing results of the dataset.
pklfile_prefix (str | None): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str | None): The prefix of submitted files. It
includes the file path and the prefix of filename, e.g.,
"a/b/prefix". If not specified, a temp file will be created.
Default: None.
data_format (str | None): Output data format. Default: 'waymo'.
Another supported choice is 'kitti'.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
if pklfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
assert ('waymo' in data_format or 'kitti' in data_format), \
f'invalid data_format {data_format}'
if (not isinstance(outputs[0], dict)) or 'img_bbox' in outputs[0]:
raise TypeError('Not supported type for reformat results.')
elif 'pts_bbox' in outputs[0]:
result_files = dict()
for name in outputs[0]:
results_ = [out[name] for out in outputs]
pklfile_prefix_ = pklfile_prefix + name
if submission_prefix is not None:
submission_prefix_ = f'{submission_prefix}_{name}'
else:
submission_prefix_ = None
result_files_ = self.bbox2result_kitti(results_, self.CLASSES,
pklfile_prefix_,
submission_prefix_)
result_files[name] = result_files_
else:
result_files = self.bbox2result_kitti(outputs, self.CLASSES,
pklfile_prefix,
submission_prefix)
if 'waymo' in data_format:
waymo_root = osp.join(
self.data_root.split('kitti_format')[0], 'waymo_format')
if self.split == 'training':
waymo_tfrecords_dir = osp.join(waymo_root, 'validation')
prefix = '1'
elif self.split == 'testing':
waymo_tfrecords_dir = osp.join(waymo_root, 'testing')
prefix = '2'
else:
raise ValueError('Not supported split value.')
save_tmp_dir = tempfile.TemporaryDirectory()
waymo_results_save_dir = save_tmp_dir.name
waymo_results_final_path = f'{pklfile_prefix}.bin'
if 'pts_bbox' in result_files:
converter = KITTI2Waymo(result_files['pts_bbox'],
waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path, prefix)
else:
converter = KITTI2Waymo(result_files, waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path, prefix)
converter.convert()
save_tmp_dir.cleanup()
return result_files, tmp_dir
def evaluate(self,
results,
metric='waymo',
logger=None,
pklfile_prefix=None,
submission_prefix=None,
show=False,
out_dir=None):
"""Evaluation in KITTI protocol.
Args:
results (list[dict]): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
Default: 'waymo'. Another supported metric is 'kitti'.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
pklfile_prefix (str | None): The prefix of pkl files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
submission_prefix (str | None): The prefix of submission datas.
If not specified, the submission data will not be generated.
show (bool): Whether to visualize.
Default: False.
out_dir (str): Path to save the visualization results.
Default: None.
Returns:
dict[str: float]: results of each evaluation metric
"""
assert ('waymo' in metric or 'kitti' in metric), \
f'invalid metric {metric}'
if 'kitti' in metric:
result_files, tmp_dir = self.format_results(
results,
pklfile_prefix,
submission_prefix,
data_format='kitti')
from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.data_infos]
if isinstance(result_files, dict):
ap_dict = dict()
for name, result_files_ in result_files.items():
eval_types = ['bev', '3d']
ap_result_str, ap_dict_ = kitti_eval(
gt_annos,
result_files_,
self.CLASSES,
eval_types=eval_types)
for ap_type, ap in ap_dict_.items():
ap_dict[f'{name}/{ap_type}'] = float(
'{:.4f}'.format(ap))
print_log(
f'Results of {name}:\n' + ap_result_str, logger=logger)
else:
ap_result_str, ap_dict = kitti_eval(
gt_annos,
result_files,
self.CLASSES,
eval_types=['bev', '3d'])
print_log('\n' + ap_result_str, logger=logger)
if 'waymo' in metric:
waymo_root = osp.join(
self.data_root.split('kitti_format')[0], 'waymo_format')
if pklfile_prefix is None:
eval_tmp_dir = tempfile.TemporaryDirectory()
pklfile_prefix = osp.join(eval_tmp_dir.name, 'results')
else:
eval_tmp_dir = None
result_files, tmp_dir = self.format_results(
results,
pklfile_prefix,
submission_prefix,
data_format='waymo')
import subprocess
ret_bytes = subprocess.check_output(
'mmdet3d/core/evaluation/waymo_utils/' +
f'compute_detection_metrics_main {pklfile_prefix}.bin ' +
f'{waymo_root}/gt.bin',
shell=True)
ret_texts = ret_bytes.decode('utf-8')
print_log(ret_texts)
# parse the text to get ap_dict
ap_dict = {
'Vehicle/L1 mAP': 0,
'Vehicle/L1 mAPH': 0,
'Vehicle/L2 mAP': 0,
'Vehicle/L2 mAPH': 0,
'Pedestrian/L1 mAP': 0,
'Pedestrian/L1 mAPH': 0,
'Pedestrian/L2 mAP': 0,
'Pedestrian/L2 mAPH': 0,
'Sign/L1 mAP': 0,
'Sign/L1 mAPH': 0,
'Sign/L2 mAP': 0,
'Sign/L2 mAPH': 0,
'Cyclist/L1 mAP': 0,
'Cyclist/L1 mAPH': 0,
'Cyclist/L2 mAP': 0,
'Cyclist/L2 mAPH': 0
}
mAP_splits = ret_texts.split('mAP ')
mAPH_splits = ret_texts.split('mAPH ')
for idx, key in enumerate(ap_dict.keys()):
split_idx = int(idx / 2) + 1
if idx % 2 == 0: # mAP
ap_dict[key] = float(mAP_splits[split_idx].split(']')[0])
else: # mAPH
ap_dict[key] = float(mAPH_splits[split_idx].split(']')[0])
if eval_tmp_dir is not None:
eval_tmp_dir.cleanup()
if tmp_dir is not None:
tmp_dir.cleanup()
if show:
self.show(results, out_dir)
return ap_dict
def bbox2result_kitti(self,
net_outputs,
class_names,
pklfile_prefix=None,
submission_prefix=None):
"""Convert results to kitti format for evaluation and test submission.
Args:
net_outputs (List[np.ndarray]): list of array storing the
bbox and score
class_nanes (List[String]): A list of class names
pklfile_prefix (str | None): The prefix of pkl file.
submission_prefix (str | None): The prefix of submission file.
Returns:
List[dict]: A list of dict have the kitti 3d format
"""
assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
det_annos = []
print('\nConverting prediction to KITTI format')
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.data_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
box_dict = self.convert_valid_bboxes(pred_dicts, info)
if len(box_dict['bbox']) > 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
box_preds_lidar = box_dict['box3d_lidar']
label_preds = box_dict['label_preds']
anno = {
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': []
}
for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(
-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
anno['bbox'].append(bbox)
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
anno['score'].append(score)
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
if submission_prefix is not None:
curr_file = f'{submission_prefix}/{sample_idx:07d}.txt'
with open(curr_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.
format(anno['name'][idx], anno['alpha'][idx],
bbox[idx][0], bbox[idx][1],
bbox[idx][2], bbox[idx][3],
dims[idx][1], dims[idx][2],
dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][2], anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
else:
annos.append({
'name': np.array([]),
'truncated': np.array([]),
'occluded': np.array([]),
'alpha': np.array([]),
'bbox': np.zeros([0, 4]),
'dimensions': np.zeros([0, 3]),
'location': np.zeros([0, 3]),
'rotation_y': np.array([]),
'score': np.array([]),
})
annos[-1]['sample_idx'] = np.array(
[sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos
if pklfile_prefix is not None:
if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out)
print(f'Result is saved to {out}.')
return det_annos
def convert_valid_bboxes(self, box_dict, info):
"""Convert the boxes into valid format.
Args:
box_dict (dict): Bounding boxes to be converted.
- boxes_3d (:obj:``LiDARInstance3DBoxes``): 3D bounding boxes.
- scores_3d (np.ndarray): Scores of predicted boxes.
- labels_3d (np.ndarray): Class labels of predicted boxes.
info (dict): Dataset information dictionary.
Returns:
dict: Valid boxes after conversion.
- bbox (np.ndarray): 2D bounding boxes (in camera 0).
- box3d_camera (np.ndarray): 3D boxes in camera coordinates.
- box3d_lidar (np.ndarray): 3D boxes in lidar coordinates.
- scores (np.ndarray): Scores of predicted boxes.
- label_preds (np.ndarray): Class labels of predicted boxes.
- sample_idx (np.ndarray): Sample index.
"""
# TODO: refactor this function
box_preds = box_dict['boxes_3d']
scores = box_dict['scores_3d']
labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
# TODO: remove the hack of yaw
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if len(box_preds) == 0:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P0 = info['calib']['P0'].astype(np.float32)
P0 = box_preds.tensor.new_tensor(P0)
box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
box_corners = box_preds_camera.corners
box_corners_in_image = points_cam2img(box_corners, P0)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check box_preds
limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(box_preds.center < limit_range[3:]))
valid_inds = valid_pcd_inds.all(-1)
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx,
)
else:
return dict(
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx,
)
......@@ -48,8 +48,8 @@ class Base3DDetector(BaseDetector):
Note this setting will change the expected inputs. When
`return_loss=True`, img and img_metas are single-nested (i.e.
torch.Tensor and list[dict]), and when `resturn_loss=False`, img and
img_metas should be double nested (i.e. list[torch.Tensor],
torch.Tensor and list[dict]), and when `resturn_loss=False`, img
and img_metas should be double nested (i.e. list[torch.Tensor],
list[list[dict]]), with the outer list indicating test time
augmentations.
"""
......
......@@ -7,8 +7,8 @@ class TwoStage3DDetector(Base3DDetector, TwoStageDetector):
"""Base class of two-stage 3D detector.
It inherits original ``:class:TwoStageDetector`` and
``:class:Base3DDetector``. This class could serve as a base class for all
two-stage 3D detectors.
``:class:Base3DDetector``. This class could serve as a base class
for all two-stage 3D detectors.
"""
def __init__(self, **kwargs):
......
......@@ -93,7 +93,7 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
If rescale is False, then returned bboxes and masks will fit the
scale of imgs[0].
"""
pass
......@@ -7,8 +7,8 @@ from . import furthest_point_sample_ext
class FurthestPointSampling(Function):
"""Furthest Point Sampling.
Uses iterative furthest point sampling to select a set of features whose
corresponding points have the furthest distance.
Uses iterative furthest point sampling to select a set of features
whose corresponding points have the furthest distance.
"""
@staticmethod
......
......@@ -5,8 +5,9 @@ import torch
def scatter_nd(indices, updates, shape):
"""pytorch edition of tensorflow scatter_nd.
this function don't contain except handle code. so use this carefully when
indice repeats, don't support repeat add which is supported in tensorflow.
this function don't contain except handle code. so use this
carefully when indice repeats, don't support repeat add which is
supported in tensorflow.
"""
ret = torch.zeros(*shape, dtype=updates.dtype, device=updates.device)
ndim = indices.shape[-1]
......
......@@ -7,3 +7,5 @@ plyfile
# by default we also use tensorboard to log results
tensorboard
trimesh>=2.35.39,<2.35.40
scikit-image
waymo-open-dataset-tf-2-1-0==1.2.0
......@@ -8,6 +8,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmdet,mmdet3d
known_third_party = load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,recommonmark,scannet_utils,scipy,seaborn,shapely,skimage,terminaltables,torch,trimesh
known_third_party = load_scannet_data,lyft_dataset_sdk,m2r,matplotlib,mmcv,nuimages,numba,numpy,nuscenes,pandas,plyfile,pycocotools,pyquaternion,pytest,recommonmark,scannet_utils,scipy,seaborn,shapely,skimage,tensorflow,terminaltables,torch,trimesh,waymo_open_dataset
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
......@@ -44,8 +44,8 @@ def _get_config_module(fname):
def _get_model_cfg(fname):
"""Grab configs necessary to create a model.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
......@@ -56,8 +56,8 @@ def _get_model_cfg(fname):
def _get_detector_cfg(fname):
"""Grab configs necessary to create a detector.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......
......@@ -37,8 +37,8 @@ def _get_config_module(fname):
def _get_detector_cfg(fname):
"""Grab configs necessary to create a detector.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......
......@@ -46,8 +46,8 @@ def _get_config_module(fname):
def _get_head_cfg(fname):
"""Grab configs necessary to create a bbox_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......@@ -64,8 +64,8 @@ def _get_head_cfg(fname):
def _get_rpn_head_cfg(fname):
"""Grab configs necessary to create a rpn_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......@@ -82,8 +82,8 @@ def _get_rpn_head_cfg(fname):
def _get_roi_head_cfg(fname):
"""Grab configs necessary to create a roi_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......@@ -100,8 +100,8 @@ def _get_roi_head_cfg(fname):
def _get_pts_bbox_head_cfg(fname):
"""Grab configs necessary to create a pts_bbox_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......@@ -118,8 +118,8 @@ def _get_pts_bbox_head_cfg(fname):
def _get_vote_head_cfg(fname):
"""Grab configs necessary to create a vote_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
......@@ -136,8 +136,8 @@ def _get_vote_head_cfg(fname):
def _get_parta2_bbox_head_cfg(fname):
"""Grab configs necessary to create a parta2_bbox_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
These are deep copied to allow for safe modification of parameters
without influencing other tests.
"""
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
......
......@@ -5,6 +5,7 @@ from tools.data_converter import indoor_converter as indoor
from tools.data_converter import kitti_converter as kitti
from tools.data_converter import lyft_converter as lyft_converter
from tools.data_converter import nuscenes_converter as nuscenes_converter
from tools.data_converter import waymo_converter as waymo
from tools.data_converter.create_gt_database import create_groundtruth_database
......@@ -133,6 +134,48 @@ def sunrgbd_data_prep(root_path, info_prefix, out_dir, workers):
root_path, info_prefix, out_dir, workers=workers)
def waymo_data_prep(root_path,
info_prefix,
version,
out_dir,
workers,
max_sweeps=5):
"""Prepare the info file for waymo dataset.
Args:
root_path (str): Path of dataset root.
info_prefix (str): The prefix of info filenames.
out_dir (str): Output directory of the generated info file.
workers (int): Number of threads to be used.
max_sweeps (int): Number of input consecutive frames. Default: 5 \
Here we store pose information of these frames for later use.
"""
splits = ['training', 'validation', 'testing']
for i, split in enumerate(splits):
load_dir = osp.join(root_path, 'waymo_format', split)
if split == 'validation':
save_dir = osp.join(out_dir, 'kitti_format', 'training')
else:
save_dir = osp.join(out_dir, 'kitti_format', split)
converter = waymo.Waymo2KITTI(
load_dir,
save_dir,
prefix=str(i),
workers=workers,
test_mode=(split == 'test'))
converter.convert()
# Generate waymo infos
out_dir = osp.join(out_dir, 'kitti_format')
kitti.create_waymo_info_file(out_dir, info_prefix, max_sweeps=max_sweeps)
create_groundtruth_database(
'WaymoDataset',
out_dir,
info_prefix,
f'{out_dir}/{info_prefix}_infos_train.pkl',
relative_path=False,
with_mask=False)
parser = argparse.ArgumentParser(description='Data converter arg parser')
parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
parser.add_argument(
......@@ -213,6 +256,14 @@ if __name__ == '__main__':
dataset_name='LyftDataset',
out_dir=args.out_dir,
max_sweeps=args.max_sweeps)
elif args.dataset == 'waymo':
waymo_data_prep(
root_path=args.root_path,
info_prefix=args.extra_tag,
version=args.version,
out_dir=args.out_dir,
workers=args.workers,
max_sweeps=args.max_sweeps)
elif args.dataset == 'scannet':
scannet_data_prep(
root_path=args.root_path,
......
......@@ -183,6 +183,31 @@ def create_groundtruth_database(dataset_class_name,
with_bbox_3d=True,
with_label_3d=True)
])
elif dataset_class_name == 'WaymoDataset':
file_client_args = dict(backend='disk')
dataset_cfg.update(
test_mode=False,
split='training',
modality=dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
),
pipeline=[
dict(
type='LoadPointsFromFile',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args)
])
dataset = build_dataset(dataset_cfg)
if database_save_path is None:
......
import mmcv
import numpy as np
import pickle
from mmcv import track_iter_progress
from pathlib import Path
from mmdet3d.core.bbox import box_np_ops
from .kitti_data_utils import get_kitti_image_info
from .kitti_data_utils import get_kitti_image_info, get_waymo_image_info
def convert_to_kitti_info_version2(info):
......@@ -43,7 +42,7 @@ def _calculate_num_points_in_gt(data_path,
relative_path,
remove_outside=True,
num_features=4):
for info in track_iter_progress(infos):
for info in mmcv.track_iter_progress(infos):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
......@@ -80,7 +79,7 @@ def _calculate_num_points_in_gt(data_path,
def create_kitti_info_file(data_path,
pkl_prefix='kitti_',
pkl_prefix='kitti',
save_path=None,
relative_path=True):
"""Create info file of KITTI dataset.
......@@ -113,8 +112,7 @@ def create_kitti_info_file(data_path,
_calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
filename = save_path / f'{pkl_prefix}_infos_train.pkl'
print(f'Kitti info train file is saved to {filename}')
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train, f)
mmcv.dump(kitti_infos_train, filename)
kitti_infos_val = get_kitti_image_info(
data_path,
training=True,
......@@ -125,12 +123,10 @@ def create_kitti_info_file(data_path,
_calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
filename = save_path / f'{pkl_prefix}_infos_val.pkl'
print(f'Kitti info val file is saved to {filename}')
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_val, f)
mmcv.dump(kitti_infos_val, filename)
filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
print(f'Kitti info trainval file is saved to {filename}')
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train + kitti_infos_val, f)
mmcv.dump(kitti_infos_train + kitti_infos_val, filename)
kitti_infos_test = get_kitti_image_info(
data_path,
......@@ -142,18 +138,109 @@ def create_kitti_info_file(data_path,
relative_path=relative_path)
filename = save_path / f'{pkl_prefix}_infos_test.pkl'
print(f'Kitti info test file is saved to {filename}')
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_test, f)
mmcv.dump(kitti_infos_test, filename)
def create_waymo_info_file(data_path,
pkl_prefix='waymo',
save_path=None,
relative_path=True,
max_sweeps=5):
"""Create info file of waymo dataset.
Given the raw data, generate its related info file in pkl format.
Args:
data_path (str): Path of the data root.
pkl_prefix (str): Prefix of the info file to be generated.
save_path (str | None): Path to save the info file.
relative_path (bool): Whether to use relative path.
max_sweeps (int): Max sweeps before the detection frame to be used.
"""
imageset_folder = Path(data_path) / 'ImageSets'
train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt'))
val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt'))
test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
print('Generate info. this may take several minutes.')
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
waymo_infos_train = get_waymo_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
pose=True,
image_ids=train_img_ids,
relative_path=relative_path,
max_sweeps=max_sweeps)
_calculate_num_points_in_gt(
data_path,
waymo_infos_train,
relative_path,
num_features=6,
remove_outside=False)
filename = save_path / f'{pkl_prefix}_infos_train.pkl'
print(f'Waymo info train file is saved to {filename}')
mmcv.dump(waymo_infos_train, filename)
waymo_infos_val = get_waymo_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
pose=True,
image_ids=val_img_ids,
relative_path=relative_path,
max_sweeps=max_sweeps)
_calculate_num_points_in_gt(
data_path,
waymo_infos_val,
relative_path,
num_features=6,
remove_outside=False)
filename = save_path / f'{pkl_prefix}_infos_val.pkl'
print(f'Waymo info val file is saved to {filename}')
mmcv.dump(waymo_infos_val, filename)
filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
print(f'Waymo info trainval file is saved to {filename}')
mmcv.dump(waymo_infos_train + waymo_infos_val, filename)
waymo_infos_test = get_waymo_image_info(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
pose=True,
image_ids=test_img_ids,
relative_path=relative_path,
max_sweeps=max_sweeps)
filename = save_path / f'{pkl_prefix}_infos_test.pkl'
print(f'Waymo info test file is saved to {filename}')
mmcv.dump(waymo_infos_test, filename)
def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False):
with open(info_path, 'rb') as f:
kitti_infos = pickle.load(f)
back=False,
num_features=4,
front_camera_id=2):
"""Create reduced point clouds for given info.
for info in track_iter_progress(kitti_infos):
Args:
data_path (str): Path of original data.
info_path (str): Path of data info.
save_path (str | None): Path to save reduced point cloud data.
Default: None.
back (bool): Whether to flip the points to back.
num_features (int): Number of point features. Default: 4.
front_camera_id (int): The referenced/front camera ID. Default: 2.
"""
kitti_infos = mmcv.load(info_path)
for info in mmcv.track_iter_progress(kitti_infos):
pc_info = info['point_cloud']
image_info = info['image']
calib = info['calib']
......@@ -161,9 +248,13 @@ def _create_reduced_point_cloud(data_path,
v_path = pc_info['velodyne_path']
v_path = Path(data_path) / v_path
points_v = np.fromfile(
str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])
str(v_path), dtype=np.float32,
count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
P2 = calib['P2']
if front_camera_id == 2:
P2 = calib['P2']
else:
P2 = calib[f'P{str(front_camera_id)}']
Trv2c = calib['Tr_velo_to_cam']
# first remove z < 0 points
# keep = points_v[:, -1] > 0
......@@ -196,10 +287,10 @@ def create_reduced_point_cloud(data_path,
test_info_path=None,
save_path=None,
with_back=False):
"""Create reduced point cloud info file.
"""Create reduced point clouds for training/validation/testing.
Args:
data_path (str): Path of original infos.
data_path (str): Path of original data.
pkl_prefix (str): Prefix of info files.
train_info_path (str | None): Path of training set info.
Default: None.
......@@ -207,8 +298,8 @@ def create_reduced_point_cloud(data_path,
Default: None.
test_info_path (str | None): Path of test set info.
Default: None.
save_path (str | None): Path to save reduced info.
with_back (bool | None): Whether to create backup info.
save_path (str | None): Path to save reduced point cloud data.
with_back (bool): Whether to flip the points to back.
"""
if train_info_path is None:
train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'
......@@ -219,7 +310,7 @@ def create_reduced_point_cloud(data_path,
print('create reduced point cloud for training set')
_create_reduced_point_cloud(data_path, train_info_path, save_path)
print('create reduced point cloud for validatin set')
print('create reduced point cloud for validation set')
_create_reduced_point_cloud(data_path, val_info_path, save_path)
print('create reduced point cloud for testing set')
_create_reduced_point_cloud(data_path, test_info_path, save_path)
......
import numpy as np
from collections import OrderedDict
from concurrent import futures as futures
from os import path as osp
from pathlib import Path
from skimage import io
def get_image_index_str(img_idx):
return '{:06d}'.format(img_idx)
def get_image_index_str(img_idx, use_prefix_id=False):
if use_prefix_id:
return '{:07d}'.format(img_idx)
else:
return '{:06d}'.format(img_idx)
def get_kitti_info_path(idx,
......@@ -15,8 +19,9 @@ def get_kitti_info_path(idx,
file_tail='.png',
training=True,
relative_path=True,
exist_check=True):
img_idx_str = get_image_index_str(idx)
exist_check=True,
use_prefix_id=False):
img_idx_str = get_image_index_str(idx, use_prefix_id)
img_idx_str += file_tail
prefix = Path(prefix)
if training:
......@@ -35,36 +40,52 @@ def get_image_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True):
return get_kitti_info_path(idx, prefix, 'image_2', '.png', training,
relative_path, exist_check)
exist_check=True,
info_type='image_2',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.png', training,
relative_path, exist_check, use_prefix_id)
def get_label_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True):
return get_kitti_info_path(idx, prefix, 'label_2', '.txt', training,
relative_path, exist_check)
exist_check=True,
info_type='label_2',
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, info_type, '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_velodyne_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True):
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,
relative_path, exist_check)
relative_path, exist_check, use_prefix_id)
def get_calib_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True):
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,
relative_path, exist_check)
relative_path, exist_check, use_prefix_id)
def get_pose_path(idx,
prefix,
training=True,
relative_path=True,
exist_check=True,
use_prefix_id=False):
return get_kitti_info_path(idx, prefix, 'pose', '.txt', training,
relative_path, exist_check, use_prefix_id)
def get_label_anno(label_path):
......@@ -126,7 +147,6 @@ def get_kitti_image_info(path,
num_worker=8,
relative_path=True,
with_imageshape=True):
# image_infos = []
"""
KITTI annotation format version 2:
{
......@@ -241,6 +261,185 @@ def get_kitti_image_info(path,
return list(image_infos)
def get_waymo_image_info(path,
training=True,
label_info=True,
velodyne=False,
calib=False,
pose=False,
image_ids=7481,
extend_matrix=True,
num_worker=8,
relative_path=True,
with_imageshape=True,
max_sweeps=5):
"""
Waymo annotation format version like KITTI:
{
[optional]points: [N, 3+] point cloud
[optional, for kitti]image: {
image_idx: ...
image_path: ...
image_shape: ...
}
point_cloud: {
num_features: 6
velodyne_path: ...
}
[optional, for kitti]calib: {
R0_rect: ...
Tr_velo_to_cam0: ...
P0: ...
}
annos: {
location: [num_gt, 3] array
dimensions: [num_gt, 3] array
rotation_y: [num_gt] angle array
name: [num_gt] ground truth name array
[optional]difficulty: kitti difficulty
[optional]group_ids: used for multi-part object
}
}
"""
root_path = Path(path)
if not isinstance(image_ids, list):
image_ids = list(range(image_ids))
def map_func(idx):
info = {}
pc_info = {'num_features': 6}
calib_info = {}
image_info = {'image_idx': idx}
annotations = None
if velodyne:
pc_info['velodyne_path'] = get_velodyne_path(
idx, path, training, relative_path, use_prefix_id=True)
points = np.fromfile(
Path(path) / pc_info['velodyne_path'], dtype=np.float32)
points = np.copy(points).reshape(-1, pc_info['num_features'])
info['timestamp'] = np.int64(points[0, -1])
# values of the last dim are all the timestamp
image_info['image_path'] = get_image_path(
idx,
path,
training,
relative_path,
info_type='image_0',
use_prefix_id=True)
if with_imageshape:
img_path = image_info['image_path']
if relative_path:
img_path = str(root_path / img_path)
image_info['image_shape'] = np.array(
io.imread(img_path).shape[:2], dtype=np.int32)
if label_info:
label_path = get_label_path(
idx,
path,
training,
relative_path,
info_type='label_all',
use_prefix_id=True)
if relative_path:
label_path = str(root_path / label_path)
annotations = get_label_anno(label_path)
info['image'] = image_info
info['point_cloud'] = pc_info
if calib:
calib_path = get_calib_path(
idx, path, training, relative_path=False, use_prefix_id=True)
with open(calib_path, 'r') as f:
lines = f.readlines()
P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
]).reshape([3, 4])
P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
]).reshape([3, 4])
P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
]).reshape([3, 4])
P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
]).reshape([3, 4])
P4 = np.array([float(info) for info in lines[4].split(' ')[1:13]
]).reshape([3, 4])
if extend_matrix:
P0 = _extend_matrix(P0)
P1 = _extend_matrix(P1)
P2 = _extend_matrix(P2)
P3 = _extend_matrix(P3)
P4 = _extend_matrix(P4)
R0_rect = np.array([
float(info) for info in lines[5].split(' ')[1:10]
]).reshape([3, 3])
if extend_matrix:
rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
rect_4x4[3, 3] = 1.
rect_4x4[:3, :3] = R0_rect
else:
rect_4x4 = R0_rect
Tr_velo_to_cam = np.array([
float(info) for info in lines[6].split(' ')[1:13]
]).reshape([3, 4])
if extend_matrix:
Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
calib_info['P0'] = P0
calib_info['P1'] = P1
calib_info['P2'] = P2
calib_info['P3'] = P3
calib_info['P4'] = P4
calib_info['R0_rect'] = rect_4x4
calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
info['calib'] = calib_info
if pose:
pose_path = get_pose_path(
idx, path, training, relative_path=False, use_prefix_id=True)
info['pose'] = np.loadtxt(pose_path)
if annotations is not None:
info['annos'] = annotations
info['annos']['camera_id'] = info['annos'].pop('score')
add_difficulty_to_annos(info)
sweeps = []
prev_idx = idx
while len(sweeps) < max_sweeps:
prev_info = {}
prev_idx -= 1
prev_info['velodyne_path'] = get_velodyne_path(
prev_idx,
path,
training,
relative_path,
exist_check=False,
use_prefix_id=True)
if_prev_exists = osp.exists(
Path(path) / prev_info['velodyne_path'])
if if_prev_exists:
prev_points = np.fromfile(
Path(path) / prev_info['velodyne_path'], dtype=np.float32)
prev_points = np.copy(prev_points).reshape(
-1, pc_info['num_features'])
prev_info['timestamp'] = np.int64(prev_points[0, -1])
prev_pose_path = get_pose_path(
prev_idx,
path,
training,
relative_path=False,
use_prefix_id=True)
prev_info['pose'] = np.loadtxt(prev_pose_path)
sweeps.append(prev_info)
else:
break
info['sweeps'] = sweeps
return info
with futures.ThreadPoolExecutor(num_worker) as executor:
image_infos = executor.map(map_func, image_ids)
return list(image_infos)
def kitti_anno_to_label_file(annos, folder):
folder = Path(folder)
for anno in annos:
......
r"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
import mmcv
import numpy as np
import tensorflow as tf
from glob import glob
from os.path import join
from waymo_open_dataset import dataset_pb2
from waymo_open_dataset import dataset_pb2 as open_dataset
from waymo_open_dataset.utils import range_image_utils, transform_utils
from waymo_open_dataset.utils.frame_utils import \
parse_range_image_and_camera_projection
class Waymo2KITTI(object):
"""Waymo to KITTI converter.
This class serves as the converter to change the waymo raw data to KITTI
format.
Args:
load_dir (str): Directory to load waymo raw data.
save_dir (str): Directory to save data in KITTI format.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (str): Number of workers for the parallel process.
test_mode (bool): Whether in the test_mode. Default: False.
"""
def __init__(self,
load_dir,
save_dir,
prefix,
workers=64,
test_mode=False):
self.filter_empty_3dboxes = True
self.filter_no_label_zone_points = True
self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST']
# Only data collected in specific locations will be converted
# If set None, this filter is disabled
# Available options: location_sf (main dataset)
self.selected_waymo_locations = None
self.save_track_id = False
# turn on eager execution for older tensorflow versions
if int(tf.__version__.split('.')[0]) < 2:
tf.enable_eager_execution()
self.lidar_list = [
'_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT',
'_SIDE_LEFT'
]
self.type_list = [
'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST'
]
self.waymo_to_kitti_class_map = {
'UNKNOWN': 'DontCare',
'PEDESTRIAN': 'Pedestrian',
'VEHICLE': 'Car',
'CYCLIST': 'Cyclist',
'SIGN': 'Sign' # not in kitti
}
self.load_dir = load_dir
self.save_dir = save_dir
self.prefix = prefix
self.workers = int(workers)
self.test_mode = test_mode
self.tfrecord_pathnames = sorted(
glob(join(self.load_dir, '*.tfrecord')))
self.label_save_dir = f'{self.save_dir}/label_'
self.label_all_save_dir = f'{self.save_dir}/label_all'
self.image_save_dir = f'{self.save_dir}/image_'
self.calib_save_dir = f'{self.save_dir}/calib'
self.point_cloud_save_dir = f'{self.save_dir}/velodyne'
self.pose_save_dir = f'{self.save_dir}/pose'
self.create_folder()
def convert(self):
"""Convert action."""
print('Start converting ...')
mmcv.track_parallel_progress(self.convert_one, range(len(self)),
self.workers)
print('\nFinished ...')
def convert_one(self, file_idx):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
pathname = self.tfrecord_pathnames[file_idx]
dataset = tf.data.TFRecordDataset(pathname, compression_type='')
for frame_idx, data in enumerate(dataset):
frame = open_dataset.Frame()
frame.ParseFromString(bytearray(data.numpy()))
if (self.selected_waymo_locations is not None
and frame.context.stats.location
not in self.selected_waymo_locations):
continue
self.save_image(frame, file_idx, frame_idx)
self.save_calib(frame, file_idx, frame_idx)
self.save_lidar(frame, file_idx, frame_idx)
self.save_pose(frame, file_idx, frame_idx)
if not self.test_mode:
self.save_label(frame, file_idx, frame_idx)
def __len__(self):
"""Length of the filename list."""
return len(self.tfrecord_pathnames)
def save_image(self, frame, file_idx, frame_idx):
"""Parse and save the images in png format.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
for img in frame.images:
img_path = f'{self.image_save_dir}{str(img.name - 1)}/' + \
f'{self.prefix}{str(file_idx).zfill(3)}' + \
f'{str(frame_idx).zfill(3)}.png'
img = mmcv.imfrombytes(img.image)
mmcv.imwrite(img, img_path)
def save_calib(self, frame, file_idx, frame_idx):
"""Parse and save the calibration data.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
# waymo front camera to kitti reference camera
T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0],
[1.0, 0.0, 0.0]])
camera_calibs = []
R0_rect = [f'{i:e}' for i in np.eye(3).flatten()]
Tr_velo_to_cams = []
calib_context = ''
for camera in frame.context.camera_calibrations:
# extrinsic parameters
T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape(
4, 4)
T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle)
Tr_velo_to_cam = \
self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam
if camera.name == 1: # FRONT = 1, see dataset.proto for details
self.T_velo_to_front_cam = Tr_velo_to_cam.copy()
Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, ))
Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam])
# intrinsic parameters
camera_calib = np.zeros((3, 4))
camera_calib[0, 0] = camera.intrinsic[0]
camera_calib[1, 1] = camera.intrinsic[1]
camera_calib[0, 2] = camera.intrinsic[2]
camera_calib[1, 2] = camera.intrinsic[3]
camera_calib[2, 2] = 1
camera_calib = list(camera_calib.reshape(12))
camera_calib = [f'{i:e}' for i in camera_calib]
camera_calibs.append(camera_calib)
# all camera ids are saved as id-1 in the result because
# camera 0 is unknown in the proto
for i in range(5):
calib_context += 'P' + str(i) + ': ' + \
' '.join(camera_calibs[i]) + '\n'
calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\n'
for i in range(5):
calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \
' '.join(Tr_velo_to_cams[i]) + '\n'
with open(
f'{self.calib_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt',
'w+') as fp_calib:
fp_calib.write(calib_context)
fp_calib.close()
def save_lidar(self, frame, file_idx, frame_idx):
"""Parse and save the lidar data in psd format.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
range_images, camera_projections, range_image_top_pose = \
parse_range_image_and_camera_projection(frame)
# First return
points_0, cp_points_0, intensity_0, elongation_0 = \
self.convert_range_image_to_point_cloud(
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=0
)
points_0 = np.concatenate(points_0, axis=0)
intensity_0 = np.concatenate(intensity_0, axis=0)
elongation_0 = np.concatenate(elongation_0, axis=0)
# Second return
points_1, cp_points_1, intensity_1, elongation_1 = \
self.convert_range_image_to_point_cloud(
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=1
)
points_1 = np.concatenate(points_1, axis=0)
intensity_1 = np.concatenate(intensity_1, axis=0)
elongation_1 = np.concatenate(elongation_1, axis=0)
points = np.concatenate([points_0, points_1], axis=0)
intensity = np.concatenate([intensity_0, intensity_1], axis=0)
elongation = np.concatenate([elongation_0, elongation_1], axis=0)
timestamp = frame.timestamp_micros * np.ones_like(intensity)
# concatenate x,y,z, intensity, elongation, timestamp (6-dim)
point_cloud = np.column_stack(
(points, intensity, elongation, timestamp))
pc_path = f'{self.point_cloud_save_dir}/{self.prefix}' + \
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin'
point_cloud.astype(np.float32).tofile(pc_path)
def save_label(self, frame, file_idx, frame_idx):
"""Parse and save the label data in txt format.
The relation between waymo and kitti coordinates is noteworthy:
1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti)
2. x-y-z: front-left-up (waymo) -> right-down-front(kitti)
3. bbox origin at volumetric center (waymo) -> bottom center (kitti)
4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo)
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
fp_label_all = open(
f'{self.label_all_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+')
id_to_bbox = dict()
id_to_name = dict()
for labels in frame.projected_lidar_labels:
name = labels.name
for label in labels.labels:
# TODO: need a workaround as bbox may not belong to front cam
bbox = [
label.box.center_x - label.box.length / 2,
label.box.center_y - label.box.width / 2,
label.box.center_x + label.box.length / 2,
label.box.center_y + label.box.width / 2
]
id_to_bbox[label.id] = bbox
id_to_name[label.id] = name - 1
for obj in frame.laser_labels:
bounding_box = None
name = None
id = obj.id
for lidar in self.lidar_list:
if id + lidar in id_to_bbox:
bounding_box = id_to_bbox.get(id + lidar)
name = str(id_to_name.get(id + lidar))
break
if bounding_box is None or name is None:
name = '0'
bounding_box = (0, 0, 0, 0)
my_type = self.type_list[obj.type]
if my_type not in self.selected_waymo_classes:
continue
if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1:
continue
my_type = self.waymo_to_kitti_class_map[my_type]
height = obj.box.height
width = obj.box.width
length = obj.box.length
x = obj.box.center_x
y = obj.box.center_y
z = obj.box.center_z - height / 2
# project bounding box to the virtual reference frame
pt_ref = self.T_velo_to_front_cam @ \
np.array([x, y, z, 1]).reshape((4, 1))
x, y, z, _ = pt_ref.flatten().tolist()
rotation_y = -obj.box.heading - np.pi / 2
track_id = obj.id
# not available
truncated = 0
occluded = 0
alpha = -10
line = my_type + \
' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format(
round(truncated, 2), occluded, round(alpha, 2),
round(bounding_box[0], 2), round(bounding_box[1], 2),
round(bounding_box[2], 2), round(bounding_box[3], 2),
round(height, 2), round(width, 2), round(length, 2),
round(x, 2), round(y, 2), round(z, 2),
round(rotation_y, 2))
if self.save_track_id:
line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n'
else:
line_all = line[:-1] + ' ' + name + '\n'
fp_label = open(
f'{self.label_save_dir}{name}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a')
fp_label.write(line)
fp_label.close()
fp_label_all.write(line_all)
fp_label_all.close()
def save_pose(self, frame, file_idx, frame_idx):
"""Parse and save the pose data.
Note that SDC's own pose is not included in the regular training
of KITTI dataset. KITTI raw dataset contains ego motion files
but are not often used. Pose is important for algorithms that
take advantage of the temporal information.
Args:
frame (:obj:`Frame`): Open dataset frame proto.
file_idx (int): Current file index.
frame_idx (int): Current frame index.
"""
pose = np.array(frame.pose.transform).reshape(4, 4)
np.savetxt(
join(f'{self.pose_save_dir}/{self.prefix}' +
f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'),
pose)
def create_folder(self):
"""Create folder for data preprocessing."""
if not self.test_mode:
dir_list1 = [
self.label_all_save_dir, self.calib_save_dir,
self.point_cloud_save_dir, self.pose_save_dir
]
dir_list2 = [self.label_save_dir, self.image_save_dir]
else:
dir_list1 = [
self.calib_save_dir, self.point_cloud_save_dir,
self.pose_save_dir
]
dir_list2 = [self.image_save_dir]
for d in dir_list1:
mmcv.mkdir_or_exist(d)
for d in dir_list2:
for i in range(5):
mmcv.mkdir_or_exist(f'{d}{str(i)}')
def convert_range_image_to_point_cloud(self,
frame,
range_images,
camera_projections,
range_image_top_pose,
ri_index=0):
"""Convert range images to point cloud.
Args:
frame (:obj:`Frame`): Open dataset frame.
range_images (dict): Mapping from laser_name to list of two
range images corresponding with two returns.
camera_projections (dict): Mapping from laser_name to list of two
camera projections corresponding with two returns.
range_image_top_pose (:obj:`Transform`): Range image pixel pose for
top lidar.
ri_index (int): 0 for the first return, 1 for the second return.
Default: 0.
Returns:
tuple[list[np.ndarray]]: (List of points with shape [N, 3],
camera projections of points with shape [N, 6], intensity
with shape [N, 1], elongation with shape [N, 1]). All the
lists have the length of lidar numbers (5).
"""
calibrations = sorted(
frame.context.laser_calibrations, key=lambda c: c.name)
points = []
cp_points = []
intensity = []
elongation = []
frame_pose = tf.convert_to_tensor(
value=np.reshape(np.array(frame.pose.transform), [4, 4]))
# [H, W, 6]
range_image_top_pose_tensor = tf.reshape(
tf.convert_to_tensor(value=range_image_top_pose.data),
range_image_top_pose.shape.dims)
# [H, W, 3, 3]
range_image_top_pose_tensor_rotation = \
transform_utils.get_rotation_matrix(
range_image_top_pose_tensor[..., 0],
range_image_top_pose_tensor[..., 1],
range_image_top_pose_tensor[..., 2])
range_image_top_pose_tensor_translation = \
range_image_top_pose_tensor[..., 3:]
range_image_top_pose_tensor = transform_utils.get_transform(
range_image_top_pose_tensor_rotation,
range_image_top_pose_tensor_translation)
for c in calibrations:
range_image = range_images[c.name][ri_index]
if len(c.beam_inclinations) == 0:
beam_inclinations = range_image_utils.compute_inclination(
tf.constant(
[c.beam_inclination_min, c.beam_inclination_max]),
height=range_image.shape.dims[0])
else:
beam_inclinations = tf.constant(c.beam_inclinations)
beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])
range_image_tensor = tf.reshape(
tf.convert_to_tensor(value=range_image.data),
range_image.shape.dims)
pixel_pose_local = None
frame_pose_local = None
if c.name == dataset_pb2.LaserName.TOP:
pixel_pose_local = range_image_top_pose_tensor
pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
frame_pose_local = tf.expand_dims(frame_pose, axis=0)
range_image_mask = range_image_tensor[..., 0] > 0
if self.filter_no_label_zone_points:
nlz_mask = range_image_tensor[..., 3] != 1.0 # 1.0: in NLZ
range_image_mask = range_image_mask & nlz_mask
range_image_cartesian = \
range_image_utils.extract_point_cloud_from_range_image(
tf.expand_dims(range_image_tensor[..., 0], axis=0),
tf.expand_dims(extrinsic, axis=0),
tf.expand_dims(tf.convert_to_tensor(
value=beam_inclinations), axis=0),
pixel_pose=pixel_pose_local,
frame_pose=frame_pose_local)
range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
points_tensor = tf.gather_nd(range_image_cartesian,
tf.compat.v1.where(range_image_mask))
cp = camera_projections[c.name][ri_index]
cp_tensor = tf.reshape(
tf.convert_to_tensor(value=cp.data), cp.shape.dims)
cp_points_tensor = tf.gather_nd(
cp_tensor, tf.compat.v1.where(range_image_mask))
points.append(points_tensor.numpy())
cp_points.append(cp_points_tensor.numpy())
intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],
tf.where(range_image_mask))
intensity.append(intensity_tensor.numpy())
elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],
tf.where(range_image_mask))
elongation.append(elongation_tensor.numpy())
return points, cp_points, intensity, elongation
def cart_to_homo(self, mat):
"""Convert transformation matrix in Cartesian coordinates to
homogeneous format.
Args:
mat (np.ndarray): Transformation matrix in Cartesian.
The input matrix shape is 3x3 or 3x4.
Returns:
np.ndarray: Transformation matrix in homogeneous format.
The matrix shape is 4x4.
"""
ret = np.eye(4)
if mat.shape == (3, 3):
ret[:3, :3] = mat
elif mat.shape == (3, 4):
ret[:3, :] = mat
else:
raise ValueError(mat.shape)
return ret
import argparse
import torch
import torch.nn as nn
from mmcv.runner import save_checkpoint
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment