Commit d1aac35d authored by zhangwenwei's avatar zhangwenwei
Browse files

Initial commit

parents
from functools import partial
import mmcv
import numpy as np
import torch
import torch.nn.functional as F
from six.moves import map, zip
def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
num_imgs = tensor.size(0)
mean = np.array(mean, dtype=np.float32)
std = np.array(std, dtype=np.float32)
imgs = []
for img_id in range(num_imgs):
img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
img = mmcv.imdenormalize(
img, mean, std, to_bgr=to_rgb).astype(np.uint8)
imgs.append(np.ascontiguousarray(img))
return imgs
def multi_apply(func, *args, **kwargs):
pfunc = partial(func, **kwargs) if kwargs else func
map_results = map(pfunc, *args)
return tuple(map(list, zip(*map_results)))
def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if data.dim() == 1:
ret = data.new_full((count, ), fill)
ret[inds] = data
else:
new_size = (count, ) + data.size()[1:]
ret = data.new_full(new_size, fill)
ret[inds, :] = data
return ret
def merge_batch(data):
for key, elems in data.items():
if key in ['voxels', 'num_points', 'voxel_labels', 'voxel_centers']:
data[key]._data[0] = torch.cat(elems._data[0], dim=0)
elif key == 'coors':
coors = []
for i, coor in enumerate(elems._data[0]):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors.append(coor_pad)
data[key]._data[0] = torch.cat(coors, dim=0)
return data
def merge_hook_batch(data):
for key, elems in data.items():
if key in ['voxels', 'num_points', 'voxel_labels', 'voxel_centers']:
data[key] = torch.cat(elems, dim=0)
elif key == 'coors':
coors = []
for i, coor in enumerate(elems):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors.append(coor_pad)
data[key] = torch.cat(coors, dim=0)
return data
from .builder import build_voxel_generator
from .voxel_generator import VoxelGenerator
__all__ = ['build_voxel_generator', 'VoxelGenerator']
import mmcv
from . import voxel_generator
def build_voxel_generator(cfg, **kwargs):
if isinstance(cfg, voxel_generator.VoxelGenerator):
return cfg
elif isinstance(cfg, dict):
return mmcv.runner.obj_from_dict(
cfg, voxel_generator, default_args=kwargs)
else:
raise TypeError('Invalid type {} for building a sampler'.format(
type(cfg)))
import numba
import numpy as np
class VoxelGenerator(object):
def __init__(self,
voxel_size,
point_cloud_range,
max_num_points,
max_voxels=20000):
point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
# [0, -40, -3, 70.4, 40, 1]
voxel_size = np.array(voxel_size, dtype=np.float32)
grid_size = (point_cloud_range[3:] -
point_cloud_range[:3]) / voxel_size
grid_size = np.round(grid_size).astype(np.int64)
self._voxel_size = voxel_size
self._point_cloud_range = point_cloud_range
self._max_num_points = max_num_points
self._max_voxels = max_voxels
self._grid_size = grid_size
def generate(self, points):
return points_to_voxel(points, self._voxel_size,
self._point_cloud_range, self._max_num_points,
True, self._max_voxels)
@property
def voxel_size(self):
return self._voxel_size
@property
def max_num_points_per_voxel(self):
return self._max_num_points
@property
def point_cloud_range(self):
return self._point_cloud_range
@property
def grid_size(self):
return self._grid_size
def points_to_voxel(points,
voxel_size,
coors_range,
max_points=35,
reverse_index=True,
max_voxels=20000):
"""convert kitti points(N, >=3) to voxels. This version calculate
everything in one loop. now it takes only 4.2ms(complete point cloud)
with jit and 3.2ghz cpu.(don't calculate other features)
Args:
points: [N, ndim] float tensor. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
coors_range: [6] list/tuple or array, float. indicate voxel range.
format: xyzxyz, minmax
max_points: int. indicate maximum points contained in a voxel.
reverse_index: boolean. indicate whether return reversed coordinates.
if points has xyz format and reverse_index is True, output
coordinates will be zyx format, but points in features always
xyz format.
max_voxels: int. indicate maximum voxels this function create.
for second, 20000 is a good choice. you should shuffle points
before call this function because max_voxels may drop some points.
Returns:
voxels: [M, max_points, ndim] float tensor. only contain points.
coordinates: [M, 3] int32 tensor.
num_points_per_voxel: [M] int32 tensor.
"""
if not isinstance(voxel_size, np.ndarray):
voxel_size = np.array(voxel_size, dtype=points.dtype)
if not isinstance(coors_range, np.ndarray):
coors_range = np.array(coors_range, dtype=points.dtype)
voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
if reverse_index:
voxelmap_shape = voxelmap_shape[::-1]
# don't create large array in jit(nopython=True) code.
num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)
coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)
voxels = np.zeros(
shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
if reverse_index:
voxel_num = _points_to_voxel_reverse_kernel(
points, voxel_size, coors_range, num_points_per_voxel,
coor_to_voxelidx, voxels, coors, max_points, max_voxels)
else:
voxel_num = _points_to_voxel_kernel(points, voxel_size, coors_range,
num_points_per_voxel,
coor_to_voxelidx, voxels, coors,
max_points, max_voxels)
coors = coors[:voxel_num]
voxels = voxels[:voxel_num]
num_points_per_voxel = num_points_per_voxel[:voxel_num]
return voxels, coors, num_points_per_voxel
@numba.jit(nopython=True)
def _points_to_voxel_reverse_kernel(points,
voxel_size,
coors_range,
num_points_per_voxel,
coor_to_voxelidx,
voxels,
coors,
max_points=35,
max_voxels=20000):
# put all computations to one loop.
# we shouldn't create large array in main jit code, otherwise
# reduce performance
N = points.shape[0]
# ndim = points.shape[1] - 1
ndim = 3
ndim_minus_1 = ndim - 1
grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
# np.round(grid_size)
# grid_size = np.round(grid_size).astype(np.int64)(np.int32)
grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
coor = np.zeros(shape=(3, ), dtype=np.int32)
voxel_num = 0
failed = False
for i in range(N):
failed = False
for j in range(ndim):
c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
if c < 0 or c >= grid_size[j]:
failed = True
break
coor[ndim_minus_1 - j] = c
if failed:
continue
voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
if voxelidx == -1:
voxelidx = voxel_num
if voxel_num >= max_voxels:
break
voxel_num += 1
coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
coors[voxelidx] = coor
num = num_points_per_voxel[voxelidx]
if num < max_points:
voxels[voxelidx, num] = points[i]
num_points_per_voxel[voxelidx] += 1
return voxel_num
@numba.jit(nopython=True)
def _points_to_voxel_kernel(points,
voxel_size,
coors_range,
num_points_per_voxel,
coor_to_voxelidx,
voxels,
coors,
max_points=35,
max_voxels=20000):
# need mutex if write in cuda, but numba.cuda don't support mutex.
# in addition, pytorch don't support cuda in dataloader.
# put all computations to one loop.
# we shouldn't create large array in main jit code, otherwise
# decrease performance
N = points.shape[0]
# ndim = points.shape[1] - 1
ndim = 3
grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
# grid_size = np.round(grid_size).astype(np.int64)(np.int32)
grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
# lower_bound = coors_range[:3]
# upper_bound = coors_range[3:]
coor = np.zeros(shape=(3, ), dtype=np.int32)
voxel_num = 0
failed = False
for i in range(N):
failed = False
for j in range(ndim):
c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
if c < 0 or c >= grid_size[j]:
failed = True
break
coor[j] = c
if failed:
continue
voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
if voxelidx == -1:
voxelidx = voxel_num
if voxel_num >= max_voxels:
break
voxel_num += 1
coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
coors[voxelidx] = coor
num = num_points_per_voxel[voxelidx]
if num < max_points:
voxels[voxelidx, num] = points[i]
num_points_per_voxel[voxelidx] += 1
return voxel_num
from mmdet.datasets.registry import DATASETS
from .builder import build_dataset
from .coco import CocoDataset
from .dataset_wrappers import ConcatDataset, RepeatDataset
from .kitti2d_dataset import Kitti2DDataset
from .kitti_dataset import KittiDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .nuscenes2d_dataset import NuScenes2DDataset
from .nuscenes_dataset import NuScenesDataset
__all__ = [
'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'DATASETS',
'build_dataset', 'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset',
'NuScenes2DDataset'
]
import copy
from mmdet.datasets import ConcatDataset, RepeatDataset
from mmdet.utils import build_from_cfg
from .dataset_wrappers import RepeatFactorDataset
from .registry import DATASETS
def _concat_dataset(cfg, default_args=None):
ann_files = cfg['ann_file']
img_prefixes = cfg.get('img_prefix', None)
seg_prefixes = cfg.get('seg_prefix', None)
proposal_files = cfg.get('proposal_file', None)
datasets = []
num_dset = len(ann_files)
for i in range(num_dset):
data_cfg = copy.deepcopy(cfg)
data_cfg['ann_file'] = ann_files[i]
if isinstance(img_prefixes, (list, tuple)):
data_cfg['img_prefix'] = img_prefixes[i]
if isinstance(seg_prefixes, (list, tuple)):
data_cfg['seg_prefix'] = seg_prefixes[i]
if isinstance(proposal_files, (list, tuple)):
data_cfg['proposal_file'] = proposal_files[i]
datasets.append(build_dataset(data_cfg, default_args))
return ConcatDataset(datasets)
def build_dataset(cfg, default_args=None):
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'RepeatDataset':
dataset = RepeatDataset(
build_dataset(cfg['dataset'], default_args), cfg['times'])
elif cfg['type'] == 'RepeatFactorDataset':
dataset = RepeatFactorDataset(
build_dataset(cfg['dataset'], default_args), cfg['repeat_thr'])
elif isinstance(cfg.get('ann_file'), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
import math
from collections import defaultdict
import numpy as np
from mmdet.datasets import DATASETS
# Modified from https://github.com/facebookresearch/detectron2/blob/41d475b75a230221e21d9cac5d69655e3415e3a4/detectron2/data/samplers/distributed_sampler.py#L57 # noqa
@DATASETS.register_module
class RepeatFactorDataset(object):
"""A wrapper of repeated dataset with repeat factor.
Suitable for training on class imbalanced datasets like LVIS. In each
epoch, an image may appear multiple times based on its "repeat factor".
The repeat factor for an image is a function of the frequency the rarest
category labeled in that image. The "frequency of category c" in [0, 1]
is defined as the fraction of images in the training set (without repeats)
in which category c appears.
This wrapper will finally be merged into LVIS dataset.
See https://arxiv.org/abs/1908.03195 (>= v2) Appendix B.2.
Args:
dataset (:obj:`Dataset`): The dataset to be repeated.
repeat_thr (float): frequency threshold below which data is repeated.
"""
def __init__(self, dataset, repeat_thr):
self.dataset = dataset
self.repeat_thr = repeat_thr
self.CLASSES = dataset.CLASSES
repeat_factors = self._get_repeat_factors(dataset, repeat_thr)
repeat_indices = []
for dataset_index, repeat_factor in enumerate(repeat_factors):
repeat_indices.extend([dataset_index] * math.ceil(repeat_factor))
self.repeat_indices = repeat_indices
flags = []
if hasattr(self.dataset, 'flag'):
for flag, repeat_factor in zip(self.dataset.flag, repeat_factors):
flags.extend([flag] * int(math.ceil(repeat_factor)))
assert len(flags) == len(repeat_indices)
self.flag = np.asarray(flags, dtype=np.uint8)
def _get_repeat_factors(self, dataset, repeat_thr):
# 1. For each category c, compute the fraction # of images
# that contain it: f(c)
category_freq = defaultdict(int)
for idx, img_info in enumerate(dataset.data_infos):
if 'category_ids' in img_info:
cat_ids = set(img_info['category_ids'])
elif 'gt_names' in img_info:
cat_ids = set([
gt for gt in img_info['gt_names']
if gt in dataset.class_names
])
else:
labels = dataset.get_ann_info(idx)['labels']
cat_ids = set([label for label in labels])
for cat_id in cat_ids:
category_freq[cat_id] += 1
num_images = len(dataset)
for k, v in category_freq.items():
category_freq[k] = v / num_images
# 2. For each category c, compute the category-level repeat factor:
# r(c) = max(1, sqrt(t / f(c)))
category_repeat = {
cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq))
for cat_id, cat_freq in category_freq.items()
}
# 3. For each image I, compute the image-level repeat factor:
# r(I) = max_{c in I} r(c)
repeat_factors = []
for idx, img_info in enumerate(dataset.data_infos):
if 'category_ids' in img_info:
cat_ids = set(img_info['category_ids'])
elif 'gt_names' in img_info:
cat_ids = set([
gt for gt in img_info['gt_names']
if gt in dataset.class_names
])
else:
labels = dataset.get_ann_info(idx)['labels']
cat_ids = set([label for label in labels])
if len(cat_ids) == 0:
repeat_factor = 1
else:
repeat_factor = max(
{category_repeat[cat_id]
for cat_id in cat_ids})
repeat_factors.append(repeat_factor)
return repeat_factors
def __getitem__(self, idx):
ori_index = self.repeat_indices[idx]
return self.dataset[ori_index]
def __len__(self):
return len(self.repeat_indices)
import mmcv
import numpy as np
from mmdet.datasets import DATASETS, CustomDataset
@DATASETS.register_module
class Kitti2DDataset(CustomDataset):
CLASSES = ('car', 'pedestrian', 'cyclist')
"""
Annotation format:
[
{
'image': {
'image_idx': 0,
'image_path': 'training/image_2/000000.png',
'image_shape': array([ 370, 1224], dtype=int32)
},
'point_cloud': {
'num_features': 4,
'velodyne_path': 'training/velodyne/000000.bin'
},
'calib': {
'P0': <np.ndarray> (4, 4),
'P1': <np.ndarray> (4, 4),
'P2': <np.ndarray> (4, 4),
'P3': <np.ndarray> (4, 4),
'R0_rect':4x4 np.array,
'Tr_velo_to_cam': 4x4 np.array,
'Tr_imu_to_velo': 4x4 np.array
},
'annos': {
'name': <np.ndarray> (n),
'truncated': <np.ndarray> (n),
'occluded': <np.ndarray> (n),
'alpha': <np.ndarray> (n),
'bbox': <np.ndarray> (n, 4),
'dimensions': <np.ndarray> (n, 3),
'location': <np.ndarray> (n, 3),
'rotation_y': <np.ndarray> (n),
'score': <np.ndarray> (n),
'index': array([0], dtype=int32),
'group_ids': array([0], dtype=int32),
'difficulty': array([0], dtype=int32),
'num_points_in_gt': <np.ndarray> (n),
}
}
]
"""
def load_annotations(self, ann_file):
self.data_infos = mmcv.load(ann_file)
self.cat2label = {
cat_name: i
for i, cat_name in enumerate(self.class_names)
}
return self.data_infos
def _filter_imgs(self, min_size=32):
"""Filter images without ground truths."""
valid_inds = []
for i, img_info in enumerate(self.data_infos):
if len(img_info['annos']['name']) > 0:
valid_inds.append(i)
return valid_inds
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
info = self.data_infos[index]
annos = info['annos']
gt_names = annos['name']
gt_bboxes = annos['bbox']
difficulty = annos['difficulty']
# remove classes that is not needed
selected = self.keep_arrays_by_name(gt_names, self.CLASSES)
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
difficulty = difficulty[selected]
gt_labels = np.array([self.cat2label[n] for n in gt_names])
anns_results = dict(
bboxes=gt_bboxes.astype(np.float32),
labels=gt_labels,
)
return anns_results
def prepare_train_img(self, idx):
img_raw_info = self.data_infos[idx]['image']
img_info = dict(filename=img_raw_info['image_path'])
ann_info = self.get_ann_info(idx)
if len(ann_info['bboxes']) == 0:
return None
results = dict(img_info=img_info, ann_info=ann_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
def prepare_test_img(self, idx):
img_raw_info = self.data_infos[idx]['image']
img_info = dict(filename=img_raw_info['image_path'])
results = dict(img_info=img_info)
if self.proposals is not None:
results['proposals'] = self.proposals[idx]
self.pre_pipeline(results)
return self.pipeline(results)
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def drop_arrays_by_name(self, gt_names, used_classes):
inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def keep_arrays_by_name(self, gt_names, used_classes):
inds = [i for i, x in enumerate(gt_names) if x in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def reformat_bbox(self, outputs, out=None):
from mmdet3d.core.bbox.transforms import bbox2result_kitti2d
sample_idx = [info['image']['image_idx'] for info in self.data_infos]
result_files = bbox2result_kitti2d(outputs, self.CLASSES, sample_idx,
out)
return result_files
def evaluate(self, result_files, eval_types=None):
from mmdet3d.core.evaluation import kitti_eval
eval_types = ['bbox'] if not eval_types else eval_types
assert eval_types in ('bbox', ['bbox'
]), 'KITTI data set only evaluate bbox'
gt_annos = [info['annos'] for info in self.data_infos]
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
return ap_result_str, ap_dict
import copy
import os
import pickle
import mmcv
import numpy as np
import torch
import torch.utils.data as torch_data
from mmdet.datasets.registry import DATASETS
from ..core.bbox import box_np_ops
from .pipelines import Compose
from .utils import remove_dontcare
@DATASETS.register_module
class KittiDataset(torch_data.Dataset):
CLASSES = ('car', 'pedestrian', 'cyclist')
def __init__(self,
root_path,
ann_file,
split,
pipeline=None,
training=False,
class_names=None,
modality=None,
with_label=True,
test_mode=False):
"""
:param root_path: KITTI data path
:param split:
"""
super().__init__()
self.root_path = root_path
self.root_split_path = os.path.join(
self.root_path, 'training' if split != 'test' else 'testing')
self.class_names = class_names if class_names else self.CLASSES
self.modality = modality
self.with_label = with_label
assert self.modality is not None
self.modality = modality
self.test_mode = test_mode
# TODO: rm the key training if it is not needed
self.training = training
self.pcd_limit_range = [0, -40, -3, 70.4, 40, 0.0]
self.ann_file = ann_file
with open(ann_file, 'rb') as f:
self.kitti_infos = pickle.load(f)
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
input_dict = self.train_pre_pipeline(input_dict)
if input_dict is None:
return None
example = self.pipeline(input_dict)
if example is None or len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def train_pre_pipeline(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_bboxes = input_dict['gt_bboxes']
gt_names = input_dict['gt_names']
difficulty = input_dict['difficulty']
input_dict['bbox_fields'] = []
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
# selected = self.keep_arrays_by_name(gt_names, self.class_names)
gt_bboxes_3d = gt_bboxes_3d[selected]
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
difficulty = difficulty[selected]
gt_bboxes_mask = np.array([n in self.class_names for n in gt_names],
dtype=np.bool_)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_bboxes'] = gt_bboxes.astype('float32')
input_dict['gt_names'] = gt_names
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
input_dict['difficulty'] = difficulty
input_dict['gt_bboxes_mask'] = gt_bboxes_mask
input_dict['gt_bboxes_3d_mask'] = copy.deepcopy(gt_bboxes_mask)
input_dict['bbox_fields'].append('gt_bboxes')
if len(gt_bboxes) == 0:
return None
return input_dict
def prepare_test_data(self, index):
input_dict = self.get_sensor_data(index)
# input_dict = self.test_pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def test_pre_pipeline(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_bboxes = input_dict['gt_bboxes']
gt_names = input_dict['gt_names']
if gt_bboxes_3d is not None:
selected = self.keep_arrays_by_name(gt_names, self.class_names)
gt_bboxes_3d = gt_bboxes_3d[selected]
gt_bboxes = gt_bboxes[selected]
gt_names = gt_names[selected]
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_bboxes'] = gt_bboxes
input_dict['gt_names'] = gt_names
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
return input_dict
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def get_lidar(self, idx):
lidar_file = os.path.join(self.root_split_path, 'velodyne',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_lidar_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path, 'velodyne_reduced',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_lidar_depth_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path,
'velodyne_depth_reduced', '%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_pure_depth_reduced(self, idx):
lidar_file = os.path.join(self.root_split_path, 'depth_reduced',
'%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_depth(self, idx):
depth_file = os.path.join(self.root_split_path, 'depth_completion',
'%06d.png' % idx)
assert os.path.exists(depth_file)
depth_img = mmcv.imread(depth_file, -1) / 256.0
return depth_img
def __len__(self):
return len(self.kitti_infos)
def get_sensor_data(self, index):
info = self.kitti_infos[index]
sample_idx = info['image']['image_idx']
# TODO: consider use torch.Tensor only
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
lidar2img = P2 @ rect @ Trv2c
if self.modality['use_depth'] and self.modality['use_lidar']:
points = self.get_lidar_depth_reduced(sample_idx)
elif self.modality['use_lidar']:
points = self.get_lidar_reduced(sample_idx)
elif self.modality['use_depth']:
points = self.get_pure_depth_reduced(sample_idx)
else:
assert (self.modality['use_depth'] or self.modality['use_lidar'])
if not self.modality['use_lidar_intensity']:
points = points[:, :3]
input_dict = dict(
sample_idx=sample_idx,
points=points,
lidar2img=lidar2img,
)
# TODO: support image input
if self.modality['use_camera']:
image_info = info['image']
image_path = image_info['image_path']
image_path = os.path.join(self.root_path, image_path)
img = mmcv.imread(image_path)
input_dict.update(
dict(
img=img,
img_shape=img.shape,
ori_shape=img.shape,
filename=image_path))
else:
input_dict.update(dict(img_shape=info['image']['image_shape']))
if self.with_label:
annos = self.get_ann_info(index)
input_dict.update(annos)
return input_dict
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
info = self.kitti_infos[index]
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
# P2 = info['calib']['P2'].astype(np.float32)
annos = info['annos']
# we need other objects to avoid collision when sample
annos = remove_dontcare(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_names = annos['name']
# print(gt_names, len(loc))
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c)
# only center format is allowed. so we need to convert
# kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
# box_np_ops.change_box3d_center_(gt_bboxes, [0.5, 0.5, 0],
# [0.5, 0.5, 0.5])
# For simplicity gt_bboxes means 2D gt bboxes
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_bboxes=annos['bbox'],
gt_names=gt_names,
difficulty=difficulty)
return anns_results
def drop_arrays_by_name(self, gt_names, used_classes):
inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def keep_arrays_by_name(self, gt_names, used_classes):
inds = [i for i, x in enumerate(gt_names) if x in used_classes]
inds = np.array(inds, dtype=np.int64)
return inds
def reformat_bbox(self, outputs, out=None):
if not isinstance(outputs[0][0], dict):
sample_idx = [
info['image']['image_idx'] for info in self.kitti_infos
]
result_files = self.bbox2result_kitti2d(outputs, self.class_names,
sample_idx, out)
else:
result_files = self.bbox2result_kitti(outputs, self.class_names,
out)
return result_files
def evaluate(self, result_files, eval_types=None):
from mmdet3d.core.evaluation import kitti_eval
gt_annos = [info['annos'] for info in self.kitti_infos]
if eval_types == 'img_bbox':
ap_result_str, ap_dict = kitti_eval(
gt_annos, result_files, self.class_names, eval_types=['bbox'])
else:
ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
self.class_names)
return ap_result_str, ap_dict
def bbox2result_kitti(self, net_outputs, class_names, out=None):
if out:
output_dir = out[:-4] if out.endswith(('.pkl', '.pickle')) else out
result_dir = output_dir + '/data'
mmcv.mkdir_or_exist(result_dir)
det_annos = []
print('Converting prediction to KITTI format')
for idx, pred_dicts in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.kitti_infos[idx]
image_shape = info['image']['image_shape'][:2]
for i, box_dict in enumerate(pred_dicts):
num_example = 0
sample_idx = box_dict['sample_idx']
box_dict = self.convert_valid_bboxes(box_dict, info)
if box_dict['bbox'] is not None or box_dict['bbox'].size.numel(
) != 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
box_preds_lidar = box_dict['box3d_lidar']
label_preds = box_dict['label_preds']
anno = {
'name': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': []
}
gt_iou = scores * 0
for box, box_lidar, bbox, score, label, cur_gt_iou in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds, gt_iou):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(
-np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
anno['bbox'].append(bbox)
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
# anno["gt_iou"].append(cur_gt_iou)
anno['score'].append(score)
num_example += 1
if num_example != 0:
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
if out:
cur_det_file = result_dir + '/%06d.txt' % sample_idx
with open(cur_det_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'
.format(anno['name'][idx],
anno['alpha'][idx], bbox[idx][0],
bbox[idx][1], bbox[idx][2],
bbox[idx][3], dims[idx][1],
dims[idx][2], dims[idx][0],
loc[idx][0], loc[idx][1],
loc[idx][2],
anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
if num_example == 0:
annos.append({
'name': np.array([]),
'truncated': np.array([]),
'occluded': np.array([]),
'alpha': np.array([]),
'bbox': np.zeros([0, 4]),
'dimensions': np.zeros([0, 3]),
'location': np.zeros([0, 3]),
'rotation_y': np.array([]),
'score': np.array([]),
})
annos[-1]['sample_idx'] = np.array(
[sample_idx] * num_example, dtype=np.int64)
det_annos += annos
if out:
if not out.endswith(('.pkl', '.pickle')):
out = '{}.pkl'.format(out)
mmcv.dump(det_annos, out)
print('Result is saved to %s' % out)
return det_annos
def bbox2result_kitti2d(self,
net_outputs,
class_names,
sample_ids,
out=None):
"""Convert results to kitti format for evaluation and test submission
Args:
net_outputs (List[array]): list of array storing the bbox and score
class_nanes (List[String]): A list of class names
sample_idx (List[Int]): A list of samples' index,
should have the same length as net_outputs.
Return:
List([dict]): A list of dict have the kitti format
"""
assert len(net_outputs) == len(sample_ids)
det_annos = []
print('Converting prediction to KITTI format')
for i, bboxes_per_sample in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
anno = dict(
name=[],
truncated=[],
occluded=[],
alpha=[],
bbox=[],
dimensions=[],
location=[],
rotation_y=[],
score=[])
sample_idx = sample_ids[i]
num_example = 0
for label in range(len(bboxes_per_sample)):
bbox = bboxes_per_sample[label]
for i in range(bbox.shape[0]):
anno['name'].append(class_names[int(label)])
anno['truncated'].append(0.0)
anno['occluded'].append(0)
anno['alpha'].append(0.0)
anno['bbox'].append(bbox[i, :4])
# set dimensions (height, width, length) to zero
anno['dimensions'].append(
np.zeros(shape=[3], dtype=np.float32))
# set the 3D translation to (-1000, -1000, -1000)
anno['location'].append(
np.ones(shape=[3], dtype=np.float32) * (-1000.0))
anno['rotation_y'].append(0.0)
anno['score'].append(bbox[i, 4])
num_example += 1
if num_example == 0:
annos.append(
dict(
name=np.array([]),
truncated=np.array([]),
occluded=np.array([]),
alpha=np.array([]),
bbox=np.zeros([0, 4]),
dimensions=np.zeros([0, 3]),
location=np.zeros([0, 3]),
rotation_y=np.array([]),
score=np.array([]),
))
else:
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
annos[-1]['sample_idx'] = np.array(
[sample_idx] * num_example, dtype=np.int64)
det_annos += annos
if out:
# save file in submission format
output_dir = out[:-4] if out.endswith(('.pkl', '.pickle')) else out
result_dir = output_dir + '/data'
mmcv.mkdir_or_exist(result_dir)
out = '{}.pkl'.format(result_dir)
mmcv.dump(det_annos, out)
print('Result is saved to {}'.format(out))
for i, anno in enumerate(det_annos):
sample_idx = sample_ids[i]
cur_det_file = result_dir + '/%06d.txt' % sample_idx
with open(cur_det_file, 'w') as f:
bbox = anno['bbox']
loc = anno['location']
dims = anno['dimensions'][::-1] # lhw -> hwl
for idx in range(len(bbox)):
print(
'{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
'{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
anno['name'][idx],
anno['alpha'][idx],
*bbox[idx], # 4 float
*dims[idx], # 3 float
*loc[idx], # 3 float
anno['rotation_y'][idx],
anno['score'][idx]),
file=f,
)
print('Result is saved to {}'.format(result_dir))
return det_annos
def convert_valid_bboxes(self, box_dict, info):
# TODO: refactor this function
final_box_preds = box_dict['box3d_lidar']
final_scores = box_dict['scores']
final_labels = box_dict['label_preds']
sample_idx = info['image']['image_idx']
final_box_preds[:, -1] = box_np_ops.limit_period(
final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2)
if final_box_preds.shape[0] == 0:
return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(),
scores=final_box_preds.new_zeros([0]).numpy(),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(),
sample_idx=sample_idx,
)
from mmdet3d.core.bbox import box_torch_ops
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
img_shape = info['image']['image_shape']
rect = final_box_preds.new_tensor(rect)
Trv2c = final_box_preds.new_tensor(Trv2c)
P2 = final_box_preds.new_tensor(P2)
final_box_preds_camera = box_torch_ops.box_lidar_to_camera(
final_box_preds, rect, Trv2c)
locs = final_box_preds_camera[:, :3]
dims = final_box_preds_camera[:, 3:6]
angles = final_box_preds_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_torch_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check final_box_preds_camera
image_shape = final_box_preds.new_tensor(img_shape)
valid_cam_inds = ((final_box_preds_camera[:, 0] < image_shape[1]) &
(final_box_preds_camera[:, 1] < image_shape[0]) &
(final_box_preds_camera[:, 2] > 0) &
(final_box_preds_camera[:, 3] > 0))
# check final_box_preds
limit_range = final_box_preds.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((final_box_preds[:, :3] > limit_range[:3]) &
(final_box_preds[:, :3] < limit_range[3:]))
valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=final_box_preds_camera[valid_inds, :].numpy(),
box3d_lidar=final_box_preds[valid_inds, :].numpy(),
scores=final_scores[valid_inds].numpy(),
label_preds=final_labels[valid_inds].numpy(),
sample_idx=sample_idx,
)
else:
return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(),
scores=final_box_preds.new_zeros([0]).numpy(),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(),
sample_idx=sample_idx,
)
from .build_loader import build_dataloader
from .sampler import DistributedGroupSampler, GroupSampler
__all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
import platform
import random
from functools import partial
import numpy as np
from mmcv.parallel import collate
from mmcv.runner import get_dist_info
from torch.utils.data import DataLoader
from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
if platform.system() != 'Windows':
# https://github.com/pytorch/pytorch/issues/973
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
def build_dataloader(dataset,
samples_per_gpu,
workers_per_gpu,
num_gpus=1,
dist=True,
seed=None,
**kwargs):
shuffle = kwargs.get('shuffle', True)
if dist:
rank, world_size = get_dist_info()
if shuffle:
sampler = DistributedGroupSampler(dataset, samples_per_gpu,
world_size, rank)
else:
sampler = DistributedSampler(
dataset, world_size, rank, shuffle=False)
batch_size = samples_per_gpu
num_workers = workers_per_gpu
else:
sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
batch_size = num_gpus * samples_per_gpu
num_workers = num_gpus * workers_per_gpu
data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
num_workers=num_workers,
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
pin_memory=False,
worker_init_fn=worker_init_fn if seed is not None else None,
**kwargs)
return data_loader
def worker_init_fn(seed):
np.random.seed(seed)
random.seed(seed)
from __future__ import division
import math
import numpy as np
import torch
from mmcv.runner import get_dist_info
from torch.utils.data import DistributedSampler as _DistributedSampler
from torch.utils.data import Sampler
class DistributedSampler(_DistributedSampler):
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
super().__init__(dataset, num_replicas=num_replicas, rank=rank)
self.shuffle = shuffle
def __iter__(self):
# deterministically shuffle based on epoch
if self.shuffle:
g = torch.Generator()
g.manual_seed(self.epoch)
indices = torch.randperm(len(self.dataset), generator=g).tolist()
else:
indices = torch.arange(len(self.dataset)).tolist()
# add extra samples to make it evenly divisible
indices += indices[:(self.total_size - len(indices))]
assert len(indices) == self.total_size
# subsample
indices = indices[self.rank:self.total_size:self.num_replicas]
assert len(indices) == self.num_samples
return iter(indices)
class GroupSampler(Sampler):
def __init__(self, dataset, samples_per_gpu=1):
assert hasattr(dataset, 'flag')
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.flag = dataset.flag.astype(np.int64)
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, size in enumerate(self.group_sizes):
self.num_samples += int(np.ceil(
size / self.samples_per_gpu)) * self.samples_per_gpu
def __iter__(self):
indices = []
for i, size in enumerate(self.group_sizes):
if size == 0:
continue
indice = np.where(self.flag == i)[0]
assert len(indice) == size
np.random.shuffle(indice)
num_extra = int(np.ceil(size / self.samples_per_gpu)
) * self.samples_per_gpu - len(indice)
indice = np.concatenate(
[indice, np.random.choice(indice, num_extra)])
indices.append(indice)
indices = np.concatenate(indices)
indices = [
indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
for i in np.random.permutation(
range(len(indices) // self.samples_per_gpu))
]
indices = np.concatenate(indices)
indices = indices.astype(np.int64).tolist()
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
class DistributedGroupSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
"""
def __init__(self,
dataset,
samples_per_gpu=1,
num_replicas=None,
rank=None):
_rank, _num_replicas = get_dist_info()
if num_replicas is None:
num_replicas = _num_replicas
if rank is None:
rank = _rank
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
assert hasattr(self.dataset, 'flag')
self.flag = self.dataset.flag
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, j in enumerate(self.group_sizes):
self.num_samples += int(
math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
self.num_replicas)) * self.samples_per_gpu
self.total_size = self.num_samples * self.num_replicas
def __iter__(self):
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = []
for i, size in enumerate(self.group_sizes):
if size > 0:
indice = np.where(self.flag == i)[0]
assert len(indice) == size
indice = indice[list(torch.randperm(int(size),
generator=g))].tolist()
extra = int(
math.ceil(
size * 1.0 / self.samples_per_gpu / self.num_replicas)
) * self.samples_per_gpu * self.num_replicas - len(indice)
# pad indice
tmp = indice.copy()
for _ in range(extra // size):
indice.extend(tmp)
indice.extend(tmp[:extra % size])
indices.extend(indice)
assert len(indices) == self.total_size
indices = [
indices[j] for i in list(
torch.randperm(
len(indices) // self.samples_per_gpu, generator=g))
for j in range(i * self.samples_per_gpu, (i + 1) *
self.samples_per_gpu)
]
# subsample
offset = self.num_samples * self.rank
indices = indices[offset:offset + self.num_samples]
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
def set_epoch(self, epoch):
self.epoch = epoch
from pycocotools.coco import COCO
from mmdet3d.core.evaluation.coco_utils import getImgIds
from mmdet.datasets import DATASETS, CocoDataset
@DATASETS.register_module
class NuScenes2DDataset(CocoDataset):
CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
def load_annotations(self, ann_file):
if not self.class_names:
self.class_names = self.CLASSES
self.coco = COCO(ann_file)
# send class_names into the get id
# in case we only need to train on several classes
# by default self.class_names = CLASSES
self.cat_ids = self.coco.getCatIds(catNms=self.class_names)
self.cat2label = {
cat_id: i # + 1 rm +1 here thus the 0-79 are fg, 80 is bg
for i, cat_id in enumerate(self.cat_ids)
}
# send cat ids to the get img id
# in case we only need to train on several classes
if len(self.cat_ids) < len(self.CLASSES):
self.img_ids = getImgIds(self.coco, catIds=self.cat_ids)
else:
self.img_ids = self.coco.getImgIds()
img_infos = []
for i in self.img_ids:
info = self.coco.loadImgs([i])[0]
info['filename'] = info['file_name']
img_infos.append(info)
return img_infos
import copy
import os.path as osp
import tempfile
import mmcv
import numpy as np
import pyquaternion
import torch.utils.data as torch_data
from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops
from .pipelines import Compose
@DATASETS.register_module
class NuScenesDataset(torch_data.Dataset):
NumPointFeatures = 4 # xyz, timestamp. set 4 to use kitti pretrain
NameMapping = {
'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.car': 'car',
'vehicle.construction': 'construction_vehicle',
'vehicle.motorcycle': 'motorcycle',
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'human.pedestrian.police_officer': 'pedestrian',
'movable_object.trafficcone': 'traffic_cone',
'vehicle.trailer': 'trailer',
'vehicle.truck': 'truck'
}
DefaultAttribute = {
'car': 'vehicle.parked',
'pedestrian': 'pedestrian.moving',
'trailer': 'vehicle.parked',
'truck': 'vehicle.parked',
'bus': 'vehicle.moving',
'motorcycle': 'cycle.without_rider',
'construction_vehicle': 'vehicle.parked',
'bicycle': 'cycle.without_rider',
'barrier': '',
'traffic_cone': '',
}
AttrMapping = {
'cycle.with_rider': 0,
'cycle.without_rider': 1,
'pedestrian.moving': 2,
'pedestrian.standing': 3,
'pedestrian.sitting_lying_down': 4,
'vehicle.moving': 5,
'vehicle.parked': 6,
'vehicle.stopped': 7,
}
AttrMapping_rev = [
'cycle.with_rider',
'cycle.without_rider',
'pedestrian.moving',
'pedestrian.standing',
'pedestrian.sitting_lying_down',
'vehicle.moving',
'vehicle.parked',
'vehicle.stopped',
]
CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
def __init__(self,
ann_file,
pipeline=None,
root_path=None,
class_names=None,
load_interval=1,
with_velocity=True,
test_mode=False,
modality=None,
eval_version='detection_cvpr_2019',
with_label=True,
max_sweeps=10,
filter_empty_gt=True):
super().__init__()
self.data_root = root_path
self.class_names = class_names if class_names else self.CLASSES
self.test_mode = test_mode
self.load_interval = load_interval
self.with_label = with_label
self.max_sweeps = max_sweeps
self.ann_file = ann_file
data = mmcv.load(ann_file)
self.infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
self.infos = self.infos[::load_interval]
self.metadata = data['metadata']
self.version = self.metadata['version']
self.with_velocity = with_velocity
self.eval_version = eval_version
from nuscenes.eval.detection.config import config_factory
self.eval_detection_configs = config_factory(self.eval_version)
if modality is None:
modality = dict(
use_camera=False,
use_lidar=True,
use_radar=False,
use_map=False,
use_external=False,
)
self.modality = modality
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
# kitti map: nusc det name -> kitti eval name
self._kitti_name_mapping = {
'car': 'car',
'pedestrian': 'pedestrian',
} # we only eval these classes in kitti
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_data(idx)
while True:
data = self.prepare_train_data(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
def _set_group_flag(self):
"""Set flag according to image aspect ratio.
Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
In kitti's pcd, they are all the same, thus are all zeros
"""
self.flag = np.zeros(len(self), dtype=np.uint8)
def _rand_another(self, idx):
pool = np.where(self.flag == self.flag[idx])[0]
return np.random.choice(pool)
def __len__(self):
return len(self.infos)
def prepare_train_data(self, index):
input_dict = self.get_sensor_data(index)
input_dict = self.train_pre_pipeline(input_dict)
if input_dict is None:
return None
example = self.pipeline(input_dict)
if len(example['gt_bboxes_3d']._data) == 0:
return None
return example
def train_pre_pipeline(self, input_dict):
if len(input_dict['gt_bboxes_3d']) == 0:
return None
return input_dict
def prepare_test_data(self, index):
input_dict = self.get_sensor_data(index)
# input_dict = self.test_pre_pipeline(input_dict)
example = self.pipeline(input_dict)
return example
def test_pre_pipeline(self, input_dict):
gt_names = input_dict['gt_names']
input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
return input_dict
def get_sensor_data(self, index):
info = self.infos[index]
points = np.fromfile(
info['lidar_path'], dtype=np.float32, count=-1).reshape([-1, 5])
# standard protocal modified from SECOND.Pytorch
points[:, 3] /= 255
points[:, 4] = 0
sweep_points_list = [points]
ts = info['timestamp'] / 1e6
for idx, sweep in enumerate(info['sweeps']):
if idx >= self.max_sweeps:
break
points_sweep = np.fromfile(
sweep['data_path'], dtype=np.float32,
count=-1).reshape([-1, 5])
sweep_ts = sweep['timestamp'] / 1e6
points_sweep[:, 3] /= 255
points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
'sensor2lidar_rotation'].T
points_sweep[:, :3] += sweep['sensor2lidar_translation']
points_sweep[:, 4] = ts - sweep_ts
sweep_points_list.append(points_sweep)
points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
input_dict = dict(
points=points,
sample_idx=info['token'],
)
if self.modality['use_camera']:
# TODO support image
imgs = []
ori_shapes = []
image_paths = []
lidar2img_rts = []
for cam_type, cam_info in info['cams'].items():
image_path = cam_info['data_path']
# image_path = osp.join(self.data_root, image_path)
img = mmcv.imread(image_path)
imgs.append(img)
ori_shapes.append(img.shape)
image_paths.append(image_path)
# obtain lidar to image transformation matrix
lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
lidar2cam_t = cam_info[
'sensor2lidar_translation'] @ lidar2cam_r.T
lidar2cam_rt = np.eye(4)
lidar2cam_rt[:3, :3] = lidar2cam_r.T
lidar2cam_rt[3, :3] = -lidar2cam_t
intrinsic = cam_info['cam_intrinsic']
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
lidar2img_rt = (viewpad @ lidar2cam_rt.T)
lidar2img_rts.append(lidar2img_rt)
input_dict.update(
dict(
img=imgs,
img_shape=ori_shapes,
ori_shape=ori_shapes,
filename=image_paths,
lidar2img=lidar2img_rts,
))
if self.with_label:
annos = self.get_ann_info(index)
input_dict.update(annos)
return input_dict
def get_ann_info(self, index):
info = self.infos[index]
# filter out bbox containing no points
mask = info['num_lidar_pts'] > 0
gt_bboxes_3d = info['gt_boxes'][mask]
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
[0.5, 0.5, 0])
gt_names_3d = info['gt_names'][mask]
if self.with_velocity:
gt_velocity = info['gt_velocity'][mask]
nan_mask = np.isnan(gt_velocity[:, 0])
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
gt_bboxes_3d_mask = np.array(
[n in self.class_names for n in gt_names_3d], dtype=np.bool_)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_names_3d=gt_names_3d,
gt_bboxes_3d_mask=gt_bboxes_3d_mask,
)
return anns_results
def _format_bbox(self, results, jsonfile_prefix=None):
nusc_annos = {}
mapped_class_names = self.class_names
token2info = {}
for info in self.infos:
token2info[info['token']] = info
print('Start to convert detection format...')
for det in mmcv.track_iter_progress(results):
annos = []
boxes = output_to_nusc_box(det[0])
boxes = lidar_nusc_box_to_global(token2info[det[0]['sample_idx']],
boxes, mapped_class_names,
self.eval_detection_configs,
self.eval_version)
for i, box in enumerate(boxes):
name = mapped_class_names[box.label]
if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2:
if name in [
'car',
'construction_vehicle',
'bus',
'truck',
'trailer',
]:
attr = 'vehicle.moving'
elif name in ['bicycle', 'motorcycle']:
attr = 'cycle.with_rider'
else:
attr = NuScenesDataset.DefaultAttribute[name]
else:
if name in ['pedestrian']:
attr = 'pedestrian.standing'
elif name in ['bus']:
attr = 'vehicle.stopped'
else:
attr = NuScenesDataset.DefaultAttribute[name]
nusc_anno = dict(
sample_token=det[0]['sample_idx'],
translation=box.center.tolist(),
size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(),
velocity=box.velocity[:2].tolist(),
detection_name=name,
detection_score=box.score,
attribute_name=attr)
annos.append(nusc_anno)
nusc_annos[det[0]['sample_idx']] = annos
nusc_submissions = {
'meta': self.modality,
'results': nusc_annos,
}
mmcv.mkdir_or_exist(jsonfile_prefix)
res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
print('Results writes to', res_path)
mmcv.dump(nusc_submissions, res_path)
return res_path
def _evaluate_single(self,
result_path,
logger=None,
metric='bbox',
result_name='pts_bbox'):
from nuscenes import NuScenes
from nuscenes.eval.detection.evaluate import NuScenesEval
output_dir = osp.join(*osp.split(result_path)[:-1])
nusc = NuScenes(
version=self.version, dataroot=self.data_root, verbose=False)
eval_set_map = {
'v1.0-mini': 'mini_train',
'v1.0-trainval': 'val',
}
nusc_eval = NuScenesEval(
nusc,
config=self.eval_detection_configs,
result_path=result_path,
eval_set=eval_set_map[self.version],
output_dir=output_dir,
verbose=False)
nusc_eval.main(render_curves=False)
# record metrics
metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
detail = dict()
metric_prefix = '{}_NuScenes'.format(result_name)
for name in self.class_names:
for k, v in metrics['label_aps'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
for k, v in metrics['label_tp_errors'][name].items():
val = float('{:.4f}'.format(v))
detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
return detail
def format_results(self, results, jsonfile_prefix=None):
"""Format the results to json (standard format for COCO evaluation).
Args:
results (list): Testing results of the dataset.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
tuple: (result_files, tmp_dir), result_files is a dict containing
the json filepaths, tmp_dir is the temporal directory created
for saving json files when jsonfile_prefix is not specified.
"""
assert isinstance(results, list), 'results must be a list'
assert len(results) == len(self), (
'The length of results is not equal to the dataset len: {} != {}'.
format(len(results), len(self)))
if jsonfile_prefix is None:
tmp_dir = tempfile.TemporaryDirectory()
jsonfile_prefix = osp.join(tmp_dir.name, 'results')
else:
tmp_dir = None
if not isinstance(results[0], dict):
result_files = self._format_bbox(results, jsonfile_prefix)
else:
result_files = dict()
for name in results[0]:
print('Formating bboxes of {}'.format(name))
results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update(
{name: self._format_bbox(results_, tmp_file_)})
return result_files, tmp_dir
def evaluate(self,
results,
metric='bbox',
logger=None,
jsonfile_prefix=None,
result_names=['pts_bbox']):
"""Evaluation in nuScenes protocol.
Args:
results (list): Testing results of the dataset.
metric (str | list[str]): Metrics to be evaluated.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
jsonfile_prefix (str | None): The prefix of json files. It includes
the file path and the prefix of filename, e.g., "a/b/prefix".
If not specified, a temp file will be created. Default: None.
Returns:
dict[str: float]
"""
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
if isinstance(result_files, dict):
results_dict = dict()
for name in result_names:
print('Evaluating bboxes of {}'.format(name))
ret_dict = self._evaluate_single(result_files[name])
results_dict.update(ret_dict)
elif isinstance(result_files, str):
results_dict = self._evaluate_single(result_files)
if tmp_dir is not None:
tmp_dir.cleanup()
return results_dict
def output_to_nusc_box(detection):
box3d = detection['box3d_lidar'].numpy()
scores = detection['scores'].numpy()
labels = detection['label_preds'].numpy()
# TODO: check whether this is necessary
# with dir_offset & dir_limit in the head
box3d[:, 6] = -box3d[:, 6] - np.pi / 2
# the trained model is in [0.5, 0.5, 0],
# change them back to nuscenes [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(box3d, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
box_list = []
for i in range(box3d.shape[0]):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box3d[i, 6])
velocity = (*box3d[i, 7:9], 0.0)
# velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6]
# velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box = NuScenesBox(
box3d[i, :3],
box3d[i, 3:6],
quat,
label=labels[i],
score=scores[i],
velocity=velocity)
box_list.append(box)
return box_list
def lidar_nusc_box_to_global(info,
boxes,
classes,
eval_configs,
eval_version='detection_cvpr_2019'):
box_list = []
for box in boxes:
# Move box to ego vehicle coord system
box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))
box.translate(np.array(info['lidar2ego_translation']))
# filter det in ego.
cls_range_map = eval_configs.class_range
radius = np.linalg.norm(box.center[:2], 2)
det_range = cls_range_map[classes[box.label]]
if radius > det_range:
continue
# Move box to global coord system
box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))
box.translate(np.array(info['ego2global_translation']))
box_list.append(box)
return box_list
from mmdet.dataset import Compose
from .formating import (Collect, Collect3D, ImageToTensor, ToDataContainer,
ToTensor, Transpose, to_tensor)
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
__all__ = [
'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
'Transpose', 'Collect', 'PhotoMetricDistortion', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D'
]
import numba
import numpy as np
from mmdet3d.core.bbox import box_np_ops
@numba.njit
def _rotation_box2d_jit_(corners, angle, rot_mat_T):
rot_sin = np.sin(angle)
rot_cos = np.cos(angle)
rot_mat_T[0, 0] = rot_cos
rot_mat_T[0, 1] = -rot_sin
rot_mat_T[1, 0] = rot_sin
rot_mat_T[1, 1] = rot_cos
corners[:] = corners @ rot_mat_T
@numba.jit(nopython=True)
def box_collision_test(boxes, qboxes, clockwise=True):
N = boxes.shape[0]
K = qboxes.shape[0]
ret = np.zeros((N, K), dtype=np.bool_)
slices = np.array([1, 2, 3, 0])
lines_boxes = np.stack((boxes, boxes[:, slices, :]),
axis=2) # [N, 4, 2(line), 2(xy)]
lines_qboxes = np.stack((qboxes, qboxes[:, slices, :]), axis=2)
# vec = np.zeros((2,), dtype=boxes.dtype)
boxes_standup = box_np_ops.corner_to_standup_nd_jit(boxes)
qboxes_standup = box_np_ops.corner_to_standup_nd_jit(qboxes)
for i in range(N):
for j in range(K):
# calculate standup first
iw = (
min(boxes_standup[i, 2], qboxes_standup[j, 2]) -
max(boxes_standup[i, 0], qboxes_standup[j, 0]))
if iw > 0:
ih = (
min(boxes_standup[i, 3], qboxes_standup[j, 3]) -
max(boxes_standup[i, 1], qboxes_standup[j, 1]))
if ih > 0:
for k in range(4):
for l in range(4):
A = lines_boxes[i, k, 0]
B = lines_boxes[i, k, 1]
C = lines_qboxes[j, l, 0]
D = lines_qboxes[j, l, 1]
acd = (D[1] - A[1]) * (C[0] -
A[0]) > (C[1] - A[1]) * (
D[0] - A[0])
bcd = (D[1] - B[1]) * (C[0] -
B[0]) > (C[1] - B[1]) * (
D[0] - B[0])
if acd != bcd:
abc = (C[1] - A[1]) * (B[0] - A[0]) > (
B[1] - A[1]) * (
C[0] - A[0])
abd = (D[1] - A[1]) * (B[0] - A[0]) > (
B[1] - A[1]) * (
D[0] - A[0])
if abc != abd:
ret[i, j] = True # collision.
break
if ret[i, j] is True:
break
if ret[i, j] is False:
# now check complete overlap.
# box overlap qbox:
box_overlap_qbox = True
for l in range(4): # point l in qboxes
for k in range(4): # corner k in boxes
vec = boxes[i, k] - boxes[i, (k + 1) % 4]
if clockwise:
vec = -vec
cross = vec[1] * (
boxes[i, k, 0] - qboxes[j, l, 0])
cross -= vec[0] * (
boxes[i, k, 1] - qboxes[j, l, 1])
if cross >= 0:
box_overlap_qbox = False
break
if box_overlap_qbox is False:
break
if box_overlap_qbox is False:
qbox_overlap_box = True
for l in range(4): # point l in boxes
for k in range(4): # corner k in qboxes
vec = qboxes[j, k] - qboxes[j, (k + 1) % 4]
if clockwise:
vec = -vec
cross = vec[1] * (
qboxes[j, k, 0] - boxes[i, l, 0])
cross -= vec[0] * (
qboxes[j, k, 1] - boxes[i, l, 1])
if cross >= 0: #
qbox_overlap_box = False
break
if qbox_overlap_box is False:
break
if qbox_overlap_box:
ret[i, j] = True # collision.
else:
ret[i, j] = True # collision.
return ret
@numba.njit
def noise_per_box(boxes, valid_mask, loc_noises, rot_noises):
# boxes: [N, 5]
# valid_mask: [N]
# loc_noises: [N, M, 3]
# rot_noises: [N, M]
num_boxes = boxes.shape[0]
num_tests = loc_noises.shape[1]
box_corners = box_np_ops.box2d_to_corner_jit(boxes)
current_corners = np.zeros((4, 2), dtype=boxes.dtype)
rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
success_mask = -np.ones((num_boxes, ), dtype=np.int64)
# print(valid_mask)
for i in range(num_boxes):
if valid_mask[i]:
for j in range(num_tests):
current_corners[:] = box_corners[i]
current_corners -= boxes[i, :2]
_rotation_box2d_jit_(current_corners, rot_noises[i, j],
rot_mat_T)
current_corners += boxes[i, :2] + loc_noises[i, j, :2]
coll_mat = box_collision_test(
current_corners.reshape(1, 4, 2), box_corners)
coll_mat[0, i] = False
# print(coll_mat)
if not coll_mat.any():
success_mask[i] = j
box_corners[i] = current_corners
break
return success_mask
@numba.njit
def noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises,
global_rot_noises):
# boxes: [N, 5]
# valid_mask: [N]
# loc_noises: [N, M, 3]
# rot_noises: [N, M]
num_boxes = boxes.shape[0]
num_tests = loc_noises.shape[1]
box_corners = box_np_ops.box2d_to_corner_jit(boxes)
current_corners = np.zeros((4, 2), dtype=boxes.dtype)
current_box = np.zeros((1, 5), dtype=boxes.dtype)
rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
dst_pos = np.zeros((2, ), dtype=boxes.dtype)
success_mask = -np.ones((num_boxes, ), dtype=np.int64)
corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
corners_norm[1, 1] = 1.0
corners_norm[2] = 1.0
corners_norm[3, 0] = 1.0
corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
corners_norm = corners_norm.reshape(4, 2)
for i in range(num_boxes):
if valid_mask[i]:
for j in range(num_tests):
current_box[0, :] = boxes[i]
current_radius = np.sqrt(boxes[i, 0]**2 + boxes[i, 1]**2)
current_grot = np.arctan2(boxes[i, 0], boxes[i, 1])
dst_grot = current_grot + global_rot_noises[i, j]
dst_pos[0] = current_radius * np.sin(dst_grot)
dst_pos[1] = current_radius * np.cos(dst_grot)
current_box[0, :2] = dst_pos
current_box[0, -1] += (dst_grot - current_grot)
rot_sin = np.sin(current_box[0, -1])
rot_cos = np.cos(current_box[0, -1])
rot_mat_T[0, 0] = rot_cos
rot_mat_T[0, 1] = -rot_sin
rot_mat_T[1, 0] = rot_sin
rot_mat_T[1, 1] = rot_cos
current_corners[:] = current_box[
0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2]
current_corners -= current_box[0, :2]
_rotation_box2d_jit_(current_corners, rot_noises[i, j],
rot_mat_T)
current_corners += current_box[0, :2] + loc_noises[i, j, :2]
coll_mat = box_collision_test(
current_corners.reshape(1, 4, 2), box_corners)
coll_mat[0, i] = False
if not coll_mat.any():
success_mask[i] = j
box_corners[i] = current_corners
loc_noises[i, j, :2] += (dst_pos - boxes[i, :2])
rot_noises[i, j] += (dst_grot - current_grot)
break
return success_mask
def _select_transform(transform, indices):
result = np.zeros((transform.shape[0], *transform.shape[2:]),
dtype=transform.dtype)
for i in range(transform.shape[0]):
if indices[i] != -1:
result[i] = transform[i, indices[i]]
return result
@numba.njit
def _rotation_matrix_3d_(rot_mat_T, angle, axis):
rot_sin = np.sin(angle)
rot_cos = np.cos(angle)
rot_mat_T[:] = np.eye(3)
if axis == 1:
rot_mat_T[0, 0] = rot_cos
rot_mat_T[0, 2] = -rot_sin
rot_mat_T[2, 0] = rot_sin
rot_mat_T[2, 2] = rot_cos
elif axis == 2 or axis == -1:
rot_mat_T[0, 0] = rot_cos
rot_mat_T[0, 1] = -rot_sin
rot_mat_T[1, 0] = rot_sin
rot_mat_T[1, 1] = rot_cos
elif axis == 0:
rot_mat_T[1, 1] = rot_cos
rot_mat_T[1, 2] = -rot_sin
rot_mat_T[2, 1] = rot_sin
rot_mat_T[2, 2] = rot_cos
@numba.njit
def points_transform_(points, centers, point_masks, loc_transform,
rot_transform, valid_mask):
num_box = centers.shape[0]
num_points = points.shape[0]
rot_mat_T = np.zeros((num_box, 3, 3), dtype=points.dtype)
for i in range(num_box):
_rotation_matrix_3d_(rot_mat_T[i], rot_transform[i], 2)
for i in range(num_points):
for j in range(num_box):
if valid_mask[j]:
if point_masks[i, j] == 1:
points[i, :3] -= centers[j, :3]
points[i:i + 1, :3] = points[i:i + 1, :3] @ rot_mat_T[j]
points[i, :3] += centers[j, :3]
points[i, :3] += loc_transform[j]
break # only apply first box's transform
@numba.njit
def box3d_transform_(boxes, loc_transform, rot_transform, valid_mask):
num_box = boxes.shape[0]
for i in range(num_box):
if valid_mask[i]:
boxes[i, :3] += loc_transform[i]
boxes[i, 6] += rot_transform[i]
def noise_per_object_v3_(gt_boxes,
points=None,
valid_mask=None,
rotation_perturb=np.pi / 4,
center_noise_std=1.0,
global_random_rot_range=np.pi / 4,
num_try=100):
"""random rotate or remove each groundtrutn independently.
use kitti viewer to test this function points_transform_
Args:
gt_boxes: [N, 7], gt box in lidar.points_transform_
points: [M, 4], point cloud in lidar.
"""
num_boxes = gt_boxes.shape[0]
if not isinstance(rotation_perturb, (list, tuple, np.ndarray)):
rotation_perturb = [-rotation_perturb, rotation_perturb]
if not isinstance(global_random_rot_range, (list, tuple, np.ndarray)):
global_random_rot_range = [
-global_random_rot_range, global_random_rot_range
]
enable_grot = np.abs(global_random_rot_range[0] -
global_random_rot_range[1]) >= 1e-3
if not isinstance(center_noise_std, (list, tuple, np.ndarray)):
center_noise_std = [
center_noise_std, center_noise_std, center_noise_std
]
if valid_mask is None:
valid_mask = np.ones((num_boxes, ), dtype=np.bool_)
center_noise_std = np.array(center_noise_std, dtype=gt_boxes.dtype)
loc_noises = np.random.normal(
scale=center_noise_std, size=[num_boxes, num_try, 3])
rot_noises = np.random.uniform(
rotation_perturb[0], rotation_perturb[1], size=[num_boxes, num_try])
gt_grots = np.arctan2(gt_boxes[:, 0], gt_boxes[:, 1])
grot_lowers = global_random_rot_range[0] - gt_grots
grot_uppers = global_random_rot_range[1] - gt_grots
global_rot_noises = np.random.uniform(
grot_lowers[..., np.newaxis],
grot_uppers[..., np.newaxis],
size=[num_boxes, num_try])
origin = [0.5, 0.5, 0]
gt_box_corners = box_np_ops.center_to_corner_box3d(
gt_boxes[:, :3],
gt_boxes[:, 3:6],
gt_boxes[:, 6],
origin=origin,
axis=2)
# TODO: rewrite this noise box function?
if not enable_grot:
selected_noise = noise_per_box(gt_boxes[:, [0, 1, 3, 4, 6]],
valid_mask, loc_noises, rot_noises)
else:
selected_noise = noise_per_box_v2_(gt_boxes[:, [0, 1, 3, 4, 6]],
valid_mask, loc_noises, rot_noises,
global_rot_noises)
loc_transforms = _select_transform(loc_noises, selected_noise)
rot_transforms = _select_transform(rot_noises, selected_noise)
surfaces = box_np_ops.corner_to_surfaces_3d_jit(gt_box_corners)
if points is not None:
# TODO: replace this points_in_convex function by my tools?
point_masks = box_np_ops.points_in_convex_polygon_3d_jit(
points[:, :3], surfaces)
points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms,
rot_transforms, valid_mask)
box3d_transform_(gt_boxes, loc_transforms, rot_transforms, valid_mask)
import copy
import os
import pickle
import cv2
import mmcv
import numpy as np
from mmdet3d.core.bbox import box_np_ops
from mmdet3d.datasets.pipelines import data_augment_utils
from ..registry import OBJECTSAMPLERS
class BatchSampler:
def __init__(self,
sampled_list,
name=None,
epoch=None,
shuffle=True,
drop_reminder=False):
self._sampled_list = sampled_list
self._indices = np.arange(len(sampled_list))
if shuffle:
np.random.shuffle(self._indices)
self._idx = 0
self._example_num = len(sampled_list)
self._name = name
self._shuffle = shuffle
self._epoch = epoch
self._epoch_counter = 0
self._drop_reminder = drop_reminder
def _sample(self, num):
if self._idx + num >= self._example_num:
ret = self._indices[self._idx:].copy()
self._reset()
else:
ret = self._indices[self._idx:self._idx + num]
self._idx += num
return ret
def _reset(self):
assert self._name is not None
# print("reset", self._name)
if self._shuffle:
np.random.shuffle(self._indices)
self._idx = 0
def sample(self, num):
indices = self._sample(num)
return [self._sampled_list[i] for i in indices]
@OBJECTSAMPLERS.register_module
class DataBaseSampler(object):
def __init__(self, info_path, root_path, rate, prepare, object_rot_range,
sample_groups, use_road_plane):
super().__init__()
self.root_path = root_path
self.info_path = info_path
self.rate = rate
self.prepare = prepare
self.object_rot_range = object_rot_range
with open(info_path, 'rb') as f:
db_infos = pickle.load(f)
# filter database infos
from mmdet3d.apis import get_root_logger
logger = get_root_logger()
for k, v in db_infos.items():
logger.info(f'load {len(v)} {k} database infos')
for prep_func, val in prepare.items():
db_infos = getattr(self, prep_func)(db_infos, val)
logger.info('After filter database:')
for k, v in db_infos.items():
logger.info(f'load {len(v)} {k} database infos')
self.db_infos = db_infos
# load sample groups
# TODO: more elegant way to load sample groups
self.sample_groups = []
for name, num in sample_groups.items():
self.sample_groups.append({name: int(num)})
self.group_db_infos = self.db_infos # just use db_infos
self.sample_classes = []
self.sample_max_nums = []
for group_info in self.sample_groups:
self.sample_classes += list(group_info.keys())
self.sample_max_nums += list(group_info.values())
self.sampler_dict = {}
for k, v in self.group_db_infos.items():
self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)
self.object_rot_range = object_rot_range
self.object_rot_enable = np.abs(self.object_rot_range[0] -
self.object_rot_range[1]) >= 1e-3
# TODO: No group_sampling currently
@staticmethod
def filter_by_difficulty(db_infos, removed_difficulty):
new_db_infos = {}
for key, dinfos in db_infos.items():
new_db_infos[key] = [
info for info in dinfos
if info['difficulty'] not in removed_difficulty
]
return new_db_infos
@staticmethod
def filter_by_min_points(db_infos, min_gt_points_dict):
for name, min_num in min_gt_points_dict.items():
min_num = int(min_num)
if min_num > 0:
filtered_infos = []
for info in db_infos[name]:
if info['num_points_in_gt'] >= min_num:
filtered_infos.append(info)
db_infos[name] = filtered_infos
return db_infos
def sample_all(self, gt_bboxes, gt_names, img=None):
sampled_num_dict = {}
sample_num_per_class = []
for class_name, max_sample_num in zip(self.sample_classes,
self.sample_max_nums):
sampled_num = int(max_sample_num -
np.sum([n == class_name for n in gt_names]))
sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
sampled_num_dict[class_name] = sampled_num
sample_num_per_class.append(sampled_num)
sampled = []
sampled_gt_bboxes = []
avoid_coll_boxes = gt_bboxes
for class_name, sampled_num in zip(self.sample_classes,
sample_num_per_class):
if sampled_num > 0:
sampled_cls = self.sample_class_v2(class_name, sampled_num,
avoid_coll_boxes)
sampled += sampled_cls
if len(sampled_cls) > 0:
if len(sampled_cls) == 1:
sampled_gt_box = sampled_cls[0]['box3d_lidar'][
np.newaxis, ...]
else:
sampled_gt_box = np.stack(
[s['box3d_lidar'] for s in sampled_cls], axis=0)
sampled_gt_bboxes += [sampled_gt_box]
avoid_coll_boxes = np.concatenate(
[avoid_coll_boxes, sampled_gt_box], axis=0)
ret = None
if len(sampled) > 0:
sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0)
# center = sampled_gt_bboxes[:, 0:3]
num_sampled = len(sampled)
s_points_list = []
count = 0
for info in sampled:
file_path = os.path.join(
self.root_path,
info['path']) if self.root_path else info['path']
s_points = np.fromfile(
file_path, dtype=np.float32).reshape([-1, 4])
if 'rot_transform' in info:
rot = info['rot_transform']
s_points[:, :3] = box_np_ops.rotation_points_single_angle(
s_points[:, :3], rot, axis=2)
s_points[:, :3] += info['box3d_lidar'][:3]
count += 1
s_points_list.append(s_points)
ret = {
'gt_names':
np.array([s['name'] for s in sampled]),
'difficulty':
np.array([s['difficulty'] for s in sampled]),
'gt_bboxes_3d':
sampled_gt_bboxes,
'points':
np.concatenate(s_points_list, axis=0),
'gt_masks':
np.ones((num_sampled, ), dtype=np.bool_),
'group_ids':
np.arange(gt_bboxes.shape[0],
gt_bboxes.shape[0] + len(sampled))
}
return ret
def sample_class_v2(self, name, num, gt_bboxes):
sampled = self.sampler_dict[name].sample(num)
sampled = copy.deepcopy(sampled)
num_gt = gt_bboxes.shape[0]
num_sampled = len(sampled)
gt_bboxes_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])
sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
valid_mask = np.zeros([gt_bboxes.shape[0]], dtype=np.bool_)
valid_mask = np.concatenate(
[valid_mask,
np.ones([sp_boxes.shape[0]], dtype=np.bool_)], axis=0)
boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()
if self.object_rot_enable:
assert False, 'This part needs to be checked'
# place samples to any place in a circle.
# TODO: rm it if not needed
data_augment_utils.noise_per_object_v3_(
boxes,
None,
valid_mask,
0,
0,
self._global_rot_range,
num_try=100)
sp_boxes_new = boxes[gt_bboxes.shape[0]:]
sp_boxes_bv = box_np_ops.center_to_corner_box2d(
sp_boxes_new[:, 0:2], sp_boxes_new[:, 3:5], sp_boxes_new[:, 6])
total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)
coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)
diag = np.arange(total_bv.shape[0])
coll_mat[diag, diag] = False
valid_samples = []
for i in range(num_gt, num_gt + num_sampled):
if coll_mat[i].any():
coll_mat[i] = False
coll_mat[:, i] = False
else:
if self.object_rot_enable:
assert False, 'This part needs to be checked'
sampled[i - num_gt]['box3d_lidar'][:2] = boxes[i, :2]
sampled[i - num_gt]['box3d_lidar'][-1] = boxes[i, -1]
sampled[i - num_gt]['rot_transform'] = (
boxes[i, -1] - sp_boxes[i - num_gt, -1])
valid_samples.append(sampled[i - num_gt])
return valid_samples
@OBJECTSAMPLERS.register_module
class MMDataBaseSampler(DataBaseSampler):
def __init__(self,
info_path,
root_path,
rate,
prepare,
object_rot_range,
sample_groups,
check_2D_collision=False,
collision_thr=0,
collision_in_classes=False,
depth_consistent=False,
blending_type=None):
super(MMDataBaseSampler, self).__init__(
info_path=info_path,
root_path=root_path,
rate=rate,
prepare=prepare,
object_rot_range=object_rot_range,
sample_groups=sample_groups,
use_road_plane=False,
)
self.blending_type = blending_type
self.depth_consistent = depth_consistent
self.check_2D_collision = check_2D_collision
self.collision_thr = collision_thr
self.collision_in_classes = collision_in_classes
def sample_all(self, gt_bboxes_3d, gt_names, gt_bboxes_2d=None, img=None):
sampled_num_dict = {}
sample_num_per_class = []
for class_name, max_sample_num in zip(self.sample_classes,
self.sample_max_nums):
sampled_num = int(max_sample_num -
np.sum([n == class_name for n in gt_names]))
sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
sampled_num_dict[class_name] = sampled_num
sample_num_per_class.append(sampled_num)
sampled = []
sampled_gt_bboxes_3d = []
sampled_gt_bboxes_2d = []
avoid_coll_boxes_3d = gt_bboxes_3d
avoid_coll_boxes_2d = gt_bboxes_2d
for class_name, sampled_num in zip(self.sample_classes,
sample_num_per_class):
if sampled_num > 0:
sampled_cls = self.sample_class_v2(class_name, sampled_num,
avoid_coll_boxes_3d,
avoid_coll_boxes_2d)
sampled += sampled_cls
if len(sampled_cls) > 0:
if len(sampled_cls) == 1:
sampled_gt_box_3d = sampled_cls[0]['box3d_lidar'][
np.newaxis, ...]
sampled_gt_box_2d = sampled_cls[0]['box2d_camera'][
np.newaxis, ...]
else:
sampled_gt_box_3d = np.stack(
[s['box3d_lidar'] for s in sampled_cls], axis=0)
sampled_gt_box_2d = np.stack(
[s['box2d_camera'] for s in sampled_cls], axis=0)
sampled_gt_bboxes_3d += [sampled_gt_box_3d]
sampled_gt_bboxes_2d += [sampled_gt_box_2d]
if self.collision_in_classes:
# TODO: check whether check collision check among
# classes is necessary
avoid_coll_boxes_3d = np.concatenate(
[avoid_coll_boxes_3d, sampled_gt_box_3d], axis=0)
avoid_coll_boxes_2d = np.concatenate(
[avoid_coll_boxes_2d, sampled_gt_box_2d], axis=0)
ret = None
if len(sampled) > 0:
sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0)
sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0)
num_sampled = len(sampled)
s_points_list = []
count = 0
if self.depth_consistent:
# change the paster order based on distance
center = sampled_gt_bboxes_3d[:, 0:3]
paste_order = np.argsort(
-np.power(np.sum(np.power(center, 2), axis=-1), 1 / 2),
axis=-1)
for idx in range(len(sampled)):
if self.depth_consistent:
inds = np.where(paste_order == idx)[0][0]
info = sampled[inds]
else:
info = sampled[idx]
pcd_file_path = os.path.join(
self.root_path,
info['path']) if self.root_path else info['path']
img_file_path = pcd_file_path + '.png'
mask_file_path = pcd_file_path + '.mask.png'
s_points = np.fromfile(
pcd_file_path, dtype=np.float32).reshape([-1, 4])
s_patch = mmcv.imread(img_file_path)
s_mask = mmcv.imread(mask_file_path, 'grayscale')
if 'rot_transform' in info:
rot = info['rot_transform']
s_points[:, :3] = box_np_ops.rotation_points_single_angle(
s_points[:, :3], rot, axis=2)
# TODO: might need to rot 2d bbox in the future
# the points of each sample already minus the object center
# so this time it needs to add the offset back
s_points[:, :3] += info['box3d_lidar'][:3]
img = self.paste_obj(
img,
s_patch,
s_mask,
bbox_2d=info['box2d_camera'].astype(np.int32))
count += 1
s_points_list.append(s_points)
ret = dict(
img=img,
gt_names=np.array([s['name'] for s in sampled]),
difficulty=np.array([s['difficulty'] for s in sampled]),
gt_bboxes_3d=sampled_gt_bboxes_3d,
gt_bboxes_2d=sampled_gt_bboxes_2d,
points=np.concatenate(s_points_list, axis=0),
gt_masks=np.ones((num_sampled, ), dtype=np.bool_),
group_ids=np.arange(gt_bboxes_3d.shape[0],
gt_bboxes_3d.shape[0] + len(sampled)))
return ret
def paste_obj(self, img, obj_img, obj_mask, bbox_2d):
# paste the image patch back
x1, y1, x2, y2 = bbox_2d
# the bbox might exceed the img size because the img is different
img_h, img_w = img.shape[:2]
w = np.maximum(min(x2, img_w - 1) - x1 + 1, 1)
h = np.maximum(min(y2, img_h - 1) - y1 + 1, 1)
obj_mask = obj_mask[:h, :w]
obj_img = obj_img[:h, :w]
# choose a blend option
if not self.blending_type:
blending_op = 'none'
else:
blending_choice = np.random.randint(len(self.blending_type))
blending_op = self.blending_type[blending_choice]
if blending_op.find('poisson') != -1:
# options: cv2.NORMAL_CLONE=1, or cv2.MONOCHROME_TRANSFER=3
# cv2.MIXED_CLONE mixed the texture, thus is not used.
if blending_op == 'poisson':
mode = np.random.choice([1, 3], 1)[0]
elif blending_op == 'poisson_normal':
mode = cv2.NORMAL_CLONE
elif blending_op == 'poisson_transfer':
mode = cv2.MONOCHROME_TRANSFER
else:
raise NotImplementedError
center = (int(x1 + w / 2), int(y1 + h / 2))
img = cv2.seamlessClone(obj_img, img, obj_mask * 255, center, mode)
else:
if blending_op == 'gaussian':
obj_mask = cv2.GaussianBlur(
obj_mask.astype(np.float32), (5, 5), 2)
elif blending_op == 'box':
obj_mask = cv2.blur(obj_mask.astype(np.float32), (3, 3))
paste_mask = 1 - obj_mask
img[y1:y1 + h,
x1:x1 + w] = (img[y1:y1 + h, x1:x1 + w].astype(np.float32) *
paste_mask[..., None]).astype(np.uint8)
img[y1:y1 + h, x1:x1 + w] += (obj_img.astype(np.float32) *
obj_mask[..., None]).astype(np.uint8)
return img
def sample_class_v2(self, name, num, gt_bboxes_3d, gt_bboxes_2d):
sampled = self.sampler_dict[name].sample(num)
sampled = copy.deepcopy(sampled)
num_gt = gt_bboxes_3d.shape[0]
num_sampled = len(sampled)
# avoid collision in BEV first
gt_bboxes_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes_3d[:, 0:2], gt_bboxes_3d[:, 3:5], gt_bboxes_3d[:, 6])
sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
sp_boxes_bv = box_np_ops.center_to_corner_box2d(
sp_boxes[:, 0:2], sp_boxes[:, 3:5], sp_boxes[:, 6])
total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)
coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)
# Then avoid collision in 2D space
if self.check_2D_collision:
sp_boxes_2d = np.stack([i['box2d_camera'] for i in sampled],
axis=0)
total_bbox_2d = np.concatenate([gt_bboxes_2d, sp_boxes_2d],
axis=0) # Nx4
# random select a collision threshold
if isinstance(self.collision_thr, float):
collision_thr = self.collision_thr
elif isinstance(self.collision_thr, list):
collision_thr = np.random.choice(self.collision_thr)
elif isinstance(self.collision_thr, dict):
mode = self.collision_thr.get('mode', 'value')
if mode == 'value':
collision_thr = np.random.choice(
self.collision_thr['thr_range'])
elif mode == 'range':
collision_thr = np.random.uniform(
self.collision_thr['thr_range'][0],
self.collision_thr['thr_range'][1])
if collision_thr == 0:
# use similar collision test as BEV did
# Nx4 (x1, y1, x2, y2) -> corners: Nx4x2
# ((x1, y1), (x2, y1), (x1, y2), (x2, y2))
x1y1 = total_bbox_2d[:, :2]
x2y2 = total_bbox_2d[:, 2:]
x1y2 = np.stack([total_bbox_2d[:, 0], total_bbox_2d[:, 3]],
axis=-1)
x2y1 = np.stack([total_bbox_2d[:, 2], total_bbox_2d[:, 1]],
axis=-1)
total_2d = np.stack([x1y1, x2y1, x1y2, x2y2], axis=1)
coll_mat_2d = data_augment_utils.box_collision_test(
total_2d, total_2d)
else:
# use iof rather than iou to protect the foreground
overlaps = box_np_ops.iou_jit(total_bbox_2d, total_bbox_2d,
'iof')
coll_mat_2d = overlaps > collision_thr
coll_mat = coll_mat + coll_mat_2d
diag = np.arange(total_bv.shape[0])
coll_mat[diag, diag] = False
valid_samples = []
for i in range(num_gt, num_gt + num_sampled):
if coll_mat[i].any():
coll_mat[i] = False
coll_mat[:, i] = False
else:
valid_samples.append(sampled[i - num_gt])
return valid_samples
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet.datasets.pipelines import PIPELINES, to_tensor
PIPELINES._module_dict.pop('DefaultFormatBundle')
@PIPELINES.register_module
class DefaultFormatBundle(object):
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields, including "img",
"proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- proposals: (1)to tensor, (2)to DataContainer
- gt_bboxes: (1)to tensor, (2)to DataContainer
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
(3)to DataContainer (stack=True)
"""
def __init__(self, ):
return
def __call__(self, results):
if 'img' in results:
if isinstance(results['img'], list):
# process multiple imgs in single frame
imgs = [img.transpose(2, 0, 1) for img in results['img']]
imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
results['img'] = DC(to_tensor(imgs), stack=True)
else:
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True)
for key in [
'proposals', 'gt_bboxes', 'gt_bboxes_3d', 'gt_bboxes_ignore',
'gt_labels', 'gt_labels_3d'
]:
if key not in results:
continue
if isinstance(results[key], list):
results[key] = DC([to_tensor(res) for res in results[key]])
else:
results[key] = DC(to_tensor(results[key]))
if 'gt_masks' in results:
results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
if 'gt_semantic_seg' in results:
results['gt_semantic_seg'] = DC(
to_tensor(results['gt_semantic_seg'][None, ...]), stack=True)
return results
def __repr__(self):
return self.__class__.__name__
@PIPELINES.register_module
class Collect3D(object):
def __init__(self,
keys,
pcd_shape=[1, 1600, 1408],
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation')):
self.keys = keys
self.meta_keys = meta_keys
self.pcd_shape = pcd_shape
def __call__(self, results):
data = {}
img_meta = {}
for key in self.meta_keys:
if key in results:
img_meta[key] = results[key]
img_meta.update(pcd_shape=self.pcd_shape, pcd_pad_shape=self.pcd_shape)
data['img_meta'] = DC(img_meta, cpu_only=True)
for key in self.keys:
data[key] = results[key]
return data
def __repr__(self):
return self.__class__.__name__ + '(keys={}, meta_keys={})'.format(
self.keys, self.meta_keys)
@PIPELINES.register_module
class DefaultFormatBundle3D(DefaultFormatBundle):
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields for voxels,
including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
"gt_semantic_seg".
These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- proposals: (1)to tensor, (2)to DataContainer
- gt_bboxes: (1)to tensor, (2)to DataContainer
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
"""
def __init__(self, class_names, with_gt=True, with_label=True):
super(DefaultFormatBundle3D, self).__init__()
self.class_names = class_names
self.with_gt = with_gt
self.with_label = with_label
def __call__(self, results):
# Format 3D data
for key in [
'voxels', 'coors', 'voxel_centers', 'num_points', 'points'
]:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]), stack=False)
if self.with_gt:
# Clean GT bboxes in the final
if 'gt_bboxes_3d_mask' in results:
gt_bboxes_3d_mask = results['gt_bboxes_3d_mask']
results['gt_bboxes_3d'] = results['gt_bboxes_3d'][
gt_bboxes_3d_mask]
results['gt_names_3d'] = results['gt_names_3d'][
gt_bboxes_3d_mask]
if 'gt_bboxes_mask' in results:
gt_bboxes_mask = results['gt_bboxes_mask']
if 'gt_bboxes' in results:
results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask]
results['gt_names'] = results['gt_names'][gt_bboxes_mask]
if self.with_label:
if 'gt_names' in results and len(results['gt_names']) == 0:
results['gt_labels'] = np.array([], dtype=np.int64)
elif 'gt_names' in results and isinstance(
results['gt_names'][0], list):
# gt_labels might be a list of list in multi-view setting
results['gt_labels'] = [
np.array([self.class_names.index(n) for n in res],
dtype=np.int64) for res in results['gt_names']
]
elif 'gt_names' in results:
results['gt_labels'] = np.array([
self.class_names.index(n) for n in results['gt_names']
],
dtype=np.int64)
# we still assume one pipeline for one frame LiDAR
# thus, the 3D name is list[string]
results['gt_labels_3d'] = np.array([
self.class_names.index(n) for n in results['gt_names_3d']
],
dtype=np.int64)
results = super(DefaultFormatBundle3D, self).__call__(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(class_names={}, '.format(self.class_names)
repr_str += 'with_gt={}, with_label={})'.format(
self.with_gt, self.with_label)
return repr_str
import os.path as osp
import mmcv
import numpy as np
import pycocotools.mask as maskUtils
from mmdet.datasets.pipelines import PIPELINES
@PIPELINES.register_module
class LoadImageFromFile(object):
def __init__(self, to_float32=False):
self.to_float32 = to_float32
def __call__(self, results):
if results['img_prefix'] is not None:
filename = osp.join(results['img_prefix'],
results['img_info']['filename'])
else:
filename = results['img_info']['filename']
img = mmcv.imread(filename)
if self.to_float32:
img = img.astype(np.float32)
results['filename'] = filename
results['img'] = img
results['img_shape'] = img.shape
results['ori_shape'] = img.shape
return results
def __repr__(self):
return self.__class__.__name__ + '(to_float32={})'.format(
self.to_float32)
@PIPELINES.register_module
class LoadAnnotations(object):
def __init__(self,
with_bbox=True,
with_label=True,
with_mask=False,
with_seg=False,
poly2mask=True):
self.with_bbox = with_bbox
self.with_label = with_label
self.with_mask = with_mask
self.with_seg = with_seg
self.poly2mask = poly2mask
def _load_bboxes(self, results):
ann_info = results['ann_info']
results['gt_bboxes'] = ann_info['bboxes']
gt_bboxes_ignore = ann_info.get('bboxes_ignore', None)
if gt_bboxes_ignore is not None:
results['gt_bboxes_ignore'] = gt_bboxes_ignore
results['bbox_fields'].append('gt_bboxes_ignore')
results['bbox_fields'].append('gt_bboxes')
return results
def _load_labels(self, results):
results['gt_labels'] = results['ann_info']['labels']
return results
def _poly2mask(self, mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
rle = maskUtils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = maskUtils.decode(rle)
return mask
def _load_masks(self, results):
h, w = results['img_info']['height'], results['img_info']['width']
gt_masks = results['ann_info']['masks']
if self.poly2mask:
gt_masks = [self._poly2mask(mask, h, w) for mask in gt_masks]
results['gt_masks'] = gt_masks
results['mask_fields'].append('gt_masks')
return results
def _load_semantic_seg(self, results):
results['gt_semantic_seg'] = mmcv.imread(
osp.join(results['seg_prefix'], results['ann_info']['seg_map']),
flag='unchanged').squeeze()
results['seg_fields'].append('gt_semantic_seg')
return results
def __call__(self, results):
if self.with_bbox:
results = self._load_bboxes(results)
if results is None:
return None
if self.with_label:
results = self._load_labels(results)
if self.with_mask:
results = self._load_masks(results)
if self.with_seg:
results = self._load_semantic_seg(results)
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += ('(with_bbox={}, with_label={}, with_mask={},'
' with_seg={})').format(self.with_bbox, self.with_label,
self.with_mask, self.with_seg)
return repr_str
@PIPELINES.register_module
class LoadProposals(object):
def __init__(self, num_max_proposals=None):
self.num_max_proposals = num_max_proposals
def __call__(self, results):
proposals = results['proposals']
if proposals.shape[1] not in (4, 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
proposals = proposals[:, :4]
if self.num_max_proposals is not None:
proposals = proposals[:self.num_max_proposals]
if len(proposals) == 0:
proposals = np.array([[0, 0, 0, 0]], dtype=np.float32)
results['proposals'] = proposals
results['bbox_fields'].append('proposals')
return results
def __repr__(self):
return self.__class__.__name__ + '(num_max_proposals={})'.format(
self.num_max_proposals)
import numpy as np
from mmdet3d.core.bbox import box_np_ops
from mmdet3d.utils import build_from_cfg
from mmdet.datasets.registry import PIPELINES
from ..registry import OBJECTSAMPLERS
from .data_augment_utils import noise_per_object_v3_
from .transforms import RandomFlip
@PIPELINES.register_module
class RandomFlip3D(RandomFlip):
"""Flip the points & bbox.
If the input dict contains the key "flip", then the flag will be used,
otherwise it will be randomly decided by a ratio specified in the init
method.
Args:
flip_ratio (float, optional): The flipping probability.
"""
def __init__(self, sync_2d=True, **kwargs):
super(RandomFlip3D, self).__init__(**kwargs)
self.sync_2d = sync_2d
def random_flip_points(self, gt_bboxes_3d, points):
gt_bboxes_3d[:, 1] = -gt_bboxes_3d[:, 1]
gt_bboxes_3d[:, 6] = -gt_bboxes_3d[:, 6] + np.pi
points[:, 1] = -points[:, 1]
if gt_bboxes_3d.shape[1] == 9:
# flip velocitys at the same time
gt_bboxes_3d[:, 8] = -gt_bboxes_3d[:, 8]
return gt_bboxes_3d, points
def __call__(self, input_dict):
super(RandomFlip3D, self).__call__(input_dict)
if self.sync_2d:
input_dict['pcd_flip'] = input_dict['flip']
else:
flip = True if np.random.rand() < self.flip_ratio else False
input_dict['pcd_flip'] = flip
if input_dict['pcd_flip']:
# flip image
gt_bboxes_3d = input_dict['gt_bboxes_3d']
points = input_dict['points']
gt_bboxes_3d, points = self.random_flip_points(
gt_bboxes_3d, points)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['points'] = points
return input_dict
@PIPELINES.register_module
class ObjectSample(object):
def __init__(self, db_sampler, sample_2d=False):
self.sampler_cfg = db_sampler
self.sample_2d = sample_2d
if 'type' not in db_sampler.keys():
db_sampler['type'] = 'DataBaseSampler'
self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)
@staticmethod
def remove_points_in_boxes(points, boxes):
masks = box_np_ops.points_in_rbbox(points, boxes)
points = points[np.logical_not(masks.any(-1))]
return points
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_names_3d = input_dict['gt_names_3d']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
# change to float for blending operation
points = input_dict['points']
# rect = input_dict['rect']
# Trv2c = input_dict['Trv2c']
# P2 = input_dict['P2']
if self.sample_2d:
img = input_dict['img'] # .astype(np.float32)
gt_bboxes_2d = input_dict['gt_bboxes']
gt_bboxes_mask = input_dict['gt_bboxes_mask']
gt_names = input_dict['gt_names']
# Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_names_3d, gt_bboxes_2d=gt_bboxes_2d, img=img)
else:
sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_names_3d, img=None)
if sampled_dict is not None:
sampled_gt_names = sampled_dict['gt_names']
sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
sampled_points = sampled_dict['points']
sampled_gt_masks = sampled_dict['gt_masks']
gt_names_3d = np.concatenate([gt_names_3d, sampled_gt_names],
axis=0)
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d
]).astype(np.float32)
gt_bboxes_3d_mask = np.concatenate(
[gt_bboxes_3d_mask, sampled_gt_masks], axis=0)
points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
# check the points dimension
dim_inds = points.shape[-1]
points = np.concatenate([sampled_points[:, :dim_inds], points],
axis=0)
if self.sample_2d:
sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
gt_bboxes_2d = np.concatenate(
[gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
gt_bboxes_mask = np.concatenate(
[gt_bboxes_mask, sampled_gt_masks], axis=0)
gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0)
input_dict['gt_names'] = gt_names
input_dict['gt_bboxes'] = gt_bboxes_2d
input_dict['gt_bboxes_mask'] = gt_bboxes_mask
input_dict['img'] = sampled_dict['img'] # .astype(np.uint8)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_names_3d'] = gt_names_3d
input_dict['points'] = points
input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
return input_dict
def __repr__(self):
return self.__class__.__name__
@PIPELINES.register_module
class ObjectNoise(object):
def __init__(self,
loc_noise_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267],
num_try=100):
self.loc_noise_std = loc_noise_std
self.global_rot_range = global_rot_range
self.rot_uniform_noise = rot_uniform_noise
self.num_try = num_try
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
points = input_dict['points']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
# TODO: check this inplace function
noise_per_object_v3_(
gt_bboxes_3d,
points,
gt_bboxes_3d_mask,
rotation_perturb=self.rot_uniform_noise,
center_noise_std=self.loc_noise_std,
global_random_rot_range=self.global_rot_range,
num_try=self.num_try)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['points'] = points
return input_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(num_try={},'.format(self.num_try)
repr_str += ' loc_noise_std={},'.format(self.loc_noise_std)
repr_str += ' global_rot_range={},'.format(self.global_rot_range)
repr_str += ' rot_uniform_noise={})'.format(self.rot_uniform_noise)
return repr_str
@PIPELINES.register_module
class GlobalRotScale(object):
def __init__(self,
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]):
self.rot_uniform_noise = rot_uniform_noise
self.scaling_uniform_noise = scaling_uniform_noise
self.trans_normal_noise = trans_normal_noise
def _trans_bbox_points(self, gt_boxes, points):
noise_trans = np.random.normal(0, self.trans_normal_noise[0], 3).T
points[:, :3] += noise_trans
gt_boxes[:, :3] += noise_trans
return gt_boxes, points, noise_trans
def _rot_bbox_points(self, gt_boxes, points, rotation=np.pi / 4):
if not isinstance(rotation, list):
rotation = [-rotation, rotation]
noise_rotation = np.random.uniform(rotation[0], rotation[1])
points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
points[:, :3], noise_rotation, axis=2)
gt_boxes[:, :3], _ = box_np_ops.rotation_points_single_angle(
gt_boxes[:, :3], noise_rotation, axis=2)
gt_boxes[:, 6] += noise_rotation
if gt_boxes.shape[1] == 9:
# rotate velo vector
rot_cos = np.cos(noise_rotation)
rot_sin = np.sin(noise_rotation)
rot_mat_T_bev = np.array([[rot_cos, -rot_sin], [rot_sin, rot_cos]],
dtype=points.dtype)
gt_boxes[:, 7:9] = gt_boxes[:, 7:9] @ rot_mat_T_bev
return gt_boxes, points, rot_mat_T
def _scale_bbox_points(self,
gt_boxes,
points,
min_scale=0.95,
max_scale=1.05):
noise_scale = np.random.uniform(min_scale, max_scale)
points[:, :3] *= noise_scale
gt_boxes[:, :6] *= noise_scale
if gt_boxes.shape[1] == 9:
gt_boxes[:, 7:] *= noise_scale
return gt_boxes, points, noise_scale
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
points = input_dict['points']
gt_bboxes_3d, points, rotation_factor = self._rot_bbox_points(
gt_bboxes_3d, points, rotation=self.rot_uniform_noise)
gt_bboxes_3d, points, scale_factor = self._scale_bbox_points(
gt_bboxes_3d, points, *self.scaling_uniform_noise)
gt_bboxes_3d, points, trans_factor = self._trans_bbox_points(
gt_bboxes_3d, points)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['points'] = points
input_dict['pcd_scale_factor'] = scale_factor
input_dict['pcd_rotation'] = rotation_factor
input_dict['pcd_trans'] = trans_factor
return input_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(rot_uniform_noise={},'.format(self.rot_uniform_noise)
repr_str += ' scaling_uniform_noise={},'.format(
self.scaling_uniform_noise)
repr_str += ' trans_normal_noise={})'.format(self.trans_normal_noise)
return repr_str
@PIPELINES.register_module
class PointShuffle(object):
def __call__(self, input_dict):
np.random.shuffle(input_dict['points'])
return input_dict
def __repr__(self):
return self.__class__.__name__
@PIPELINES.register_module
class ObjectRangeFilter(object):
def __init__(self, point_cloud_range):
self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
self.bev_range = self.pcd_range[[0, 1, 3, 4]]
@staticmethod
def limit_period(val, offset=0.5, period=np.pi):
return val - np.floor(val / period + offset) * period
@staticmethod
def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
"""remove gtbox outside training range.
this function should be applied after other prep functions
Args:
gt_bboxes_3d ([type]): [description]
limit_range ([type]): [description]
"""
gt_bboxes_3d_bv = box_np_ops.center_to_corner_box2d(
gt_bboxes_3d[:, [0, 1]], gt_bboxes_3d[:, [3, 3 + 1]],
gt_bboxes_3d[:, 6])
bounding_box = box_np_ops.minmax_to_corner_2d(
np.asarray(limit_range)[np.newaxis, ...])
ret = box_np_ops.points_in_convex_polygon_jit(
gt_bboxes_3d_bv.reshape(-1, 2), bounding_box)
return np.any(ret.reshape(-1, 4), axis=1)
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_names_3d = input_dict['gt_names_3d']
gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range)
gt_bboxes_3d = gt_bboxes_3d[mask]
gt_names_3d = gt_names_3d[mask]
# the mask should also be updated
gt_bboxes_3d_mask = gt_bboxes_3d_mask[mask]
# limit rad to [-pi, pi]
gt_bboxes_3d[:, 6] = self.limit_period(
gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_names_3d'] = gt_names_3d
input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
return input_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
return repr_str
@PIPELINES.register_module
class PointsRangeFilter(object):
def __init__(self, point_cloud_range):
self.pcd_range = np.array(
point_cloud_range, dtype=np.float32)[np.newaxis, :]
def __call__(self, input_dict):
points = input_dict['points']
points_mask = ((points[:, :3] >= self.pcd_range[:, :3])
& (points[:, :3] < self.pcd_range[:, 3:]))
points_mask = points_mask[:, 0] & points_mask[:, 1] & points_mask[:, 2]
clean_points = points[points_mask, :]
input_dict['points'] = clean_points
return input_dict
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment