Initial commit

d1aac35d · zhangwenwei · d1aac35d · d1aac35d · d1aac35d · d1aac35d
Commit d1aac35d authored Apr 14, 2020 by zhangwenwei
20 changed files
--- a/mmdet3d/core/utils/misc.py
+++ b/mmdet3d/core/utils/misc.py
+from functools import partial
+
+import mmcv
+import numpy as np
+import torch
+import torch.nn.functional as F
+from six.moves import map, zip
+
+
+def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
+    num_imgs = tensor.size(0)
+    mean = np.array(mean, dtype=np.float32)
+    std = np.array(std, dtype=np.float32)
+    imgs = []
+    for img_id in range(num_imgs):
+        img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
+        img = mmcv.imdenormalize(
+            img, mean, std, to_bgr=to_rgb).astype(np.uint8)
+        imgs.append(np.ascontiguousarray(img))
+    return imgs
+
+
+def multi_apply(func, *args, **kwargs):
+    pfunc = partial(func, **kwargs) if kwargs else func
+    map_results = map(pfunc, *args)
+    return tuple(map(list, zip(*map_results)))
+
+
+def unmap(data, count, inds, fill=0):
+    """ Unmap a subset of item (data) back to the original set of items (of
+    size count) """
+    if data.dim() == 1:
+        ret = data.new_full((count, ), fill)
+        ret[inds] = data
+    else:
+        new_size = (count, ) + data.size()[1:]
+        ret = data.new_full(new_size, fill)
+        ret[inds, :] = data
+    return ret
+
+
+def merge_batch(data):
+    for key, elems in data.items():
+        if key in ['voxels', 'num_points', 'voxel_labels', 'voxel_centers']:
+            data[key]._data[0] = torch.cat(elems._data[0], dim=0)
+        elif key == 'coors':
+            coors = []
+            for i, coor in enumerate(elems._data[0]):
+                coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
+                coors.append(coor_pad)
+            data[key]._data[0] = torch.cat(coors, dim=0)
+    return data
+
+
+def merge_hook_batch(data):
+    for key, elems in data.items():
+        if key in ['voxels', 'num_points', 'voxel_labels', 'voxel_centers']:
+            data[key] = torch.cat(elems, dim=0)
+        elif key == 'coors':
+            coors = []
+            for i, coor in enumerate(elems):
+                coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
+                coors.append(coor_pad)
+            data[key] = torch.cat(coors, dim=0)
+    return data
--- a/mmdet3d/core/voxel/__init__.py
+++ b/mmdet3d/core/voxel/__init__.py
+from .builder import build_voxel_generator
+from .voxel_generator import VoxelGenerator
+
+__all__ = ['build_voxel_generator', 'VoxelGenerator']
--- a/mmdet3d/core/voxel/builder.py
+++ b/mmdet3d/core/voxel/builder.py
+import mmcv
+
+from . import voxel_generator
+
+
+def build_voxel_generator(cfg, **kwargs):
+    if isinstance(cfg, voxel_generator.VoxelGenerator):
+        return cfg
+    elif isinstance(cfg, dict):
+        return mmcv.runner.obj_from_dict(
+            cfg, voxel_generator, default_args=kwargs)
+    else:
+        raise TypeError('Invalid type {} for building a sampler'.format(
+            type(cfg)))
--- a/mmdet3d/core/voxel/voxel_generator.py
+++ b/mmdet3d/core/voxel/voxel_generator.py
+import numba
+import numpy as np
+
+
+class VoxelGenerator(object):
+
+    def __init__(self,
+                 voxel_size,
+                 point_cloud_range,
+                 max_num_points,
+                 max_voxels=20000):
+        point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
+        # [0, -40, -3, 70.4, 40, 1]
+        voxel_size = np.array(voxel_size, dtype=np.float32)
+        grid_size = (point_cloud_range[3:] -
+                     point_cloud_range[:3]) / voxel_size
+        grid_size = np.round(grid_size).astype(np.int64)
+
+        self._voxel_size = voxel_size
+        self._point_cloud_range = point_cloud_range
+        self._max_num_points = max_num_points
+        self._max_voxels = max_voxels
+        self._grid_size = grid_size
+
+    def generate(self, points):
+        return points_to_voxel(points, self._voxel_size,
+                               self._point_cloud_range, self._max_num_points,
+                               True, self._max_voxels)
+
+    @property
+    def voxel_size(self):
+        return self._voxel_size
+
+    @property
+    def max_num_points_per_voxel(self):
+        return self._max_num_points
+
+    @property
+    def point_cloud_range(self):
+        return self._point_cloud_range
+
+    @property
+    def grid_size(self):
+        return self._grid_size
+
+
+def points_to_voxel(points,
+                    voxel_size,
+                    coors_range,
+                    max_points=35,
+                    reverse_index=True,
+                    max_voxels=20000):
+    """convert kitti points(N, >=3) to voxels. This version calculate
+    everything in one loop. now it takes only 4.2ms(complete point cloud)
+    with jit and 3.2ghz cpu.(don't calculate other features)
+
+    Args:
+        points: [N, ndim] float tensor. points[:, :3] contain xyz points and
+            points[:, 3:] contain other information such as reflectivity.
+        voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size
+        coors_range: [6] list/tuple or array, float. indicate voxel range.
+            format: xyzxyz, minmax
+        max_points: int. indicate maximum points contained in a voxel.
+        reverse_index: boolean. indicate whether return reversed coordinates.
+            if points has xyz format and reverse_index is True, output
+            coordinates will be zyx format, but points in features always
+            xyz format.
+        max_voxels: int. indicate maximum voxels this function create.
+            for second, 20000 is a good choice. you should shuffle points
+            before call this function because max_voxels may drop some points.
+
+    Returns:
+        voxels: [M, max_points, ndim] float tensor. only contain points.
+        coordinates: [M, 3] int32 tensor.
+        num_points_per_voxel: [M] int32 tensor.
+    """
+    if not isinstance(voxel_size, np.ndarray):
+        voxel_size = np.array(voxel_size, dtype=points.dtype)
+    if not isinstance(coors_range, np.ndarray):
+        coors_range = np.array(coors_range, dtype=points.dtype)
+    voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size
+    voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())
+    if reverse_index:
+        voxelmap_shape = voxelmap_shape[::-1]
+    # don't create large array in jit(nopython=True) code.
+    num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32)
+    coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)
+    voxels = np.zeros(
+        shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype)
+    coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32)
+    if reverse_index:
+        voxel_num = _points_to_voxel_reverse_kernel(
+            points, voxel_size, coors_range, num_points_per_voxel,
+            coor_to_voxelidx, voxels, coors, max_points, max_voxels)
+
+    else:
+        voxel_num = _points_to_voxel_kernel(points, voxel_size, coors_range,
+                                            num_points_per_voxel,
+                                            coor_to_voxelidx, voxels, coors,
+                                            max_points, max_voxels)
+
+    coors = coors[:voxel_num]
+    voxels = voxels[:voxel_num]
+    num_points_per_voxel = num_points_per_voxel[:voxel_num]
+
+    return voxels, coors, num_points_per_voxel
+
+
+@numba.jit(nopython=True)
+def _points_to_voxel_reverse_kernel(points,
+                                    voxel_size,
+                                    coors_range,
+                                    num_points_per_voxel,
+                                    coor_to_voxelidx,
+                                    voxels,
+                                    coors,
+                                    max_points=35,
+                                    max_voxels=20000):
+    # put all computations to one loop.
+    # we shouldn't create large array in main jit code, otherwise
+    # reduce performance
+    N = points.shape[0]
+    # ndim = points.shape[1] - 1
+    ndim = 3
+    ndim_minus_1 = ndim - 1
+    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
+    # np.round(grid_size)
+    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)
+    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
+    coor = np.zeros(shape=(3, ), dtype=np.int32)
+    voxel_num = 0
+    failed = False
+    for i in range(N):
+        failed = False
+        for j in range(ndim):
+            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
+            if c < 0 or c >= grid_size[j]:
+                failed = True
+                break
+            coor[ndim_minus_1 - j] = c
+        if failed:
+            continue
+        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
+        if voxelidx == -1:
+            voxelidx = voxel_num
+            if voxel_num >= max_voxels:
+                break
+            voxel_num += 1
+            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
+            coors[voxelidx] = coor
+        num = num_points_per_voxel[voxelidx]
+        if num < max_points:
+            voxels[voxelidx, num] = points[i]
+            num_points_per_voxel[voxelidx] += 1
+    return voxel_num
+
+
+@numba.jit(nopython=True)
+def _points_to_voxel_kernel(points,
+                            voxel_size,
+                            coors_range,
+                            num_points_per_voxel,
+                            coor_to_voxelidx,
+                            voxels,
+                            coors,
+                            max_points=35,
+                            max_voxels=20000):
+    # need mutex if write in cuda, but numba.cuda don't support mutex.
+    # in addition, pytorch don't support cuda in dataloader.
+    # put all computations to one loop.
+    # we shouldn't create large array in main jit code, otherwise
+    # decrease performance
+    N = points.shape[0]
+    # ndim = points.shape[1] - 1
+    ndim = 3
+    grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size
+    # grid_size = np.round(grid_size).astype(np.int64)(np.int32)
+    grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)
+
+    # lower_bound = coors_range[:3]
+    # upper_bound = coors_range[3:]
+    coor = np.zeros(shape=(3, ), dtype=np.int32)
+    voxel_num = 0
+    failed = False
+    for i in range(N):
+        failed = False
+        for j in range(ndim):
+            c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])
+            if c < 0 or c >= grid_size[j]:
+                failed = True
+                break
+            coor[j] = c
+        if failed:
+            continue
+        voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]
+        if voxelidx == -1:
+            voxelidx = voxel_num
+            if voxel_num >= max_voxels:
+                break
+            voxel_num += 1
+            coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx
+            coors[voxelidx] = coor
+        num = num_points_per_voxel[voxelidx]
+        if num < max_points:
+            voxels[voxelidx, num] = points[i]
+            num_points_per_voxel[voxelidx] += 1
+    return voxel_num
--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
+from mmdet.datasets.registry import DATASETS
+from .builder import build_dataset
+from .coco import CocoDataset
+from .dataset_wrappers import ConcatDataset, RepeatDataset
+from .kitti2d_dataset import Kitti2DDataset
+from .kitti_dataset import KittiDataset
+from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
+from .nuscenes2d_dataset import NuScenes2DDataset
+from .nuscenes_dataset import NuScenesDataset
+
+__all__ = [
+    'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
+    'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'DATASETS',
+    'build_dataset', 'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset',
+    'NuScenes2DDataset'
+]
--- a/mmdet3d/datasets/builder.py
+++ b/mmdet3d/datasets/builder.py
+import copy
+
+from mmdet.datasets import ConcatDataset, RepeatDataset
+from mmdet.utils import build_from_cfg
+from .dataset_wrappers import RepeatFactorDataset
+from .registry import DATASETS
+
+
+def _concat_dataset(cfg, default_args=None):
+    ann_files = cfg['ann_file']
+    img_prefixes = cfg.get('img_prefix', None)
+    seg_prefixes = cfg.get('seg_prefix', None)
+    proposal_files = cfg.get('proposal_file', None)
+
+    datasets = []
+    num_dset = len(ann_files)
+    for i in range(num_dset):
+        data_cfg = copy.deepcopy(cfg)
+        data_cfg['ann_file'] = ann_files[i]
+        if isinstance(img_prefixes, (list, tuple)):
+            data_cfg['img_prefix'] = img_prefixes[i]
+        if isinstance(seg_prefixes, (list, tuple)):
+            data_cfg['seg_prefix'] = seg_prefixes[i]
+        if isinstance(proposal_files, (list, tuple)):
+            data_cfg['proposal_file'] = proposal_files[i]
+        datasets.append(build_dataset(data_cfg, default_args))
+
+    return ConcatDataset(datasets)
+
+
+def build_dataset(cfg, default_args=None):
+    if isinstance(cfg, (list, tuple)):
+        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+    elif cfg['type'] == 'RepeatDataset':
+        dataset = RepeatDataset(
+            build_dataset(cfg['dataset'], default_args), cfg['times'])
+    elif cfg['type'] == 'RepeatFactorDataset':
+        dataset = RepeatFactorDataset(
+            build_dataset(cfg['dataset'], default_args), cfg['repeat_thr'])
+    elif isinstance(cfg.get('ann_file'), (list, tuple)):
+        dataset = _concat_dataset(cfg, default_args)
+    else:
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+
+    return dataset
--- a/mmdet3d/datasets/dataset_wrappers.py
+++ b/mmdet3d/datasets/dataset_wrappers.py
+import math
+from collections import defaultdict
+
+import numpy as np
+
+from mmdet.datasets import DATASETS
+
+
+# Modified from https://github.com/facebookresearch/detectron2/blob/41d475b75a230221e21d9cac5d69655e3415e3a4/detectron2/data/samplers/distributed_sampler.py#L57 # noqa
+@DATASETS.register_module
+class RepeatFactorDataset(object):
+    """A wrapper of repeated dataset with repeat factor.
+
+    Suitable for training on class imbalanced datasets like LVIS. In each
+    epoch, an image may appear multiple times based on its "repeat factor".
+    The repeat factor for an image is a function of the frequency the rarest
+    category labeled in that image. The "frequency of category c" in [0, 1]
+    is defined as the fraction of images in the training set (without repeats)
+    in which category c appears.
+    This wrapper will finally be merged into LVIS dataset.
+
+    See https://arxiv.org/abs/1908.03195 (>= v2) Appendix B.2.
+    Args:
+        dataset (:obj:`Dataset`): The dataset to be repeated.
+        repeat_thr (float): frequency threshold below which data is repeated.
+    """
+
+    def __init__(self, dataset, repeat_thr):
+        self.dataset = dataset
+        self.repeat_thr = repeat_thr
+        self.CLASSES = dataset.CLASSES
+
+        repeat_factors = self._get_repeat_factors(dataset, repeat_thr)
+        repeat_indices = []
+        for dataset_index, repeat_factor in enumerate(repeat_factors):
+            repeat_indices.extend([dataset_index] * math.ceil(repeat_factor))
+        self.repeat_indices = repeat_indices
+
+        flags = []
+        if hasattr(self.dataset, 'flag'):
+            for flag, repeat_factor in zip(self.dataset.flag, repeat_factors):
+                flags.extend([flag] * int(math.ceil(repeat_factor)))
+            assert len(flags) == len(repeat_indices)
+        self.flag = np.asarray(flags, dtype=np.uint8)
+
+    def _get_repeat_factors(self, dataset, repeat_thr):
+        # 1. For each category c, compute the fraction # of images
+        # that contain it: f(c)
+        category_freq = defaultdict(int)
+        for idx, img_info in enumerate(dataset.data_infos):
+            if 'category_ids' in img_info:
+                cat_ids = set(img_info['category_ids'])
+            elif 'gt_names' in img_info:
+                cat_ids = set([
+                    gt for gt in img_info['gt_names']
+                    if gt in dataset.class_names
+                ])
+            else:
+                labels = dataset.get_ann_info(idx)['labels']
+                cat_ids = set([label for label in labels])
+            for cat_id in cat_ids:
+                category_freq[cat_id] += 1
+        num_images = len(dataset)
+        for k, v in category_freq.items():
+            category_freq[k] = v / num_images
+
+        # 2. For each category c, compute the category-level repeat factor:
+        #    r(c) = max(1, sqrt(t / f(c)))
+        category_repeat = {
+            cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq))
+            for cat_id, cat_freq in category_freq.items()
+        }
+
+        # 3. For each image I, compute the image-level repeat factor:
+        #    r(I) = max_{c in I} r(c)
+        repeat_factors = []
+        for idx, img_info in enumerate(dataset.data_infos):
+            if 'category_ids' in img_info:
+                cat_ids = set(img_info['category_ids'])
+            elif 'gt_names' in img_info:
+                cat_ids = set([
+                    gt for gt in img_info['gt_names']
+                    if gt in dataset.class_names
+                ])
+            else:
+                labels = dataset.get_ann_info(idx)['labels']
+                cat_ids = set([label for label in labels])
+
+            if len(cat_ids) == 0:
+                repeat_factor = 1
+            else:
+                repeat_factor = max(
+                    {category_repeat[cat_id]
+                     for cat_id in cat_ids})
+            repeat_factors.append(repeat_factor)
+        return repeat_factors
+
+    def __getitem__(self, idx):
+        ori_index = self.repeat_indices[idx]
+        return self.dataset[ori_index]
+
+    def __len__(self):
+        return len(self.repeat_indices)
--- a/mmdet3d/datasets/kitti2d_dataset.py
+++ b/mmdet3d/datasets/kitti2d_dataset.py
+import mmcv
+import numpy as np
+
+from mmdet.datasets import DATASETS, CustomDataset
+
+
+@DATASETS.register_module
+class Kitti2DDataset(CustomDataset):
+
+    CLASSES = ('car', 'pedestrian', 'cyclist')
+    """
+    Annotation format:
+    [
+        {
+            'image': {
+                'image_idx': 0,
+                'image_path': 'training/image_2/000000.png',
+                'image_shape': array([ 370, 1224], dtype=int32)
+            },
+            'point_cloud': {
+                 'num_features': 4,
+                 'velodyne_path': 'training/velodyne/000000.bin'
+             },
+             'calib': {
+                 'P0': <np.ndarray> (4, 4),
+                 'P1': <np.ndarray> (4, 4),
+                 'P2': <np.ndarray> (4, 4),
+                 'P3': <np.ndarray> (4, 4),
+                 'R0_rect':4x4 np.array,
+                 'Tr_velo_to_cam': 4x4 np.array,
+                 'Tr_imu_to_velo': 4x4 np.array
+             },
+             'annos': {
+                 'name': <np.ndarray> (n),
+                 'truncated': <np.ndarray> (n),
+                 'occluded': <np.ndarray> (n),
+                 'alpha': <np.ndarray> (n),
+                 'bbox': <np.ndarray> (n, 4),
+                 'dimensions': <np.ndarray> (n, 3),
+                 'location': <np.ndarray> (n, 3),
+                 'rotation_y': <np.ndarray> (n),
+                 'score': <np.ndarray> (n),
+                 'index': array([0], dtype=int32),
+                 'group_ids': array([0], dtype=int32),
+                 'difficulty': array([0], dtype=int32),
+                 'num_points_in_gt': <np.ndarray> (n),
+             }
+        }
+    ]
+    """
+
+    def load_annotations(self, ann_file):
+        self.data_infos = mmcv.load(ann_file)
+        self.cat2label = {
+            cat_name: i
+            for i, cat_name in enumerate(self.class_names)
+        }
+        return self.data_infos
+
+    def _filter_imgs(self, min_size=32):
+        """Filter images without ground truths."""
+        valid_inds = []
+        for i, img_info in enumerate(self.data_infos):
+            if len(img_info['annos']['name']) > 0:
+                valid_inds.append(i)
+        return valid_inds
+
+    def get_ann_info(self, index):
+        # Use index to get the annos, thus the evalhook could also use this api
+        info = self.data_infos[index]
+        annos = info['annos']
+        gt_names = annos['name']
+        gt_bboxes = annos['bbox']
+        difficulty = annos['difficulty']
+
+        # remove classes that is not needed
+        selected = self.keep_arrays_by_name(gt_names, self.CLASSES)
+        gt_bboxes = gt_bboxes[selected]
+        gt_names = gt_names[selected]
+        difficulty = difficulty[selected]
+        gt_labels = np.array([self.cat2label[n] for n in gt_names])
+
+        anns_results = dict(
+            bboxes=gt_bboxes.astype(np.float32),
+            labels=gt_labels,
+        )
+        return anns_results
+
+    def prepare_train_img(self, idx):
+        img_raw_info = self.data_infos[idx]['image']
+        img_info = dict(filename=img_raw_info['image_path'])
+        ann_info = self.get_ann_info(idx)
+        if len(ann_info['bboxes']) == 0:
+            return None
+        results = dict(img_info=img_info, ann_info=ann_info)
+        if self.proposals is not None:
+            results['proposals'] = self.proposals[idx]
+        self.pre_pipeline(results)
+        return self.pipeline(results)
+
+    def prepare_test_img(self, idx):
+        img_raw_info = self.data_infos[idx]['image']
+        img_info = dict(filename=img_raw_info['image_path'])
+        results = dict(img_info=img_info)
+        if self.proposals is not None:
+            results['proposals'] = self.proposals[idx]
+        self.pre_pipeline(results)
+        return self.pipeline(results)
+
+    def _set_group_flag(self):
+        """Set flag according to image aspect ratio.
+        Images with aspect ratio greater than 1 will be set as group 1,
+        otherwise group 0.
+        In kitti's pcd, they are all the same, thus are all zeros
+        """
+        self.flag = np.zeros(len(self), dtype=np.uint8)
+
+    def drop_arrays_by_name(self, gt_names, used_classes):
+        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def keep_arrays_by_name(self, gt_names, used_classes):
+        inds = [i for i, x in enumerate(gt_names) if x in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def reformat_bbox(self, outputs, out=None):
+        from mmdet3d.core.bbox.transforms import bbox2result_kitti2d
+        sample_idx = [info['image']['image_idx'] for info in self.data_infos]
+        result_files = bbox2result_kitti2d(outputs, self.CLASSES, sample_idx,
+                                           out)
+        return result_files
+
+    def evaluate(self, result_files, eval_types=None):
+        from mmdet3d.core.evaluation import kitti_eval
+        eval_types = ['bbox'] if not eval_types else eval_types
+        assert eval_types in ('bbox', ['bbox'
+                                       ]), 'KITTI data set only evaluate bbox'
+        gt_annos = [info['annos'] for info in self.data_infos]
+        ap_result_str, ap_dict = kitti_eval(
+            gt_annos, result_files, self.CLASSES, eval_types=['bbox'])
+        return ap_result_str, ap_dict
--- a/mmdet3d/datasets/kitti_dataset.py
+++ b/mmdet3d/datasets/kitti_dataset.py
+import copy
+import os
+import pickle
+
+import mmcv
+import numpy as np
+import torch
+import torch.utils.data as torch_data
+
+from mmdet.datasets.registry import DATASETS
+from ..core.bbox import box_np_ops
+from .pipelines import Compose
+from .utils import remove_dontcare
+
+
+@DATASETS.register_module
+class KittiDataset(torch_data.Dataset):
+
+    CLASSES = ('car', 'pedestrian', 'cyclist')
+
+    def __init__(self,
+                 root_path,
+                 ann_file,
+                 split,
+                 pipeline=None,
+                 training=False,
+                 class_names=None,
+                 modality=None,
+                 with_label=True,
+                 test_mode=False):
+        """
+        :param root_path: KITTI data path
+        :param split:
+        """
+        super().__init__()
+        self.root_path = root_path
+        self.root_split_path = os.path.join(
+            self.root_path, 'training' if split != 'test' else 'testing')
+        self.class_names = class_names if class_names else self.CLASSES
+        self.modality = modality
+        self.with_label = with_label
+        assert self.modality is not None
+        self.modality = modality
+        self.test_mode = test_mode
+        # TODO: rm the key training if it is not needed
+        self.training = training
+        self.pcd_limit_range = [0, -40, -3, 70.4, 40, 0.0]
+
+        self.ann_file = ann_file
+        with open(ann_file, 'rb') as f:
+            self.kitti_infos = pickle.load(f)
+
+        # set group flag for the sampler
+        if not self.test_mode:
+            self._set_group_flag()
+
+        # processing pipeline
+        if pipeline is not None:
+            self.pipeline = Compose(pipeline)
+
+    def __getitem__(self, idx):
+        if self.test_mode:
+            return self.prepare_test_data(idx)
+        while True:
+            data = self.prepare_train_data(idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                continue
+            return data
+
+    def prepare_train_data(self, index):
+        input_dict = self.get_sensor_data(index)
+        input_dict = self.train_pre_pipeline(input_dict)
+        if input_dict is None:
+            return None
+        example = self.pipeline(input_dict)
+        if example is None or len(example['gt_bboxes_3d']._data) == 0:
+            return None
+        return example
+
+    def train_pre_pipeline(self, input_dict):
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        gt_bboxes = input_dict['gt_bboxes']
+        gt_names = input_dict['gt_names']
+        difficulty = input_dict['difficulty']
+        input_dict['bbox_fields'] = []
+
+        selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
+        # selected = self.keep_arrays_by_name(gt_names, self.class_names)
+        gt_bboxes_3d = gt_bboxes_3d[selected]
+        gt_bboxes = gt_bboxes[selected]
+        gt_names = gt_names[selected]
+        difficulty = difficulty[selected]
+        gt_bboxes_mask = np.array([n in self.class_names for n in gt_names],
+                                  dtype=np.bool_)
+
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
+        input_dict['gt_bboxes'] = gt_bboxes.astype('float32')
+        input_dict['gt_names'] = gt_names
+        input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
+        input_dict['difficulty'] = difficulty
+        input_dict['gt_bboxes_mask'] = gt_bboxes_mask
+        input_dict['gt_bboxes_3d_mask'] = copy.deepcopy(gt_bboxes_mask)
+        input_dict['bbox_fields'].append('gt_bboxes')
+        if len(gt_bboxes) == 0:
+            return None
+        return input_dict
+
+    def prepare_test_data(self, index):
+        input_dict = self.get_sensor_data(index)
+        # input_dict = self.test_pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        return example
+
+    def test_pre_pipeline(self, input_dict):
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        gt_bboxes = input_dict['gt_bboxes']
+        gt_names = input_dict['gt_names']
+
+        if gt_bboxes_3d is not None:
+            selected = self.keep_arrays_by_name(gt_names, self.class_names)
+            gt_bboxes_3d = gt_bboxes_3d[selected]
+            gt_bboxes = gt_bboxes[selected]
+            gt_names = gt_names[selected]
+
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
+        input_dict['gt_bboxes'] = gt_bboxes
+        input_dict['gt_names'] = gt_names
+        input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
+        return input_dict
+
+    def _set_group_flag(self):
+        """Set flag according to image aspect ratio.
+        Images with aspect ratio greater than 1 will be set as group 1,
+        otherwise group 0.
+        In kitti's pcd, they are all the same, thus are all zeros
+        """
+        self.flag = np.zeros(len(self), dtype=np.uint8)
+
+    def _rand_another(self, idx):
+        pool = np.where(self.flag == self.flag[idx])[0]
+        return np.random.choice(pool)
+
+    def get_lidar(self, idx):
+        lidar_file = os.path.join(self.root_split_path, 'velodyne',
+                                  '%06d.bin' % idx)
+        assert os.path.exists(lidar_file)
+        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
+
+    def get_lidar_reduced(self, idx):
+        lidar_file = os.path.join(self.root_split_path, 'velodyne_reduced',
+                                  '%06d.bin' % idx)
+        assert os.path.exists(lidar_file)
+        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
+
+    def get_lidar_depth_reduced(self, idx):
+        lidar_file = os.path.join(self.root_split_path,
+                                  'velodyne_depth_reduced', '%06d.bin' % idx)
+        assert os.path.exists(lidar_file)
+        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
+
+    def get_pure_depth_reduced(self, idx):
+        lidar_file = os.path.join(self.root_split_path, 'depth_reduced',
+                                  '%06d.bin' % idx)
+        assert os.path.exists(lidar_file)
+        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
+
+    def get_depth(self, idx):
+        depth_file = os.path.join(self.root_split_path, 'depth_completion',
+                                  '%06d.png' % idx)
+        assert os.path.exists(depth_file)
+        depth_img = mmcv.imread(depth_file, -1) / 256.0
+        return depth_img
+
+    def __len__(self):
+        return len(self.kitti_infos)
+
+    def get_sensor_data(self, index):
+        info = self.kitti_infos[index]
+        sample_idx = info['image']['image_idx']
+        # TODO: consider use torch.Tensor only
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+        P2 = info['calib']['P2'].astype(np.float32)
+        lidar2img = P2 @ rect @ Trv2c
+
+        if self.modality['use_depth'] and self.modality['use_lidar']:
+            points = self.get_lidar_depth_reduced(sample_idx)
+        elif self.modality['use_lidar']:
+            points = self.get_lidar_reduced(sample_idx)
+        elif self.modality['use_depth']:
+            points = self.get_pure_depth_reduced(sample_idx)
+        else:
+            assert (self.modality['use_depth'] or self.modality['use_lidar'])
+
+        if not self.modality['use_lidar_intensity']:
+            points = points[:, :3]
+
+        input_dict = dict(
+            sample_idx=sample_idx,
+            points=points,
+            lidar2img=lidar2img,
+        )
+
+        # TODO: support image input
+        if self.modality['use_camera']:
+            image_info = info['image']
+            image_path = image_info['image_path']
+            image_path = os.path.join(self.root_path, image_path)
+            img = mmcv.imread(image_path)
+            input_dict.update(
+                dict(
+                    img=img,
+                    img_shape=img.shape,
+                    ori_shape=img.shape,
+                    filename=image_path))
+        else:
+            input_dict.update(dict(img_shape=info['image']['image_shape']))
+        if self.with_label:
+            annos = self.get_ann_info(index)
+            input_dict.update(annos)
+
+        return input_dict
+
+    def get_ann_info(self, index):
+        # Use index to get the annos, thus the evalhook could also use this api
+        info = self.kitti_infos[index]
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+        # P2 = info['calib']['P2'].astype(np.float32)
+
+        annos = info['annos']
+        # we need other objects to avoid collision when sample
+        annos = remove_dontcare(annos)
+        loc = annos['location']
+        dims = annos['dimensions']
+        rots = annos['rotation_y']
+        gt_names = annos['name']
+        # print(gt_names, len(loc))
+        gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
+                                      axis=1).astype(np.float32)
+        difficulty = annos['difficulty']
+        # this change gt_bboxes_3d to velodyne coordinates
+        gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
+                                                      Trv2c)
+        # only center format is allowed. so we need to convert
+        # kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
+        # box_np_ops.change_box3d_center_(gt_bboxes, [0.5, 0.5, 0],
+        #                                 [0.5, 0.5, 0.5])
+
+        # For simplicity gt_bboxes means 2D gt bboxes
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_bboxes=annos['bbox'],
+            gt_names=gt_names,
+            difficulty=difficulty)
+        return anns_results
+
+    def drop_arrays_by_name(self, gt_names, used_classes):
+        inds = [i for i, x in enumerate(gt_names) if x not in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def keep_arrays_by_name(self, gt_names, used_classes):
+        inds = [i for i, x in enumerate(gt_names) if x in used_classes]
+        inds = np.array(inds, dtype=np.int64)
+        return inds
+
+    def reformat_bbox(self, outputs, out=None):
+        if not isinstance(outputs[0][0], dict):
+            sample_idx = [
+                info['image']['image_idx'] for info in self.kitti_infos
+            ]
+            result_files = self.bbox2result_kitti2d(outputs, self.class_names,
+                                                    sample_idx, out)
+        else:
+            result_files = self.bbox2result_kitti(outputs, self.class_names,
+                                                  out)
+        return result_files
+
+    def evaluate(self, result_files, eval_types=None):
+        from mmdet3d.core.evaluation import kitti_eval
+        gt_annos = [info['annos'] for info in self.kitti_infos]
+        if eval_types == 'img_bbox':
+            ap_result_str, ap_dict = kitti_eval(
+                gt_annos, result_files, self.class_names, eval_types=['bbox'])
+        else:
+            ap_result_str, ap_dict = kitti_eval(gt_annos, result_files,
+                                                self.class_names)
+        return ap_result_str, ap_dict
+
+    def bbox2result_kitti(self, net_outputs, class_names, out=None):
+        if out:
+            output_dir = out[:-4] if out.endswith(('.pkl', '.pickle')) else out
+            result_dir = output_dir + '/data'
+            mmcv.mkdir_or_exist(result_dir)
+
+        det_annos = []
+        print('Converting prediction to KITTI format')
+        for idx, pred_dicts in enumerate(
+                mmcv.track_iter_progress(net_outputs)):
+            annos = []
+            info = self.kitti_infos[idx]
+            image_shape = info['image']['image_shape'][:2]
+            for i, box_dict in enumerate(pred_dicts):
+                num_example = 0
+                sample_idx = box_dict['sample_idx']
+                box_dict = self.convert_valid_bboxes(box_dict, info)
+                if box_dict['bbox'] is not None or box_dict['bbox'].size.numel(
+                ) != 0:
+                    box_2d_preds = box_dict['bbox']
+                    box_preds = box_dict['box3d_camera']
+                    scores = box_dict['scores']
+                    box_preds_lidar = box_dict['box3d_lidar']
+                    label_preds = box_dict['label_preds']
+
+                    anno = {
+                        'name': [],
+                        'truncated': [],
+                        'occluded': [],
+                        'alpha': [],
+                        'bbox': [],
+                        'dimensions': [],
+                        'location': [],
+                        'rotation_y': [],
+                        'score': []
+                    }
+                    gt_iou = scores * 0
+
+                    for box, box_lidar, bbox, score, label, cur_gt_iou in zip(
+                            box_preds, box_preds_lidar, box_2d_preds, scores,
+                            label_preds, gt_iou):
+                        bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
+                        bbox[:2] = np.maximum(bbox[:2], [0, 0])
+                        anno['name'].append(class_names[int(label)])
+                        anno['truncated'].append(0.0)
+                        anno['occluded'].append(0)
+                        anno['alpha'].append(
+                            -np.arctan2(-box_lidar[1], box_lidar[0]) + box[6])
+                        anno['bbox'].append(bbox)
+                        anno['dimensions'].append(box[3:6])
+                        anno['location'].append(box[:3])
+                        anno['rotation_y'].append(box[6])
+                        # anno["gt_iou"].append(cur_gt_iou)
+                        anno['score'].append(score)
+
+                        num_example += 1
+
+                    if num_example != 0:
+                        anno = {k: np.stack(v) for k, v in anno.items()}
+                        annos.append(anno)
+
+                    if out:
+                        cur_det_file = result_dir + '/%06d.txt' % sample_idx
+                        with open(cur_det_file, 'w') as f:
+                            bbox = anno['bbox']
+                            loc = anno['location']
+                            dims = anno['dimensions']  # lhw -> hwl
+
+                            for idx in range(len(bbox)):
+                                print(
+                                    '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
+                                    '{:.4f} {:.4f} {:.4f} '
+                                    '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'
+                                    .format(anno['name'][idx],
+                                            anno['alpha'][idx], bbox[idx][0],
+                                            bbox[idx][1], bbox[idx][2],
+                                            bbox[idx][3], dims[idx][1],
+                                            dims[idx][2], dims[idx][0],
+                                            loc[idx][0], loc[idx][1],
+                                            loc[idx][2],
+                                            anno['rotation_y'][idx],
+                                            anno['score'][idx]),
+                                    file=f)
+
+                if num_example == 0:
+                    annos.append({
+                        'name': np.array([]),
+                        'truncated': np.array([]),
+                        'occluded': np.array([]),
+                        'alpha': np.array([]),
+                        'bbox': np.zeros([0, 4]),
+                        'dimensions': np.zeros([0, 3]),
+                        'location': np.zeros([0, 3]),
+                        'rotation_y': np.array([]),
+                        'score': np.array([]),
+                    })
+                annos[-1]['sample_idx'] = np.array(
+                    [sample_idx] * num_example, dtype=np.int64)
+
+            det_annos += annos
+
+        if out:
+            if not out.endswith(('.pkl', '.pickle')):
+                out = '{}.pkl'.format(out)
+            mmcv.dump(det_annos, out)
+            print('Result is saved to %s' % out)
+
+        return det_annos
+
+    def bbox2result_kitti2d(self,
+                            net_outputs,
+                            class_names,
+                            sample_ids,
+                            out=None):
+        """Convert results to kitti format for evaluation and test submission
+
+        Args:
+            net_outputs (List[array]): list of array storing the bbox and score
+            class_nanes (List[String]): A list of class names
+            sample_idx (List[Int]): A list of samples' index,
+                should have the same length as net_outputs.
+
+        Return:
+            List([dict]): A list of dict have the kitti format
+        """
+        assert len(net_outputs) == len(sample_ids)
+
+        det_annos = []
+        print('Converting prediction to KITTI format')
+        for i, bboxes_per_sample in enumerate(
+                mmcv.track_iter_progress(net_outputs)):
+            annos = []
+            anno = dict(
+                name=[],
+                truncated=[],
+                occluded=[],
+                alpha=[],
+                bbox=[],
+                dimensions=[],
+                location=[],
+                rotation_y=[],
+                score=[])
+            sample_idx = sample_ids[i]
+
+            num_example = 0
+            for label in range(len(bboxes_per_sample)):
+                bbox = bboxes_per_sample[label]
+                for i in range(bbox.shape[0]):
+                    anno['name'].append(class_names[int(label)])
+                    anno['truncated'].append(0.0)
+                    anno['occluded'].append(0)
+                    anno['alpha'].append(0.0)
+                    anno['bbox'].append(bbox[i, :4])
+                    # set dimensions (height, width, length) to zero
+                    anno['dimensions'].append(
+                        np.zeros(shape=[3], dtype=np.float32))
+                    # set the 3D translation to (-1000, -1000, -1000)
+                    anno['location'].append(
+                        np.ones(shape=[3], dtype=np.float32) * (-1000.0))
+                    anno['rotation_y'].append(0.0)
+                    anno['score'].append(bbox[i, 4])
+                    num_example += 1
+
+            if num_example == 0:
+                annos.append(
+                    dict(
+                        name=np.array([]),
+                        truncated=np.array([]),
+                        occluded=np.array([]),
+                        alpha=np.array([]),
+                        bbox=np.zeros([0, 4]),
+                        dimensions=np.zeros([0, 3]),
+                        location=np.zeros([0, 3]),
+                        rotation_y=np.array([]),
+                        score=np.array([]),
+                    ))
+            else:
+                anno = {k: np.stack(v) for k, v in anno.items()}
+                annos.append(anno)
+
+            annos[-1]['sample_idx'] = np.array(
+                [sample_idx] * num_example, dtype=np.int64)
+            det_annos += annos
+
+        if out:
+            # save file in submission format
+            output_dir = out[:-4] if out.endswith(('.pkl', '.pickle')) else out
+            result_dir = output_dir + '/data'
+            mmcv.mkdir_or_exist(result_dir)
+            out = '{}.pkl'.format(result_dir)
+            mmcv.dump(det_annos, out)
+            print('Result is saved to {}'.format(out))
+            for i, anno in enumerate(det_annos):
+                sample_idx = sample_ids[i]
+                cur_det_file = result_dir + '/%06d.txt' % sample_idx
+                with open(cur_det_file, 'w') as f:
+                    bbox = anno['bbox']
+                    loc = anno['location']
+                    dims = anno['dimensions'][::-1]  # lhw -> hwl
+                    for idx in range(len(bbox)):
+                        print(
+                            '{} -1 -1 {:4f} {:4f} {:4f} {:4f} {:4f} {:4f} '
+                            '{:4f} {:4f} {:4f} {:4f} {:4f} {:4f} {:4f}'.format(
+                                anno['name'][idx],
+                                anno['alpha'][idx],
+                                *bbox[idx],  # 4 float
+                                *dims[idx],  # 3 float
+                                *loc[idx],  # 3 float
+                                anno['rotation_y'][idx],
+                                anno['score'][idx]),
+                            file=f,
+                        )
+            print('Result is saved to {}'.format(result_dir))
+
+        return det_annos
+
+    def convert_valid_bboxes(self, box_dict, info):
+        # TODO: refactor this function
+        final_box_preds = box_dict['box3d_lidar']
+        final_scores = box_dict['scores']
+        final_labels = box_dict['label_preds']
+        sample_idx = info['image']['image_idx']
+        final_box_preds[:, -1] = box_np_ops.limit_period(
+            final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2)
+
+        if final_box_preds.shape[0] == 0:
+            return dict(
+                bbox=final_box_preds.new_zeros([0, 4]).numpy(),
+                box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(),
+                box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(),
+                scores=final_box_preds.new_zeros([0]).numpy(),
+                label_preds=final_box_preds.new_zeros([0, 4]).numpy(),
+                sample_idx=sample_idx,
+            )
+
+        from mmdet3d.core.bbox import box_torch_ops
+        rect = info['calib']['R0_rect'].astype(np.float32)
+        Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
+        P2 = info['calib']['P2'].astype(np.float32)
+        img_shape = info['image']['image_shape']
+        rect = final_box_preds.new_tensor(rect)
+        Trv2c = final_box_preds.new_tensor(Trv2c)
+        P2 = final_box_preds.new_tensor(P2)
+
+        final_box_preds_camera = box_torch_ops.box_lidar_to_camera(
+            final_box_preds, rect, Trv2c)
+        locs = final_box_preds_camera[:, :3]
+        dims = final_box_preds_camera[:, 3:6]
+        angles = final_box_preds_camera[:, 6]
+        camera_box_origin = [0.5, 1.0, 0.5]
+        box_corners = box_torch_ops.center_to_corner_box3d(
+            locs, dims, angles, camera_box_origin, axis=1)
+        box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2)
+        # box_corners_in_image: [N, 8, 2]
+        minxy = torch.min(box_corners_in_image, dim=1)[0]
+        maxxy = torch.max(box_corners_in_image, dim=1)[0]
+        box_2d_preds = torch.cat([minxy, maxxy], dim=1)
+        # Post-processing
+        # check final_box_preds_camera
+        image_shape = final_box_preds.new_tensor(img_shape)
+        valid_cam_inds = ((final_box_preds_camera[:, 0] < image_shape[1]) &
+                          (final_box_preds_camera[:, 1] < image_shape[0]) &
+                          (final_box_preds_camera[:, 2] > 0) &
+                          (final_box_preds_camera[:, 3] > 0))
+        # check final_box_preds
+        limit_range = final_box_preds.new_tensor(self.pcd_limit_range)
+        valid_pcd_inds = ((final_box_preds[:, :3] > limit_range[:3]) &
+                          (final_box_preds[:, :3] < limit_range[3:]))
+        valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
+
+        if valid_inds.sum() > 0:
+            return dict(
+                bbox=box_2d_preds[valid_inds, :].numpy(),
+                box3d_camera=final_box_preds_camera[valid_inds, :].numpy(),
+                box3d_lidar=final_box_preds[valid_inds, :].numpy(),
+                scores=final_scores[valid_inds].numpy(),
+                label_preds=final_labels[valid_inds].numpy(),
+                sample_idx=sample_idx,
+            )
+        else:
+            return dict(
+                bbox=final_box_preds.new_zeros([0, 4]).numpy(),
+                box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(),
+                box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(),
+                scores=final_box_preds.new_zeros([0]).numpy(),
+                label_preds=final_box_preds.new_zeros([0, 4]).numpy(),
+                sample_idx=sample_idx,
+            )
--- a/mmdet3d/datasets/loader/__init__.py
+++ b/mmdet3d/datasets/loader/__init__.py
+from .build_loader import build_dataloader
+from .sampler import DistributedGroupSampler, GroupSampler
+
+__all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
--- a/mmdet3d/datasets/loader/build_loader.py
+++ b/mmdet3d/datasets/loader/build_loader.py
+import platform
+import random
+from functools import partial
+
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from torch.utils.data import DataLoader
+
+from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
+
+if platform.system() != 'Windows':
+    # https://github.com/pytorch/pytorch/issues/973
+    import resource
+    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
+
+
+def build_dataloader(dataset,
+                     samples_per_gpu,
+                     workers_per_gpu,
+                     num_gpus=1,
+                     dist=True,
+                     seed=None,
+                     **kwargs):
+    shuffle = kwargs.get('shuffle', True)
+    if dist:
+        rank, world_size = get_dist_info()
+        if shuffle:
+            sampler = DistributedGroupSampler(dataset, samples_per_gpu,
+                                              world_size, rank)
+        else:
+            sampler = DistributedSampler(
+                dataset, world_size, rank, shuffle=False)
+        batch_size = samples_per_gpu
+        num_workers = workers_per_gpu
+    else:
+        sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
+        batch_size = num_gpus * samples_per_gpu
+        num_workers = num_gpus * workers_per_gpu
+
+    data_loader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        sampler=sampler,
+        num_workers=num_workers,
+        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
+        pin_memory=False,
+        worker_init_fn=worker_init_fn if seed is not None else None,
+        **kwargs)
+
+    return data_loader
+
+
+def worker_init_fn(seed):
+    np.random.seed(seed)
+    random.seed(seed)
--- a/mmdet3d/datasets/loader/sampler.py
+++ b/mmdet3d/datasets/loader/sampler.py
+from __future__ import division
+import math
+
+import numpy as np
+import torch
+from mmcv.runner import get_dist_info
+from torch.utils.data import DistributedSampler as _DistributedSampler
+from torch.utils.data import Sampler
+
+
+class DistributedSampler(_DistributedSampler):
+
+    def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
+        super().__init__(dataset, num_replicas=num_replicas, rank=rank)
+        self.shuffle = shuffle
+
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        if self.shuffle:
+            g = torch.Generator()
+            g.manual_seed(self.epoch)
+            indices = torch.randperm(len(self.dataset), generator=g).tolist()
+        else:
+            indices = torch.arange(len(self.dataset)).tolist()
+
+        # add extra samples to make it evenly divisible
+        indices += indices[:(self.total_size - len(indices))]
+        assert len(indices) == self.total_size
+
+        # subsample
+        indices = indices[self.rank:self.total_size:self.num_replicas]
+        assert len(indices) == self.num_samples
+
+        return iter(indices)
+
+
+class GroupSampler(Sampler):
+
+    def __init__(self, dataset, samples_per_gpu=1):
+        assert hasattr(dataset, 'flag')
+        self.dataset = dataset
+        self.samples_per_gpu = samples_per_gpu
+        self.flag = dataset.flag.astype(np.int64)
+        self.group_sizes = np.bincount(self.flag)
+        self.num_samples = 0
+        for i, size in enumerate(self.group_sizes):
+            self.num_samples += int(np.ceil(
+                size / self.samples_per_gpu)) * self.samples_per_gpu
+
+    def __iter__(self):
+        indices = []
+        for i, size in enumerate(self.group_sizes):
+            if size == 0:
+                continue
+            indice = np.where(self.flag == i)[0]
+            assert len(indice) == size
+            np.random.shuffle(indice)
+            num_extra = int(np.ceil(size / self.samples_per_gpu)
+                            ) * self.samples_per_gpu - len(indice)
+            indice = np.concatenate(
+                [indice, np.random.choice(indice, num_extra)])
+            indices.append(indice)
+        indices = np.concatenate(indices)
+        indices = [
+            indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
+            for i in np.random.permutation(
+                range(len(indices) // self.samples_per_gpu))
+        ]
+        indices = np.concatenate(indices)
+        indices = indices.astype(np.int64).tolist()
+        assert len(indices) == self.num_samples
+        return iter(indices)
+
+    def __len__(self):
+        return self.num_samples
+
+
+class DistributedGroupSampler(Sampler):
+    """Sampler that restricts data loading to a subset of the dataset.
+    It is especially useful in conjunction with
+    :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
+    process can pass a DistributedSampler instance as a DataLoader sampler,
+    and load a subset of the original dataset that is exclusive to it.
+    .. note::
+        Dataset is assumed to be of constant size.
+    Arguments:
+        dataset: Dataset used for sampling.
+        num_replicas (optional): Number of processes participating in
+            distributed training.
+        rank (optional): Rank of the current process within num_replicas.
+    """
+
+    def __init__(self,
+                 dataset,
+                 samples_per_gpu=1,
+                 num_replicas=None,
+                 rank=None):
+        _rank, _num_replicas = get_dist_info()
+        if num_replicas is None:
+            num_replicas = _num_replicas
+        if rank is None:
+            rank = _rank
+        self.dataset = dataset
+        self.samples_per_gpu = samples_per_gpu
+        self.num_replicas = num_replicas
+        self.rank = rank
+        self.epoch = 0
+
+        assert hasattr(self.dataset, 'flag')
+        self.flag = self.dataset.flag
+        self.group_sizes = np.bincount(self.flag)
+
+        self.num_samples = 0
+        for i, j in enumerate(self.group_sizes):
+            self.num_samples += int(
+                math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
+                          self.num_replicas)) * self.samples_per_gpu
+        self.total_size = self.num_samples * self.num_replicas
+
+    def __iter__(self):
+        # deterministically shuffle based on epoch
+        g = torch.Generator()
+        g.manual_seed(self.epoch)
+
+        indices = []
+        for i, size in enumerate(self.group_sizes):
+            if size > 0:
+                indice = np.where(self.flag == i)[0]
+                assert len(indice) == size
+                indice = indice[list(torch.randperm(int(size),
+                                                    generator=g))].tolist()
+                extra = int(
+                    math.ceil(
+                        size * 1.0 / self.samples_per_gpu / self.num_replicas)
+                ) * self.samples_per_gpu * self.num_replicas - len(indice)
+                # pad indice
+                tmp = indice.copy()
+                for _ in range(extra // size):
+                    indice.extend(tmp)
+                indice.extend(tmp[:extra % size])
+                indices.extend(indice)
+
+        assert len(indices) == self.total_size
+
+        indices = [
+            indices[j] for i in list(
+                torch.randperm(
+                    len(indices) // self.samples_per_gpu, generator=g))
+            for j in range(i * self.samples_per_gpu, (i + 1) *
+                           self.samples_per_gpu)
+        ]
+
+        # subsample
+        offset = self.num_samples * self.rank
+        indices = indices[offset:offset + self.num_samples]
+        assert len(indices) == self.num_samples
+
+        return iter(indices)
+
+    def __len__(self):
+        return self.num_samples
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
--- a/mmdet3d/datasets/nuscenes2d_dataset.py
+++ b/mmdet3d/datasets/nuscenes2d_dataset.py
+from pycocotools.coco import COCO
+
+from mmdet3d.core.evaluation.coco_utils import getImgIds
+from mmdet.datasets import DATASETS, CocoDataset
+
+
+@DATASETS.register_module
+class NuScenes2DDataset(CocoDataset):
+
+    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+               'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
+               'barrier')
+
+    def load_annotations(self, ann_file):
+        if not self.class_names:
+            self.class_names = self.CLASSES
+        self.coco = COCO(ann_file)
+        # send class_names into the get id
+        # in case we only need to train on several classes
+        # by default self.class_names = CLASSES
+        self.cat_ids = self.coco.getCatIds(catNms=self.class_names)
+
+        self.cat2label = {
+            cat_id: i  # + 1 rm +1 here thus the 0-79 are fg, 80 is bg
+            for i, cat_id in enumerate(self.cat_ids)
+        }
+        # send cat ids to the get img id
+        # in case we only need to train on several classes
+        if len(self.cat_ids) < len(self.CLASSES):
+            self.img_ids = getImgIds(self.coco, catIds=self.cat_ids)
+        else:
+            self.img_ids = self.coco.getImgIds()
+        img_infos = []
+        for i in self.img_ids:
+            info = self.coco.loadImgs([i])[0]
+            info['filename'] = info['file_name']
+            img_infos.append(info)
+        return img_infos
--- a/mmdet3d/datasets/nuscenes_dataset.py
+++ b/mmdet3d/datasets/nuscenes_dataset.py
+import copy
+import os.path as osp
+import tempfile
+
+import mmcv
+import numpy as np
+import pyquaternion
+import torch.utils.data as torch_data
+from nuscenes.utils.data_classes import Box as NuScenesBox
+
+from mmdet.datasets import DATASETS
+from ..core.bbox import box_np_ops
+from .pipelines import Compose
+
+
+@DATASETS.register_module
+class NuScenesDataset(torch_data.Dataset):
+    NumPointFeatures = 4  # xyz, timestamp. set 4 to use kitti pretrain
+    NameMapping = {
+        'movable_object.barrier': 'barrier',
+        'vehicle.bicycle': 'bicycle',
+        'vehicle.bus.bendy': 'bus',
+        'vehicle.bus.rigid': 'bus',
+        'vehicle.car': 'car',
+        'vehicle.construction': 'construction_vehicle',
+        'vehicle.motorcycle': 'motorcycle',
+        'human.pedestrian.adult': 'pedestrian',
+        'human.pedestrian.child': 'pedestrian',
+        'human.pedestrian.construction_worker': 'pedestrian',
+        'human.pedestrian.police_officer': 'pedestrian',
+        'movable_object.trafficcone': 'traffic_cone',
+        'vehicle.trailer': 'trailer',
+        'vehicle.truck': 'truck'
+    }
+    DefaultAttribute = {
+        'car': 'vehicle.parked',
+        'pedestrian': 'pedestrian.moving',
+        'trailer': 'vehicle.parked',
+        'truck': 'vehicle.parked',
+        'bus': 'vehicle.moving',
+        'motorcycle': 'cycle.without_rider',
+        'construction_vehicle': 'vehicle.parked',
+        'bicycle': 'cycle.without_rider',
+        'barrier': '',
+        'traffic_cone': '',
+    }
+    AttrMapping = {
+        'cycle.with_rider': 0,
+        'cycle.without_rider': 1,
+        'pedestrian.moving': 2,
+        'pedestrian.standing': 3,
+        'pedestrian.sitting_lying_down': 4,
+        'vehicle.moving': 5,
+        'vehicle.parked': 6,
+        'vehicle.stopped': 7,
+    }
+    AttrMapping_rev = [
+        'cycle.with_rider',
+        'cycle.without_rider',
+        'pedestrian.moving',
+        'pedestrian.standing',
+        'pedestrian.sitting_lying_down',
+        'vehicle.moving',
+        'vehicle.parked',
+        'vehicle.stopped',
+    ]
+    CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+               'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
+               'barrier')
+
+    def __init__(self,
+                 ann_file,
+                 pipeline=None,
+                 root_path=None,
+                 class_names=None,
+                 load_interval=1,
+                 with_velocity=True,
+                 test_mode=False,
+                 modality=None,
+                 eval_version='detection_cvpr_2019',
+                 with_label=True,
+                 max_sweeps=10,
+                 filter_empty_gt=True):
+        super().__init__()
+        self.data_root = root_path
+        self.class_names = class_names if class_names else self.CLASSES
+        self.test_mode = test_mode
+        self.load_interval = load_interval
+        self.with_label = with_label
+        self.max_sweeps = max_sweeps
+
+        self.ann_file = ann_file
+        data = mmcv.load(ann_file)
+        self.infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
+        self.infos = self.infos[::load_interval]
+        self.metadata = data['metadata']
+        self.version = self.metadata['version']
+        self.with_velocity = with_velocity
+        self.eval_version = eval_version
+        from nuscenes.eval.detection.config import config_factory
+        self.eval_detection_configs = config_factory(self.eval_version)
+
+        if modality is None:
+            modality = dict(
+                use_camera=False,
+                use_lidar=True,
+                use_radar=False,
+                use_map=False,
+                use_external=False,
+            )
+        self.modality = modality
+        # set group flag for the sampler
+        if not self.test_mode:
+            self._set_group_flag()
+
+        # processing pipeline
+        if pipeline is not None:
+            self.pipeline = Compose(pipeline)
+
+        # kitti map: nusc det name -> kitti eval name
+        self._kitti_name_mapping = {
+            'car': 'car',
+            'pedestrian': 'pedestrian',
+        }  # we only eval these classes in kitti
+
+    def __getitem__(self, idx):
+        if self.test_mode:
+            return self.prepare_test_data(idx)
+        while True:
+            data = self.prepare_train_data(idx)
+            if data is None:
+                idx = self._rand_another(idx)
+                continue
+            return data
+
+    def _set_group_flag(self):
+        """Set flag according to image aspect ratio.
+        Images with aspect ratio greater than 1 will be set as group 1,
+        otherwise group 0.
+        In kitti's pcd, they are all the same, thus are all zeros
+        """
+        self.flag = np.zeros(len(self), dtype=np.uint8)
+
+    def _rand_another(self, idx):
+        pool = np.where(self.flag == self.flag[idx])[0]
+        return np.random.choice(pool)
+
+    def __len__(self):
+        return len(self.infos)
+
+    def prepare_train_data(self, index):
+        input_dict = self.get_sensor_data(index)
+        input_dict = self.train_pre_pipeline(input_dict)
+        if input_dict is None:
+            return None
+        example = self.pipeline(input_dict)
+        if len(example['gt_bboxes_3d']._data) == 0:
+            return None
+        return example
+
+    def train_pre_pipeline(self, input_dict):
+        if len(input_dict['gt_bboxes_3d']) == 0:
+            return None
+        return input_dict
+
+    def prepare_test_data(self, index):
+        input_dict = self.get_sensor_data(index)
+        # input_dict = self.test_pre_pipeline(input_dict)
+        example = self.pipeline(input_dict)
+        return example
+
+    def test_pre_pipeline(self, input_dict):
+        gt_names = input_dict['gt_names']
+        input_dict['gt_names_3d'] = copy.deepcopy(gt_names)
+        return input_dict
+
+    def get_sensor_data(self, index):
+        info = self.infos[index]
+        points = np.fromfile(
+            info['lidar_path'], dtype=np.float32, count=-1).reshape([-1, 5])
+        # standard protocal modified from SECOND.Pytorch
+        points[:, 3] /= 255
+        points[:, 4] = 0
+        sweep_points_list = [points]
+        ts = info['timestamp'] / 1e6
+
+        for idx, sweep in enumerate(info['sweeps']):
+            if idx >= self.max_sweeps:
+                break
+            points_sweep = np.fromfile(
+                sweep['data_path'], dtype=np.float32,
+                count=-1).reshape([-1, 5])
+            sweep_ts = sweep['timestamp'] / 1e6
+            points_sweep[:, 3] /= 255
+            points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
+                'sensor2lidar_rotation'].T
+            points_sweep[:, :3] += sweep['sensor2lidar_translation']
+            points_sweep[:, 4] = ts - sweep_ts
+            sweep_points_list.append(points_sweep)
+
+        points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
+        input_dict = dict(
+            points=points,
+            sample_idx=info['token'],
+        )
+
+        if self.modality['use_camera']:
+            # TODO support image
+            imgs = []
+            ori_shapes = []
+            image_paths = []
+            lidar2img_rts = []
+            for cam_type, cam_info in info['cams'].items():
+                image_path = cam_info['data_path']
+                # image_path = osp.join(self.data_root, image_path)
+                img = mmcv.imread(image_path)
+                imgs.append(img)
+                ori_shapes.append(img.shape)
+                image_paths.append(image_path)
+                # obtain lidar to image transformation matrix
+                lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
+                lidar2cam_t = cam_info[
+                    'sensor2lidar_translation'] @ lidar2cam_r.T
+                lidar2cam_rt = np.eye(4)
+                lidar2cam_rt[:3, :3] = lidar2cam_r.T
+                lidar2cam_rt[3, :3] = -lidar2cam_t
+                intrinsic = cam_info['cam_intrinsic']
+                viewpad = np.eye(4)
+                viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
+                lidar2img_rt = (viewpad @ lidar2cam_rt.T)
+                lidar2img_rts.append(lidar2img_rt)
+
+            input_dict.update(
+                dict(
+                    img=imgs,
+                    img_shape=ori_shapes,
+                    ori_shape=ori_shapes,
+                    filename=image_paths,
+                    lidar2img=lidar2img_rts,
+                ))
+
+        if self.with_label:
+            annos = self.get_ann_info(index)
+            input_dict.update(annos)
+
+        return input_dict
+
+    def get_ann_info(self, index):
+        info = self.infos[index]
+        # filter out bbox containing no points
+        mask = info['num_lidar_pts'] > 0
+        gt_bboxes_3d = info['gt_boxes'][mask]
+        # the nuscenes box center is [0.5, 0.5, 0.5], we keep it
+        # the same as KITTI [0.5, 0.5, 0]
+        box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
+                                        [0.5, 0.5, 0])
+        gt_names_3d = info['gt_names'][mask]
+
+        if self.with_velocity:
+            gt_velocity = info['gt_velocity'][mask]
+            nan_mask = np.isnan(gt_velocity[:, 0])
+            gt_velocity[nan_mask] = [0.0, 0.0]
+            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
+
+        gt_bboxes_3d_mask = np.array(
+            [n in self.class_names for n in gt_names_3d], dtype=np.bool_)
+        anns_results = dict(
+            gt_bboxes_3d=gt_bboxes_3d,
+            gt_names_3d=gt_names_3d,
+            gt_bboxes_3d_mask=gt_bboxes_3d_mask,
+        )
+        return anns_results
+
+    def _format_bbox(self, results, jsonfile_prefix=None):
+        nusc_annos = {}
+        mapped_class_names = self.class_names
+        token2info = {}
+        for info in self.infos:
+            token2info[info['token']] = info
+        print('Start to convert detection format...')
+        for det in mmcv.track_iter_progress(results):
+            annos = []
+            boxes = output_to_nusc_box(det[0])
+            boxes = lidar_nusc_box_to_global(token2info[det[0]['sample_idx']],
+                                             boxes, mapped_class_names,
+                                             self.eval_detection_configs,
+                                             self.eval_version)
+            for i, box in enumerate(boxes):
+                name = mapped_class_names[box.label]
+                if np.sqrt(box.velocity[0]**2 + box.velocity[1]**2) > 0.2:
+                    if name in [
+                            'car',
+                            'construction_vehicle',
+                            'bus',
+                            'truck',
+                            'trailer',
+                    ]:
+                        attr = 'vehicle.moving'
+                    elif name in ['bicycle', 'motorcycle']:
+                        attr = 'cycle.with_rider'
+                    else:
+                        attr = NuScenesDataset.DefaultAttribute[name]
+                else:
+                    if name in ['pedestrian']:
+                        attr = 'pedestrian.standing'
+                    elif name in ['bus']:
+                        attr = 'vehicle.stopped'
+                    else:
+                        attr = NuScenesDataset.DefaultAttribute[name]
+
+                nusc_anno = dict(
+                    sample_token=det[0]['sample_idx'],
+                    translation=box.center.tolist(),
+                    size=box.wlh.tolist(),
+                    rotation=box.orientation.elements.tolist(),
+                    velocity=box.velocity[:2].tolist(),
+                    detection_name=name,
+                    detection_score=box.score,
+                    attribute_name=attr)
+                annos.append(nusc_anno)
+            nusc_annos[det[0]['sample_idx']] = annos
+        nusc_submissions = {
+            'meta': self.modality,
+            'results': nusc_annos,
+        }
+
+        mmcv.mkdir_or_exist(jsonfile_prefix)
+        res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
+        print('Results writes to', res_path)
+        mmcv.dump(nusc_submissions, res_path)
+        return res_path
+
+    def _evaluate_single(self,
+                         result_path,
+                         logger=None,
+                         metric='bbox',
+                         result_name='pts_bbox'):
+        from nuscenes import NuScenes
+        from nuscenes.eval.detection.evaluate import NuScenesEval
+
+        output_dir = osp.join(*osp.split(result_path)[:-1])
+        nusc = NuScenes(
+            version=self.version, dataroot=self.data_root, verbose=False)
+        eval_set_map = {
+            'v1.0-mini': 'mini_train',
+            'v1.0-trainval': 'val',
+        }
+        nusc_eval = NuScenesEval(
+            nusc,
+            config=self.eval_detection_configs,
+            result_path=result_path,
+            eval_set=eval_set_map[self.version],
+            output_dir=output_dir,
+            verbose=False)
+        nusc_eval.main(render_curves=False)
+
+        # record metrics
+        metrics = mmcv.load(osp.join(output_dir, 'metrics_summary.json'))
+        detail = dict()
+        metric_prefix = '{}_NuScenes'.format(result_name)
+        for name in self.class_names:
+            for k, v in metrics['label_aps'][name].items():
+                val = float('{:.4f}'.format(v))
+                detail['{}/{}_AP_dist_{}'.format(metric_prefix, name, k)] = val
+            for k, v in metrics['label_tp_errors'][name].items():
+                val = float('{:.4f}'.format(v))
+                detail['{}/{}_{}'.format(metric_prefix, name, k)] = val
+
+        detail['{}/NDS'.format(metric_prefix)] = metrics['nd_score']
+        detail['{}/mAP'.format(metric_prefix)] = metrics['mean_ap']
+        return detail
+
+    def format_results(self, results, jsonfile_prefix=None):
+        """Format the results to json (standard format for COCO evaluation).
+
+        Args:
+            results (list): Testing results of the dataset.
+            jsonfile_prefix (str | None): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+
+        Returns:
+            tuple: (result_files, tmp_dir), result_files is a dict containing
+                the json filepaths, tmp_dir is the temporal directory created
+                for saving json files when jsonfile_prefix is not specified.
+        """
+        assert isinstance(results, list), 'results must be a list'
+        assert len(results) == len(self), (
+            'The length of results is not equal to the dataset len: {} != {}'.
+            format(len(results), len(self)))
+
+        if jsonfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            jsonfile_prefix = osp.join(tmp_dir.name, 'results')
+        else:
+            tmp_dir = None
+
+        if not isinstance(results[0], dict):
+            result_files = self._format_bbox(results, jsonfile_prefix)
+        else:
+            result_files = dict()
+            for name in results[0]:
+                print('Formating bboxes of {}'.format(name))
+                results_ = [out[name] for out in results]
+                tmp_file_ = osp.join(jsonfile_prefix, name)
+                result_files.update(
+                    {name: self._format_bbox(results_, tmp_file_)})
+        return result_files, tmp_dir
+
+    def evaluate(self,
+                 results,
+                 metric='bbox',
+                 logger=None,
+                 jsonfile_prefix=None,
+                 result_names=['pts_bbox']):
+        """Evaluation in nuScenes protocol.
+
+        Args:
+            results (list): Testing results of the dataset.
+            metric (str | list[str]): Metrics to be evaluated.
+            logger (logging.Logger | str | None): Logger used for printing
+                related information during evaluation. Default: None.
+            jsonfile_prefix (str | None): The prefix of json files. It includes
+                the file path and the prefix of filename, e.g., "a/b/prefix".
+                If not specified, a temp file will be created. Default: None.
+
+        Returns:
+            dict[str: float]
+        """
+        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
+
+        if isinstance(result_files, dict):
+            results_dict = dict()
+            for name in result_names:
+                print('Evaluating bboxes of {}'.format(name))
+                ret_dict = self._evaluate_single(result_files[name])
+            results_dict.update(ret_dict)
+        elif isinstance(result_files, str):
+            results_dict = self._evaluate_single(result_files)
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+        return results_dict
+
+
+def output_to_nusc_box(detection):
+    box3d = detection['box3d_lidar'].numpy()
+    scores = detection['scores'].numpy()
+    labels = detection['label_preds'].numpy()
+    # TODO: check whether this is necessary
+    # with dir_offset & dir_limit in the head
+    box3d[:, 6] = -box3d[:, 6] - np.pi / 2
+    # the trained model is in [0.5, 0.5, 0],
+    # change them back to nuscenes [0.5, 0.5, 0.5]
+    box_np_ops.change_box3d_center_(box3d, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
+    box_list = []
+    for i in range(box3d.shape[0]):
+        quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box3d[i, 6])
+        velocity = (*box3d[i, 7:9], 0.0)
+        # velo_val = np.linalg.norm(box3d[i, 7:9])
+        # velo_ori = box3d[i, 6]
+        # velocity = (
+        # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
+        box = NuScenesBox(
+            box3d[i, :3],
+            box3d[i, 3:6],
+            quat,
+            label=labels[i],
+            score=scores[i],
+            velocity=velocity)
+        box_list.append(box)
+    return box_list
+
+
+def lidar_nusc_box_to_global(info,
+                             boxes,
+                             classes,
+                             eval_configs,
+                             eval_version='detection_cvpr_2019'):
+    box_list = []
+    for box in boxes:
+        # Move box to ego vehicle coord system
+        box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))
+        box.translate(np.array(info['lidar2ego_translation']))
+        # filter det in ego.
+        cls_range_map = eval_configs.class_range
+        radius = np.linalg.norm(box.center[:2], 2)
+        det_range = cls_range_map[classes[box.label]]
+        if radius > det_range:
+            continue
+        # Move box to global coord system
+        box.rotate(pyquaternion.Quaternion(info['ego2global_rotation']))
+        box.translate(np.array(info['ego2global_translation']))
+        box_list.append(box)
+    return box_list
--- a/mmdet3d/datasets/pipelines/__init__.py
+++ b/mmdet3d/datasets/pipelines/__init__.py
+from mmdet.dataset import Compose
+from .formating import (Collect, Collect3D, ImageToTensor, ToDataContainer,
+                        ToTensor, Transpose, to_tensor)
+from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
+                        ObjectSample, PointShuffle, PointsRangeFilter,
+                        RandomFlip3D)
+
+__all__ = [
+    'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
+    'Transpose', 'Collect', 'PhotoMetricDistortion', 'ObjectSample',
+    'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
+    'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D'
+]
--- a/mmdet3d/datasets/pipelines/data_augment_utils.py
+++ b/mmdet3d/datasets/pipelines/data_augment_utils.py
+import numba
+import numpy as np
+
+from mmdet3d.core.bbox import box_np_ops
+
+
+@numba.njit
+def _rotation_box2d_jit_(corners, angle, rot_mat_T):
+    rot_sin = np.sin(angle)
+    rot_cos = np.cos(angle)
+    rot_mat_T[0, 0] = rot_cos
+    rot_mat_T[0, 1] = -rot_sin
+    rot_mat_T[1, 0] = rot_sin
+    rot_mat_T[1, 1] = rot_cos
+    corners[:] = corners @ rot_mat_T
+
+
+@numba.jit(nopython=True)
+def box_collision_test(boxes, qboxes, clockwise=True):
+    N = boxes.shape[0]
+    K = qboxes.shape[0]
+    ret = np.zeros((N, K), dtype=np.bool_)
+    slices = np.array([1, 2, 3, 0])
+    lines_boxes = np.stack((boxes, boxes[:, slices, :]),
+                           axis=2)  # [N, 4, 2(line), 2(xy)]
+    lines_qboxes = np.stack((qboxes, qboxes[:, slices, :]), axis=2)
+    # vec = np.zeros((2,), dtype=boxes.dtype)
+    boxes_standup = box_np_ops.corner_to_standup_nd_jit(boxes)
+    qboxes_standup = box_np_ops.corner_to_standup_nd_jit(qboxes)
+    for i in range(N):
+        for j in range(K):
+            # calculate standup first
+            iw = (
+                min(boxes_standup[i, 2], qboxes_standup[j, 2]) -
+                max(boxes_standup[i, 0], qboxes_standup[j, 0]))
+            if iw > 0:
+                ih = (
+                    min(boxes_standup[i, 3], qboxes_standup[j, 3]) -
+                    max(boxes_standup[i, 1], qboxes_standup[j, 1]))
+                if ih > 0:
+                    for k in range(4):
+                        for l in range(4):
+                            A = lines_boxes[i, k, 0]
+                            B = lines_boxes[i, k, 1]
+                            C = lines_qboxes[j, l, 0]
+                            D = lines_qboxes[j, l, 1]
+                            acd = (D[1] - A[1]) * (C[0] -
+                                                   A[0]) > (C[1] - A[1]) * (
+                                                       D[0] - A[0])
+                            bcd = (D[1] - B[1]) * (C[0] -
+                                                   B[0]) > (C[1] - B[1]) * (
+                                                       D[0] - B[0])
+                            if acd != bcd:
+                                abc = (C[1] - A[1]) * (B[0] - A[0]) > (
+                                    B[1] - A[1]) * (
+                                        C[0] - A[0])
+                                abd = (D[1] - A[1]) * (B[0] - A[0]) > (
+                                    B[1] - A[1]) * (
+                                        D[0] - A[0])
+                                if abc != abd:
+                                    ret[i, j] = True  # collision.
+                                    break
+                        if ret[i, j] is True:
+                            break
+                    if ret[i, j] is False:
+                        # now check complete overlap.
+                        # box overlap qbox:
+                        box_overlap_qbox = True
+                        for l in range(4):  # point l in qboxes
+                            for k in range(4):  # corner k in boxes
+                                vec = boxes[i, k] - boxes[i, (k + 1) % 4]
+                                if clockwise:
+                                    vec = -vec
+                                cross = vec[1] * (
+                                    boxes[i, k, 0] - qboxes[j, l, 0])
+                                cross -= vec[0] * (
+                                    boxes[i, k, 1] - qboxes[j, l, 1])
+                                if cross >= 0:
+                                    box_overlap_qbox = False
+                                    break
+                            if box_overlap_qbox is False:
+                                break
+
+                        if box_overlap_qbox is False:
+                            qbox_overlap_box = True
+                            for l in range(4):  # point l in boxes
+                                for k in range(4):  # corner k in qboxes
+                                    vec = qboxes[j, k] - qboxes[j, (k + 1) % 4]
+                                    if clockwise:
+                                        vec = -vec
+                                    cross = vec[1] * (
+                                        qboxes[j, k, 0] - boxes[i, l, 0])
+                                    cross -= vec[0] * (
+                                        qboxes[j, k, 1] - boxes[i, l, 1])
+                                    if cross >= 0:  #
+                                        qbox_overlap_box = False
+                                        break
+                                if qbox_overlap_box is False:
+                                    break
+                            if qbox_overlap_box:
+                                ret[i, j] = True  # collision.
+                        else:
+                            ret[i, j] = True  # collision.
+    return ret
+
+
+@numba.njit
+def noise_per_box(boxes, valid_mask, loc_noises, rot_noises):
+    # boxes: [N, 5]
+    # valid_mask: [N]
+    # loc_noises: [N, M, 3]
+    # rot_noises: [N, M]
+    num_boxes = boxes.shape[0]
+    num_tests = loc_noises.shape[1]
+    box_corners = box_np_ops.box2d_to_corner_jit(boxes)
+    current_corners = np.zeros((4, 2), dtype=boxes.dtype)
+    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
+    success_mask = -np.ones((num_boxes, ), dtype=np.int64)
+    # print(valid_mask)
+    for i in range(num_boxes):
+        if valid_mask[i]:
+            for j in range(num_tests):
+                current_corners[:] = box_corners[i]
+                current_corners -= boxes[i, :2]
+                _rotation_box2d_jit_(current_corners, rot_noises[i, j],
+                                     rot_mat_T)
+                current_corners += boxes[i, :2] + loc_noises[i, j, :2]
+                coll_mat = box_collision_test(
+                    current_corners.reshape(1, 4, 2), box_corners)
+                coll_mat[0, i] = False
+                # print(coll_mat)
+                if not coll_mat.any():
+                    success_mask[i] = j
+                    box_corners[i] = current_corners
+                    break
+    return success_mask
+
+
+@numba.njit
+def noise_per_box_v2_(boxes, valid_mask, loc_noises, rot_noises,
+                      global_rot_noises):
+    # boxes: [N, 5]
+    # valid_mask: [N]
+    # loc_noises: [N, M, 3]
+    # rot_noises: [N, M]
+    num_boxes = boxes.shape[0]
+    num_tests = loc_noises.shape[1]
+    box_corners = box_np_ops.box2d_to_corner_jit(boxes)
+    current_corners = np.zeros((4, 2), dtype=boxes.dtype)
+    current_box = np.zeros((1, 5), dtype=boxes.dtype)
+    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
+    dst_pos = np.zeros((2, ), dtype=boxes.dtype)
+    success_mask = -np.ones((num_boxes, ), dtype=np.int64)
+    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
+    corners_norm[1, 1] = 1.0
+    corners_norm[2] = 1.0
+    corners_norm[3, 0] = 1.0
+    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
+    corners_norm = corners_norm.reshape(4, 2)
+    for i in range(num_boxes):
+        if valid_mask[i]:
+            for j in range(num_tests):
+                current_box[0, :] = boxes[i]
+                current_radius = np.sqrt(boxes[i, 0]**2 + boxes[i, 1]**2)
+                current_grot = np.arctan2(boxes[i, 0], boxes[i, 1])
+                dst_grot = current_grot + global_rot_noises[i, j]
+                dst_pos[0] = current_radius * np.sin(dst_grot)
+                dst_pos[1] = current_radius * np.cos(dst_grot)
+                current_box[0, :2] = dst_pos
+                current_box[0, -1] += (dst_grot - current_grot)
+
+                rot_sin = np.sin(current_box[0, -1])
+                rot_cos = np.cos(current_box[0, -1])
+                rot_mat_T[0, 0] = rot_cos
+                rot_mat_T[0, 1] = -rot_sin
+                rot_mat_T[1, 0] = rot_sin
+                rot_mat_T[1, 1] = rot_cos
+                current_corners[:] = current_box[
+                    0, 2:4] * corners_norm @ rot_mat_T + current_box[0, :2]
+                current_corners -= current_box[0, :2]
+                _rotation_box2d_jit_(current_corners, rot_noises[i, j],
+                                     rot_mat_T)
+                current_corners += current_box[0, :2] + loc_noises[i, j, :2]
+                coll_mat = box_collision_test(
+                    current_corners.reshape(1, 4, 2), box_corners)
+                coll_mat[0, i] = False
+                if not coll_mat.any():
+                    success_mask[i] = j
+                    box_corners[i] = current_corners
+                    loc_noises[i, j, :2] += (dst_pos - boxes[i, :2])
+                    rot_noises[i, j] += (dst_grot - current_grot)
+                    break
+    return success_mask
+
+
+def _select_transform(transform, indices):
+    result = np.zeros((transform.shape[0], *transform.shape[2:]),
+                      dtype=transform.dtype)
+    for i in range(transform.shape[0]):
+        if indices[i] != -1:
+            result[i] = transform[i, indices[i]]
+    return result
+
+
+@numba.njit
+def _rotation_matrix_3d_(rot_mat_T, angle, axis):
+    rot_sin = np.sin(angle)
+    rot_cos = np.cos(angle)
+    rot_mat_T[:] = np.eye(3)
+    if axis == 1:
+        rot_mat_T[0, 0] = rot_cos
+        rot_mat_T[0, 2] = -rot_sin
+        rot_mat_T[2, 0] = rot_sin
+        rot_mat_T[2, 2] = rot_cos
+    elif axis == 2 or axis == -1:
+        rot_mat_T[0, 0] = rot_cos
+        rot_mat_T[0, 1] = -rot_sin
+        rot_mat_T[1, 0] = rot_sin
+        rot_mat_T[1, 1] = rot_cos
+    elif axis == 0:
+        rot_mat_T[1, 1] = rot_cos
+        rot_mat_T[1, 2] = -rot_sin
+        rot_mat_T[2, 1] = rot_sin
+        rot_mat_T[2, 2] = rot_cos
+
+
+@numba.njit
+def points_transform_(points, centers, point_masks, loc_transform,
+                      rot_transform, valid_mask):
+    num_box = centers.shape[0]
+    num_points = points.shape[0]
+    rot_mat_T = np.zeros((num_box, 3, 3), dtype=points.dtype)
+    for i in range(num_box):
+        _rotation_matrix_3d_(rot_mat_T[i], rot_transform[i], 2)
+    for i in range(num_points):
+        for j in range(num_box):
+            if valid_mask[j]:
+                if point_masks[i, j] == 1:
+                    points[i, :3] -= centers[j, :3]
+                    points[i:i + 1, :3] = points[i:i + 1, :3] @ rot_mat_T[j]
+                    points[i, :3] += centers[j, :3]
+                    points[i, :3] += loc_transform[j]
+                    break  # only apply first box's transform
+
+
+@numba.njit
+def box3d_transform_(boxes, loc_transform, rot_transform, valid_mask):
+    num_box = boxes.shape[0]
+    for i in range(num_box):
+        if valid_mask[i]:
+            boxes[i, :3] += loc_transform[i]
+            boxes[i, 6] += rot_transform[i]
+
+
+def noise_per_object_v3_(gt_boxes,
+                         points=None,
+                         valid_mask=None,
+                         rotation_perturb=np.pi / 4,
+                         center_noise_std=1.0,
+                         global_random_rot_range=np.pi / 4,
+                         num_try=100):
+    """random rotate or remove each groundtrutn independently.
+    use kitti viewer to test this function points_transform_
+
+    Args:
+        gt_boxes: [N, 7], gt box in lidar.points_transform_
+        points: [M, 4], point cloud in lidar.
+    """
+    num_boxes = gt_boxes.shape[0]
+    if not isinstance(rotation_perturb, (list, tuple, np.ndarray)):
+        rotation_perturb = [-rotation_perturb, rotation_perturb]
+    if not isinstance(global_random_rot_range, (list, tuple, np.ndarray)):
+        global_random_rot_range = [
+            -global_random_rot_range, global_random_rot_range
+        ]
+    enable_grot = np.abs(global_random_rot_range[0] -
+                         global_random_rot_range[1]) >= 1e-3
+
+    if not isinstance(center_noise_std, (list, tuple, np.ndarray)):
+        center_noise_std = [
+            center_noise_std, center_noise_std, center_noise_std
+        ]
+    if valid_mask is None:
+        valid_mask = np.ones((num_boxes, ), dtype=np.bool_)
+    center_noise_std = np.array(center_noise_std, dtype=gt_boxes.dtype)
+
+    loc_noises = np.random.normal(
+        scale=center_noise_std, size=[num_boxes, num_try, 3])
+    rot_noises = np.random.uniform(
+        rotation_perturb[0], rotation_perturb[1], size=[num_boxes, num_try])
+    gt_grots = np.arctan2(gt_boxes[:, 0], gt_boxes[:, 1])
+    grot_lowers = global_random_rot_range[0] - gt_grots
+    grot_uppers = global_random_rot_range[1] - gt_grots
+    global_rot_noises = np.random.uniform(
+        grot_lowers[..., np.newaxis],
+        grot_uppers[..., np.newaxis],
+        size=[num_boxes, num_try])
+
+    origin = [0.5, 0.5, 0]
+    gt_box_corners = box_np_ops.center_to_corner_box3d(
+        gt_boxes[:, :3],
+        gt_boxes[:, 3:6],
+        gt_boxes[:, 6],
+        origin=origin,
+        axis=2)
+
+    # TODO: rewrite this noise box function?
+    if not enable_grot:
+        selected_noise = noise_per_box(gt_boxes[:, [0, 1, 3, 4, 6]],
+                                       valid_mask, loc_noises, rot_noises)
+    else:
+        selected_noise = noise_per_box_v2_(gt_boxes[:, [0, 1, 3, 4, 6]],
+                                           valid_mask, loc_noises, rot_noises,
+                                           global_rot_noises)
+
+    loc_transforms = _select_transform(loc_noises, selected_noise)
+    rot_transforms = _select_transform(rot_noises, selected_noise)
+    surfaces = box_np_ops.corner_to_surfaces_3d_jit(gt_box_corners)
+    if points is not None:
+        # TODO: replace this points_in_convex function by my tools?
+        point_masks = box_np_ops.points_in_convex_polygon_3d_jit(
+            points[:, :3], surfaces)
+        points_transform_(points, gt_boxes[:, :3], point_masks, loc_transforms,
+                          rot_transforms, valid_mask)
+
+    box3d_transform_(gt_boxes, loc_transforms, rot_transforms, valid_mask)
--- a/mmdet3d/datasets/pipelines/dbsampler.py
+++ b/mmdet3d/datasets/pipelines/dbsampler.py
+import copy
+import os
+import pickle
+
+import cv2
+import mmcv
+import numpy as np
+
+from mmdet3d.core.bbox import box_np_ops
+from mmdet3d.datasets.pipelines import data_augment_utils
+from ..registry import OBJECTSAMPLERS
+
+
+class BatchSampler:
+
+    def __init__(self,
+                 sampled_list,
+                 name=None,
+                 epoch=None,
+                 shuffle=True,
+                 drop_reminder=False):
+        self._sampled_list = sampled_list
+        self._indices = np.arange(len(sampled_list))
+        if shuffle:
+            np.random.shuffle(self._indices)
+        self._idx = 0
+        self._example_num = len(sampled_list)
+        self._name = name
+        self._shuffle = shuffle
+        self._epoch = epoch
+        self._epoch_counter = 0
+        self._drop_reminder = drop_reminder
+
+    def _sample(self, num):
+        if self._idx + num >= self._example_num:
+            ret = self._indices[self._idx:].copy()
+            self._reset()
+        else:
+            ret = self._indices[self._idx:self._idx + num]
+            self._idx += num
+        return ret
+
+    def _reset(self):
+        assert self._name is not None
+        # print("reset", self._name)
+        if self._shuffle:
+            np.random.shuffle(self._indices)
+        self._idx = 0
+
+    def sample(self, num):
+        indices = self._sample(num)
+        return [self._sampled_list[i] for i in indices]
+
+
+@OBJECTSAMPLERS.register_module
+class DataBaseSampler(object):
+
+    def __init__(self, info_path, root_path, rate, prepare, object_rot_range,
+                 sample_groups, use_road_plane):
+        super().__init__()
+        self.root_path = root_path
+        self.info_path = info_path
+        self.rate = rate
+        self.prepare = prepare
+        self.object_rot_range = object_rot_range
+
+        with open(info_path, 'rb') as f:
+            db_infos = pickle.load(f)
+
+        # filter database infos
+        from mmdet3d.apis import get_root_logger
+        logger = get_root_logger()
+        for k, v in db_infos.items():
+            logger.info(f'load {len(v)} {k} database infos')
+        for prep_func, val in prepare.items():
+            db_infos = getattr(self, prep_func)(db_infos, val)
+        logger.info('After filter database:')
+        for k, v in db_infos.items():
+            logger.info(f'load {len(v)} {k} database infos')
+
+        self.db_infos = db_infos
+
+        # load sample groups
+        # TODO: more elegant way to load sample groups
+        self.sample_groups = []
+        for name, num in sample_groups.items():
+            self.sample_groups.append({name: int(num)})
+
+        self.group_db_infos = self.db_infos  # just use db_infos
+        self.sample_classes = []
+        self.sample_max_nums = []
+        for group_info in self.sample_groups:
+            self.sample_classes += list(group_info.keys())
+            self.sample_max_nums += list(group_info.values())
+
+        self.sampler_dict = {}
+        for k, v in self.group_db_infos.items():
+            self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)
+
+        self.object_rot_range = object_rot_range
+        self.object_rot_enable = np.abs(self.object_rot_range[0] -
+                                        self.object_rot_range[1]) >= 1e-3
+
+        # TODO: No group_sampling currently
+
+    @staticmethod
+    def filter_by_difficulty(db_infos, removed_difficulty):
+        new_db_infos = {}
+        for key, dinfos in db_infos.items():
+            new_db_infos[key] = [
+                info for info in dinfos
+                if info['difficulty'] not in removed_difficulty
+            ]
+        return new_db_infos
+
+    @staticmethod
+    def filter_by_min_points(db_infos, min_gt_points_dict):
+        for name, min_num in min_gt_points_dict.items():
+            min_num = int(min_num)
+            if min_num > 0:
+                filtered_infos = []
+                for info in db_infos[name]:
+                    if info['num_points_in_gt'] >= min_num:
+                        filtered_infos.append(info)
+                db_infos[name] = filtered_infos
+        return db_infos
+
+    def sample_all(self, gt_bboxes, gt_names, img=None):
+        sampled_num_dict = {}
+        sample_num_per_class = []
+        for class_name, max_sample_num in zip(self.sample_classes,
+                                              self.sample_max_nums):
+            sampled_num = int(max_sample_num -
+                              np.sum([n == class_name for n in gt_names]))
+            sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
+            sampled_num_dict[class_name] = sampled_num
+            sample_num_per_class.append(sampled_num)
+
+        sampled = []
+        sampled_gt_bboxes = []
+        avoid_coll_boxes = gt_bboxes
+
+        for class_name, sampled_num in zip(self.sample_classes,
+                                           sample_num_per_class):
+            if sampled_num > 0:
+                sampled_cls = self.sample_class_v2(class_name, sampled_num,
+                                                   avoid_coll_boxes)
+
+                sampled += sampled_cls
+                if len(sampled_cls) > 0:
+                    if len(sampled_cls) == 1:
+                        sampled_gt_box = sampled_cls[0]['box3d_lidar'][
+                            np.newaxis, ...]
+                    else:
+                        sampled_gt_box = np.stack(
+                            [s['box3d_lidar'] for s in sampled_cls], axis=0)
+
+                    sampled_gt_bboxes += [sampled_gt_box]
+                    avoid_coll_boxes = np.concatenate(
+                        [avoid_coll_boxes, sampled_gt_box], axis=0)
+
+        ret = None
+        if len(sampled) > 0:
+            sampled_gt_bboxes = np.concatenate(sampled_gt_bboxes, axis=0)
+            # center = sampled_gt_bboxes[:, 0:3]
+
+            num_sampled = len(sampled)
+            s_points_list = []
+            count = 0
+            for info in sampled:
+                file_path = os.path.join(
+                    self.root_path,
+                    info['path']) if self.root_path else info['path']
+                s_points = np.fromfile(
+                    file_path, dtype=np.float32).reshape([-1, 4])
+
+                if 'rot_transform' in info:
+                    rot = info['rot_transform']
+                    s_points[:, :3] = box_np_ops.rotation_points_single_angle(
+                        s_points[:, :3], rot, axis=2)
+                s_points[:, :3] += info['box3d_lidar'][:3]
+
+                count += 1
+
+                s_points_list.append(s_points)
+
+            ret = {
+                'gt_names':
+                np.array([s['name'] for s in sampled]),
+                'difficulty':
+                np.array([s['difficulty'] for s in sampled]),
+                'gt_bboxes_3d':
+                sampled_gt_bboxes,
+                'points':
+                np.concatenate(s_points_list, axis=0),
+                'gt_masks':
+                np.ones((num_sampled, ), dtype=np.bool_),
+                'group_ids':
+                np.arange(gt_bboxes.shape[0],
+                          gt_bboxes.shape[0] + len(sampled))
+            }
+
+        return ret
+
+    def sample_class_v2(self, name, num, gt_bboxes):
+        sampled = self.sampler_dict[name].sample(num)
+        sampled = copy.deepcopy(sampled)
+        num_gt = gt_bboxes.shape[0]
+        num_sampled = len(sampled)
+        gt_bboxes_bv = box_np_ops.center_to_corner_box2d(
+            gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])
+
+        sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
+
+        valid_mask = np.zeros([gt_bboxes.shape[0]], dtype=np.bool_)
+        valid_mask = np.concatenate(
+            [valid_mask,
+             np.ones([sp_boxes.shape[0]], dtype=np.bool_)], axis=0)
+        boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()
+        if self.object_rot_enable:
+            assert False, 'This part needs to be checked'
+            # place samples to any place in a circle.
+            # TODO: rm it if not needed
+            data_augment_utils.noise_per_object_v3_(
+                boxes,
+                None,
+                valid_mask,
+                0,
+                0,
+                self._global_rot_range,
+                num_try=100)
+
+        sp_boxes_new = boxes[gt_bboxes.shape[0]:]
+        sp_boxes_bv = box_np_ops.center_to_corner_box2d(
+            sp_boxes_new[:, 0:2], sp_boxes_new[:, 3:5], sp_boxes_new[:, 6])
+
+        total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)
+        coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)
+        diag = np.arange(total_bv.shape[0])
+        coll_mat[diag, diag] = False
+
+        valid_samples = []
+        for i in range(num_gt, num_gt + num_sampled):
+            if coll_mat[i].any():
+                coll_mat[i] = False
+                coll_mat[:, i] = False
+            else:
+                if self.object_rot_enable:
+                    assert False, 'This part needs to be checked'
+                    sampled[i - num_gt]['box3d_lidar'][:2] = boxes[i, :2]
+                    sampled[i - num_gt]['box3d_lidar'][-1] = boxes[i, -1]
+                    sampled[i - num_gt]['rot_transform'] = (
+                        boxes[i, -1] - sp_boxes[i - num_gt, -1])
+                valid_samples.append(sampled[i - num_gt])
+        return valid_samples
+
+
+@OBJECTSAMPLERS.register_module
+class MMDataBaseSampler(DataBaseSampler):
+
+    def __init__(self,
+                 info_path,
+                 root_path,
+                 rate,
+                 prepare,
+                 object_rot_range,
+                 sample_groups,
+                 check_2D_collision=False,
+                 collision_thr=0,
+                 collision_in_classes=False,
+                 depth_consistent=False,
+                 blending_type=None):
+        super(MMDataBaseSampler, self).__init__(
+            info_path=info_path,
+            root_path=root_path,
+            rate=rate,
+            prepare=prepare,
+            object_rot_range=object_rot_range,
+            sample_groups=sample_groups,
+            use_road_plane=False,
+        )
+        self.blending_type = blending_type
+        self.depth_consistent = depth_consistent
+        self.check_2D_collision = check_2D_collision
+        self.collision_thr = collision_thr
+        self.collision_in_classes = collision_in_classes
+
+    def sample_all(self, gt_bboxes_3d, gt_names, gt_bboxes_2d=None, img=None):
+        sampled_num_dict = {}
+        sample_num_per_class = []
+        for class_name, max_sample_num in zip(self.sample_classes,
+                                              self.sample_max_nums):
+            sampled_num = int(max_sample_num -
+                              np.sum([n == class_name for n in gt_names]))
+            sampled_num = np.round(self.rate * sampled_num).astype(np.int64)
+            sampled_num_dict[class_name] = sampled_num
+            sample_num_per_class.append(sampled_num)
+
+        sampled = []
+        sampled_gt_bboxes_3d = []
+        sampled_gt_bboxes_2d = []
+        avoid_coll_boxes_3d = gt_bboxes_3d
+        avoid_coll_boxes_2d = gt_bboxes_2d
+
+        for class_name, sampled_num in zip(self.sample_classes,
+                                           sample_num_per_class):
+            if sampled_num > 0:
+                sampled_cls = self.sample_class_v2(class_name, sampled_num,
+                                                   avoid_coll_boxes_3d,
+                                                   avoid_coll_boxes_2d)
+
+                sampled += sampled_cls
+                if len(sampled_cls) > 0:
+                    if len(sampled_cls) == 1:
+                        sampled_gt_box_3d = sampled_cls[0]['box3d_lidar'][
+                            np.newaxis, ...]
+                        sampled_gt_box_2d = sampled_cls[0]['box2d_camera'][
+                            np.newaxis, ...]
+                    else:
+                        sampled_gt_box_3d = np.stack(
+                            [s['box3d_lidar'] for s in sampled_cls], axis=0)
+                        sampled_gt_box_2d = np.stack(
+                            [s['box2d_camera'] for s in sampled_cls], axis=0)
+
+                    sampled_gt_bboxes_3d += [sampled_gt_box_3d]
+                    sampled_gt_bboxes_2d += [sampled_gt_box_2d]
+                    if self.collision_in_classes:
+                        # TODO: check whether check collision check among
+                        # classes is necessary
+                        avoid_coll_boxes_3d = np.concatenate(
+                            [avoid_coll_boxes_3d, sampled_gt_box_3d], axis=0)
+                        avoid_coll_boxes_2d = np.concatenate(
+                            [avoid_coll_boxes_2d, sampled_gt_box_2d], axis=0)
+
+        ret = None
+        if len(sampled) > 0:
+            sampled_gt_bboxes_3d = np.concatenate(sampled_gt_bboxes_3d, axis=0)
+            sampled_gt_bboxes_2d = np.concatenate(sampled_gt_bboxes_2d, axis=0)
+
+            num_sampled = len(sampled)
+            s_points_list = []
+            count = 0
+
+            if self.depth_consistent:
+                # change the paster order based on distance
+                center = sampled_gt_bboxes_3d[:, 0:3]
+                paste_order = np.argsort(
+                    -np.power(np.sum(np.power(center, 2), axis=-1), 1 / 2),
+                    axis=-1)
+
+            for idx in range(len(sampled)):
+                if self.depth_consistent:
+                    inds = np.where(paste_order == idx)[0][0]
+                    info = sampled[inds]
+                else:
+                    info = sampled[idx]
+                pcd_file_path = os.path.join(
+                    self.root_path,
+                    info['path']) if self.root_path else info['path']
+                img_file_path = pcd_file_path + '.png'
+                mask_file_path = pcd_file_path + '.mask.png'
+                s_points = np.fromfile(
+                    pcd_file_path, dtype=np.float32).reshape([-1, 4])
+                s_patch = mmcv.imread(img_file_path)
+                s_mask = mmcv.imread(mask_file_path, 'grayscale')
+
+                if 'rot_transform' in info:
+                    rot = info['rot_transform']
+                    s_points[:, :3] = box_np_ops.rotation_points_single_angle(
+                        s_points[:, :3], rot, axis=2)
+                    # TODO: might need to rot 2d bbox in the future
+
+                # the points of each sample already minus the object center
+                # so this time it needs to add the offset back
+                s_points[:, :3] += info['box3d_lidar'][:3]
+                img = self.paste_obj(
+                    img,
+                    s_patch,
+                    s_mask,
+                    bbox_2d=info['box2d_camera'].astype(np.int32))
+
+                count += 1
+                s_points_list.append(s_points)
+
+            ret = dict(
+                img=img,
+                gt_names=np.array([s['name'] for s in sampled]),
+                difficulty=np.array([s['difficulty'] for s in sampled]),
+                gt_bboxes_3d=sampled_gt_bboxes_3d,
+                gt_bboxes_2d=sampled_gt_bboxes_2d,
+                points=np.concatenate(s_points_list, axis=0),
+                gt_masks=np.ones((num_sampled, ), dtype=np.bool_),
+                group_ids=np.arange(gt_bboxes_3d.shape[0],
+                                    gt_bboxes_3d.shape[0] + len(sampled)))
+
+        return ret
+
+    def paste_obj(self, img, obj_img, obj_mask, bbox_2d):
+        # paste the image patch back
+        x1, y1, x2, y2 = bbox_2d
+        # the bbox might exceed the img size because the img is different
+        img_h, img_w = img.shape[:2]
+        w = np.maximum(min(x2, img_w - 1) - x1 + 1, 1)
+        h = np.maximum(min(y2, img_h - 1) - y1 + 1, 1)
+        obj_mask = obj_mask[:h, :w]
+        obj_img = obj_img[:h, :w]
+
+        # choose a blend option
+        if not self.blending_type:
+            blending_op = 'none'
+
+        else:
+            blending_choice = np.random.randint(len(self.blending_type))
+            blending_op = self.blending_type[blending_choice]
+
+        if blending_op.find('poisson') != -1:
+            # options: cv2.NORMAL_CLONE=1, or cv2.MONOCHROME_TRANSFER=3
+            # cv2.MIXED_CLONE mixed the texture, thus is not used.
+            if blending_op == 'poisson':
+                mode = np.random.choice([1, 3], 1)[0]
+            elif blending_op == 'poisson_normal':
+                mode = cv2.NORMAL_CLONE
+            elif blending_op == 'poisson_transfer':
+                mode = cv2.MONOCHROME_TRANSFER
+            else:
+                raise NotImplementedError
+            center = (int(x1 + w / 2), int(y1 + h / 2))
+            img = cv2.seamlessClone(obj_img, img, obj_mask * 255, center, mode)
+        else:
+            if blending_op == 'gaussian':
+                obj_mask = cv2.GaussianBlur(
+                    obj_mask.astype(np.float32), (5, 5), 2)
+            elif blending_op == 'box':
+                obj_mask = cv2.blur(obj_mask.astype(np.float32), (3, 3))
+            paste_mask = 1 - obj_mask
+            img[y1:y1 + h,
+                x1:x1 + w] = (img[y1:y1 + h, x1:x1 + w].astype(np.float32) *
+                              paste_mask[..., None]).astype(np.uint8)
+            img[y1:y1 + h, x1:x1 + w] += (obj_img.astype(np.float32) *
+                                          obj_mask[..., None]).astype(np.uint8)
+        return img
+
+    def sample_class_v2(self, name, num, gt_bboxes_3d, gt_bboxes_2d):
+        sampled = self.sampler_dict[name].sample(num)
+        sampled = copy.deepcopy(sampled)
+        num_gt = gt_bboxes_3d.shape[0]
+        num_sampled = len(sampled)
+        # avoid collision in BEV first
+        gt_bboxes_bv = box_np_ops.center_to_corner_box2d(
+            gt_bboxes_3d[:, 0:2], gt_bboxes_3d[:, 3:5], gt_bboxes_3d[:, 6])
+        sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
+        sp_boxes_bv = box_np_ops.center_to_corner_box2d(
+            sp_boxes[:, 0:2], sp_boxes[:, 3:5], sp_boxes[:, 6])
+        total_bv = np.concatenate([gt_bboxes_bv, sp_boxes_bv], axis=0)
+        coll_mat = data_augment_utils.box_collision_test(total_bv, total_bv)
+
+        # Then avoid collision in 2D space
+        if self.check_2D_collision:
+            sp_boxes_2d = np.stack([i['box2d_camera'] for i in sampled],
+                                   axis=0)
+            total_bbox_2d = np.concatenate([gt_bboxes_2d, sp_boxes_2d],
+                                           axis=0)  # Nx4
+            # random select a collision threshold
+            if isinstance(self.collision_thr, float):
+                collision_thr = self.collision_thr
+            elif isinstance(self.collision_thr, list):
+                collision_thr = np.random.choice(self.collision_thr)
+            elif isinstance(self.collision_thr, dict):
+                mode = self.collision_thr.get('mode', 'value')
+                if mode == 'value':
+                    collision_thr = np.random.choice(
+                        self.collision_thr['thr_range'])
+                elif mode == 'range':
+                    collision_thr = np.random.uniform(
+                        self.collision_thr['thr_range'][0],
+                        self.collision_thr['thr_range'][1])
+
+            if collision_thr == 0:
+                # use similar collision test as BEV did
+                # Nx4 (x1, y1, x2, y2) -> corners: Nx4x2
+                # ((x1, y1), (x2, y1), (x1, y2), (x2, y2))
+                x1y1 = total_bbox_2d[:, :2]
+                x2y2 = total_bbox_2d[:, 2:]
+                x1y2 = np.stack([total_bbox_2d[:, 0], total_bbox_2d[:, 3]],
+                                axis=-1)
+                x2y1 = np.stack([total_bbox_2d[:, 2], total_bbox_2d[:, 1]],
+                                axis=-1)
+                total_2d = np.stack([x1y1, x2y1, x1y2, x2y2], axis=1)
+                coll_mat_2d = data_augment_utils.box_collision_test(
+                    total_2d, total_2d)
+            else:
+                # use iof rather than iou to protect the foreground
+                overlaps = box_np_ops.iou_jit(total_bbox_2d, total_bbox_2d,
+                                              'iof')
+                coll_mat_2d = overlaps > collision_thr
+            coll_mat = coll_mat + coll_mat_2d
+
+        diag = np.arange(total_bv.shape[0])
+        coll_mat[diag, diag] = False
+
+        valid_samples = []
+        for i in range(num_gt, num_gt + num_sampled):
+            if coll_mat[i].any():
+                coll_mat[i] = False
+                coll_mat[:, i] = False
+            else:
+                valid_samples.append(sampled[i - num_gt])
+
+        return valid_samples
--- a/mmdet3d/datasets/pipelines/formating.py
+++ b/mmdet3d/datasets/pipelines/formating.py
+import numpy as np
+from mmcv.parallel import DataContainer as DC
+
+from mmdet.datasets.pipelines import PIPELINES, to_tensor
+
+PIPELINES._module_dict.pop('DefaultFormatBundle')
+
+
+@PIPELINES.register_module
+class DefaultFormatBundle(object):
+    """Default formatting bundle.
+
+    It simplifies the pipeline of formatting common fields, including "img",
+    "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
+    These fields are formatted as follows.
+
+    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+    - proposals: (1)to tensor, (2)to DataContainer
+    - gt_bboxes: (1)to tensor, (2)to DataContainer
+    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
+    - gt_labels: (1)to tensor, (2)to DataContainer
+    - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
+    - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor,
+                       (3)to DataContainer (stack=True)
+    """
+
+    def __init__(self, ):
+        return
+
+    def __call__(self, results):
+        if 'img' in results:
+            if isinstance(results['img'], list):
+                # process multiple imgs in single frame
+                imgs = [img.transpose(2, 0, 1) for img in results['img']]
+                imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
+                results['img'] = DC(to_tensor(imgs), stack=True)
+            else:
+                img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
+                results['img'] = DC(to_tensor(img), stack=True)
+        for key in [
+                'proposals', 'gt_bboxes', 'gt_bboxes_3d', 'gt_bboxes_ignore',
+                'gt_labels', 'gt_labels_3d'
+        ]:
+            if key not in results:
+                continue
+            if isinstance(results[key], list):
+                results[key] = DC([to_tensor(res) for res in results[key]])
+            else:
+                results[key] = DC(to_tensor(results[key]))
+        if 'gt_masks' in results:
+            results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
+        if 'gt_semantic_seg' in results:
+            results['gt_semantic_seg'] = DC(
+                to_tensor(results['gt_semantic_seg'][None, ...]), stack=True)
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+
+@PIPELINES.register_module
+class Collect3D(object):
+
+    def __init__(self,
+                 keys,
+                 pcd_shape=[1, 1600, 1408],
+                 meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
+                            'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
+                            'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans',
+                            'sample_idx', 'pcd_scale_factor', 'pcd_rotation')):
+        self.keys = keys
+        self.meta_keys = meta_keys
+        self.pcd_shape = pcd_shape
+
+    def __call__(self, results):
+        data = {}
+        img_meta = {}
+        for key in self.meta_keys:
+            if key in results:
+                img_meta[key] = results[key]
+        img_meta.update(pcd_shape=self.pcd_shape, pcd_pad_shape=self.pcd_shape)
+        data['img_meta'] = DC(img_meta, cpu_only=True)
+        for key in self.keys:
+            data[key] = results[key]
+        return data
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(keys={}, meta_keys={})'.format(
+            self.keys, self.meta_keys)
+
+
+@PIPELINES.register_module
+class DefaultFormatBundle3D(DefaultFormatBundle):
+    """Default formatting bundle.
+
+    It simplifies the pipeline of formatting common fields for voxels,
+    including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
+    "gt_semantic_seg".
+    These fields are formatted as follows.
+
+    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+    - proposals: (1)to tensor, (2)to DataContainer
+    - gt_bboxes: (1)to tensor, (2)to DataContainer
+    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
+    - gt_labels: (1)to tensor, (2)to DataContainer
+    """
+
+    def __init__(self, class_names, with_gt=True, with_label=True):
+        super(DefaultFormatBundle3D, self).__init__()
+        self.class_names = class_names
+        self.with_gt = with_gt
+        self.with_label = with_label
+
+    def __call__(self, results):
+        # Format 3D data
+        for key in [
+                'voxels', 'coors', 'voxel_centers', 'num_points', 'points'
+        ]:
+            if key not in results:
+                continue
+            results[key] = DC(to_tensor(results[key]), stack=False)
+
+        if self.with_gt:
+            # Clean GT bboxes in the final
+            if 'gt_bboxes_3d_mask' in results:
+                gt_bboxes_3d_mask = results['gt_bboxes_3d_mask']
+                results['gt_bboxes_3d'] = results['gt_bboxes_3d'][
+                    gt_bboxes_3d_mask]
+                results['gt_names_3d'] = results['gt_names_3d'][
+                    gt_bboxes_3d_mask]
+            if 'gt_bboxes_mask' in results:
+                gt_bboxes_mask = results['gt_bboxes_mask']
+                if 'gt_bboxes' in results:
+                    results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask]
+                results['gt_names'] = results['gt_names'][gt_bboxes_mask]
+            if self.with_label:
+                if 'gt_names' in results and len(results['gt_names']) == 0:
+                    results['gt_labels'] = np.array([], dtype=np.int64)
+                elif 'gt_names' in results and isinstance(
+                        results['gt_names'][0], list):
+                    # gt_labels might be a list of list in multi-view setting
+                    results['gt_labels'] = [
+                        np.array([self.class_names.index(n) for n in res],
+                                 dtype=np.int64) for res in results['gt_names']
+                    ]
+                elif 'gt_names' in results:
+                    results['gt_labels'] = np.array([
+                        self.class_names.index(n) for n in results['gt_names']
+                    ],
+                                                    dtype=np.int64)
+                # we still assume one pipeline for one frame LiDAR
+                # thus, the 3D name is list[string]
+                results['gt_labels_3d'] = np.array([
+                    self.class_names.index(n) for n in results['gt_names_3d']
+                ],
+                                                   dtype=np.int64)
+        results = super(DefaultFormatBundle3D, self).__call__(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(class_names={}, '.format(self.class_names)
+        repr_str += 'with_gt={}, with_label={})'.format(
+            self.with_gt, self.with_label)
+        return repr_str
--- a/mmdet3d/datasets/pipelines/loading.py
+++ b/mmdet3d/datasets/pipelines/loading.py
+import os.path as osp
+
+import mmcv
+import numpy as np
+import pycocotools.mask as maskUtils
+
+from mmdet.datasets.pipelines import PIPELINES
+
+
+@PIPELINES.register_module
+class LoadImageFromFile(object):
+
+    def __init__(self, to_float32=False):
+        self.to_float32 = to_float32
+
+    def __call__(self, results):
+        if results['img_prefix'] is not None:
+            filename = osp.join(results['img_prefix'],
+                                results['img_info']['filename'])
+        else:
+            filename = results['img_info']['filename']
+        img = mmcv.imread(filename)
+        if self.to_float32:
+            img = img.astype(np.float32)
+        results['filename'] = filename
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['ori_shape'] = img.shape
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(to_float32={})'.format(
+            self.to_float32)
+
+
+@PIPELINES.register_module
+class LoadAnnotations(object):
+
+    def __init__(self,
+                 with_bbox=True,
+                 with_label=True,
+                 with_mask=False,
+                 with_seg=False,
+                 poly2mask=True):
+        self.with_bbox = with_bbox
+        self.with_label = with_label
+        self.with_mask = with_mask
+        self.with_seg = with_seg
+        self.poly2mask = poly2mask
+
+    def _load_bboxes(self, results):
+        ann_info = results['ann_info']
+        results['gt_bboxes'] = ann_info['bboxes']
+
+        gt_bboxes_ignore = ann_info.get('bboxes_ignore', None)
+        if gt_bboxes_ignore is not None:
+            results['gt_bboxes_ignore'] = gt_bboxes_ignore
+            results['bbox_fields'].append('gt_bboxes_ignore')
+        results['bbox_fields'].append('gt_bboxes')
+        return results
+
+    def _load_labels(self, results):
+        results['gt_labels'] = results['ann_info']['labels']
+        return results
+
+    def _poly2mask(self, mask_ann, img_h, img_w):
+        if isinstance(mask_ann, list):
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+            rle = maskUtils.merge(rles)
+        elif isinstance(mask_ann['counts'], list):
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+        else:
+            # rle
+            rle = mask_ann
+        mask = maskUtils.decode(rle)
+        return mask
+
+    def _load_masks(self, results):
+        h, w = results['img_info']['height'], results['img_info']['width']
+        gt_masks = results['ann_info']['masks']
+        if self.poly2mask:
+            gt_masks = [self._poly2mask(mask, h, w) for mask in gt_masks]
+        results['gt_masks'] = gt_masks
+        results['mask_fields'].append('gt_masks')
+        return results
+
+    def _load_semantic_seg(self, results):
+        results['gt_semantic_seg'] = mmcv.imread(
+            osp.join(results['seg_prefix'], results['ann_info']['seg_map']),
+            flag='unchanged').squeeze()
+        results['seg_fields'].append('gt_semantic_seg')
+        return results
+
+    def __call__(self, results):
+        if self.with_bbox:
+            results = self._load_bboxes(results)
+            if results is None:
+                return None
+        if self.with_label:
+            results = self._load_labels(results)
+        if self.with_mask:
+            results = self._load_masks(results)
+        if self.with_seg:
+            results = self._load_semantic_seg(results)
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += ('(with_bbox={}, with_label={}, with_mask={},'
+                     ' with_seg={})').format(self.with_bbox, self.with_label,
+                                             self.with_mask, self.with_seg)
+        return repr_str
+
+
+@PIPELINES.register_module
+class LoadProposals(object):
+
+    def __init__(self, num_max_proposals=None):
+        self.num_max_proposals = num_max_proposals
+
+    def __call__(self, results):
+        proposals = results['proposals']
+        if proposals.shape[1] not in (4, 5):
+            raise AssertionError(
+                'proposals should have shapes (n, 4) or (n, 5), '
+                'but found {}'.format(proposals.shape))
+        proposals = proposals[:, :4]
+
+        if self.num_max_proposals is not None:
+            proposals = proposals[:self.num_max_proposals]
+
+        if len(proposals) == 0:
+            proposals = np.array([[0, 0, 0, 0]], dtype=np.float32)
+        results['proposals'] = proposals
+        results['bbox_fields'].append('proposals')
+        return results
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(num_max_proposals={})'.format(
+            self.num_max_proposals)
--- a/mmdet3d/datasets/pipelines/train_aug.py
+++ b/mmdet3d/datasets/pipelines/train_aug.py
+import numpy as np
+
+from mmdet3d.core.bbox import box_np_ops
+from mmdet3d.utils import build_from_cfg
+from mmdet.datasets.registry import PIPELINES
+from ..registry import OBJECTSAMPLERS
+from .data_augment_utils import noise_per_object_v3_
+from .transforms import RandomFlip
+
+
+@PIPELINES.register_module
+class RandomFlip3D(RandomFlip):
+    """Flip the points & bbox.
+
+    If the input dict contains the key "flip", then the flag will be used,
+    otherwise it will be randomly decided by a ratio specified in the init
+    method.
+
+    Args:
+        flip_ratio (float, optional): The flipping probability.
+    """
+
+    def __init__(self, sync_2d=True, **kwargs):
+        super(RandomFlip3D, self).__init__(**kwargs)
+        self.sync_2d = sync_2d
+
+    def random_flip_points(self, gt_bboxes_3d, points):
+        gt_bboxes_3d[:, 1] = -gt_bboxes_3d[:, 1]
+        gt_bboxes_3d[:, 6] = -gt_bboxes_3d[:, 6] + np.pi
+        points[:, 1] = -points[:, 1]
+        if gt_bboxes_3d.shape[1] == 9:
+            # flip velocitys at the same time
+            gt_bboxes_3d[:, 8] = -gt_bboxes_3d[:, 8]
+        return gt_bboxes_3d, points
+
+    def __call__(self, input_dict):
+        super(RandomFlip3D, self).__call__(input_dict)
+        if self.sync_2d:
+            input_dict['pcd_flip'] = input_dict['flip']
+        else:
+            flip = True if np.random.rand() < self.flip_ratio else False
+            input_dict['pcd_flip'] = flip
+        if input_dict['pcd_flip']:
+            # flip image
+            gt_bboxes_3d = input_dict['gt_bboxes_3d']
+            points = input_dict['points']
+            gt_bboxes_3d, points = self.random_flip_points(
+                gt_bboxes_3d, points)
+            input_dict['gt_bboxes_3d'] = gt_bboxes_3d
+            input_dict['points'] = points
+        return input_dict
+
+
+@PIPELINES.register_module
+class ObjectSample(object):
+
+    def __init__(self, db_sampler, sample_2d=False):
+        self.sampler_cfg = db_sampler
+        self.sample_2d = sample_2d
+        if 'type' not in db_sampler.keys():
+            db_sampler['type'] = 'DataBaseSampler'
+        self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)
+
+    @staticmethod
+    def remove_points_in_boxes(points, boxes):
+        masks = box_np_ops.points_in_rbbox(points, boxes)
+        points = points[np.logical_not(masks.any(-1))]
+        return points
+
+    def __call__(self, input_dict):
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        gt_names_3d = input_dict['gt_names_3d']
+        gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
+        # change to float for blending operation
+        points = input_dict['points']
+        #         rect = input_dict['rect']
+        #         Trv2c = input_dict['Trv2c']
+        #         P2 = input_dict['P2']
+        if self.sample_2d:
+            img = input_dict['img']  # .astype(np.float32)
+            gt_bboxes_2d = input_dict['gt_bboxes']
+            gt_bboxes_mask = input_dict['gt_bboxes_mask']
+            gt_names = input_dict['gt_names']
+            # Assume for now 3D & 2D bboxes are the same
+            sampled_dict = self.db_sampler.sample_all(
+                gt_bboxes_3d, gt_names_3d, gt_bboxes_2d=gt_bboxes_2d, img=img)
+        else:
+            sampled_dict = self.db_sampler.sample_all(
+                gt_bboxes_3d, gt_names_3d, img=None)
+
+        if sampled_dict is not None:
+            sampled_gt_names = sampled_dict['gt_names']
+            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
+            sampled_points = sampled_dict['points']
+            sampled_gt_masks = sampled_dict['gt_masks']
+
+            gt_names_3d = np.concatenate([gt_names_3d, sampled_gt_names],
+                                         axis=0)
+            gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d
+                                           ]).astype(np.float32)
+            gt_bboxes_3d_mask = np.concatenate(
+                [gt_bboxes_3d_mask, sampled_gt_masks], axis=0)
+            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
+            # check the points dimension
+            dim_inds = points.shape[-1]
+            points = np.concatenate([sampled_points[:, :dim_inds], points],
+                                    axis=0)
+
+            if self.sample_2d:
+                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
+                gt_bboxes_2d = np.concatenate(
+                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
+                gt_bboxes_mask = np.concatenate(
+                    [gt_bboxes_mask, sampled_gt_masks], axis=0)
+                gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0)
+                input_dict['gt_names'] = gt_names
+                input_dict['gt_bboxes'] = gt_bboxes_2d
+                input_dict['gt_bboxes_mask'] = gt_bboxes_mask
+                input_dict['img'] = sampled_dict['img']  # .astype(np.uint8)
+
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
+        input_dict['gt_names_3d'] = gt_names_3d
+        input_dict['points'] = points
+        input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
+        return input_dict
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+
+@PIPELINES.register_module
+class ObjectNoise(object):
+
+    def __init__(self,
+                 loc_noise_std=[0.25, 0.25, 0.25],
+                 global_rot_range=[0.0, 0.0],
+                 rot_uniform_noise=[-0.15707963267, 0.15707963267],
+                 num_try=100):
+        self.loc_noise_std = loc_noise_std
+        self.global_rot_range = global_rot_range
+        self.rot_uniform_noise = rot_uniform_noise
+        self.num_try = num_try
+
+    def __call__(self, input_dict):
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        points = input_dict['points']
+        gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
+        # TODO: check this inplace function
+        noise_per_object_v3_(
+            gt_bboxes_3d,
+            points,
+            gt_bboxes_3d_mask,
+            rotation_perturb=self.rot_uniform_noise,
+            center_noise_std=self.loc_noise_std,
+            global_random_rot_range=self.global_rot_range,
+            num_try=self.num_try)
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
+        input_dict['points'] = points
+        return input_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(num_try={},'.format(self.num_try)
+        repr_str += ' loc_noise_std={},'.format(self.loc_noise_std)
+        repr_str += ' global_rot_range={},'.format(self.global_rot_range)
+        repr_str += ' rot_uniform_noise={})'.format(self.rot_uniform_noise)
+        return repr_str
+
+
+@PIPELINES.register_module
+class GlobalRotScale(object):
+
+    def __init__(self,
+                 rot_uniform_noise=[-0.78539816, 0.78539816],
+                 scaling_uniform_noise=[0.95, 1.05],
+                 trans_normal_noise=[0, 0, 0]):
+        self.rot_uniform_noise = rot_uniform_noise
+        self.scaling_uniform_noise = scaling_uniform_noise
+        self.trans_normal_noise = trans_normal_noise
+
+    def _trans_bbox_points(self, gt_boxes, points):
+        noise_trans = np.random.normal(0, self.trans_normal_noise[0], 3).T
+        points[:, :3] += noise_trans
+        gt_boxes[:, :3] += noise_trans
+        return gt_boxes, points, noise_trans
+
+    def _rot_bbox_points(self, gt_boxes, points, rotation=np.pi / 4):
+        if not isinstance(rotation, list):
+            rotation = [-rotation, rotation]
+        noise_rotation = np.random.uniform(rotation[0], rotation[1])
+        points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
+            points[:, :3], noise_rotation, axis=2)
+        gt_boxes[:, :3], _ = box_np_ops.rotation_points_single_angle(
+            gt_boxes[:, :3], noise_rotation, axis=2)
+        gt_boxes[:, 6] += noise_rotation
+        if gt_boxes.shape[1] == 9:
+            # rotate velo vector
+            rot_cos = np.cos(noise_rotation)
+            rot_sin = np.sin(noise_rotation)
+            rot_mat_T_bev = np.array([[rot_cos, -rot_sin], [rot_sin, rot_cos]],
+                                     dtype=points.dtype)
+            gt_boxes[:, 7:9] = gt_boxes[:, 7:9] @ rot_mat_T_bev
+        return gt_boxes, points, rot_mat_T
+
+    def _scale_bbox_points(self,
+                           gt_boxes,
+                           points,
+                           min_scale=0.95,
+                           max_scale=1.05):
+        noise_scale = np.random.uniform(min_scale, max_scale)
+        points[:, :3] *= noise_scale
+        gt_boxes[:, :6] *= noise_scale
+        if gt_boxes.shape[1] == 9:
+            gt_boxes[:, 7:] *= noise_scale
+        return gt_boxes, points, noise_scale
+
+    def __call__(self, input_dict):
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        points = input_dict['points']
+
+        gt_bboxes_3d, points, rotation_factor = self._rot_bbox_points(
+            gt_bboxes_3d, points, rotation=self.rot_uniform_noise)
+        gt_bboxes_3d, points, scale_factor = self._scale_bbox_points(
+            gt_bboxes_3d, points, *self.scaling_uniform_noise)
+        gt_bboxes_3d, points, trans_factor = self._trans_bbox_points(
+            gt_bboxes_3d, points)
+
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
+        input_dict['points'] = points
+        input_dict['pcd_scale_factor'] = scale_factor
+        input_dict['pcd_rotation'] = rotation_factor
+        input_dict['pcd_trans'] = trans_factor
+        return input_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(rot_uniform_noise={},'.format(self.rot_uniform_noise)
+        repr_str += ' scaling_uniform_noise={},'.format(
+            self.scaling_uniform_noise)
+        repr_str += ' trans_normal_noise={})'.format(self.trans_normal_noise)
+        return repr_str
+
+
+@PIPELINES.register_module
+class PointShuffle(object):
+
+    def __call__(self, input_dict):
+        np.random.shuffle(input_dict['points'])
+        return input_dict
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+
+@PIPELINES.register_module
+class ObjectRangeFilter(object):
+
+    def __init__(self, point_cloud_range):
+        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
+        self.bev_range = self.pcd_range[[0, 1, 3, 4]]
+
+    @staticmethod
+    def limit_period(val, offset=0.5, period=np.pi):
+        return val - np.floor(val / period + offset) * period
+
+    @staticmethod
+    def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
+        """remove gtbox outside training range.
+        this function should be applied after other prep functions
+        Args:
+            gt_bboxes_3d ([type]): [description]
+            limit_range ([type]): [description]
+        """
+        gt_bboxes_3d_bv = box_np_ops.center_to_corner_box2d(
+            gt_bboxes_3d[:, [0, 1]], gt_bboxes_3d[:, [3, 3 + 1]],
+            gt_bboxes_3d[:, 6])
+        bounding_box = box_np_ops.minmax_to_corner_2d(
+            np.asarray(limit_range)[np.newaxis, ...])
+        ret = box_np_ops.points_in_convex_polygon_jit(
+            gt_bboxes_3d_bv.reshape(-1, 2), bounding_box)
+        return np.any(ret.reshape(-1, 4), axis=1)
+
+    def __call__(self, input_dict):
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        gt_names_3d = input_dict['gt_names_3d']
+        gt_bboxes_3d_mask = input_dict['gt_bboxes_3d_mask']
+        mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range)
+        gt_bboxes_3d = gt_bboxes_3d[mask]
+        gt_names_3d = gt_names_3d[mask]
+        # the mask should also be updated
+        gt_bboxes_3d_mask = gt_bboxes_3d_mask[mask]
+
+        # limit rad to [-pi, pi]
+        gt_bboxes_3d[:, 6] = self.limit_period(
+            gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi)
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
+        input_dict['gt_names_3d'] = gt_names_3d
+        input_dict['gt_bboxes_3d_mask'] = gt_bboxes_3d_mask
+        return input_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
+        return repr_str
+
+
+@PIPELINES.register_module
+class PointsRangeFilter(object):
+
+    def __init__(self, point_cloud_range):
+        self.pcd_range = np.array(
+            point_cloud_range, dtype=np.float32)[np.newaxis, :]
+
+    def __call__(self, input_dict):
+        points = input_dict['points']
+        points_mask = ((points[:, :3] >= self.pcd_range[:, :3])
+                       & (points[:, :3] < self.pcd_range[:, 3:]))
+        points_mask = points_mask[:, 0] & points_mask[:, 1] & points_mask[:, 2]
+        clean_points = points[points_mask, :]
+        input_dict['points'] = clean_points
+        return input_dict
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
+        return repr_str