Initial commit

d1aac35d · zhangwenwei · d1aac35d · d1aac35d · d1aac35d · d1aac35d
Commit d1aac35d authored Apr 14, 2020 by zhangwenwei
14 changed files
--- a/tests/test_config.py
+++ b/tests/test_config.py
+from os.path import dirname, exists, join, relpath
+from mmdet.core import BitmapMasks, PolygonMasks
+def _get_config_directory():
+    """ Find the predefined detector config directory """
+    try:
+        # Assume we are running in the source mmdetection repo
+        repo_dpath = dirname(dirname(__file__))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmdet
+        repo_dpath = dirname(dirname(mmdet.__file__))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+def test_config_build_detector():
+    """
+    Test that all detection models defined in the configs can be initialized.
+    """
+    from mmcv import Config
+    from mmdet3d.models import build_detector
+    config_dpath = _get_config_directory()
+    print('Found config_dpath = {!r}'.format(config_dpath))
+    import glob
+    config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py')))
+    config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1]
+    config_names = [relpath(p, config_dpath) for p in config_fpaths]
+    print('Using {} config files'.format(len(config_names)))
+    for config_fname in config_names:
+        config_fpath = join(config_dpath, config_fname)
+        config_mod = Config.fromfile(config_fpath)
+        config_mod.model
+        config_mod.train_cfg
+        config_mod.test_cfg
+        print('Building detector, config_fpath = {!r}'.format(config_fpath))
+        # Remove pretrained keys to allow for testing in an offline environment
+        if 'pretrained' in config_mod.model:
+            config_mod.model['pretrained'] = None
+        detector = build_detector(
+            config_mod.model,
+            train_cfg=config_mod.train_cfg,
+            test_cfg=config_mod.test_cfg)
+        assert detector is not None
+        if 'roi_head' in config_mod.model.keys():
+            # for two stage detector
+            # detectors must have bbox head
+            assert detector.roi_head.with_bbox and detector.with_bbox
+            assert detector.roi_head.with_mask == detector.with_mask
+            head_config = config_mod.model['roi_head']
+            _check_roi_head(head_config, detector.roi_head)
+        # else:
+        #     # for single stage detector
+        #     # detectors must have bbox head
+        #     # assert detector.with_bbox
+        #     head_config = config_mod.model['bbox_head']
+        #     _check_bbox_head(head_config, detector.bbox_head)
+def test_config_data_pipeline():
+    """
+    Test whether the data pipeline is valid and can process corner cases.
+    CommandLine:
+        xdoctest -m tests/test_config.py test_config_build_data_pipeline
+    """
+    from mmcv import Config
+    from mmdet.datasets.pipelines import Compose
+    import numpy as np
+    config_dpath = _get_config_directory()
+    print('Found config_dpath = {!r}'.format(config_dpath))
+    # Only tests a representative subset of configurations
+    # TODO: test pipelines using Albu, current Albu throw None given empty GT
+    config_names = [
+        'nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py',
+        'nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py',
+        'kitti/'
+        'faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py',
+    ]
+    def dummy_masks(h, w, num_obj=3, mode='bitmap'):
+        assert mode in ('polygon', 'bitmap')
+        if mode == 'bitmap':
+            masks = np.random.randint(0, 2, (num_obj, h, w), dtype=np.uint8)
+            masks = BitmapMasks(masks, h, w)
+        else:
+            masks = []
+            for i in range(num_obj):
+                masks.append([])
+                masks[-1].append(
+                    np.random.uniform(0, min(h - 1, w - 1), (8 + 4 * i, )))
+                masks[-1].append(
+                    np.random.uniform(0, min(h - 1, w - 1), (10 + 4 * i, )))
+            masks = PolygonMasks(masks, h, w)
+        return masks
+    print('Using {} config files'.format(len(config_names)))
+    for config_fname in config_names:
+        config_fpath = join(config_dpath, config_fname)
+        config_mod = Config.fromfile(config_fpath)
+        # remove loading pipeline
+        loading_pipeline = config_mod.train_pipeline.pop(0)
+        loading_ann_pipeline = config_mod.train_pipeline.pop(0)
+        config_mod.test_pipeline.pop(0)
+        train_pipeline = Compose(config_mod.train_pipeline)
+        test_pipeline = Compose(config_mod.test_pipeline)
+        print(
+            'Building data pipeline, config_fpath = {!r}'.format(config_fpath))
+        print('Test training data pipeline: \n{!r}'.format(train_pipeline))
+        img = np.random.randint(0, 255, size=(888, 666, 3), dtype=np.uint8)
+        if loading_pipeline.get('to_float32', False):
+            img = img.astype(np.float32)
+        mode = 'bitmap' if loading_ann_pipeline.get('poly2mask',
+                                                    True) else 'polygon'
+        results = dict(
+            filename='test_img.png',
+            img=img,
+            img_shape=img.shape,
+            ori_shape=img.shape,
+            gt_bboxes=np.array([[35.2, 11.7, 39.7, 15.7]], dtype=np.float32),
+            gt_labels=np.array([1], dtype=np.int64),
+            gt_masks=dummy_masks(img.shape[0], img.shape[1], mode=mode),
+        )
+        results['bbox_fields'] = ['gt_bboxes']
+        results['mask_fields'] = ['gt_masks']
+        output_results = train_pipeline(results)
+        assert output_results is not None
+        print('Test testing data pipeline: \n{!r}'.format(test_pipeline))
+        results = dict(
+            filename='test_img.png',
+            img=img,
+            img_shape=img.shape,
+            ori_shape=img.shape,
+            gt_bboxes=np.array([[35.2, 11.7, 39.7, 15.7]], dtype=np.float32),
+            gt_labels=np.array([1], dtype=np.int64),
+            gt_masks=dummy_masks(img.shape[0], img.shape[1], mode=mode),
+        )
+        results['bbox_fields'] = ['gt_bboxes']
+        results['mask_fields'] = ['gt_masks']
+        output_results = test_pipeline(results)
+        assert output_results is not None
+        # test empty GT
+        print('Test empty GT with training data pipeline: \n{!r}'.format(
+            train_pipeline))
+        results = dict(
+            filename='test_img.png',
+            img=img,
+            img_shape=img.shape,
+            ori_shape=img.shape,
+            gt_bboxes=np.zeros((0, 4), dtype=np.float32),
+            gt_labels=np.array([], dtype=np.int64),
+            gt_masks=dummy_masks(
+                img.shape[0], img.shape[1], num_obj=0, mode=mode),
+        )
+        results['bbox_fields'] = ['gt_bboxes']
+        results['mask_fields'] = ['gt_masks']
+        output_results = train_pipeline(results)
+        assert output_results is not None
+        print('Test empty GT with testing data pipeline: \n{!r}'.format(
+            test_pipeline))
+        results = dict(
+            filename='test_img.png',
+            img=img,
+            img_shape=img.shape,
+            ori_shape=img.shape,
+            gt_bboxes=np.zeros((0, 4), dtype=np.float32),
+            gt_labels=np.array([], dtype=np.int64),
+            gt_masks=dummy_masks(
+                img.shape[0], img.shape[1], num_obj=0, mode=mode),
+        )
+        results['bbox_fields'] = ['gt_bboxes']
+        results['mask_fields'] = ['gt_masks']
+        output_results = test_pipeline(results)
+        assert output_results is not None
+def _check_roi_head(config, head):
+    # check consistency between head_config and roi_head
+    assert config['type'] == head.__class__.__name__
+    # check roi_align
+    bbox_roi_cfg = config.bbox_roi_extractor
+    bbox_roi_extractor = head.bbox_roi_extractor
+    _check_roi_extractor(bbox_roi_cfg, bbox_roi_extractor)
+    # check bbox head infos
+    bbox_cfg = config.bbox_head
+    bbox_head = head.bbox_head
+    _check_bbox_head(bbox_cfg, bbox_head)
+    if head.with_mask:
+        # check roi_align
+        if config.mask_roi_extractor:
+            mask_roi_cfg = config.mask_roi_extractor
+            mask_roi_extractor = head.mask_roi_extractor
+            _check_roi_extractor(mask_roi_cfg, mask_roi_extractor,
+                                 bbox_roi_extractor)
+        # check mask head infos
+        mask_head = head.mask_head
+        mask_cfg = config.mask_head
+        _check_mask_head(mask_cfg, mask_head)
+def _check_roi_extractor(config, roi_extractor, prev_roi_extractor=None):
+    import torch.nn as nn
+    if isinstance(roi_extractor, nn.ModuleList):
+        if prev_roi_extractor:
+            prev_roi_extractor = prev_roi_extractor[0]
+        roi_extractor = roi_extractor[0]
+    assert (len(config.featmap_strides) == len(roi_extractor.roi_layers))
+    assert (config.out_channels == roi_extractor.out_channels)
+    from torch.nn.modules.utils import _pair
+    assert (_pair(
+        config.roi_layer.out_size) == roi_extractor.roi_layers[0].out_size)
+    if 'use_torchvision' in config.roi_layer:
+        assert (config.roi_layer.use_torchvision ==
+                roi_extractor.roi_layers[0].use_torchvision)
+    elif 'aligned' in config.roi_layer:
+        assert (
+            config.roi_layer.aligned == roi_extractor.roi_layers[0].aligned)
+    if prev_roi_extractor:
+        assert (roi_extractor.roi_layers[0].aligned ==
+                prev_roi_extractor.roi_layers[0].aligned)
+        assert (roi_extractor.roi_layers[0].use_torchvision ==
+                prev_roi_extractor.roi_layers[0].use_torchvision)
+def _check_mask_head(mask_cfg, mask_head):
+    import torch.nn as nn
+    if isinstance(mask_cfg, list):
+        for single_mask_cfg, single_mask_head in zip(mask_cfg, mask_head):
+            _check_mask_head(single_mask_cfg, single_mask_head)
+    elif isinstance(mask_head, nn.ModuleList):
+        for single_mask_head in mask_head:
+            _check_mask_head(mask_cfg, single_mask_head)
+    else:
+        assert mask_cfg['type'] == mask_head.__class__.__name__
+        assert mask_cfg.in_channels == mask_head.in_channels
+        assert (
+            mask_cfg.conv_out_channels == mask_head.conv_logits.in_channels)
+        class_agnostic = mask_cfg.get('class_agnostic', False)
+        out_dim = (1 if class_agnostic else mask_cfg.num_classes)
+        assert mask_head.conv_logits.out_channels == out_dim
+def _check_bbox_head(bbox_cfg, bbox_head):
+    import torch.nn as nn
+    if isinstance(bbox_cfg, list):
+        for single_bbox_cfg, single_bbox_head in zip(bbox_cfg, bbox_head):
+            _check_bbox_head(single_bbox_cfg, single_bbox_head)
+    elif isinstance(bbox_head, nn.ModuleList):
+        for single_bbox_head in bbox_head:
+            _check_bbox_head(bbox_cfg, single_bbox_head)
+    else:
+        assert bbox_cfg['type'] == bbox_head.__class__.__name__
+        assert bbox_cfg.in_channels == bbox_head.in_channels
+        with_cls = bbox_cfg.get('with_cls', True)
+        if with_cls:
+            fc_out_channels = bbox_cfg.get('fc_out_channels', 2048)
+            assert (fc_out_channels == bbox_head.fc_cls.in_features)
+            assert bbox_cfg.num_classes + 1 == bbox_head.fc_cls.out_features
+        with_reg = bbox_cfg.get('with_reg', True)
+        if with_reg:
+            out_dim = (4 if bbox_cfg.reg_class_agnostic else 4 *
+                       bbox_cfg.num_classes)
+            assert bbox_head.fc_reg.out_features == out_dim
--- a/tools/create_data.py
+++ b/tools/create_data.py
+import argparse
+import os.path as osp
+import tools.data_converter.kitti_converter as kitti
+import tools.data_converter.nuscenes_converter as nuscenes_converter
+from tools.data_converter.create_gt_database import create_groundtruth_database
+def kitti_data_prep(root_path, info_prefix, version, out_dir):
+    kitti.create_kitti_info_file(root_path, info_prefix)
+    kitti.create_reduced_point_cloud(root_path, info_prefix)
+    create_groundtruth_database(
+        'KittiDataset',
+        root_path,
+        info_prefix,
+        '{}/{}_infos_train.pkl'.format(out_dir, info_prefix),
+        relative_path=False,
+        mask_anno_path='instances_train.json',
+        with_mask=(version == 'mask'))
+def nuscenes_data_prep(root_path,
+                       info_prefix,
+                       version,
+                       dataset_name,
+                       out_dir,
+                       max_sweeps=10):
+    nuscenes_converter.create_nuscenes_infos(
+        root_path, info_prefix, version=version, max_sweeps=max_sweeps)
+    if version == 'v1.0-test':
+        return
+    info_train_path = osp.join(root_path,
+                               '{}_infos_train.pkl'.format(info_prefix))
+    info_val_path = osp.join(root_path, '{}_infos_val.pkl'.format(info_prefix))
+    nuscenes_converter.export_2d_annotation(
+        root_path, info_train_path, version=version)
+    nuscenes_converter.export_2d_annotation(
+        root_path, info_val_path, version=version)
+    create_groundtruth_database(
+        dataset_name, root_path, info_prefix,
+        '{}/{}_infos_train.pkl'.format(out_dir, info_prefix))
+parser = argparse.ArgumentParser(description='Data converter arg parser')
+parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
+parser.add_argument(
+    '--root-path',
+    type=str,
+    default='./data/kitti',
+    help='specify the root path of dataset')
+parser.add_argument(
+    '--version',
+    type=str,
+    default='v1.0',
+    required=False,
+    help='specify the dataset version, no need for kitti')
+parser.add_argument(
+    '--max-sweeps',
+    type=int,
+    default=10,
+    required=False,
+    help='specify sweeps of lidar per example')
+parser.add_argument(
+    '--out-dir',
+    type=str,
+    default='./data/kitti',
+    required='False',
+    help='name of info pkl')
+parser.add_argument('--extra-tag', type=str, default='kitti')
+args = parser.parse_args()
+if __name__ == '__main__':
+    if args.dataset == 'kitti':
+        kitti_data_prep(
+            root_path=args.root_path,
+            info_prefix=args.extra_tag,
+            version=args.version,
+            out_dir=args.out_dir)
+    elif args.dataset == 'nuscenes' and args.version != 'v1.0-mini':
+        train_version = f'{args.version}-trainval'
+        nuscenes_data_prep(
+            root_path=args.root_path,
+            info_prefix=args.extra_tag,
+            version=train_version,
+            dataset_name='NuScenesDataset',
+            out_dir=args.out_dir,
+            max_sweeps=args.max_sweeps)
+        test_version = f'{args.version}-test'
+        nuscenes_data_prep(
+            root_path=args.root_path,
+            info_prefix=args.extra_tag,
+            version=test_version,
+            dataset_name='NuScenesDataset',
+            out_dir=args.out_dir,
+            max_sweeps=args.max_sweeps)
+    elif args.dataset == 'nuscenes' and args.version == 'v1.0-mini':
+        train_version = f'{args.version}'
+        nuscenes_data_prep(
+            root_path=args.root_path,
+            info_prefix=args.extra_tag,
+            version=train_version,
+            dataset_name='NuScenesDataset',
+            out_dir=args.out_dir,
+            max_sweeps=args.max_sweeps)
--- a/tools/create_data.sh
+++ b/tools/create_data.sh
+#!/usr/bin/env bash
+set -x
+export PYTHONPATH=`pwd`:$PYTHONPATH
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+WORK_DIR=$4
+GPUS=${GPUS:-1}
+GPUS_PER_NODE=${GPUS_PER_NODE:-1}
+SRUN_ARGS=${SRUN_ARGS:-""}
+JOB_NAME=create_data
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/create_data.py kitti \
+            --root-path ./data/kitti \
+            --out-dir ./data/kitti \
+            --extra-tag kitti
--- a/tools/data_converter/__init__.py
+++ b/tools/data_converter/__init__.py
--- a/tools/data_converter/create_gt_database.py
+++ b/tools/data_converter/create_gt_database.py
+import os.path as osp
+import pickle
+import mmcv
+import numpy as np
+import pycocotools.mask as maskUtils
+from mmcv import track_iter_progress
+from pycocotools.coco import COCO
+import mmdet3d.core.bbox.box_np_ops as box_np_ops
+from mmdet3d.core.evaluation.bbox_overlaps import bbox_overlaps
+from mmdet3d.datasets import build_dataset
+from mmdet.ops import roi_align
+def _poly2mask(mask_ann, img_h, img_w):
+    if isinstance(mask_ann, list):
+        # polygon -- a single object might consist of multiple parts
+        # we merge all parts into one mask rle code
+        rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+        rle = maskUtils.merge(rles)
+    elif isinstance(mask_ann['counts'], list):
+        # uncompressed RLE
+        rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)
+    else:
+        # rle
+        rle = mask_ann
+    mask = maskUtils.decode(rle)
+    return mask
+def _parse_coco_ann_info(ann_info):
+    gt_bboxes = []
+    gt_labels = []
+    gt_bboxes_ignore = []
+    gt_masks_ann = []
+    for i, ann in enumerate(ann_info):
+        if ann.get('ignore', False):
+            continue
+        x1, y1, w, h = ann['bbox']
+        if ann['area'] <= 0:
+            continue
+        bbox = [x1, y1, x1 + w, y1 + h]
+        if ann.get('iscrowd', False):
+            gt_bboxes_ignore.append(bbox)
+        else:
+            gt_bboxes.append(bbox)
+            gt_masks_ann.append(ann['segmentation'])
+    if gt_bboxes:
+        gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
+        gt_labels = np.array(gt_labels, dtype=np.int64)
+    else:
+        gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+        gt_labels = np.array([], dtype=np.int64)
+    if gt_bboxes_ignore:
+        gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
+    else:
+        gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+    ann = dict(
+        bboxes=gt_bboxes, bboxes_ignore=gt_bboxes_ignore, masks=gt_masks_ann)
+    return ann
+def crop_image_patch_v2(pos_proposals, pos_assigned_gt_inds, gt_masks):
+    import torch
+    from torch.nn.modules.utils import _pair
+    device = pos_proposals.device
+    num_pos = pos_proposals.size(0)
+    fake_inds = (
+        torch.arange(num_pos,
+                     device=device).to(dtype=pos_proposals.dtype)[:, None])
+    rois = torch.cat([fake_inds, pos_proposals], dim=1)  # Nx5
+    mask_size = _pair(28)
+    rois = rois.to(device=device)
+    gt_masks_th = (
+        torch.from_numpy(gt_masks).to(device).index_select(
+            0, pos_assigned_gt_inds).to(dtype=rois.dtype))
+    # Use RoIAlign could apparently accelerate the training (~0.1s/iter)
+    targets = (
+        roi_align(gt_masks_th, rois, mask_size[::-1], 1.0, 0, True).squeeze(1))
+    return targets
+def crop_image_patch(pos_proposals, gt_masks, pos_assigned_gt_inds, org_img):
+    num_pos = pos_proposals.shape[0]
+    masks = []
+    img_patches = []
+    for i in range(num_pos):
+        gt_mask = gt_masks[pos_assigned_gt_inds[i]]
+        bbox = pos_proposals[i, :].astype(np.int32)
+        x1, y1, x2, y2 = bbox
+        w = np.maximum(x2 - x1 + 1, 1)
+        h = np.maximum(y2 - y1 + 1, 1)
+        mask_patch = gt_mask[y1:y1 + h, x1:x1 + w]
+        masked_img = gt_mask[..., None] * org_img
+        img_patch = masked_img[y1:y1 + h, x1:x1 + w]
+        img_patches.append(img_patch)
+        masks.append(mask_patch)
+    return img_patches, masks
+def create_groundtruth_database(dataset_class_name,
+                                data_path,
+                                info_prefix,
+                                info_path=None,
+                                mask_anno_path=None,
+                                used_classes=None,
+                                database_save_path=None,
+                                db_info_save_path=None,
+                                relative_path=True,
+                                add_rgb=False,
+                                lidar_only=False,
+                                bev_only=False,
+                                coors_range=None,
+                                with_mask=False):
+    print(f'Create GT Database of {dataset_class_name}')
+    dataset_cfg = dict(
+        type=dataset_class_name,
+        root_path=data_path,
+        ann_file=info_path,
+    )
+    if dataset_class_name == 'KittiDataset':
+        dataset_cfg.update(
+            training=True,
+            split='training',
+            modality=dict(
+                use_lidar=True,
+                use_depth=False,
+                use_lidar_intensity=True,
+                use_camera=with_mask,
+            ))
+    dataset = build_dataset(dataset_cfg)
+    if database_save_path is None:
+        database_save_path = osp.join(data_path,
+                                      '{}_gt_database'.format(info_prefix))
+    if db_info_save_path is None:
+        db_info_save_path = osp.join(
+            data_path, '{}_dbinfos_train.pkl'.format(info_prefix))
+    mmcv.mkdir_or_exist(database_save_path)
+    all_db_infos = dict()
+    if with_mask:
+        coco = COCO(osp.join(data_path, mask_anno_path))
+        imgIds = coco.getImgIds()
+        file2id = dict()
+        for i in imgIds:
+            info = coco.loadImgs([i])[0]
+            file2id.update({info['file_name']: i})
+    group_counter = 0
+    for j in track_iter_progress(list(range(len(dataset)))):
+        image_idx = j
+        annos = dataset.get_sensor_data(j)
+        image_idx = annos['sample_idx']
+        points = annos['points']
+        gt_boxes_3d = annos['gt_bboxes_3d']
+        names = annos['gt_names']
+        group_dict = dict()
+        group_ids = np.full([gt_boxes_3d.shape[0]], -1, dtype=np.int64)
+        if 'group_ids' in annos:
+            group_ids = annos['group_ids']
+        else:
+            group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64)
+        difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32)
+        if 'difficulty' in annos:
+            difficulty = annos['difficulty']
+        num_obj = gt_boxes_3d.shape[0]
+        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d)
+        if with_mask:
+            # prepare masks
+            gt_boxes = annos['gt_bboxes']
+            img_path = annos['filename'].split('/')[-1]
+            if img_path not in file2id.keys():
+                print('skip image {} for empty mask'.format(img_path))
+                continue
+            img_id = file2id[img_path]
+            kins_annIds = coco.getAnnIds(imgIds=img_id)
+            kins_raw_info = coco.loadAnns(kins_annIds)
+            kins_ann_info = _parse_coco_ann_info(kins_raw_info)
+            h, w = annos['img_shape'][:2]
+            gt_masks = [
+                _poly2mask(mask, h, w) for mask in kins_ann_info['masks']
+            ]
+            # get mask inds based on iou mapping
+            bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes)
+            mask_inds = bbox_iou.argmax(axis=0)
+            valid_inds = (bbox_iou.max(axis=0) > 0.5)
+            # mask the image
+            # use more precise crop when it is ready
+            # object_img_patches = np.ascontiguousarray(
+            #     np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2))
+            # crop image patches using roi_align
+            # object_img_patches = crop_image_patch_v2(
+            #     torch.Tensor(gt_boxes),
+            #     torch.Tensor(mask_inds).long(), object_img_patches)
+            object_img_patches, object_masks = crop_image_patch(
+                gt_boxes, gt_masks, mask_inds, annos['img'])
+        for i in range(num_obj):
+            filename = f'{image_idx}_{names[i]}_{i}.bin'
+            filepath = osp.join(database_save_path, filename)
+            # save point clouds and image patches for each object
+            gt_points = points[point_indices[:, i]]
+            gt_points[:, :3] -= gt_boxes_3d[i, :3]
+            if with_mask:
+                if object_masks[i].sum() == 0 or not valid_inds[i]:
+                    # Skip object for empty or invalid mask
+                    continue
+                img_patch_path = filepath + '.png'
+                mask_patch_path = filepath + '.mask.png'
+                mmcv.imwrite(object_img_patches[i], img_patch_path)
+                mmcv.imwrite(object_masks[i], mask_patch_path)
+            with open(filepath, 'w') as f:
+                gt_points.tofile(f)
+            if (used_classes is None) or names[i] in used_classes:
+                if relative_path:
+                    db_path = osp.join(data_path, filename)
+                else:
+                    db_path = filepath
+                db_info = {
+                    'name': names[i],
+                    'path': db_path,
+                    'image_idx': image_idx,
+                    'gt_idx': i,
+                    'box3d_lidar': gt_boxes_3d[i],
+                    'num_points_in_gt': gt_points.shape[0],
+                    'difficulty': difficulty[i],
+                }
+                local_group_id = group_ids[i]
+                # if local_group_id >= 0:
+                if local_group_id not in group_dict:
+                    group_dict[local_group_id] = group_counter
+                    group_counter += 1
+                db_info['group_id'] = group_dict[local_group_id]
+                if 'score' in annos:
+                    db_info['score'] = annos['score'][i]
+                if with_mask:
+                    db_info.update({'box2d_camera': gt_boxes[i]})
+                if names[i] in all_db_infos:
+                    all_db_infos[names[i]].append(db_info)
+                else:
+                    all_db_infos[names[i]] = [db_info]
+    for k, v in all_db_infos.items():
+        print(f'load {len(v)} {k} database infos')
+    with open(db_info_save_path, 'wb') as f:
+        pickle.dump(all_db_infos, f)
--- a/tools/data_converter/kitti_converter.py
+++ b/tools/data_converter/kitti_converter.py
+import pickle
+from pathlib import Path
+import numpy as np
+from mmcv import track_iter_progress
+from mmdet3d.core.bbox import box_np_ops
+from .kitti_data_utils import get_kitti_image_info
+def convert_to_kitti_info_version2(info):
+    """convert kitti info v1 to v2 if possible.
+    """
+    if 'image' not in info or 'calib' not in info or 'point_cloud' not in info:
+        info['image'] = {
+            'image_shape': info['img_shape'],
+            'image_idx': info['image_idx'],
+            'image_path': info['img_path'],
+        }
+        info['calib'] = {
+            'R0_rect': info['calib/R0_rect'],
+            'Tr_velo_to_cam': info['calib/Tr_velo_to_cam'],
+            'P2': info['calib/P2'],
+        }
+        info['point_cloud'] = {
+            'velodyne_path': info['velodyne_path'],
+        }
+def _read_imageset_file(path):
+    with open(path, 'r') as f:
+        lines = f.readlines()
+    return [int(line) for line in lines]
+def _calculate_num_points_in_gt(data_path,
+                                infos,
+                                relative_path,
+                                remove_outside=True,
+                                num_features=4):
+    for info in track_iter_progress(infos):
+        pc_info = info['point_cloud']
+        image_info = info['image']
+        calib = info['calib']
+        if relative_path:
+            v_path = str(Path(data_path) / pc_info['velodyne_path'])
+        else:
+            v_path = pc_info['velodyne_path']
+        points_v = np.fromfile(
+            v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
+        rect = calib['R0_rect']
+        Trv2c = calib['Tr_velo_to_cam']
+        P2 = calib['P2']
+        if remove_outside:
+            points_v = box_np_ops.remove_outside_points(
+                points_v, rect, Trv2c, P2, image_info['image_shape'])
+        # points_v = points_v[points_v[:, 0] > 0]
+        annos = info['annos']
+        num_obj = len([n for n in annos['name'] if n != 'DontCare'])
+        # annos = kitti.filter_kitti_anno(annos, ['DontCare'])
+        dims = annos['dimensions'][:num_obj]
+        loc = annos['location'][:num_obj]
+        rots = annos['rotation_y'][:num_obj]
+        gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
+                                         axis=1)
+        gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
+            gt_boxes_camera, rect, Trv2c)
+        indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
+        num_points_in_gt = indices.sum(0)
+        num_ignored = len(annos['dimensions']) - num_obj
+        num_points_in_gt = np.concatenate(
+            [num_points_in_gt, -np.ones([num_ignored])])
+        annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
+def create_kitti_info_file(data_path,
+                           pkl_prefix='kitti_',
+                           save_path=None,
+                           relative_path=True):
+    imageset_folder = Path(data_path) / 'ImageSets'
+    train_img_ids = _read_imageset_file(
+        str(imageset_folder / 'train_6014.txt'))
+    val_img_ids = _read_imageset_file(str(imageset_folder / 'val_1467.txt'))
+    test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt'))
+    print('Generate info. this may take several minutes.')
+    if save_path is None:
+        save_path = Path(data_path)
+    else:
+        save_path = Path(save_path)
+    kitti_infos_train = get_kitti_image_info(
+        data_path,
+        training=True,
+        velodyne=True,
+        calib=True,
+        image_ids=train_img_ids,
+        relative_path=relative_path)
+    _calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
+    filename = save_path / f'{pkl_prefix}_infos_train.pkl'
+    print(f'Kitti info train file is saved to {filename}')
+    with open(filename, 'wb') as f:
+        pickle.dump(kitti_infos_train, f)
+    kitti_infos_val = get_kitti_image_info(
+        data_path,
+        training=True,
+        velodyne=True,
+        calib=True,
+        image_ids=val_img_ids,
+        relative_path=relative_path)
+    _calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
+    filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    print(f'Kitti info val file is saved to {filename}')
+    with open(filename, 'wb') as f:
+        pickle.dump(kitti_infos_val, f)
+    filename = save_path / f'{pkl_prefix}_infos_trainval.pkl'
+    print(f'Kitti info trainval file is saved to {filename}')
+    with open(filename, 'wb') as f:
+        pickle.dump(kitti_infos_train + kitti_infos_val, f)
+    kitti_infos_test = get_kitti_image_info(
+        data_path,
+        training=False,
+        label_info=False,
+        velodyne=True,
+        calib=True,
+        image_ids=test_img_ids,
+        relative_path=relative_path)
+    filename = save_path / f'{pkl_prefix}_infos_test.pkl'
+    print(f'Kitti info test file is saved to {filename}')
+    with open(filename, 'wb') as f:
+        pickle.dump(kitti_infos_test, f)
+def _create_reduced_point_cloud(data_path,
+                                info_path,
+                                save_path=None,
+                                back=False):
+    with open(info_path, 'rb') as f:
+        kitti_infos = pickle.load(f)
+    for info in track_iter_progress(kitti_infos):
+        pc_info = info['point_cloud']
+        image_info = info['image']
+        calib = info['calib']
+        v_path = pc_info['velodyne_path']
+        v_path = Path(data_path) / v_path
+        points_v = np.fromfile(
+            str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])
+        rect = calib['R0_rect']
+        P2 = calib['P2']
+        Trv2c = calib['Tr_velo_to_cam']
+        # first remove z < 0 points
+        # keep = points_v[:, -1] > 0
+        # points_v = points_v[keep]
+        # then remove outside.
+        if back:
+            points_v[:, 0] = -points_v[:, 0]
+        points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
+                                                    image_info['image_shape'])
+        if save_path is None:
+            save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced')
+            if not save_dir.exists():
+                save_dir.mkdir()
+            save_filename = save_dir / v_path.name
+            # save_filename = str(v_path) + '_reduced'
+            if back:
+                save_filename += '_back'
+        else:
+            save_filename = str(Path(save_path) / v_path.name)
+            if back:
+                save_filename += '_back'
+        with open(save_filename, 'w') as f:
+            points_v.tofile(f)
+def create_reduced_point_cloud(data_path,
+                               pkl_prefix,
+                               train_info_path=None,
+                               val_info_path=None,
+                               test_info_path=None,
+                               save_path=None,
+                               with_back=False):
+    if train_info_path is None:
+        train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl'
+    if val_info_path is None:
+        val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl'
+    if test_info_path is None:
+        test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl'
+    print('create reduced point cloud for training set')
+    _create_reduced_point_cloud(data_path, train_info_path, save_path)
+    print('create reduced point cloud for validatin set')
+    _create_reduced_point_cloud(data_path, val_info_path, save_path)
+    print('create reduced point cloud for testing set')
+    _create_reduced_point_cloud(data_path, test_info_path, save_path)
+    if with_back:
+        _create_reduced_point_cloud(
+            data_path, train_info_path, save_path, back=True)
+        _create_reduced_point_cloud(
+            data_path, val_info_path, save_path, back=True)
+        _create_reduced_point_cloud(
+            data_path, test_info_path, save_path, back=True)
--- a/tools/data_converter/kitti_data_utils.py
+++ b/tools/data_converter/kitti_data_utils.py
+import concurrent.futures as futures
+from collections import OrderedDict
+from pathlib import Path
+import numpy as np
+from skimage import io
+def get_image_index_str(img_idx):
+    return '{:06d}'.format(img_idx)
+def get_kitti_info_path(idx,
+                        prefix,
+                        info_type='image_2',
+                        file_tail='.png',
+                        training=True,
+                        relative_path=True,
+                        exist_check=True):
+    img_idx_str = get_image_index_str(idx)
+    img_idx_str += file_tail
+    prefix = Path(prefix)
+    if training:
+        file_path = Path('training') / info_type / img_idx_str
+    else:
+        file_path = Path('testing') / info_type / img_idx_str
+    if exist_check and not (prefix / file_path).exists():
+        raise ValueError('file not exist: {}'.format(file_path))
+    if relative_path:
+        return str(file_path)
+    else:
+        return str(prefix / file_path)
+def get_image_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True):
+    return get_kitti_info_path(idx, prefix, 'image_2', '.png', training,
+                               relative_path, exist_check)
+def get_label_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True):
+    return get_kitti_info_path(idx, prefix, 'label_2', '.txt', training,
+                               relative_path, exist_check)
+def get_velodyne_path(idx,
+                      prefix,
+                      training=True,
+                      relative_path=True,
+                      exist_check=True):
+    return get_kitti_info_path(idx, prefix, 'velodyne', '.bin', training,
+                               relative_path, exist_check)
+def get_calib_path(idx,
+                   prefix,
+                   training=True,
+                   relative_path=True,
+                   exist_check=True):
+    return get_kitti_info_path(idx, prefix, 'calib', '.txt', training,
+                               relative_path, exist_check)
+def get_label_anno(label_path):
+    annotations = {}
+    annotations.update({
+        'name': [],
+        'truncated': [],
+        'occluded': [],
+        'alpha': [],
+        'bbox': [],
+        'dimensions': [],
+        'location': [],
+        'rotation_y': []
+    })
+    with open(label_path, 'r') as f:
+        lines = f.readlines()
+    # if len(lines) == 0 or len(lines[0]) < 15:
+    #     content = []
+    # else:
+    content = [line.strip().split(' ') for line in lines]
+    num_objects = len([x[0] for x in content if x[0] != 'DontCare'])
+    annotations['name'] = np.array([x[0] for x in content])
+    num_gt = len(annotations['name'])
+    annotations['truncated'] = np.array([float(x[1]) for x in content])
+    annotations['occluded'] = np.array([int(x[2]) for x in content])
+    annotations['alpha'] = np.array([float(x[3]) for x in content])
+    annotations['bbox'] = np.array([[float(info) for info in x[4:8]]
+                                    for x in content]).reshape(-1, 4)
+    # dimensions will convert hwl format to standard lhw(camera) format.
+    annotations['dimensions'] = np.array([[float(info) for info in x[8:11]]
+                                          for x in content
+                                          ]).reshape(-1, 3)[:, [2, 0, 1]]
+    annotations['location'] = np.array([[float(info) for info in x[11:14]]
+                                        for x in content]).reshape(-1, 3)
+    annotations['rotation_y'] = np.array([float(x[14])
+                                          for x in content]).reshape(-1)
+    if len(content) != 0 and len(content[0]) == 16:  # have score
+        annotations['score'] = np.array([float(x[15]) for x in content])
+    else:
+        annotations['score'] = np.zeros((annotations['bbox'].shape[0], ))
+    index = list(range(num_objects)) + [-1] * (num_gt - num_objects)
+    annotations['index'] = np.array(index, dtype=np.int32)
+    annotations['group_ids'] = np.arange(num_gt, dtype=np.int32)
+    return annotations
+def _extend_matrix(mat):
+    mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)
+    return mat
+def get_kitti_image_info(path,
+                         training=True,
+                         label_info=True,
+                         velodyne=False,
+                         calib=False,
+                         image_ids=7481,
+                         extend_matrix=True,
+                         num_worker=8,
+                         relative_path=True,
+                         with_imageshape=True):
+    # image_infos = []
+    """
+    KITTI annotation format version 2:
+    {
+        [optional]points: [N, 3+] point cloud
+        [optional, for kitti]image: {
+            image_idx: ...
+            image_path: ...
+            image_shape: ...
+        }
+        point_cloud: {
+            num_features: 4
+            velodyne_path: ...
+        }
+        [optional, for kitti]calib: {
+            R0_rect: ...
+            Tr_velo_to_cam: ...
+            P2: ...
+        }
+        annos: {
+            location: [num_gt, 3] array
+            dimensions: [num_gt, 3] array
+            rotation_y: [num_gt] angle array
+            name: [num_gt] ground truth name array
+            [optional]difficulty: kitti difficulty
+            [optional]group_ids: used for multi-part object
+        }
+    }
+    """
+    root_path = Path(path)
+    if not isinstance(image_ids, list):
+        image_ids = list(range(image_ids))
+    def map_func(idx):
+        info = {}
+        pc_info = {'num_features': 4}
+        calib_info = {}
+        image_info = {'image_idx': idx}
+        annotations = None
+        if velodyne:
+            pc_info['velodyne_path'] = get_velodyne_path(
+                idx, path, training, relative_path)
+        image_info['image_path'] = get_image_path(idx, path, training,
+                                                  relative_path)
+        if with_imageshape:
+            img_path = image_info['image_path']
+            if relative_path:
+                img_path = str(root_path / img_path)
+            image_info['image_shape'] = np.array(
+                io.imread(img_path).shape[:2], dtype=np.int32)
+        if label_info:
+            label_path = get_label_path(idx, path, training, relative_path)
+            if relative_path:
+                label_path = str(root_path / label_path)
+            annotations = get_label_anno(label_path)
+        info['image'] = image_info
+        info['point_cloud'] = pc_info
+        if calib:
+            calib_path = get_calib_path(
+                idx, path, training, relative_path=False)
+            with open(calib_path, 'r') as f:
+                lines = f.readlines()
+            P0 = np.array([float(info) for info in lines[0].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P1 = np.array([float(info) for info in lines[1].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P2 = np.array([float(info) for info in lines[2].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            P3 = np.array([float(info) for info in lines[3].split(' ')[1:13]
+                           ]).reshape([3, 4])
+            if extend_matrix:
+                P0 = _extend_matrix(P0)
+                P1 = _extend_matrix(P1)
+                P2 = _extend_matrix(P2)
+                P3 = _extend_matrix(P3)
+            R0_rect = np.array([
+                float(info) for info in lines[4].split(' ')[1:10]
+            ]).reshape([3, 3])
+            if extend_matrix:
+                rect_4x4 = np.zeros([4, 4], dtype=R0_rect.dtype)
+                rect_4x4[3, 3] = 1.
+                rect_4x4[:3, :3] = R0_rect
+            else:
+                rect_4x4 = R0_rect
+            Tr_velo_to_cam = np.array([
+                float(info) for info in lines[5].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_imu_to_velo = np.array([
+                float(info) for info in lines[6].split(' ')[1:13]
+            ]).reshape([3, 4])
+            if extend_matrix:
+                Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
+                Tr_imu_to_velo = _extend_matrix(Tr_imu_to_velo)
+            calib_info['P0'] = P0
+            calib_info['P1'] = P1
+            calib_info['P2'] = P2
+            calib_info['P3'] = P3
+            calib_info['R0_rect'] = rect_4x4
+            calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
+            calib_info['Tr_imu_to_velo'] = Tr_imu_to_velo
+            info['calib'] = calib_info
+        if annotations is not None:
+            info['annos'] = annotations
+            add_difficulty_to_annos(info)
+        return info
+    with futures.ThreadPoolExecutor(num_worker) as executor:
+        image_infos = executor.map(map_func, image_ids)
+    return list(image_infos)
+def kitti_anno_to_label_file(annos, folder):
+    folder = Path(folder)
+    for anno in annos:
+        image_idx = anno['metadata']['image_idx']
+        label_lines = []
+        for j in range(anno['bbox'].shape[0]):
+            label_dict = {
+                'name': anno['name'][j],
+                'alpha': anno['alpha'][j],
+                'bbox': anno['bbox'][j],
+                'location': anno['location'][j],
+                'dimensions': anno['dimensions'][j],
+                'rotation_y': anno['rotation_y'][j],
+                'score': anno['score'][j],
+            }
+            label_line = kitti_result_line(label_dict)
+            label_lines.append(label_line)
+        label_file = folder / f'{get_image_index_str(image_idx)}.txt'
+        label_str = '\n'.join(label_lines)
+        with open(label_file, 'w') as f:
+            f.write(label_str)
+def add_difficulty_to_annos(info):
+    min_height = [40, 25,
+                  25]  # minimum height for evaluated groundtruth/detections
+    max_occlusion = [
+        0, 1, 2
+    ]  # maximum occlusion level of the groundtruth used for evaluation
+    max_trunc = [
+        0.15, 0.3, 0.5
+    ]  # maximum truncation level of the groundtruth used for evaluation
+    annos = info['annos']
+    dims = annos['dimensions']  # lhw format
+    bbox = annos['bbox']
+    height = bbox[:, 3] - bbox[:, 1]
+    occlusion = annos['occluded']
+    truncation = annos['truncated']
+    diff = []
+    easy_mask = np.ones((len(dims), ), dtype=np.bool)
+    moderate_mask = np.ones((len(dims), ), dtype=np.bool)
+    hard_mask = np.ones((len(dims), ), dtype=np.bool)
+    i = 0
+    for h, o, t in zip(height, occlusion, truncation):
+        if o > max_occlusion[0] or h <= min_height[0] or t > max_trunc[0]:
+            easy_mask[i] = False
+        if o > max_occlusion[1] or h <= min_height[1] or t > max_trunc[1]:
+            moderate_mask[i] = False
+        if o > max_occlusion[2] or h <= min_height[2] or t > max_trunc[2]:
+            hard_mask[i] = False
+        i += 1
+    is_easy = easy_mask
+    is_moderate = np.logical_xor(easy_mask, moderate_mask)
+    is_hard = np.logical_xor(hard_mask, moderate_mask)
+    for i in range(len(dims)):
+        if is_easy[i]:
+            diff.append(0)
+        elif is_moderate[i]:
+            diff.append(1)
+        elif is_hard[i]:
+            diff.append(2)
+        else:
+            diff.append(-1)
+    annos['difficulty'] = np.array(diff, np.int32)
+    return diff
+def kitti_result_line(result_dict, precision=4):
+    prec_float = '{' + ':.{}f'.format(precision) + '}'
+    res_line = []
+    all_field_default = OrderedDict([
+        ('name', None),
+        ('truncated', -1),
+        ('occluded', -1),
+        ('alpha', -10),
+        ('bbox', None),
+        ('dimensions', [-1, -1, -1]),
+        ('location', [-1000, -1000, -1000]),
+        ('rotation_y', -10),
+        ('score', 0.0),
+    ])
+    res_dict = [(key, None) for key, val in all_field_default.items()]
+    res_dict = OrderedDict(res_dict)
+    for key, val in result_dict.items():
+        if all_field_default[key] is None and val is None:
+            raise ValueError('you must specify a value for {}'.format(key))
+        res_dict[key] = val
+    for key, val in res_dict.items():
+        if key == 'name':
+            res_line.append(val)
+        elif key in ['truncated', 'alpha', 'rotation_y', 'score']:
+            if val is None:
+                res_line.append(str(all_field_default[key]))
+            else:
+                res_line.append(prec_float.format(val))
+        elif key == 'occluded':
+            if val is None:
+                res_line.append(str(all_field_default[key]))
+            else:
+                res_line.append('{}'.format(val))
+        elif key in ['bbox', 'dimensions', 'location']:
+            if val is None:
+                res_line += [str(v) for v in all_field_default[key]]
+            else:
+                res_line += [prec_float.format(v) for v in val]
+        else:
+            raise ValueError('unknown key. supported key:{}'.format(
+                res_dict.keys()))
+    return ' '.join(res_line)
--- a/tools/data_converter/nuscenes_converter.py
+++ b/tools/data_converter/nuscenes_converter.py
+import os.path as osp
+from collections import OrderedDict
+from typing import List, Tuple, Union
+import mmcv
+import numpy as np
+from nuscenes.nuscenes import NuScenes
+from nuscenes.utils.geometry_utils import view_points
+from pyquaternion import Quaternion
+from shapely.geometry import MultiPoint, box
+from mmdet3d.datasets import NuScenesDataset
+nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
+                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
+                  'barrier')
+def create_nuscenes_infos(root_path,
+                          info_prefix,
+                          version='v1.0-trainval',
+                          max_sweeps=10):
+    from nuscenes.nuscenes import NuScenes
+    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
+    from nuscenes.utils import splits
+    available_vers = ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']
+    assert version in available_vers
+    if version == 'v1.0-trainval':
+        train_scenes = splits.train
+        val_scenes = splits.val
+    elif version == 'v1.0-test':
+        train_scenes = splits.test
+        val_scenes = []
+    elif version == 'v1.0-mini':
+        train_scenes = splits.mini_train
+        val_scenes = splits.mini_val
+    else:
+        raise ValueError('unknown')
+    # filter existing scenes.
+    available_scenes = _get_available_scenes(nusc)
+    available_scene_names = [s['name'] for s in available_scenes]
+    train_scenes = list(
+        filter(lambda x: x in available_scene_names, train_scenes))
+    val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
+    train_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in train_scenes
+    ])
+    val_scenes = set([
+        available_scenes[available_scene_names.index(s)]['token']
+        for s in val_scenes
+    ])
+    test = 'test' in version
+    if test:
+        print('test scene: {}'.format(len(train_scenes)))
+    else:
+        print('train scene: {}, val scene: {}'.format(
+            len(train_scenes), len(val_scenes)))
+    train_nusc_infos, val_nusc_infos = _fill_trainval_infos(
+        nusc, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
+    metadata = dict(version=version)
+    if test:
+        print('test sample: {}'.format(len(train_nusc_infos)))
+        data = dict(infos=train_nusc_infos, metadata=metadata)
+        info_path = osp.join(root_path,
+                             '{}_infos_test.pkl'.format(info_prefix))
+        mmcv.dump(data, info_path)
+    else:
+        print('train sample: {}, val sample: {}'.format(
+            len(train_nusc_infos), len(val_nusc_infos)))
+        data = dict(infos=train_nusc_infos, metadata=metadata)
+        info_path = osp.join(root_path,
+                             '{}_infos_train.pkl'.format(info_prefix))
+        mmcv.dump(data, info_path)
+        data['infos'] = val_nusc_infos
+        info_val_path = osp.join(root_path,
+                                 '{}_infos_val.pkl'.format(info_prefix))
+        mmcv.dump(data, info_val_path)
+def _get_available_scenes(nusc):
+    available_scenes = []
+    print('total scene num: {}'.format(len(nusc.scene)))
+    for scene in nusc.scene:
+        scene_token = scene['token']
+        scene_rec = nusc.get('scene', scene_token)
+        sample_rec = nusc.get('sample', scene_rec['first_sample_token'])
+        sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])
+        has_more_frames = True
+        scene_not_exist = False
+        while has_more_frames:
+            lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token'])
+            if not mmcv.is_filepath(lidar_path):
+                scene_not_exist = True
+                break
+            else:
+                break
+            if not sd_rec['next'] == '':
+                sd_rec = nusc.get('sample_data', sd_rec['next'])
+            else:
+                has_more_frames = False
+        if scene_not_exist:
+            continue
+        available_scenes.append(scene)
+    print('exist scene num: {}'.format(len(available_scenes)))
+    return available_scenes
+def _fill_trainval_infos(nusc,
+                         train_scenes,
+                         val_scenes,
+                         test=False,
+                         max_sweeps=10):
+    train_nusc_infos = []
+    val_nusc_infos = []
+    for sample in mmcv.track_iter_progress(nusc.sample):
+        lidar_token = sample['data']['LIDAR_TOP']
+        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
+        cs_record = nusc.get('calibrated_sensor',
+                             sd_rec['calibrated_sensor_token'])
+        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
+        mmcv.check_file_exist(lidar_path, msg_tmpl='file "{}" does not exist.')
+        info = {
+            'lidar_path': lidar_path,
+            'token': sample['token'],
+            'sweeps': [],
+            'cams': dict(),
+            'lidar2ego_translation': cs_record['translation'],
+            'lidar2ego_rotation': cs_record['rotation'],
+            'ego2global_translation': pose_record['translation'],
+            'ego2global_rotation': pose_record['rotation'],
+            'timestamp': sample['timestamp'],
+        }
+        l2e_r = info['lidar2ego_rotation']
+        l2e_t = info['lidar2ego_translation']
+        e2g_r = info['ego2global_rotation']
+        e2g_t = info['ego2global_translation']
+        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
+        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
+        # obtain 6 image's information per frame
+        camera_types = [
+            'CAM_FRONT',
+            'CAM_FRONT_RIGHT',
+            'CAM_FRONT_LEFT',
+            'CAM_BACK',
+            'CAM_BACK_LEFT',
+            'CAM_BACK_RIGHT',
+        ]
+        for cam in camera_types:
+            cam_token = sample['data'][cam]
+            cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token)
+            cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,
+                                         e2g_t, e2g_r_mat, cam)
+            cam_info.update(cam_intrinsic=cam_intrinsic)
+            info['cams'].update({cam: cam_info})
+        # obtain sweeps for a single key-frame
+        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
+        sweeps = []
+        while len(sweeps) < max_sweeps:
+            if not sd_rec['prev'] == '':
+                sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
+                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
+                sweeps.append(sweep)
+                sd_rec = nusc.get('sample_data', sd_rec['prev'])
+            else:
+                break
+        info['sweeps'] = sweeps
+        # obtain annotation
+        if not test:
+            annotations = [
+                nusc.get('sample_annotation', token)
+                for token in sample['anns']
+            ]
+            locs = np.array([b.center for b in boxes]).reshape(-1, 3)
+            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
+            rots = np.array([b.orientation.yaw_pitch_roll[0]
+                             for b in boxes]).reshape(-1, 1)
+            velocity = np.array(
+                [nusc.box_velocity(token)[:2] for token in sample['anns']])
+            # convert velo from global to lidar
+            for i in range(len(boxes)):
+                velo = np.array([*velocity[i], 0.0])
+                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
+                    l2e_r_mat).T
+                velocity[i] = velo[:2]
+            names = [b.name for b in boxes]
+            for i in range(len(names)):
+                if names[i] in NuScenesDataset.NameMapping:
+                    names[i] = NuScenesDataset.NameMapping[names[i]]
+            names = np.array(names)
+            # we need to convert rot to SECOND format.
+            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)
+            assert len(gt_boxes) == len(
+                annotations), f'{len(gt_boxes)}, {len(annotations)}'
+            info['gt_boxes'] = gt_boxes
+            info['gt_names'] = names
+            info['gt_velocity'] = velocity.reshape(-1, 2)
+            info['num_lidar_pts'] = np.array(
+                [a['num_lidar_pts'] for a in annotations])
+            info['num_radar_pts'] = np.array(
+                [a['num_radar_pts'] for a in annotations])
+        if sample['scene_token'] in train_scenes:
+            train_nusc_infos.append(info)
+        else:
+            val_nusc_infos.append(info)
+    return train_nusc_infos, val_nusc_infos
+def obtain_sensor2top(nusc,
+                      sensor_token,
+                      l2e_t,
+                      l2e_r_mat,
+                      e2g_t,
+                      e2g_r_mat,
+                      sensor_type='lidar'):
+    """Obtain the info with RT matric from general sensor to Top LiDAR
+    """
+    sd_rec = nusc.get('sample_data', sensor_token)
+    cs_record = nusc.get('calibrated_sensor',
+                         sd_rec['calibrated_sensor_token'])
+    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+    data_path = nusc.get_sample_data_path(sd_rec['token'])
+    sweep = {
+        'data_path': data_path,
+        'type': sensor_type,
+        'sample_data_token': sd_rec['token'],
+        'sensor2ego_translation': cs_record['translation'],
+        'sensor2ego_rotation': cs_record['rotation'],
+        'ego2global_translation': pose_record['translation'],
+        'ego2global_rotation': pose_record['rotation'],
+        'timestamp': sd_rec['timestamp']
+    }
+    l2e_r_s = sweep['sensor2ego_rotation']
+    l2e_t_s = sweep['sensor2ego_translation']
+    e2g_r_s = sweep['ego2global_rotation']
+    e2g_t_s = sweep['ego2global_translation']
+    # obtain the RT from sensor to Top LiDAR
+    # sweep->ego->global->ego'->lidar
+    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
+    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
+    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
+        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
+        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
+                  ) + l2e_t @ np.linalg.inv(l2e_r_mat).T
+    sweep['sensor2lidar_rotation'] = R.T  # points @ R.T + T
+    sweep['sensor2lidar_translation'] = T
+    return sweep
+def export_2d_annotation(root_path, info_path, version):
+    # get bbox annotations for camera
+    camera_types = [
+        'CAM_FRONT',
+        'CAM_FRONT_RIGHT',
+        'CAM_FRONT_LEFT',
+        'CAM_BACK',
+        'CAM_BACK_LEFT',
+        'CAM_BACK_RIGHT',
+    ]
+    nusc_infos = mmcv.load(info_path)['infos']
+    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
+    # info_2d_list = []
+    cat2Ids = [
+        dict(id=nus_categories.index(cat_name), name=cat_name)
+        for cat_name in nus_categories
+    ]
+    coco_ann_id = 0
+    coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
+    for info in mmcv.track_iter_progress(nusc_infos):
+        # info_2d = dict(token=info['token'],
+        #                timestamp=info['timestamp'],
+        #                cams=dict())
+        for cam in camera_types:
+            cam_info = info['cams'][cam]
+            coco_infos = get_2d_boxes(
+                nusc,
+                cam_info['sample_data_token'],
+                visibilities=['', '1', '2', '3', '4'])
+            (height, width, _) = mmcv.imread(cam_info['data_path']).shape
+            coco_2d_dict['images'].append(
+                dict(
+                    file_name=cam_info['data_path'],
+                    id=cam_info['sample_data_token'],
+                    width=width,
+                    height=height))
+            for coco_info in coco_infos:
+                if coco_info is None:
+                    continue
+                # add an empty key for coco format
+                coco_info['segmentation'] = []
+                coco_info['id'] = coco_ann_id
+                coco_2d_dict['annotations'].append(coco_info)
+                coco_ann_id += 1
+            # gt_bbox_2d = [res['bbox_corners'] for res in anno_info]
+            # gt_names_2d = [res['category_name'] for res in anno_info]
+            # for i in range(len(gt_names_2d)):
+            #     if gt_names_2d[i] in NuScenesDataset.NameMapping:
+            #         gt_names_2d[i] = NuScenesDataset.NameMapping[
+            #               gt_names_2d[i]]
+            # assert len(gt_bbox_2d) == len(gt_names_2d)
+            # gt_bbox_2d = np.array(gt_bbox_2d, dtype=np.float32)
+            # gt_names_2d = np.array(gt_names_2d)
+            # info_2d['cams'][cam] = dict(
+            #     data_path=info['cams'][cam]['data_path'],
+            #     type=info['cams'][cam]['type'],
+            #     token=info['cams'][cam]['sample_data_token'],
+            #     gt_boxes=gt_bbox_2d,
+            #     gt_names=gt_names_2d)
+        # info_2d_list.append(info_2d)
+    # mmcv.dump(
+    #     info_2d_list,
+    #     osp.join(root_path,
+    #     '{}_2d_infos_train.pkl'.format(info_prefix)))
+    mmcv.dump(coco_2d_dict, '{}.coco.json'.format(info_path[:-4]))
+def get_2d_boxes(nusc, sample_data_token: str,
+                 visibilities: List[str]) -> List[OrderedDict]:
+    """Get the 2D annotation records for a given `sample_data_token`.
+    Args:
+        sample_data_token: Sample data token belonging to a camera keyframe.
+        visibilities: Visibility filter.
+    Return:
+        list: List of 2D annotation record that belongs to the input
+            `sample_data_token`.
+    """
+    # Get the sample data and the sample corresponding to that sample data.
+    sd_rec = nusc.get('sample_data', sample_data_token)
+    assert sd_rec[
+        'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \
+        ' for camera sample_data!'
+    if not sd_rec['is_key_frame']:
+        raise ValueError(
+            'The 2D re-projections are available only for keyframes.')
+    s_rec = nusc.get('sample', sd_rec['sample_token'])
+    # Get the calibrated sensor and ego pose
+    # record to get the transformation matrices.
+    cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
+    pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
+    camera_intrinsic = np.array(cs_rec['camera_intrinsic'])
+    # Get all the annotation with the specified visibilties.
+    ann_recs = [
+        nusc.get('sample_annotation', token) for token in s_rec['anns']
+    ]
+    ann_recs = [
+        ann_rec for ann_rec in ann_recs
+        if (ann_rec['visibility_token'] in visibilities)
+    ]
+    repro_recs = []
+    for ann_rec in ann_recs:
+        # Augment sample_annotation with token information.
+        ann_rec['sample_annotation_token'] = ann_rec['token']
+        ann_rec['sample_data_token'] = sample_data_token
+        # Get the box in global coordinates.
+        box = nusc.get_box(ann_rec['token'])
+        # Move them to the ego-pose frame.
+        box.translate(-np.array(pose_rec['translation']))
+        box.rotate(Quaternion(pose_rec['rotation']).inverse)
+        # Move them to the calibrated sensor frame.
+        box.translate(-np.array(cs_rec['translation']))
+        box.rotate(Quaternion(cs_rec['rotation']).inverse)
+        # Filter out the corners that are not in front of the calibrated
+        # sensor.
+        corners_3d = box.corners()
+        in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
+        corners_3d = corners_3d[:, in_front]
+        # Project 3d box to 2d.
+        corner_coords = view_points(corners_3d, camera_intrinsic,
+                                    True).T[:, :2].tolist()
+        # Keep only corners that fall within the image.
+        final_coords = post_process_coords(corner_coords)
+        # Skip if the convex hull of the re-projected corners
+        # does not intersect the image canvas.
+        if final_coords is None:
+            continue
+        else:
+            min_x, min_y, max_x, max_y = final_coords
+        # Generate dictionary record to be included in the .json file.
+        repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
+                                    sample_data_token, sd_rec['filename'])
+        repro_recs.append(repro_rec)
+    return repro_recs
+def post_process_coords(
+    corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
+) -> Union[Tuple[float, float, float, float], None]:
+    """
+    Get the intersection of the convex hull of the reprojected
+    bbox corners and the image canvas, return None if no
+    intersection.
+    corner_coords: Corner coordinates of reprojected bounding box.
+    imsize: Size of the image canvas.
+    Return:
+        Intersection of the convex hull of the 2D box corners and the image
+        canvas.
+    """
+    polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
+    img_canvas = box(0, 0, imsize[0], imsize[1])
+    if polygon_from_2d_box.intersects(img_canvas):
+        img_intersection = polygon_from_2d_box.intersection(img_canvas)
+        intersection_coords = np.array(
+            [coord for coord in img_intersection.exterior.coords])
+        min_x = min(intersection_coords[:, 0])
+        min_y = min(intersection_coords[:, 1])
+        max_x = max(intersection_coords[:, 0])
+        max_y = max(intersection_coords[:, 1])
+        return min_x, min_y, max_x, max_y
+    else:
+        return None
+def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
+                    sample_data_token: str, filename: str) -> OrderedDict:
+    """
+    Generate one 2D annotation record given various informations on
+    top of the 2D bounding box coordinates.
+    :param ann_rec: Original 3d annotation record.
+    :param x1: Minimum value of the x coordinate.
+    :param y1: Minimum value of the y coordinate.
+    :param x2: Maximum value of the x coordinate.
+    :param y2: Maximum value of the y coordinate.
+    :param sample_data_token: Sample data token.
+    :param filename:The corresponding image file where the annotation
+                    is present.
+    :return: A sample 2D annotation record.
+    """
+    repro_rec = OrderedDict()
+    repro_rec['sample_data_token'] = sample_data_token
+    coco_rec = dict()
+    relevant_keys = [
+        'attribute_tokens',
+        'category_name',
+        'instance_token',
+        'next',
+        'num_lidar_pts',
+        'num_radar_pts',
+        'prev',
+        'sample_annotation_token',
+        'sample_data_token',
+        'visibility_token',
+    ]
+    for key, value in ann_rec.items():
+        if key in relevant_keys:
+            repro_rec[key] = value
+    repro_rec['bbox_corners'] = [x1, y1, x2, y2]
+    repro_rec['filename'] = filename
+    coco_rec['file_name'] = filename
+    coco_rec['image_id'] = sample_data_token
+    coco_rec['area'] = (y2 - y1) * (x2 - x1)
+    if repro_rec['category_name'] not in NuScenesDataset.NameMapping:
+        return None
+    cat_name = NuScenesDataset.NameMapping[repro_rec['category_name']]
+    coco_rec['category_name'] = cat_name
+    coco_rec['category_id'] = nus_categories.index(cat_name)
+    coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1]
+    coco_rec['iscrowd'] = 0
+    return coco_rec
--- a/tools/dist_train.sh
+++ b/tools/dist_train.sh
+#!/usr/bin/env bash
+PYTHON=${PYTHON:-"python"}
+CONFIG=$1
+GPUS=$2
+$PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
+    $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
--- a/tools/publish_model.py
+++ b/tools/publish_model.py
+import argparse
+import subprocess
+import torch
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Process a checkpoint to be published')
+    parser.add_argument('in_file', help='input checkpoint filename')
+    parser.add_argument('out_file', help='output checkpoint filename')
+    args = parser.parse_args()
+    return args
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+    subprocess.Popen(['mv', out_file, final_file])
+def main():
+    args = parse_args()
+    process_checkpoint(args.in_file, args.out_file)
+if __name__ == '__main__':
+    main()
--- a/tools/slurm_test.sh
+++ b/tools/slurm_test.sh
+#!/usr/bin/env bash
+set -x
+export PYTHONPATH=`pwd`:$PYTHONPATH
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
--- a/tools/slurm_train.sh
+++ b/tools/slurm_train.sh
+#!/usr/bin/env bash
+set -x
+export PYTHONPATH=`pwd`:$PYTHONPATH
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+WORK_DIR=$4
+GPUS=${GPUS:-8}
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${PY_ARGS:-"--validate"}
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
--- a/tools/test.py
+++ b/tools/test.py
+import argparse
+import os
+import mmcv
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import get_dist_info, init_dist, load_checkpoint
+from tools.fuse_conv_bn import fuse_module
+from mmdet3d.datasets import build_dataloader, build_dataset
+from mmdet3d.models import build_detector
+from mmdet.apis import multi_gpu_test, single_gpu_test
+from mmdet.core import wrap_fp16_model
+class MultipleKVAction(argparse.Action):
+    """
+    argparse action to split an argument into KEY=VALUE form
+    on the first = and append to a dictionary. List options should
+    be passed as comma separated values, i.e KEY=V1,V2,V3
+    """
+    def _parse_int_float_bool(self, val):
+        try:
+            return int(val)
+        except ValueError:
+            pass
+        try:
+            return float(val)
+        except ValueError:
+            pass
+        if val.lower() in ['true', 'false']:
+            return True if val.lower() == 'true' else False
+        return val
+    def __call__(self, parser, namespace, values, option_string=None):
+        options = {}
+        for kv in values:
+            key, val = kv.split('=', maxsplit=1)
+            val = [self._parse_int_float_bool(v) for v in val.split(',')]
+            if len(val) == 1:
+                val = val[0]
+            options[key] = val
+        setattr(namespace, self.dest, options)
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='MMDet test (and eval) a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument('--out', help='output result file in pickle format')
+    parser.add_argument(
+        '--fuse_conv_bn',
+        action='store_true',
+        help='Whether to fuse conv and bn, this will slightly increase'
+        'the inference speed')
+    parser.add_argument(
+        '--format_only',
+        action='store_true',
+        help='Format the output results without perform evaluation. It is'
+        'useful when you want to format the result to a specific format and '
+        'submit it to the test server')
+    parser.add_argument(
+        '--eval',
+        type=str,
+        nargs='+',
+        help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
+        ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
+    parser.add_argument('--show', action='store_true', help='show results')
+    parser.add_argument(
+        '--gpu_collect',
+        action='store_true',
+        help='whether to use gpu to collect results.')
+    parser.add_argument(
+        '--tmpdir',
+        help='tmp directory used for collecting results from multiple '
+        'workers, available when gpu_collect is not specified')
+    parser.add_argument(
+        '--options', nargs='+', action=MultipleKVAction, help='custom options')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+def main():
+    args = parse_args()
+    assert args.out or args.eval or args.format_only or args.show, \
+        ('Please specify at least one operation (save/eval/format/show the '
+         'results) with the argument "--out", "--eval", "--format_only" '
+         'or "--show"')
+    if args.eval and args.format_only:
+        raise ValueError('--eval and --format_only cannot be both specified')
+    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+        raise ValueError('The output file must be a pkl file.')
+    cfg = mmcv.Config.fromfile(args.config)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.workers_per_gpu,
+        dist=distributed,
+        shuffle=False)
+    # build the model and load checkpoint
+    model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        wrap_fp16_model(model)
+    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
+    if args.fuse_conv_bn:
+        model = fuse_module(model)
+    # old versions did not save class info in checkpoints, this walkaround is
+    # for backward compatibility
+    if 'CLASSES' in checkpoint['meta']:
+        model.CLASSES = checkpoint['meta']['CLASSES']
+    else:
+        model.CLASSES = dataset.CLASSES
+    if not distributed:
+        model = MMDataParallel(model, device_ids=[0])
+        outputs = single_gpu_test(model, data_loader, args.show)
+    else:
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False)
+        outputs = multi_gpu_test(model, data_loader, args.tmpdir,
+                                 args.gpu_collect)
+    rank, _ = get_dist_info()
+    if rank == 0:
+        if args.out:
+            print('\nwriting results to {}'.format(args.out))
+            mmcv.dump(outputs, args.out)
+        kwargs = {} if args.options is None else args.options
+        if args.format_only:
+            dataset.format_results(outputs, **kwargs)
+        if args.eval:
+            dataset.evaluate(outputs, args.eval, **kwargs)
+if __name__ == '__main__':
+    main()
--- a/tools/train.py
+++ b/tools/train.py
+from __future__ import division
+import argparse
+import copy
+import os
+import os.path as osp
+import time
+import mmcv
+import torch
+from mmcv import Config
+from mmcv.runner import init_dist
+from mmdet3d import __version__
+from mmdet3d.apis import train_detector
+from mmdet3d.datasets import build_dataset
+from mmdet3d.models import build_detector
+from mmdet3d.utils import collect_env
+from mmdet.apis import get_root_logger, set_random_seed
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a detector')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work_dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume_from', help='the checkpoint file to resume from')
+    parser.add_argument(
+        '--validate',
+        action='store_true',
+        help='whether to evaluate the checkpoint during training')
+    parser.add_argument(
+        '--gpus',
+        type=int,
+        default=1,
+        help='number of gpus to use '
+        '(only applicable to non-distributed training)')
+    parser.add_argument('--seed', type=int, default=0, help='random seed')
+    parser.add_argument(
+        '--deterministic',
+        action='store_true',
+        help='whether to set deterministic options for CUDNN backend.')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    parser.add_argument('--local_rank', type=int, default=0)
+    parser.add_argument(
+        '--autoscale-lr',
+        action='store_true',
+        help='automatically scale lr with the number of gpus')
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+def main():
+    args = parse_args()
+    cfg = Config.fromfile(args.config)
+    # set cudnn_benchmark
+    if cfg.get('cudnn_benchmark', False):
+        torch.backends.cudnn.benchmark = True
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0])
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
+    cfg.gpus = args.gpus
+    if args.autoscale_lr:
+        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
+        cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8
+    # init distributed env first, since logger depends on the dist info.
+    if args.launcher == 'none':
+        distributed = False
+    else:
+        distributed = True
+        init_dist(args.launcher, **cfg.dist_params)
+    # create work_dir
+    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
+    # init the logger before other steps
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp))
+    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
+    # init the meta dict to record some important information such as
+    # environment info and seed, which will be logged
+    meta = dict()
+    # log env info
+    env_info_dict = collect_env()
+    env_info = '\n'.join([('{}: {}'.format(k, v))
+                          for k, v in env_info_dict.items()])
+    dash_line = '-' * 60 + '\n'
+    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                dash_line)
+    meta['env_info'] = env_info
+    # log some basic info
+    logger.info('Distributed training: {}'.format(distributed))
+    logger.info('Config:\n{}'.format(cfg.text))
+    # set random seeds
+    if args.seed is not None:
+        logger.info('Set random seed to {}, deterministic: {}'.format(
+            args.seed, args.deterministic))
+        set_random_seed(args.seed, deterministic=args.deterministic)
+    cfg.seed = args.seed
+    meta['seed'] = args.seed
+    model = build_detector(
+        cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
+    logger.info('Model:\n{}'.format(model))
+    datasets = [build_dataset(cfg.data.train)]
+    if len(cfg.workflow) == 2:
+        val_dataset = copy.deepcopy(cfg.data.val)
+        val_dataset.pipeline = cfg.data.train.pipeline
+        datasets.append(build_dataset(val_dataset))
+    if cfg.checkpoint_config is not None:
+        # save mmdet version, config file content and class names in
+        # checkpoints as meta data
+        cfg.checkpoint_config.meta = dict(
+            mmdet_version=__version__,
+            config=cfg.text,
+            CLASSES=datasets[0].CLASSES)
+    # add an attribute for visualization convenience
+    model.CLASSES = datasets[0].CLASSES
+    train_detector(
+        model,
+        datasets,
+        cfg,
+        distributed=distributed,
+        validate=args.validate,
+        timestamp=timestamp,
+        meta=meta)
+if __name__ == '__main__':
+    main()