Merge branch 'master' into fix-train-runtime

89bda282 · zhangwenwei · ff8623e1 · 99db60dd · 89bda282 · 89bda282
Commit 89bda282 authored May 05, 2020 by zhangwenwei
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,7 +12,7 @@ repos:
    hooks:
        - id: isort
  - repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.29.0
+    rev: v0.30.0
    hooks:
      - id: yapf
  - repo: https://github.com/pre-commit/pre-commit-hooks

--- a/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
@@ -116,7 +116,7 @@ input_modality = dict(
    use_lidar=True,
    use_depth=False,
    use_lidar_intensity=True,
-    use_camera=False,
+    use_camera=True,
 )
 db_sampler = dict(
    root_path=data_root,

--- a/configs/kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
+# model settings
+voxel_size = [0.05, 0.05, 0.1]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
+model = dict(
+    type='PartA2',
+    voxel_layer=dict(
+        max_num_points=5,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(16000, 40000)  # (training, testing) max_coxels
+    ),
+    voxel_encoder=dict(type='VoxelFeatureExtractorV3'),
+    middle_encoder=dict(
+        type='SparseUNet',
+        in_channels=4,
+        output_shape=[41, 1600, 1408],
+        pre_act=False,
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        num_filters=[128, 256]),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        upsample_strides=[1, 2],
+        num_upsample_filters=[256, 256]),
+    rpn_head=dict(
+        type='PartA2RPNHead',
+        class_name=['Pedestrian', 'Cyclist', 'Car'],
+        in_channels=512,
+        feat_channels=512,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_generator=dict(
+            type='Anchor3DRangeGenerator',
+            ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                    [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+                    [0, -40.0, -1.78, 70.4, 40.0, -1.78]],
+            strides=[2],
+            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            rotations=[0, 1.57],
+            reshape_out=False),
+        diff_rad_by_sin=True,
+        assigner_per_size=True,
+        assign_per_class=True,
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=[
+            dict(  # for Pedestrian
+                type='MaxIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.35,
+                min_pos_iou=0.35,
+                ignore_iof_thr=-1),
+            dict(  # for Cyclist
+                type='MaxIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.35,
+                min_pos_iou=0.35,
+                ignore_iof_thr=-1),
+            dict(  # for Car
+                type='MaxIoUAssigner',
+                iou_calculator=dict(type='BboxOverlapsNearest3D'),
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.45,
+                min_pos_iou=0.45,
+                ignore_iof_thr=-1),
+        ],
+        allowed_border=0,
+        pos_weight=-1,
+        debug=False),
+    rpn_proposal=dict(
+        nms_pre=9000,
+        nms_post=512,
+        nms_thr=0.8,
+        score_thr=0,
+        use_rotate_nms=False),
+)
+test_cfg = dict(
+    rpn=dict(
+        nms_pre=1024,
+        max_per_img=100,
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_thr=0.7,
+        score_thr=0))
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=True)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
+    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[0, 0, 0],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.39269908, 0.39269908]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05],
+        trans_normal_noise=[0.2, 0.2, 0.2]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.003  # max learning rate
+optimizer = dict(
+    type='AdamW',
+    lr=lr,
+    betas=(0.95, 0.99),  # the momentum is change during training
+    weight_decay=0.001)
+optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
+lr_config = dict(
+    policy='cosine',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 10,
+    target_lr=1e-5,
+    as_ratio=True)
+momentum_config = None
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 80
+dist_params = dict(backend='nccl', port=29502)
+log_level = 'INFO'
+work_dir = './work_dirs/parta2_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/mmdet3d/models/anchor_heads/__init__.py
+++ b/mmdet3d/models/anchor_heads/__init__.py
 from .boxvelo_head import Anchor3DVeloHead
+from .parta2_rpn_head import PartA2RPNHead
 from .second_head import SECONDHead
-__all__ = ['Anchor3DVeloHead', 'SECONDHead']
+__all__ = ['Anchor3DVeloHead', 'SECONDHead', 'PartA2RPNHead']
--- a/mmdet3d/models/anchor_heads/parta2_rpn_head.py
+++ b/mmdet3d/models/anchor_heads/parta2_rpn_head.py
+from __future__ import division
+import numpy as np
+import torch
+from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
+from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
+from mmdet.models import HEADS
+from .second_head import SECONDHead
+@HEADS.register_module
+class PartA2RPNHead(SECONDHead):
+    """rpn head for PartA2
+    Args:
+        class_name (list[str]): name of classes (TODO: to be removed)
+        in_channels (int): Number of channels in the input feature map.
+        train_cfg (dict): train configs
+        test_cfg (dict): test configs
+        feat_channels (int): Number of channels of the feature map.
+        use_direction_classifier (bool): Whether to add a direction classifier.
+        encode_bg_as_zeros (bool): Whether to use sigmoid of softmax
+            (TODO: to be removed)
+        box_code_size (int): The size of box code.
+        anchor_generator(dict): Config dict of anchor generator.
+        assigner_per_size (bool): Whether to do assignment for each separate
+            anchor size.
+        assign_per_class (bool): Whether to do assignment for each class.
+        diff_rad_by_sin (bool): Whether to change the difference into sin
+            difference for box regression loss.
+        dir_offset (float | int): The offset of BEV rotation angles
+            (TODO: may be moved into box coder)
+        dirlimit_offset (float | int): The limited range of BEV rotation angles
+            (TODO: may be moved into box coder)
+        box_coder (dict): Config dict of box coders.
+        loss_cls (dict): Config of classification loss.
+        loss_bbox (dict): Config of localization loss.
+        loss_dir (dict): Config of direction classifier loss.
+    """  # npqa:W293
+    def __init__(self,
+                 class_name,
+                 in_channels,
+                 train_cfg,
+                 test_cfg,
+                 feat_channels=256,
+                 use_direction_classifier=True,
+                 encode_bg_as_zeros=False,
+                 box_code_size=7,
+                 anchor_generator=dict(
+                     type='Anchor3DRangeGenerator',
+                     range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
+                     strides=[2],
+                     sizes=[[1.6, 3.9, 1.56]],
+                     rotations=[0, 1.57],
+                     custom_values=[],
+                     reshape_out=False),
+                 assigner_per_size=False,
+                 assign_per_class=False,
+                 diff_rad_by_sin=True,
+                 dir_offset=0,
+                 dir_limit_offset=1,
+                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
+                 loss_cls=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=True,
+                     loss_weight=1.0),
+                 loss_bbox=dict(
+                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+                 loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
+        super().__init__(class_name, in_channels, train_cfg, test_cfg,
+                         feat_channels, use_direction_classifier,
+                         encode_bg_as_zeros, box_code_size, anchor_generator,
+                         assigner_per_size, assign_per_class, diff_rad_by_sin,
+                         dir_offset, dir_limit_offset, bbox_coder, loss_cls,
+                         loss_bbox, loss_dir)
+    def get_bboxes(self,
+                   cls_scores,
+                   bbox_preds,
+                   dir_cls_preds,
+                   input_metas,
+                   cfg,
+                   rescale=False):
+        assert len(cls_scores) == len(bbox_preds)
+        assert len(cls_scores) == len(dir_cls_preds)
+        num_levels = len(cls_scores)
+        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
+        device = cls_scores[0].device
+        mlvl_anchors = self.anchor_generator.grid_anchors(
+            featmap_sizes, device=device)
+        mlvl_anchors = [
+            anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors
+        ]
+        result_list = []
+        for img_id in range(len(input_metas)):
+            cls_score_list = [
+                cls_scores[i][img_id].detach() for i in range(num_levels)
+            ]
+            bbox_pred_list = [
+                bbox_preds[i][img_id].detach() for i in range(num_levels)
+            ]
+            dir_cls_pred_list = [
+                dir_cls_preds[i][img_id].detach() for i in range(num_levels)
+            ]
+            input_meta = input_metas[img_id]
+            proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
+                                               dir_cls_pred_list, mlvl_anchors,
+                                               input_meta, cfg, rescale)
+            result_list.append(proposals)
+        return result_list
+    def get_bboxes_single(self,
+                          cls_scores,
+                          bbox_preds,
+                          dir_cls_preds,
+                          mlvl_anchors,
+                          input_meta,
+                          cfg,
+                          rescale=False):
+        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
+        mlvl_bboxes = []
+        mlvl_max_scores = []
+        mlvl_label_pred = []
+        mlvl_dir_scores = []
+        mlvl_cls_score = []
+        for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
+                cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
+            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+            assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
+            dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
+            dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
+            cls_score = cls_score.permute(1, 2,
+                                          0).reshape(-1, self.num_classes)
+            if self.use_sigmoid_cls:
+                scores = cls_score.sigmoid()
+            else:
+                scores = cls_score.softmax(-1)
+            bbox_pred = bbox_pred.permute(1, 2,
+                                          0).reshape(-1, self.box_code_size)
+            nms_pre = cfg.get('nms_pre', -1)
+            if self.use_sigmoid_cls:
+                max_scores, pred_labels = scores.max(dim=1)
+            else:
+                max_scores, pred_labels = scores[:, :-1].max(dim=1)
+            # get topk
+            if nms_pre > 0 and scores.shape[0] > nms_pre:
+                topk_scores, topk_inds = max_scores.topk(nms_pre)
+                anchors = anchors[topk_inds, :]
+                bbox_pred = bbox_pred[topk_inds, :]
+                max_scores = topk_scores
+                cls_score = cls_score[topk_inds, :]
+                dir_cls_score = dir_cls_score[topk_inds]
+                pred_labels = pred_labels[topk_inds]
+            bboxes = self.bbox_coder.decode(anchors, bbox_pred)
+            mlvl_bboxes.append(bboxes)
+            mlvl_max_scores.append(max_scores)
+            mlvl_cls_score.append(cls_score)
+            mlvl_label_pred.append(pred_labels)
+            mlvl_dir_scores.append(dir_cls_score)
+        mlvl_bboxes = torch.cat(mlvl_bboxes)
+        mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
+        mlvl_max_scores = torch.cat(mlvl_max_scores)
+        mlvl_label_pred = torch.cat(mlvl_label_pred)
+        mlvl_dir_scores = torch.cat(mlvl_dir_scores)
+        mlvl_cls_score = torch.cat(
+            mlvl_cls_score)  # shape [k, num_class] before sigmoid
+        score_thr = cfg.get('score_thr', 0)
+        result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
+                                         mlvl_max_scores, mlvl_label_pred,
+                                         mlvl_cls_score, mlvl_dir_scores,
+                                         score_thr, cfg.nms_post, cfg)
+        result.update(dict(sample_idx=input_meta['sample_idx']))
+        return result
+    def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
+                           mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
+                           mlvl_dir_scores, score_thr, max_num, cfg):
+        bboxes = []
+        scores = []
+        labels = []
+        dir_scores = []
+        cls_scores = []
+        score_thr_inds = mlvl_max_scores > score_thr
+        _scores = mlvl_max_scores[score_thr_inds]
+        _bboxes_for_nms = mlvl_bboxes_for_nms[score_thr_inds, :]
+        if cfg.use_rotate_nms:
+            nms_func = nms_gpu
+        else:
+            nms_func = nms_normal_gpu
+        selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)
+        _mlvl_bboxes = mlvl_bboxes[score_thr_inds, :]
+        _mlvl_dir_scores = mlvl_dir_scores[score_thr_inds]
+        _mlvl_label_pred = mlvl_label_pred[score_thr_inds]
+        _mlvl_cls_score = mlvl_cls_score[score_thr_inds]
+        if len(selected) > 0:
+            bboxes.append(_mlvl_bboxes[selected])
+            scores.append(_scores[selected])
+            labels.append(_mlvl_label_pred[selected])
+            cls_scores.append(_mlvl_cls_score[selected])
+            dir_scores.append(_mlvl_dir_scores[selected])
+            dir_rot = box_torch_ops.limit_period(
+                bboxes[-1][..., 6] - self.dir_offset, self.dir_limit_offset,
+                np.pi)
+            bboxes[-1][..., 6] = (
+                dir_rot + self.dir_offset +
+                np.pi * dir_scores[-1].to(bboxes[-1].dtype))
+        if bboxes:
+            bboxes = torch.cat(bboxes, dim=0)
+            scores = torch.cat(scores, dim=0)
+            cls_scores = torch.cat(cls_scores, dim=0)
+            labels = torch.cat(labels, dim=0)
+            dir_scores = torch.cat(dir_scores, dim=0)
+            if bboxes.shape[0] > max_num:
+                _, inds = scores.sort(descending=True)
+                inds = inds[:max_num]
+                bboxes = bboxes[inds, :]
+                labels = labels[inds]
+                scores = scores[inds]
+                cls_scores = cls_scores[inds]
+                dir_scores = dir_scores[inds]
+            return dict(
+                box3d_lidar=bboxes.cpu(),
+                scores=scores.cpu(),
+                label_preds=labels.cpu(),
+                cls_preds=cls_scores.cpu(
+                )  # raw scores with shape [max_num, cls_num]
+            )
+        else:
+            return dict(
+                box3d_lidar=mlvl_bboxes.new_zeros([0,
+                                                   self.box_code_size]).cpu(),
+                scores=mlvl_bboxes.new_zeros([0]).cpu(),
+                label_preds=mlvl_bboxes.new_zeros([0]).cpu(),
+                cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]
+                                                 ]).cpu())
--- a/mmdet3d/models/detectors/__init__.py
+++ b/mmdet3d/models/detectors/__init__.py
@@ -3,10 +3,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
                              DynamicMVXFasterRCNNV3)
 from .mvx_single_stage import MVXSingleStageDetector
 from .mvx_two_stage import MVXTwoStageDetector
+from .parta2 import PartA2
 from .voxelnet import DynamicVoxelNet, VoxelNet
 __all__ = [
    'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
    'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
-    'DynamicMVXFasterRCNNV3'
+    'DynamicMVXFasterRCNNV3', 'PartA2'
 ]
--- a/mmdet3d/models/detectors/parta2.py
+++ b/mmdet3d/models/detectors/parta2.py
+import torch
+import torch.nn.functional as F
+from mmdet3d.ops import Voxelization
+from mmdet.models import DETECTORS, TwoStageDetector
+from .. import builder
+@DETECTORS.register_module
+class PartA2(TwoStageDetector):
+    def __init__(self,
+                 voxel_layer,
+                 voxel_encoder,
+                 middle_encoder,
+                 backbone,
+                 neck=None,
+                 rpn_head=None,
+                 roi_head=None,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        super(PartA2, self).__init__(
+            backbone=backbone,
+            neck=neck,
+            rpn_head=rpn_head,
+            roi_head=roi_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            pretrained=pretrained,
+        )
+        self.voxel_layer = Voxelization(**voxel_layer)
+        self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)
+        self.middle_encoder = builder.build_middle_encoder(middle_encoder)
+    def extract_feat(self, points, img_meta):
+        voxels, num_points, coors = self.voxelize(points)
+        voxel_dict = dict(voxels=voxels, num_points=num_points, coors=coors)
+        voxel_features = self.voxel_encoder(voxels, num_points, coors)
+        batch_size = coors[-1, 0].item() + 1
+        feats_dict = self.middle_encoder(voxel_features, coors, batch_size)
+        x = self.backbone(feats_dict['spatial_features'])
+        if self.with_neck:
+            neck_feats = self.neck(x)
+            feats_dict.update({'neck_feats': neck_feats})
+        return feats_dict, voxel_dict
+    @torch.no_grad()
+    def voxelize(self, points):
+        voxels, coors, num_points = [], [], []
+        for res in points:
+            res_voxels, res_coors, res_num_points = self.voxel_layer(res)
+            voxels.append(res_voxels)
+            coors.append(res_coors)
+            num_points.append(res_num_points)
+        voxels = torch.cat(voxels, dim=0)
+        num_points = torch.cat(num_points, dim=0)
+        coors_batch = []
+        for i, coor in enumerate(coors):
+            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
+            coors_batch.append(coor_pad)
+        coors_batch = torch.cat(coors_batch, dim=0)
+        return voxels, num_points, coors_batch
+    def forward_train(self,
+                      points,
+                      img_meta,
+                      gt_bboxes_3d,
+                      gt_labels_3d,
+                      gt_bboxes_ignore=None,
+                      proposals=None):
+        # TODO: complete it
+        feats_dict, voxels_dict = self.extract_feat(points, img_meta)
+        losses = dict()
+        if self.with_rpn:
+            rpn_outs = self.rpn_head(feats_dict['neck_feats'])
+            rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d, img_meta)
+            rpn_losses = self.rpn_head.loss(
+                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+            losses.update(rpn_losses)
+            proposal_cfg = self.train_cfg.get('rpn_proposal',
+                                              self.test_cfg.rpn)
+            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
+            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
+        else:
+            proposal_list = proposals  # noqa: F841
+        return losses
+    def forward_test(self, **kwargs):
+        return self.simple_test(**kwargs)
+    def forward(self, return_loss=True, **kwargs):
+        if return_loss:
+            return self.forward_train(**kwargs)
+        else:
+            return self.forward_test(**kwargs)
+    def simple_test(self,
+                    points,
+                    img_meta,
+                    gt_bboxes_3d=None,
+                    proposals=None,
+                    rescale=False):
+        feats_dict, voxels_dict = self.extract_feat(points, img_meta)
+        # TODO: complete it
+        if proposals is None:
+            proposal_list = self.simple_test_rpn(feats_dict['neck_feats'],
+                                                 img_meta, self.test_cfg.rpn)
+        else:
+            proposal_list = proposals
+        return self.roi_head.simple_test(
+            feats_dict, proposal_list, img_meta, rescale=rescale)
--- a/mmdet3d/models/middle_encoders/__init__.py
+++ b/mmdet3d/models/middle_encoders/__init__.py
 from .pillar_scatter import PointPillarsScatter
 from .sparse_encoder import SparseEncoder
+from .sparse_unet import SparseUNet
-__all__ = ['PointPillarsScatter', 'SparseEncoder']
+__all__ = ['PointPillarsScatter', 'SparseEncoder', 'SparseUNet']
--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
+import torch
+import torch.nn as nn
+import mmdet3d.ops.spconv as spconv
+from mmdet3d.ops import SparseBasicBlock
+from mmdet.ops import build_norm_layer
+from ..registry import MIDDLE_ENCODERS
+@MIDDLE_ENCODERS.register_module
+class SparseUNet(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 output_shape,
+                 pre_act=False,
+                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 base_channels=16,
+                 output_channels=128,
+                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
+                                                                        64)),
+                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
+                                                                 1)),
+                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
+                                   (16, 16, 16)),
+                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):
+        """SparseUNet for PartA^2
+        See https://arxiv.org/abs/1907.03670 for more detials.
+        Args:
+            in_channels (int): the number of input channels
+            output_shape (list[int]): the shape of output tensor
+            pre_act (bool): use pre_act_block or post_act_block
+            norm_cfg (dict): config of normalization layer
+            base_channels (int): out channels for conv_input layer
+            output_channels (int): out channels for conv_out layer
+            encoder_channels (tuple[tuple[int]]):
+                conv channels of each encode block
+            encoder_paddings (tuple[tuple[int]]): paddings of each encode block
+            decoder_channels (tuple[tuple[int]]):
+                conv channels of each decode block
+            decoder_paddings (tuple[tuple[int]]): paddings of each decode block
+        """
+        super().__init__()
+        self.sparse_shape = output_shape
+        self.output_shape = output_shape
+        self.in_channels = in_channels
+        self.pre_act = pre_act
+        self.base_channels = base_channels
+        self.output_channels = output_channels
+        self.encoder_channels = encoder_channels
+        self.encoder_paddings = encoder_paddings
+        self.decoder_channels = decoder_channels
+        self.decoder_paddings = decoder_paddings
+        self.stage_num = len(self.encoder_channels)
+        # Spconv init all weight on its own
+        if pre_act:
+            # TODO: use ConvModule to encapsulate
+            self.conv_input = spconv.SparseSequential(
+                spconv.SubMConv3d(
+                    in_channels,
+                    self.base_channels,
+                    3,
+                    padding=1,
+                    bias=False,
+                    indice_key='subm1'))
+            make_block = self.pre_act_block
+        else:
+            self.conv_input = spconv.SparseSequential(
+                spconv.SubMConv3d(
+                    in_channels,
+                    self.base_channels,
+                    3,
+                    padding=1,
+                    bias=False,
+                    indice_key='subm1'),
+                build_norm_layer(norm_cfg, self.base_channels)[1], nn.ReLU())
+            make_block = self.post_act_block
+        encoder_out_channels = self.make_encoder_layers(
+            make_block, norm_cfg, self.base_channels)
+        self.make_decoder_layers(make_block, norm_cfg, encoder_out_channels)
+        self.conv_out = spconv.SparseSequential(
+            # [200, 176, 5] -> [200, 176, 2]
+            spconv.SparseConv3d(
+                encoder_out_channels,
+                self.output_channels, (3, 1, 1),
+                stride=(2, 1, 1),
+                padding=0,
+                bias=False,
+                indice_key='spconv_down2'),
+            build_norm_layer(norm_cfg, self.output_channels)[1],
+            nn.ReLU())
+    def forward(self, voxel_features, coors, batch_size):
+        """Forward of SparseUNet
+        Args:
+            voxel_features (torch.float32): shape [N, C]
+            coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx)
+            batch_size (int): batch size
+        Returns:
+            dict: backbone features
+        """
+        coors = coors.int()
+        input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
+                                                  self.sparse_shape,
+                                                  batch_size)
+        x = self.conv_input(input_sp_tensor)
+        encode_features = []
+        for encoder_layer in self.encoder_layers:
+            x = encoder_layer(x)
+            encode_features.append(x)
+        # for detection head
+        # [200, 176, 5] -> [200, 176, 2]
+        out = self.conv_out(encode_features[-1])
+        spatial_features = out.dense()
+        N, C, D, H, W = spatial_features.shape
+        spatial_features = spatial_features.view(N, C * D, H, W)
+        # for segmentation head, with output shape:
+        # [400, 352, 11] <- [200, 176, 5]
+        # [800, 704, 21] <- [400, 352, 11]
+        # [1600, 1408, 41] <- [800, 704, 21]
+        # [1600, 1408, 41] <- [1600, 1408, 41]
+        decode_features = []
+        x = encode_features[-1]
+        for i in range(self.stage_num, 0, -1):
+            x = self.decoder_layer_forward(encode_features[i - 1], x,
+                                           getattr(self, f'lateral_layer{i}'),
+                                           getattr(self, f'merge_layer{i}'),
+                                           getattr(self, f'upsample_layer{i}'))
+            decode_features.append(x)
+        seg_features = decode_features[-1].features
+        ret = dict(
+            spatial_features=spatial_features, seg_features=seg_features)
+        return ret
+    def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer,
+                              merge_layer, upsample_layer):
+        """Forward of upsample and residual block.
+        Args:
+            x_lateral (SparseConvTensor): lateral tensor
+            x_bottom (SparseConvTensor): feature from bottom layer
+            lateral_layer (SparseBasicBlock): convolution for lateral tensor
+            merge_layer (SparseSequential): convolution for merging features
+            upsample_layer (SparseSequential): convolution for upsampling
+        Returns:
+            SparseConvTensor: upsampled feature
+        """
+        x = lateral_layer(x_lateral)
+        x.features = torch.cat((x_bottom.features, x.features), dim=1)
+        x_merge = merge_layer(x)
+        x = self.reduce_channel(x, x_merge.features.shape[1])
+        x.features = x_merge.features + x.features
+        x = upsample_layer(x)
+        return x
+    @staticmethod
+    def reduce_channel(x, out_channels):
+        """reduce channel for element-wise addition.
+        Args:
+            x (SparseConvTensor): x.features (N, C1)
+            out_channels (int): the number of channel after reduction
+        Returns:
+            SparseConvTensor: channel reduced feature
+        """
+        features = x.features
+        n, in_channels = features.shape
+        assert (in_channels % out_channels
+                == 0) and (in_channels >= out_channels)
+        x.features = features.view(n, out_channels, -1).sum(dim=2)
+        return x
+    def pre_act_block(self,
+                      in_channels,
+                      out_channels,
+                      kernel_size,
+                      indice_key=None,
+                      stride=1,
+                      padding=0,
+                      conv_type='subm',
+                      norm_cfg=None):
+        """Make pre activate sparse convolution block.
+        Args:
+            in_channels (int): the number of input channels
+            out_channels (int): the number of out channels
+            kernel_size (int): kernel size of convolution
+            indice_key (str): the indice key used for sparse tensor
+            stride (int): the stride of convolution
+            padding (int or list[int]): the padding number of input
+            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
+            norm_cfg (dict): config of normalization layer
+        Returns:
+            spconv.SparseSequential: pre activate sparse convolution block.
+        """
+        # TODO: use ConvModule to encapsulate
+        assert conv_type in ['subm', 'spconv', 'inverseconv']
+        if conv_type == 'subm':
+            m = spconv.SparseSequential(
+                build_norm_layer(norm_cfg, in_channels)[1],
+                nn.ReLU(inplace=True),
+                spconv.SubMConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    padding=padding,
+                    bias=False,
+                    indice_key=indice_key))
+        elif conv_type == 'spconv':
+            m = spconv.SparseSequential(
+                build_norm_layer(norm_cfg, in_channels)[1],
+                nn.ReLU(inplace=True),
+                spconv.SparseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    bias=False,
+                    indice_key=indice_key))
+        elif conv_type == 'inverseconv':
+            m = spconv.SparseSequential(
+                build_norm_layer(norm_cfg, in_channels)[1],
+                nn.ReLU(inplace=True),
+                spconv.SparseInverseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    bias=False,
+                    indice_key=indice_key))
+        else:
+            raise NotImplementedError
+        return m
+    def post_act_block(self,
+                       in_channels,
+                       out_channels,
+                       kernel_size,
+                       indice_key,
+                       stride=1,
+                       padding=0,
+                       conv_type='subm',
+                       norm_cfg=None):
+        """Make post activate sparse convolution block.
+        Args:
+            in_channels (int): the number of input channels
+            out_channels (int): the number of out channels
+            kernel_size (int): kernel size of convolution
+            indice_key (str): the indice key used for sparse tensor
+            stride (int): the stride of convolution
+            padding (int or list[int]): the padding number of input
+            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
+            norm_cfg (dict[str]): config of normalization layer
+        Returns:
+            spconv.SparseSequential: post activate sparse convolution block.
+        """
+        # TODO: use ConvModule to encapsulate
+        assert conv_type in ['subm', 'spconv', 'inverseconv']
+        if conv_type == 'subm':
+            m = spconv.SparseSequential(
+                spconv.SubMConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    bias=False,
+                    indice_key=indice_key),
+                build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True))
+        elif conv_type == 'spconv':
+            m = spconv.SparseSequential(
+                spconv.SparseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    bias=False,
+                    indice_key=indice_key),
+                build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True))
+        elif conv_type == 'inverseconv':
+            m = spconv.SparseSequential(
+                spconv.SparseInverseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    bias=False,
+                    indice_key=indice_key),
+                build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True))
+        else:
+            raise NotImplementedError
+        return m
+    def make_encoder_layers(self, make_block, norm_cfg, in_channels):
+        """make encoder layers using sparse convs
+        Args:
+            make_block (method): a bounded function to build blocks
+            norm_cfg (dict[str]): config of normalization layer
+            in_channels (int): the number of encoder input channels
+        Returns:
+            int: the number of encoder output channels
+        """
+        self.encoder_layers = spconv.SparseSequential()
+        for i, blocks in enumerate(self.encoder_channels):
+            blocks_list = []
+            for j, out_channels in enumerate(tuple(blocks)):
+                padding = tuple(self.encoder_paddings[i])[j]
+                # each stage started with a spconv layer
+                # except the first stage
+                if i != 0 and j == 0:
+                    blocks_list.append(
+                        make_block(
+                            in_channels,
+                            out_channels,
+                            3,
+                            norm_cfg=norm_cfg,
+                            stride=2,
+                            padding=padding,
+                            indice_key=f'spconv{i + 1}',
+                            conv_type='spconv'))
+                else:
+                    blocks_list.append(
+                        make_block(
+                            in_channels,
+                            out_channels,
+                            3,
+                            norm_cfg=norm_cfg,
+                            padding=padding,
+                            indice_key=f'subm{i + 1}'))
+                in_channels = out_channels
+            stage_name = f'encoder_layer{i + 1}'
+            stage_layers = spconv.SparseSequential(*blocks_list)
+            self.encoder_layers.add_module(stage_name, stage_layers)
+        return out_channels
+    def make_decoder_layers(self, make_block, norm_cfg, in_channels):
+        """make decoder layers using sparse convs
+        Args:
+            make_block (method): a bounded function to build blocks
+            norm_cfg (dict[str]): config of normalization layer
+            in_channels (int): the number of encoder input channels
+        Returns:
+            int: the number of encoder output channels
+        """
+        block_num = len(self.decoder_channels)
+        for i, block_channels in enumerate(self.decoder_channels):
+            paddings = self.decoder_paddings[i]
+            setattr(
+                self, f'lateral_layer{block_num - i}',
+                SparseBasicBlock(
+                    in_channels,
+                    block_channels[0],
+                    conv_cfg=dict(
+                        type='SubMConv3d', indice_key=f'subm{block_num - i}'),
+                    norm_cfg=norm_cfg))
+            setattr(
+                self, f'merge_layer{block_num - i}',
+                make_block(
+                    in_channels * 2,
+                    block_channels[1],
+                    3,
+                    norm_cfg=norm_cfg,
+                    padding=paddings[0],
+                    indice_key=f'subm{block_num - i}'))
+            if block_num - i != 1:
+                setattr(
+                    self, f'upsample_layer{block_num - i}',
+                    make_block(
+                        in_channels,
+                        block_channels[2],
+                        3,
+                        norm_cfg=norm_cfg,
+                        padding=paddings[1],
+                        indice_key=f'spconv{block_num - i}',
+                        conv_type='inverseconv'))
+            else:
+                # use submanifold conv instead of inverse conv
+                # in the last block
+                setattr(
+                    self, f'upsample_layer{block_num - i}',
+                    make_block(
+                        in_channels,
+                        block_channels[2],
+                        3,
+                        norm_cfg=norm_cfg,
+                        padding=paddings[1],
+                        indice_key='subm1',
+                        conv_type='subm'))
+            in_channels = block_channels[2]
--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -2,12 +2,29 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                       get_compiling_cuda_version, nms, roi_align,
                       sigmoid_focal_loss)
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
+from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
+                           SparseBottleneck, SparseBottleneckV0)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
 __all__ = [
-    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
+    'nms',
-    'get_compiling_cuda_version', 'build_conv_layer', 'NaiveSyncBatchNorm1d',
+    'soft_nms',
-    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
+    'RoIAlign',
-    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
+    'roi_align',
-    'SigmoidFocalLoss'
+    'get_compiler_version',
+    'get_compiling_cuda_version',
+    'build_conv_layer',
+    'NaiveSyncBatchNorm1d',
+    'NaiveSyncBatchNorm2d',
+    'batched_nms',
+    'Voxelization',
+    'voxelization',
+    'dynamic_scatter',
+    'DynamicScatter',
+    'sigmoid_focal_loss',
+    'SigmoidFocalLoss',
+    'SparseBasicBlockV0',
+    'SparseBottleneckV0',
+    'SparseBasicBlock',
+    'SparseBottleneck',
 ]
--- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
+++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
@@ -4,12 +4,14 @@ from . import roiaware_pool3d_ext
 def points_in_boxes_gpu(points, boxes):
-    """
+    """Find points that are in boxes (CUDA)
    Args:
        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,
            (x, y, z) is the bottom center
    Returns:
        box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
    """
@@ -27,14 +29,20 @@ def points_in_boxes_gpu(points, boxes):
 def points_in_boxes_cpu(points, boxes):
-    """
+    """Find points that are in boxes (CPU)
+    Note: Currently, the output of this function is different from that of
+        points_in_boxes_gpu.
    Args:
        points (torch.Tensor): [npoints, 3]
        boxes (torch.Tensor): [N, 7], in LiDAR coordinate,
            (x, y, z) is the bottom center
    Returns:
        point_indices (torch.Tensor): (N, npoints)
    """
+    # TODO: Refactor this function as a CPU version of points_in_boxes_gpu
    assert boxes.shape[1] == 7
    assert points.shape[1] == 3

--- a/mmdet3d/ops/roiaware_pool3d/roiaware_pool3d.py
+++ b/mmdet3d/ops/roiaware_pool3d/roiaware_pool3d.py
@@ -10,7 +10,8 @@ class RoIAwarePool3d(nn.Module):
    def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
        super().__init__()
-        """
+        """RoIAwarePool3d module
        Args:
            out_size (int or tuple): n or [n1, n2, n3]
            max_pts_per_voxel (int): m
@@ -23,12 +24,14 @@ class RoIAwarePool3d(nn.Module):
        self.mode = pool_method_map[mode]
    def forward(self, rois, pts, pts_feature):
-        """
+        """RoIAwarePool3d module forward
        Args:
            rois (torch.Tensor): [N, 7],in LiDAR coordinate,
                (x, y, z) is the bottom center of rois
            pts (torch.Tensor): [npoints, 3]
            pts_feature (torch.Tensor): [npoints, C]
        Returns:
            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
        """
@@ -43,7 +46,8 @@ class RoIAwarePool3dFunction(Function):
    @staticmethod
    def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
                mode):
-        """
+        """RoIAwarePool3d function forward
        Args:
            rois (torch.Tensor): [N, 7], in LiDAR coordinate,
                (x, y, z) is the bottom center of rois
@@ -52,6 +56,7 @@ class RoIAwarePool3dFunction(Function):
            out_size (int or tuple): n or [n1, n2, n3]
            max_pts_per_voxel (int): m
            mode (int): 0 (max pool) or 1 (average pool)
        Returns:
            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
        """
@@ -84,11 +89,12 @@ class RoIAwarePool3dFunction(Function):
    @staticmethod
    def backward(ctx, grad_out):
-        """
+        """RoIAwarePool3d function forward
        Args:
-            grad_out: [N, out_x, out_y, out_z, C]
+            grad_out (torch.Tensor): [N, out_x, out_y, out_z, C]
        Returns:
-            grad_in: [npoints, C]
+            grad_in (torch.Tensor): [npoints, C]
        """
        ret = ctx.roiaware_pool3d_for_backward
        pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret

--- a/mmdet3d/ops/sparse_block.py
+++ b/mmdet3d/ops/sparse_block.py
+from torch import nn
+import mmdet3d.ops.spconv as spconv
+from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
+from mmdet.ops import build_norm_layer
+from mmdet.ops.conv import conv_cfg
+conv_cfg.update({'SubMConv3d': spconv.SubMConv3d})
+def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
+    """3x3 submanifold sparse convolution with padding.
+    Args:
+        in_planes (int): the number of input channels
+        out_planes (int): the number of output channels
+        stride (int): the stride of convolution
+        indice_key (str): the indice key used for sparse tensor
+    Returns:
+        spconv.conv.SubMConv3d: 3x3 submanifold sparse convolution ops
+    """
+    # TODO: deprecate this class
+    return spconv.SubMConv3d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False,
+        indice_key=indice_key)
+def conv1x1(in_planes, out_planes, stride=1, indice_key=None):
+    """1x1 submanifold sparse convolution with padding.
+    Args:
+        in_planes (int): the number of input channels
+        out_planes (int): the number of output channels
+        stride (int): the stride of convolution
+        indice_key (str): the indice key used for sparse tensor
+    Returns:
+        spconv.conv.SubMConv3d: 1x1 submanifold sparse convolution ops
+    """
+    # TODO: deprecate this class
+    return spconv.SubMConv3d(
+        in_planes,
+        out_planes,
+        kernel_size=1,
+        stride=stride,
+        padding=1,
+        bias=False,
+        indice_key=indice_key)
+class SparseBasicBlockV0(spconv.SparseModule):
+    expansion = 1
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 indice_key=None,
+                 norm_cfg=None):
+        """Sparse basic block for PartA^2.
+        Sparse basic block implemented with submanifold sparse convolution.
+        """
+        # TODO: deprecate this class
+        super().__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride, indice_key=indice_key)
+        norm_name1, norm_layer1 = build_norm_layer(norm_cfg, planes)
+        self.bn1 = norm_layer1
+        self.relu = nn.ReLU()
+        self.conv2 = conv3x3(planes, planes, indice_key=indice_key)
+        norm_name2, norm_layer2 = build_norm_layer(norm_cfg, planes)
+        self.bn2 = norm_layer2
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x.features
+        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out.features += identity
+        out.features = self.relu(out.features)
+        return out
+class SparseBottleneckV0(spconv.SparseModule):
+    expansion = 4
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 indice_key=None,
+                 norm_fn=None):
+        """Sparse bottleneck block for PartA^2.
+        Bottleneck block implemented with submanifold sparse convolution.
+        """
+        # TODO: deprecate this class
+        super().__init__()
+        self.conv1 = conv1x1(inplanes, planes, indice_key=indice_key)
+        self.bn1 = norm_fn(planes)
+        self.conv2 = conv3x3(planes, planes, stride, indice_key=indice_key)
+        self.bn2 = norm_fn(planes)
+        self.conv3 = conv1x1(
+            planes, planes * self.expansion, indice_key=indice_key)
+        self.bn3 = norm_fn(planes * self.expansion)
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x.features
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv3(out)
+        out.features = self.bn3(out.features)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out.features += identity
+        out.features = self.relu(out.features)
+        return out
+class SparseBottleneck(Bottleneck, spconv.SparseModule):
+    expansion = 4
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 conv_cfg=None,
+                 norm_cfg=None):
+        """Sparse bottleneck block for PartA^2.
+        Bottleneck block implemented with submanifold sparse convolution.
+        """
+        spconv.SparseModule.__init__(self)
+        Bottleneck.__init__(
+            self,
+            inplanes,
+            planes,
+            stride=stride,
+            downsample=downsample,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg)
+    def forward(self, x):
+        identity = x.features
+        out = self.conv1(x)
+        out.features = self.bn1(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv2(out)
+        out.features = self.bn2(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv3(out)
+        out.features = self.bn3(out.features)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out.features += identity
+        out.features = self.relu(out.features)
+        return out
+class SparseBasicBlock(BasicBlock, spconv.SparseModule):
+    expansion = 1
+    def __init__(self,
+                 inplanes,
+                 planes,
+                 stride=1,
+                 downsample=None,
+                 conv_cfg=None,
+                 norm_cfg=None):
+        """Sparse basic block for PartA^2.
+        Sparse basic block implemented with submanifold sparse convolution.
+        """
+        spconv.SparseModule.__init__(self)
+        BasicBlock.__init__(
+            self,
+            inplanes,
+            planes,
+            stride=stride,
+            downsample=downsample,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg)
+    def forward(self, x):
+        identity = x.features
+        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
+        out = self.conv1(x)
+        out.features = self.norm1(out.features)
+        out.features = self.relu(out.features)
+        out = self.conv2(out)
+        out.features = self.norm2(out.features)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out.features += identity
+        out.features = self.relu(out.features)
+        return out
--- a/tests/test_heads.py
+++ b/tests/test_heads.py
+import copy
+from os.path import dirname, exists, join
+import pytest
+import torch
+def _get_config_directory():
+    """ Find the predefined detector config directory """
+    try:
+        # Assume we are running in the source mmdetection repo
+        repo_dpath = dirname(dirname(__file__))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmdet
+        repo_dpath = dirname(dirname(mmdet.__file__))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+def _get_config_module(fname):
+    """
+    Load a configuration as a python module
+    """
+    from mmcv import Config
+    config_dpath = _get_config_directory()
+    config_fpath = join(config_dpath, fname)
+    config_mod = Config.fromfile(config_fpath)
+    return config_mod
+def _get_head_cfg(fname):
+    """
+    Grab configs necessary to create a bbox_head. These are deep copied to
+    allow for safe modification of parameters without influencing other tests.
+    """
+    import mmcv
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
+    test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
+    bbox_head = model.bbox_head
+    bbox_head.update(train_cfg=train_cfg)
+    bbox_head.update(test_cfg=test_cfg)
+    return bbox_head
+def _get_rpn_head_cfg(fname):
+    """
+    Grab configs necessary to create a rpn_head. These are deep copied to allow
+    for safe modification of parameters without influencing other tests.
+    """
+    import mmcv
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
+    test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
+    rpn_head = model.rpn_head
+    rpn_head.update(train_cfg=train_cfg.rpn)
+    rpn_head.update(test_cfg=test_cfg.rpn)
+    return rpn_head, train_cfg.rpn_proposal
+def test_second_head_loss():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    bbox_head_cfg = _get_head_cfg(
+        'kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py')
+    from mmdet3d.models.builder import build_head
+    self = build_head(bbox_head_cfg)
+    self.cuda()
+    assert isinstance(self.conv_cls, torch.nn.modules.conv.Conv2d)
+    assert self.conv_cls.in_channels == 512
+    assert self.conv_cls.out_channels == 18
+    assert self.conv_reg.out_channels == 42
+    assert self.conv_dir_cls.out_channels == 12
+    # test forward
+    feats = list()
+    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
+    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
+    assert cls_score[0].shape == torch.Size([2, 18, 200, 176])
+    assert bbox_pred[0].shape == torch.Size([2, 42, 200, 176])
+    assert dir_cls_preds[0].shape == torch.Size([2, 12, 200, 176])
+    # test loss
+    gt_bboxes = list(
+        torch.tensor(
+            [[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
+             [[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],
+            dtype=torch.float32).cuda())
+    gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
+    input_metas = [{
+        'sample_idx': 1234
+    }, {
+        'sample_idx': 2345
+    }]  # fake input_metas
+    losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
+                       gt_labels, input_metas)
+    assert losses['loss_cls_3d'][0] > 0
+    assert losses['loss_bbox_3d'][0] > 0
+    assert losses['loss_dir_3d'][0] > 0
+    # test empty ground truth case
+    gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
+    gt_labels = list(torch.empty((2, 0)).cuda())
+    empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
+                                gt_labels, input_metas)
+    assert empty_gt_losses['loss_cls_3d'][0] > 0
+    assert empty_gt_losses['loss_bbox_3d'][0] == 0
+    assert empty_gt_losses['loss_dir_3d'][0] == 0
+def test_second_head_getboxes():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    bbox_head_cfg = _get_head_cfg(
+        'kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py')
+    from mmdet3d.models.builder import build_head
+    self = build_head(bbox_head_cfg)
+    self.cuda()
+    feats = list()
+    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
+    input_metas = [{
+        'sample_idx': 1234
+    }, {
+        'sample_idx': 2345
+    }]  # fake input_metas
+    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
+    # test get_boxes
+    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom
+    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
+                                  input_metas)
+    assert (result_list[0]['scores'] > 0.3).all()
+def test_parta2_rpnhead_getboxes():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+    rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(
+        'kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py')
+    from mmdet3d.models.builder import build_head
+    self = build_head(rpn_head_cfg)
+    self.cuda()
+    feats = list()
+    feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
+    input_metas = [{
+        'sample_idx': 1234
+    }, {
+        'sample_idx': 2345
+    }]  # fake input_metas
+    (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
+    # test get_boxes
+    cls_score[0] -= 1.5  # too many positive samples may cause cuda oom
+    result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
+                                  input_metas, proposal_cfg)
+    assert result_list[0]['scores'].shape == torch.Size([512])
+    assert result_list[0]['label_preds'].shape == torch.Size([512])
+    assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])
+    assert result_list[0]['box3d_lidar'].shape == torch.Size([512, 7])
--- a/tests/test_roiaware_pool3d.py
+++ b/tests/test_roiaware_pool3d.py
@@ -19,23 +19,10 @@ def test_RoIAwarePool3d():
        dtype=torch.float32).cuda(
        )  # boxes (m, 7) with bottom center in lidar coordinate
    pts = torch.tensor(
-        [
+        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
-            [1, 2, 3.3],
+         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
-            [1.2, 2.5, 3.0],
+         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
-            [0.8, 2.1, 3.5],
+         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
-            [1.6, 2.6, 3.6],
-            [0.8, 1.2, 3.9],
-            [-9.2, 21.0, 18.2],
-            [3.8, 7.9, 6.3],
-            [4.7, 3.5, -12.2],
-            [3.8, 7.6, -2],
-            [-10.6, -12.9, -20],
-            [-16, -18, 9],
-            [-21.3, -52, -5],
-            [0, 0, 0],
-            [6, 7, 8],
-            [-2, -3, -4],
-        ],
        dtype=torch.float32).cuda()  # points (n, 3) in lidar coordinate
    pts_feature = pts.clone()
@@ -83,23 +70,10 @@ def test_points_in_boxes_cpu():
        dtype=torch.float32
    )  # boxes (m, 7) with bottom center in lidar coordinate
    pts = torch.tensor(
-        [
+        [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
-            [1, 2, 3.3],
+         [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
-            [1.2, 2.5, 3.0],
+         [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
-            [0.8, 2.1, 3.5],
+         [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
-            [1.6, 2.6, 3.6],
-            [0.8, 1.2, 3.9],
-            [-9.2, 21.0, 18.2],
-            [3.8, 7.9, 6.3],
-            [4.7, 3.5, -12.2],
-            [3.8, 7.6, -2],
-            [-10.6, -12.9, -20],
-            [-16, -18, 9],
-            [-21.3, -52, -5],
-            [0, 0, 0],
-            [6, 7, 8],
-            [-2, -3, -4],
-        ],
        dtype=torch.float32)  # points (n, 3) in lidar coordinate
    point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)
@@ -109,9 +83,3 @@ def test_points_in_boxes_cpu():
        dtype=torch.int32)
    assert point_indices.shape == torch.Size([2, 15])
    assert (point_indices == expected_point_indices).all()
-if __name__ == '__main__':
-    test_points_in_boxes_cpu()
-    test_points_in_boxes_gpu()
-    test_RoIAwarePool3d()
--- a/tests/test_sparse_unet.py
+++ b/tests/test_sparse_unet.py
+import torch
+import mmdet3d.ops.spconv as spconv
+from mmdet3d.ops import SparseBasicBlock, SparseBasicBlockV0
+def test_SparseUNet():
+    from mmdet3d.models.middle_encoders.sparse_unet import SparseUNet
+    self = SparseUNet(
+        in_channels=4, output_shape=[41, 1600, 1408], pre_act=False)
+    # test encoder layers
+    assert len(self.encoder_layers) == 4
+    assert self.encoder_layers.encoder_layer1[0][0].in_channels == 16
+    assert self.encoder_layers.encoder_layer1[0][0].out_channels == 16
+    assert isinstance(self.encoder_layers.encoder_layer1[0][0],
+                      spconv.conv.SubMConv3d)
+    assert isinstance(self.encoder_layers.encoder_layer1[0][1],
+                      torch.nn.modules.batchnorm.BatchNorm1d)
+    assert isinstance(self.encoder_layers.encoder_layer1[0][2],
+                      torch.nn.modules.activation.ReLU)
+    assert self.encoder_layers.encoder_layer4[0][0].in_channels == 64
+    assert self.encoder_layers.encoder_layer4[0][0].out_channels == 64
+    assert isinstance(self.encoder_layers.encoder_layer4[0][0],
+                      spconv.conv.SparseConv3d)
+    assert isinstance(self.encoder_layers.encoder_layer4[2][0],
+                      spconv.conv.SubMConv3d)
+    # test decoder layers
+    assert isinstance(self.lateral_layer1, SparseBasicBlock)
+    assert isinstance(self.merge_layer1[0], spconv.conv.SubMConv3d)
+    assert isinstance(self.upsample_layer1[0], spconv.conv.SubMConv3d)
+    assert isinstance(self.upsample_layer2[0], spconv.conv.SparseInverseConv3d)
+    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
+                                   [6.8162713, -2.480431, -1.3616394, 0.36],
+                                   [11.643568, -4.744306, -1.3580885, 0.16],
+                                   [23.482342, 6.5036807, 0.5806964, 0.35]],
+                                  dtype=torch.float32)  # n, point_features
+    coordinates = torch.tensor(
+        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
+         [1, 35, 930, 469]],
+        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)
+    unet_ret_dict = self.forward(voxel_features, coordinates, 2)
+    seg_features = unet_ret_dict['seg_features']
+    spatial_features = unet_ret_dict['spatial_features']
+    assert seg_features.shape == torch.Size([4, 16])
+    assert spatial_features.shape == torch.Size([2, 256, 200, 176])
+def test_SparseBasicBlock():
+    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
+                                   [6.8162713, -2.480431, -1.3616394, 0.36],
+                                   [11.643568, -4.744306, -1.3580885, 0.16],
+                                   [23.482342, 6.5036807, 0.5806964, 0.35]],
+                                  dtype=torch.float32)  # n, point_features
+    coordinates = torch.tensor(
+        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
+         [1, 35, 930, 469]],
+        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)
+    # test v0
+    self = SparseBasicBlockV0(
+        4,
+        4,
+        indice_key='subm0',
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))
+    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
+                                              [41, 1600, 1408], 2)
+    out_features = self(input_sp_tensor)
+    assert out_features.features.shape == torch.Size([4, 4])
+    # test
+    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
+                                              [41, 1600, 1408], 2)
+    self = SparseBasicBlock(
+        4,
+        4,
+        conv_cfg=dict(type='SubMConv3d', indice_key='subm1'),
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))
+    # test conv and bn layer
+    assert isinstance(self.conv1, spconv.conv.SubMConv3d)
+    assert self.conv1.in_channels == 4
+    assert self.conv1.out_channels == 4
+    assert isinstance(self.conv2, spconv.conv.SubMConv3d)
+    assert self.conv2.out_channels == 4
+    assert self.conv2.out_channels == 4
+    assert self.bn1.eps == 1e-3
+    assert self.bn1.momentum == 0.01
+    out_features = self(input_sp_tensor)
+    assert out_features.features.shape == torch.Size([4, 4])
--- a/tools/create_data.py
+++ b/tools/create_data.py
 import argparse
 import os.path as osp
+import tools.data_converter.indoor_converter as indoor
 import tools.data_converter.kitti_converter as kitti
 import tools.data_converter.nuscenes_converter as nuscenes_converter
-import tools.data_converter.scannet_converter as scannet
-import tools.data_converter.sunrgbd_converter as sunrgbd
 from tools.data_converter.create_gt_database import create_groundtruth_database
@@ -46,11 +45,11 @@ def nuscenes_data_prep(root_path,
 def scannet_data_prep(root_path, info_prefix, out_dir):
-    scannet.create_scannet_info_file(root_path, info_prefix, out_dir)
+    indoor.create_indoor_info_file(root_path, info_prefix, out_dir)
 def sunrgbd_data_prep(root_path, info_prefix, out_dir):
-    sunrgbd.create_sunrgbd_info_file(root_path, info_prefix, out_dir)
+    indoor.create_indoor_info_file(root_path, info_prefix, out_dir)
 parser = argparse.ArgumentParser(description='Data converter arg parser')

--- a/tools/data_converter/indoor_converter.py
+++ b/tools/data_converter/indoor_converter.py
+import os
+import mmcv
+from tools.data_converter.scannet_data_utils import ScanNetData
+from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
+def create_indoor_info_file(data_path,
+                            pkl_prefix='sunrgbd',
+                            save_path=None,
+                            use_v1=False):
+    """Create indoor  information file.
+    Get information of the raw data and save it to the pkl file.
+    Args:
+        data_path (str): Path of the data.
+        pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
+        save_path (str): Path of the pkl to be saved. Default: None.
+        use_v1 (bool): Whether to use v1. Default: False.
+    """
+    assert os.path.exists(data_path)
+    assert pkl_prefix in ['sunrgbd', 'scannet']
+    save_path = data_path if save_path is None else save_path
+    assert os.path.exists(save_path)
+    train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl')
+    val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
+    if pkl_prefix == 'sunrgbd':
+        train_dataset = SUNRGBDData(
+            root_path=data_path, split='train', use_v1=use_v1)
+        val_dataset = SUNRGBDData(
+            root_path=data_path, split='val', use_v1=use_v1)
+    else:
+        train_dataset = ScanNetData(root_path=data_path, split='train')
+        val_dataset = ScanNetData(root_path=data_path, split='val')
+    infos_train = train_dataset.get_infos(has_label=True)
+    mmcv.dump(infos_train, train_filename, 'pkl')
+    print(f'{pkl_prefix} info train file is saved to {train_filename}')
+    infos_val = val_dataset.get_infos(has_label=True)
+    mmcv.dump(infos_val, val_filename, 'pkl')
+    print(f'{pkl_prefix} info val file is saved to {val_filename}')
--- a/tools/data_converter/scannet_converter.py
+++ b/tools/data_converter/scannet_converter.py
-import os
-import pickle
-from pathlib import Path
-from tools.data_converter.scannet_data_utils import ScanNetData
-def create_scannet_info_file(data_path, pkl_prefix='scannet', save_path=None):
-    assert os.path.exists(data_path)
-    if save_path is None:
-        save_path = Path(data_path)
-    else:
-        save_path = Path(save_path)
-    assert os.path.exists(save_path)
-    train_filename = save_path / f'{pkl_prefix}_infos_train.pkl'
-    val_filename = save_path / f'{pkl_prefix}_infos_val.pkl'
-    train_dataset = ScanNetData(root_path=data_path, split='train')
-    val_dataset = ScanNetData(root_path=data_path, split='val')
-    scannet_infos_train = train_dataset.get_scannet_infos(has_label=True)
-    with open(train_filename, 'wb') as f:
-        pickle.dump(scannet_infos_train, f)
-    print('Scannet info train file is saved to %s' % train_filename)
-    scannet_infos_val = val_dataset.get_scannet_infos(has_label=True)
-    with open(val_filename, 'wb') as f:
-        pickle.dump(scannet_infos_val, f)
-    print('Scannet info val file is saved to %s' % val_filename)
-if __name__ == '__main__':
-    create_scannet_info_file(
-        data_path='./data/scannet', save_path='./data/scannet')
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
+import concurrent.futures as futures
 import os
+import mmcv
 import numpy as np
 class ScanNetData(object):
-    ''' Load and parse object data '''
+    """ScanNet Data
+    Generate scannet infos for scannet_converter
+    Args:
+        root_path (str): Root path of the raw data
+        split (str): Set split type of the data. Default: 'train'.
+    """
    def __init__(self, root_path, split='train'):
        self.root_dir = root_path
@@ -25,28 +34,37 @@ class ScanNetData(object):
            for i, nyu40id in enumerate(list(self.cat_ids))
        }
        assert split in ['train', 'val', 'test']
-        split_dir = os.path.join(self.root_dir, 'meta_data',
+        split_file = os.path.join(self.root_dir, 'meta_data',
-                                 'scannetv2_%s.txt' % split)
+                                  f'scannetv2_{split}.txt')
-        self.sample_id_list = [x.strip() for x in open(split_dir).readlines()
+        mmcv.check_file_exist(split_file)
-                               ] if os.path.exists(split_dir) else None
+        self.sample_id_list = mmcv.list_from_file(split_file)
    def __len__(self):
        return len(self.sample_id_list)
    def get_box_label(self, idx):
        box_file = os.path.join(self.root_dir, 'scannet_train_instance_data',
-                                '%s_bbox.npy' % idx)
+                                f'{idx}_bbox.npy')
        assert os.path.exists(box_file)
        return np.load(box_file)
-    def get_scannet_infos(self,
+    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
-                          num_workers=4,
+        """Get data infos.
-                          has_label=True,
-                          sample_id_list=None):
+        This method gets information from the raw data.
-        import concurrent.futures as futures
+        Args:
+            num_workers (int): Number of threads to be used. Default: 4.
+            has_label (bool): Whether the data has label. Default: True.
+            sample_id_list (List[int]): Index list of the sample.
+                Default: None.
+        Returns:
+            infos (List[dict]): Information of the raw data.
+        """
        def process_single_scene(sample_idx):
-            print('%s sample_idx: %s' % (self.split, sample_idx))
+            print(f'{self.split} sample_idx: {sample_idx}')
            info = dict()
            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info