merge master

f27d308f · yinchimaoliang · c66ae813 · 27ebcfac · f27d308f · f27d308f
Commit f27d308f authored Jun 07, 2020 by yinchimaoliang
20 changed files
--- a/mmdet3d/datasets/pipelines/loading.py
+++ b/mmdet3d/datasets/pipelines/loading.py
@@ -42,9 +42,40 @@ class LoadMultiViewImageFromFiles(object):
 @PIPELINES.register_module()
 class LoadPointsFromMultiSweeps(object):
+    """Load points from multiple sweeps
-    def __init__(self, sweeps_num=10):
+    This is usually used for nuScenes dataset to utilize previous sweeps.
+    Args:
+        sweeps_num (int): number of sweeps
+        load_dim (int): dimension number of the loaded points
+        file_client_args (dict): Config dict of file clients, refer to
+            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
+            for more details.
+    """
+    def __init__(self,
+                 sweeps_num=10,
+                 load_dim=5,
+                 file_client_args=dict(backend='disk')):
+        self.load_dim = load_dim
        self.sweeps_num = sweeps_num
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+    def _load_points(self, pts_filename):
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+        try:
+            pts_bytes = self.file_client.get(pts_filename)
+            points = np.frombuffer(pts_bytes, dtype=np.float32)
+        except ConnectionError:
+            mmcv.check_file_exist(pts_filename)
+            if pts_filename.endswith('.npy'):
+                points = np.load(pts_filename)
+            else:
+                points = np.fromfile(pts_filename, dtype=np.float32)
+        return points
    def __call__(self, results):
        points = results['points']
@@ -56,9 +87,8 @@ class LoadPointsFromMultiSweeps(object):
        for idx, sweep in enumerate(results['sweeps']):
            if idx >= self.sweeps_num:
                break
-            points_sweep = np.fromfile(
+            points_sweep = self._load_points(sweep['data_path'])
-                sweep['data_path'], dtype=np.float32,
+            points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim)
-                count=-1).reshape([-1, 5])
            sweep_ts = sweep['timestamp'] / 1e6
            points_sweep[:, 3] /= 255
            points_sweep[:, :3] = points_sweep[:, :3] @ sweep[

--- a/mmdet3d/datasets/pipelines/point_seg_class_mapping.py
+++ b/mmdet3d/datasets/pipelines/point_seg_class_mapping.py
+from mmdet.datasets.builder import PIPELINES
+@PIPELINES.register_module()
+class PointSegClassMapping(object):
+    """Map original semantic class to valid category ids.
+    Map valid classes as 0~len(valid_cat_ids)-1 and
+    others as len(valid_cat_ids).
+    Args:
+        valid_cat_ids (tuple[int): A tuple of valid category.
+    """
+    def __init__(self, valid_cat_ids):
+        self.valid_cat_ids = valid_cat_ids
+    def __call__(self, results):
+        assert 'pts_semantic_mask' in results
+        pts_semantic_mask = results['pts_semantic_mask']
+        neg_cls = len(self.valid_cat_ids)
+        for i in range(pts_semantic_mask.shape[0]):
+            if pts_semantic_mask[i] in self.valid_cat_ids:
+                converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
+                pts_semantic_mask[i] = converted_id
+            else:
+                pts_semantic_mask[i] = neg_cls
+        results['pts_semantic_mask'] = pts_semantic_mask
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
+        return repr_str
--- a/mmdet3d/datasets/scannet_dataset.py
+++ b/mmdet3d/datasets/scannet_dataset.py
@@ -20,9 +20,10 @@ class ScanNetDataset(Custom3DDataset):
                 pipeline=None,
                 classes=None,
                 modality=None,
+                 filter_empty_gt=True,
                 test_mode=False):
        super().__init__(data_root, ann_file, pipeline, classes, modality,
-                         test_mode)
+                         filter_empty_gt, test_mode)
    def get_ann_info(self, index):
        # Use index to get the annos, thus the evalhook could also use this api

--- a/mmdet3d/datasets/sunrgbd_dataset.py
+++ b/mmdet3d/datasets/sunrgbd_dataset.py
@@ -16,9 +16,10 @@ class SUNRGBDDataset(Custom3DDataset):
                 pipeline=None,
                 classes=None,
                 modality=None,
+                 filter_empty_gt=True,
                 test_mode=False):
        super().__init__(data_root, ann_file, pipeline, classes, modality,
-                         test_mode)
+                         filter_empty_gt, test_mode)
    def get_ann_info(self, index):
        # Use index to get the annos, thus the evalhook could also use this api

--- a/mmdet3d/models/__init__.py
+++ b/mmdet3d/models/__init__.py
@@ -8,6 +8,7 @@ from .detectors import *  # noqa: F401,F403
 from .fusion_layers import *  # noqa: F401,F403
 from .losses import *  # noqa: F401,F403
 from .middle_encoders import *  # noqa: F401,F403
+from .model_utils import *  # noqa: F401,F403
 from .necks import *  # noqa: F401,F403
 from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS
 from .roi_heads import *  # noqa: F401,F403

--- a/mmdet3d/models/dense_heads/__init__.py
+++ b/mmdet3d/models/dense_heads/__init__.py
 from .anchor3d_head import Anchor3DHead
 from .parta2_rpn_head import PartA2RPNHead
+from .vote_head import VoteHead
-__all__ = ['Anchor3DHead', 'PartA2RPNHead']
+__all__ = ['Anchor3DHead', 'PartA2RPNHead', 'VoteHead']
--- a/mmdet3d/models/dense_heads/vote_head.py
+++ b/mmdet3d/models/dense_heads/vote_head.py
--- a/mmdet3d/models/detectors/__init__.py
+++ b/mmdet3d/models/detectors/__init__.py
@@ -4,10 +4,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
 from .mvx_single_stage import MVXSingleStageDetector
 from .mvx_two_stage import MVXTwoStageDetector
 from .parta2 import PartA2
+from .votenet import VoteNet
 from .voxelnet import DynamicVoxelNet, VoxelNet
 __all__ = [
    'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
    'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
-    'DynamicMVXFasterRCNNV3', 'PartA2'
+    'DynamicMVXFasterRCNNV3', 'PartA2', 'VoteNet'
 ]
--- a/mmdet3d/models/detectors/votenet.py
+++ b/mmdet3d/models/detectors/votenet.py
+import torch
+from mmdet3d.core import bbox3d2result
+from mmdet.models import DETECTORS, SingleStageDetector
+@DETECTORS.register_module()
+class VoteNet(SingleStageDetector):
+    """VoteNet model.
+    https://arxiv.org/pdf/1904.09664.pdf
+    """
+    def __init__(self,
+                 backbone,
+                 bbox_head=None,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None):
+        super(VoteNet, self).__init__(
+            backbone=backbone,
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            pretrained=pretrained)
+    def extract_feat(self, points):
+        x = self.backbone(points)
+        if self.with_neck:
+            x = self.neck(x)
+        return x
+    def forward_train(self,
+                      points,
+                      img_meta,
+                      gt_bboxes_3d,
+                      gt_labels_3d,
+                      pts_semantic_mask=None,
+                      pts_instance_mask=None,
+                      gt_bboxes_ignore=None):
+        """Forward of training.
+        Args:
+            points (list[Tensor]): Points of each batch.
+            img_meta (list): Image metas.
+            gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
+            gt_labels_3d (list[Tensor]): gt class labels of each batch.
+            pts_semantic_mask (None | list[Tensor]): point-wise semantic
+                label of each batch.
+            pts_instance_mask (None | list[Tensor]): point-wise instance
+                label of each batch.
+            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding.
+        Returns:
+            dict: Losses.
+        """
+        points_cat = torch.stack(points)  # tmp
+        x = self.extract_feat(points_cat)
+        bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
+        loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
+                       pts_instance_mask, img_meta)
+        losses = self.bbox_head.loss(
+            bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+        return losses
+    def forward_test(self, **kwargs):
+        return self.simple_test(**kwargs)
+    def forward(self, return_loss=True, **kwargs):
+        if return_loss:
+            return self.forward_train(**kwargs)
+        else:
+            return self.forward_test(**kwargs)
+    def simple_test(self,
+                    points,
+                    img_meta,
+                    gt_bboxes_3d=None,
+                    gt_labels_3d=None,
+                    pts_semantic_mask=None,
+                    pts_instance_mask=None,
+                    rescale=False):
+        """Forward of testing.
+        Args:
+            points (list[Tensor]): Points of each sample.
+            img_meta (list): Image metas.
+            gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
+            gt_labels_3d (list[Tensor]): gt class labels of each sample.
+            pts_semantic_mask (None | list[Tensor]): point-wise semantic
+                label of each sample.
+            pts_instance_mask (None | list[Tensor]): point-wise instance
+                label of each sample.
+            rescale (bool): Whether to rescale results.
+        Returns:
+            list: Predicted 3d boxes.
+        """
+        points_cat = torch.stack(points)  # tmp
+        x = self.extract_feat(points_cat)
+        bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
+        bbox_list = self.bbox_head.get_bboxes(
+            points_cat, bbox_preds, img_meta, rescale=rescale)
+        bbox_results = [
+            bbox3d2result(bboxes, scores, labels)
+            for bboxes, scores, labels in bbox_list
+        ]
+        return bbox_results[0]
--- a/mmdet3d/models/losses/__init__.py
+++ b/mmdet3d/models/losses/__init__.py
 from mmdet.models.losses import FocalLoss, SmoothL1Loss, binary_cross_entropy
+from .chamfer_distance import ChamferDistance, chamfer_distance
-__all__ = ['FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy']
+__all__ = [
+    'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
+    'chamfer_distance'
+]
--- a/mmdet3d/models/losses/chamfer_distance.py
+++ b/mmdet3d/models/losses/chamfer_distance.py
+import torch
+import torch.nn as nn
+from torch.nn.functional import l1_loss, mse_loss, smooth_l1_loss
+from mmdet.models.builder import LOSSES
+def chamfer_distance(src,
+                     dst,
+                     src_weight=1.0,
+                     dst_weight=1.0,
+                     criterion_mode='l2',
+                     reduction='mean'):
+    """Calculate Chamfer Distance of two sets.
+    Args:
+        src (tensor): Source set with shape [B, N, C] to
+            calculate Chamfer Distance.
+        dst (tensor): Destination set with shape [B, M, C] to
+            calculate Chamfer Distance.
+        src_weight (tensor or float): Weight of source loss.
+        dst_weight (tensor or float): Weight of destination loss.
+        criterion_mode (str): Criterion mode to calculate distance.
+            The valid modes are smooth_l1, l1 or l2.
+        reduction (str): Method to reduce losses.
+            The valid reduction method are none, sum or mean.
+    Returns:
+        tuple: Source and Destination loss with indices.
+            - loss_src (Tensor): The min distance from source to destination.
+            - loss_dst (Tensor): The min distance from destination to source.
+            - indices1 (Tensor): Index the min distance point for each point
+                in source to destination.
+            - indices2 (Tensor): Index the min distance point for each point
+                in destination to source.
+    """
+    if criterion_mode == 'smooth_l1':
+        criterion = smooth_l1_loss
+    elif criterion_mode == 'l1':
+        criterion = l1_loss
+    elif criterion_mode == 'l2':
+        criterion = mse_loss
+    else:
+        raise NotImplementedError
+    src_expand = src.unsqueeze(2).repeat(1, 1, dst.shape[1], 1)
+    dst_expand = dst.unsqueeze(1).repeat(1, src.shape[1], 1, 1)
+    distance = criterion(src_expand, dst_expand, reduction='none').sum(-1)
+    src2dst_distance, indices1 = torch.min(distance, dim=2)  # (B,N)
+    dst2src_distance, indices2 = torch.min(distance, dim=1)  # (B,M)
+    loss_src = (src2dst_distance * src_weight)
+    loss_dst = (dst2src_distance * dst_weight)
+    if reduction == 'sum':
+        loss_src = torch.sum(loss_src)
+        loss_dst = torch.sum(loss_dst)
+    elif reduction == 'mean':
+        loss_src = torch.mean(loss_src)
+        loss_dst = torch.mean(loss_dst)
+    elif reduction == 'none':
+        pass
+    else:
+        raise NotImplementedError
+    return loss_src, loss_dst, indices1, indices2
+@LOSSES.register_module()
+class ChamferDistance(nn.Module):
+    """Calculate Chamfer Distance of two sets.
+    Args:
+        mode (str): Criterion mode to calculate distance.
+            The valid modes are smooth_l1, l1 or l2.
+        reduction (str): Method to reduce losses.
+            The valid reduction method are none, sum or mean.
+        loss_src_weight (float): Weight of loss_source.
+        loss_dst_weight (float): Weight of loss_target.
+    """
+    def __init__(self,
+                 mode='l2',
+                 reduction='mean',
+                 loss_src_weight=1.0,
+                 loss_dst_weight=1.0):
+        super(ChamferDistance, self).__init__()
+        assert mode in ['smooth_l1', 'l1', 'l2']
+        assert reduction in ['none', 'sum', 'mean']
+        self.mode = mode
+        self.reduction = reduction
+        self.loss_src_weight = loss_src_weight
+        self.loss_dst_weight = loss_dst_weight
+    def forward(self,
+                source,
+                target,
+                src_weight=1.0,
+                dst_weight=1.0,
+                reduction_override=None,
+                return_indices=False,
+                **kwargs):
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_source, loss_target, indices1, indices2 = chamfer_distance(
+            source, target, src_weight, dst_weight, self.mode, reduction)
+        loss_source *= self.loss_src_weight
+        loss_target *= self.loss_dst_weight
+        if return_indices:
+            return loss_source, loss_target, indices1, indices2
+        else:
+            return loss_source, loss_target
--- a/mmdet3d/models/model_utils/__init__.py
+++ b/mmdet3d/models/model_utils/__init__.py
+from .vote_module import VoteModule
+__all__ = ['VoteModule']
--- a/mmdet3d/ops/vote_module.py
+++ b/mmdet3d/ops/vote_module.py
 import torch
 import torch.nn as nn
 from mmcv.cnn import ConvModule
-from torch.nn.functional import l1_loss, mse_loss, smooth_l1_loss
+from mmdet3d.models.builder import build_loss
 class VoteModule(nn.Module):
@@ -22,7 +23,7 @@ class VoteModule(nn.Module):
            Default: dict(type='BN1d').
        norm_feats (bool): Whether to normalize features.
            Default: True.
-        loss_weight (float): Weight of voting loss.
+        vote_loss (dict): config of vote loss.
    """
    def __init__(self,
@@ -33,13 +34,13 @@ class VoteModule(nn.Module):
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d'),
                 norm_feats=True,
-                 loss_weight=1.0):
+                 vote_loss=None):
        super().__init__()
        self.in_channels = in_channels
        self.vote_per_seed = vote_per_seed
        self.gt_per_seed = gt_per_seed
        self.norm_feats = norm_feats
-        self.loss_weight = loss_weight
+        self.vote_loss = build_loss(vote_loss)
        prev_channels = in_channels
        vote_conv_list = list()
@@ -118,57 +119,17 @@ class VoteModule(nn.Module):
        seed_gt_votes_mask = torch.gather(vote_targets_mask, 1,
                                          seed_indices).float()
-        pos_num = torch.sum(seed_gt_votes_mask)
        seed_indices_expand = seed_indices.unsqueeze(-1).repeat(
            1, 1, 3 * self.gt_per_seed)
        seed_gt_votes = torch.gather(vote_targets, 1, seed_indices_expand)
        seed_gt_votes += seed_points.repeat(1, 1, 3)
-        distance = self.nn_distance(
+        weight = seed_gt_votes_mask / (torch.sum(seed_gt_votes_mask) + 1e-6)
+        distance = self.vote_loss(
            vote_points.view(batch_size * num_seed, -1, 3),
            seed_gt_votes.view(batch_size * num_seed, -1, 3),
-            mode='l1')[2]
+            dst_weight=weight.view(batch_size * num_seed, 1))[1]
-        votes_distance = torch.min(distance, dim=1)[0]
+        vote_loss = torch.sum(torch.min(distance, dim=1)[0])
-        votes_dist = votes_distance.view(batch_size, num_seed)
-        vote_loss = torch.sum(votes_dist * seed_gt_votes_mask) / (
-            pos_num + 1e-6)
-        return self.loss_weight * vote_loss
-    def nn_distance(self, points1, points2, mode='smooth_l1'):
+        return vote_loss
-        """Find the nearest neighbor from point1 to point2
-        Args:
-            points1 (Tensor): points to find the Nearest neighbor.
-            points2 (Tensor): points to find the Nearest neighbor.
-            mode (str): Specify the function (smooth_l1, l1 or l2)
-                to calculate distance.
-        Returns:
-            tuple[Tensor]:
-                - distance1: the nearest distance from points1 to points2.
-                - index1: the index of the nearest neighbor for points1.
-                - distance2: the nearest distance from points2 to points1.
-                - index2: the index of the nearest neighbor for points2.
-        """
-        assert mode in ['smooth_l1', 'l1', 'l2']
-        N = points1.shape[1]
-        M = points2.shape[1]
-        pc1_expand_tile = points1.unsqueeze(2).repeat(1, 1, M, 1)
-        pc2_expand_tile = points2.unsqueeze(1).repeat(1, N, 1, 1)
-        if mode == 'smooth_l1':
-            pc_dist = torch.sum(
-                smooth_l1_loss(pc1_expand_tile, pc2_expand_tile), dim=-1)
-        elif mode == 'l1':
-            pc_dist = torch.sum(
-                l1_loss(pc1_expand_tile, pc2_expand_tile), dim=-1)  # (B,N,M)
-        elif mode == 'l2':
-            pc_dist = torch.sum(
-                mse_loss(pc1_expand_tile, pc2_expand_tile), dim=-1)  # (B,N,M)
-        else:
-            raise NotImplementedError
-        distance1, index1 = torch.min(pc_dist, dim=2)  # (B,N)
-        distance2, index2 = torch.min(pc_dist, dim=1)  # (B,M)
-        return distance1, index1, distance2, index2
--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
 import numpy as np
 import torch
 import torch.nn as nn
-from mmcv.cnn import ConvModule, build_norm_layer, normal_init, xavier_init
+from mmcv.cnn import ConvModule, normal_init, xavier_init
 import mmdet3d.ops.spconv as spconv
 from mmdet3d.core import build_bbox_coder, multi_apply
 from mmdet3d.core.bbox import box_torch_ops
 from mmdet3d.models.builder import build_loss
+from mmdet3d.ops import make_sparse_convmodule
 from mmdet3d.ops.iou3d.iou3d_utils import (boxes3d_to_bev_torch_lidar, nms_gpu,
                                           nms_normal_gpu)
 from mmdet.models import HEADS
@@ -78,19 +79,18 @@ class PartA2BboxHead(nn.Module):
        assert down_conv_channels[-1] == shared_fc_channels[0]
        # init layers
-        block = self.post_act_block
        part_channel_last = part_in_channels
        part_conv = []
        for i, channel in enumerate(part_conv_channels):
            part_conv.append(
-                block(
+                make_sparse_convmodule(
                    part_channel_last,
                    channel,
                    3,
                    padding=1,
                    norm_cfg=norm_cfg,
-                    indice_key=f'rcnn_part{i}'))
+                    indice_key=f'rcnn_part{i}',
+                    conv_type='SubMConv3d'))
            part_channel_last = channel
        self.part_conv = spconv.SparseSequential(*part_conv)
@@ -98,13 +98,14 @@ class PartA2BboxHead(nn.Module):
        seg_conv = []
        for i, channel in enumerate(seg_conv_channels):
            seg_conv.append(
-                block(
+                make_sparse_convmodule(
                    seg_channel_last,
                    channel,
                    3,
                    padding=1,
                    norm_cfg=norm_cfg,
-                    indice_key=f'rcnn_seg{i}'))
+                    indice_key=f'rcnn_seg{i}',
+                    conv_type='SubMConv3d'))
            seg_channel_last = channel
        self.seg_conv = spconv.SparseSequential(*seg_conv)
@@ -114,26 +115,28 @@ class PartA2BboxHead(nn.Module):
        merge_conv = []
        for i, channel in enumerate(merge_conv_channels):
            merge_conv.append(
-                block(
+                make_sparse_convmodule(
                    merge_conv_channel_last,
                    channel,
                    3,
                    padding=1,
                    norm_cfg=norm_cfg,
-                    indice_key=f'rcnn_down0'))
+                    indice_key=f'rcnn_down0',
+                    conv_type='SubMConv3d'))
            merge_conv_channel_last = channel
        down_conv_channel_last = merge_conv_channel_last
        conv_down = []
        for i, channel in enumerate(down_conv_channels):
            conv_down.append(
-                block(
+                make_sparse_convmodule(
                    down_conv_channel_last,
                    channel,
                    3,
                    padding=1,
                    norm_cfg=norm_cfg,
-                    indice_key=f'rcnn_down1'))
+                    indice_key=f'rcnn_down1',
+                    conv_type='SubMConv3d'))
            down_conv_channel_last = channel
        self.conv_down.add_module('merge_conv',
@@ -228,69 +231,6 @@ class PartA2BboxHead(nn.Module):
        normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
-    def post_act_block(self,
-                       in_channels,
-                       out_channels,
-                       kernel_size,
-                       indice_key,
-                       stride=1,
-                       padding=0,
-                       conv_type='subm',
-                       norm_cfg=None):
-        """Make post activate sparse convolution block.
-        Args:
-            in_channels (int): the number of input channels
-            out_channels (int): the number of out channels
-            kernel_size (int): kernel size of convolution
-            indice_key (str): the indice key used for sparse tensor
-            stride (int): the stride of convolution
-            padding (int or list[int]): the padding number of input
-            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
-            norm_cfg (dict[str]): config of normalization layer
-        Returns:
-            spconv.SparseSequential: post activate sparse convolution block.
-        """
-        # TODO: clean post_act_block by existing bottlnecks.
-        assert conv_type in ['subm', 'spconv', 'inverseconv']
-        if conv_type == 'subm':
-            m = spconv.SparseSequential(
-                spconv.SubMConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    bias=False,
-                    indice_key=indice_key),
-                build_norm_layer(norm_cfg, out_channels)[1],
-                nn.ReLU(inplace=True))
-        elif conv_type == 'spconv':
-            m = spconv.SparseSequential(
-                spconv.SparseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride=stride,
-                    padding=padding,
-                    bias=False,
-                    indice_key=indice_key),
-                build_norm_layer(norm_cfg, out_channels)[1],
-                nn.ReLU(inplace=True))
-        elif conv_type == 'inverseconv':
-            m = spconv.SparseSequential(
-                spconv.SparseInverseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    bias=False,
-                    indice_key=indice_key),
-                build_norm_layer(norm_cfg, out_channels)[1],
-                nn.ReLU(inplace=True))
-        else:
-            raise NotImplementedError
-        return m
    def forward(self, seg_feats, part_feats):
        # (B * N, out_x, out_y, out_z, 4)
        rcnn_batch_size = part_feats.shape[0]

--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -9,11 +9,10 @@ from .group_points import (GroupAll, QueryAndGroup, group_points,
 from .interpolate import three_interpolate, three_nn
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
 from .pointnet_modules import PointFPModule, PointSAModule, PointSAModuleMSG
-from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch,
-                              points_in_boxes_gpu)
+                              points_in_boxes_cpu, points_in_boxes_gpu)
 from .sparse_block import (SparseBasicBlock, SparseBottleneck,
                           make_sparse_convmodule)
-from .vote_module import VoteModule
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
 __all__ = [
@@ -26,5 +25,5 @@ __all__ = [
    'make_sparse_convmodule', 'ball_query', 'furthest_point_sample',
    'three_interpolate', 'three_nn', 'gather_points', 'grouping_operation',
    'group_points', 'GroupAll', 'QueryAndGroup', 'PointSAModule',
-    'PointSAModuleMSG', 'PointFPModule', 'VoteModule'
+    'PointSAModuleMSG', 'PointFPModule', 'points_in_boxes_batch'
 ]
--- a/mmdet3d/ops/ball_query/src/ball_query.cpp
+++ b/mmdet3d/ops/ball_query/src/ball_query.cpp
-#include <torch/serialize/tensor.h>
-#include <vector>
 #include <THC/THC.h>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include <torch/extension.h>
+#include <torch/serialize/tensor.h>
+#include <vector>
 extern THCState *state;
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CUDA(x) \
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+  TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
 int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
-	at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
+                       at::Tensor new_xyz_tensor, at::Tensor xyz_tensor,
+                       at::Tensor idx_tensor);
 void ball_query_kernel_launcher(int b, int n, int m, float radius, int nsample,
-	const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream);
+                                const float *xyz, const float *new_xyz,
+                                int *idx, cudaStream_t stream);
 int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
-    at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) {
+                       at::Tensor new_xyz_tensor, at::Tensor xyz_tensor,
+                       at::Tensor idx_tensor) {
  CHECK_INPUT(new_xyz_tensor);
  CHECK_INPUT(xyz_tensor);
-    const float *new_xyz = new_xyz_tensor.data<float>();
+  const float *new_xyz = new_xyz_tensor.data_ptr<float>();
-    const float *xyz = xyz_tensor.data<float>();
+  const float *xyz = xyz_tensor.data_ptr<float>();
-    int *idx = idx_tensor.data<int>();
+  int *idx = idx_tensor.data_ptr<int>();
  cudaStream_t stream = THCState_getCurrentStream(state);
-    ball_query_kernel_launcher(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
+  ball_query_kernel_launcher(b, n, m, radius, nsample, new_xyz, xyz, idx,
+                             stream);
  return 1;
 }
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("ball_query_wrapper", &ball_query_wrapper, "ball_query_wrapper");
 }
--- a/mmdet3d/ops/ball_query/src/ball_query_cuda.cu
+++ b/mmdet3d/ops/ball_query/src/ball_query_cuda.cu
@@ -3,11 +3,13 @@
 #include <stdlib.h>
 #define THREADS_PER_BLOCK 256
-#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+__global__ void ball_query_kernel(int b, int n, int m, float radius,
-__global__ void ball_query_kernel(int b, int n, int m, float radius, int nsample,
+                                  int nsample,
-    const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {
+                                  const float *__restrict__ new_xyz,
+                                  const float *__restrict__ xyz,
+                                  int *__restrict__ idx) {
  // new_xyz: (B, M, 3)
  // xyz: (B, N, 3)
  // output:
@@ -30,9 +32,10 @@ __global__ void ball_query_kernel(int b, int n, int m, float radius, int nsample
    float x = xyz[k * 3 + 0];
    float y = xyz[k * 3 + 1];
    float z = xyz[k * 3 + 2];
-        float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
+    float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
-        if (d2 < radius2){
+               (new_z - z) * (new_z - z);
-            if (cnt == 0){
+    if (d2 < radius2) {
+      if (cnt == 0) {
        for (int l = 0; l < nsample; ++l) {
          idx[l] = k;
        }
@@ -44,9 +47,9 @@ __global__ void ball_query_kernel(int b, int n, int m, float radius, int nsample
  }
 }
+void ball_query_kernel_launcher(int b, int n, int m, float radius, int nsample,
-void ball_query_kernel_launcher(int b, int n, int m, float radius, int nsample, \
+                                const float *new_xyz, const float *xyz,
-    const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {
+                                int *idx, cudaStream_t stream) {
  // new_xyz: (B, M, 3)
  // xyz: (B, N, 3)
  // output:
@@ -54,10 +57,12 @@ void ball_query_kernel_launcher(int b, int n, int m, float radius, int nsample,
  cudaError_t err;
-    dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
+  dim3 blocks(DIVUP(m, THREADS_PER_BLOCK),
+              b);  // blockIdx.x(col), blockIdx.y(row)
  dim3 threads(THREADS_PER_BLOCK);
-    ball_query_kernel<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
+  ball_query_kernel<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample,
+                                                    new_xyz, xyz, idx);
  // cudaDeviceSynchronize();  // for using printf in kernel function
  err = cudaGetLastError();
  if (cudaSuccess != err) {

--- a/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp
+++ b/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample.cpp
-#include <torch/serialize/tensor.h>
 #include <ATen/cuda/CUDAContext.h>
-#include <vector>
 #include <THC/THC.h>
 #include <torch/extension.h>
+#include <torch/serialize/tensor.h>
+#include <vector>
 extern THCState *state;
 int furthest_point_sampling_wrapper(int b, int n, int m,
-    at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
+                                    at::Tensor points_tensor,
+                                    at::Tensor temp_tensor,
+                                    at::Tensor idx_tensor);
 void furthest_point_sampling_kernel_launcher(int b, int n, int m,
-    const float *dataset, float *temp, int *idxs, cudaStream_t stream);
+                                             const float *dataset, float *temp,
+                                             int *idxs, cudaStream_t stream);
 int furthest_point_sampling_wrapper(int b, int n, int m,
-    at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {
+                                    at::Tensor points_tensor,
+                                    at::Tensor temp_tensor,
-    const float *points = points_tensor.data<float>();
+                                    at::Tensor idx_tensor) {
-    float *temp = temp_tensor.data<float>();
+  const float *points = points_tensor.data_ptr<float>();
-    int *idx = idx_tensor.data<int>();
+  float *temp = temp_tensor.data_ptr<float>();
+  int *idx = idx_tensor.data_ptr<int>();
  cudaStream_t stream = THCState_getCurrentStream(state);
  furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
@@ -26,5 +30,6 @@ int furthest_point_sampling_wrapper(int b, int n, int m,
 }
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper");
+  m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper,
+        "furthest_point_sampling_wrapper");
 }
--- a/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu
+++ b/mmdet3d/ops/furthest_point_sample/src/furthest_point_sample_cuda.cu
@@ -3,7 +3,7 @@
 #define TOTAL_THREADS 1024
 #define THREADS_PER_BLOCK 256
-#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
 inline int opt_n_threads(int work_size) {
  const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
@@ -11,7 +11,8 @@ inline int opt_n_threads(int work_size) {
  return max(min(1 << pow_2, TOTAL_THREADS), 1);
 }
-__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){
+__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
+                         int idx1, int idx2) {
  const float v1 = dists[idx1], v2 = dists[idx2];
  const int i1 = dists_i[idx1], i2 = dists_i[idx2];
  dists[idx1] = max(v1, v2);
@@ -19,8 +20,9 @@ __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, i
 }
 template <unsigned int block_size>
-__global__ void furthest_point_sampling_kernel(int b, int n, int m,
+__global__ void furthest_point_sampling_kernel(
-    const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {
+    int b, int n, int m, const float *__restrict__ dataset,
+    float *__restrict__ temp, int *__restrict__ idxs) {
  // dataset: (B, N, 3)
  // tmp: (B, N)
  // output:
@@ -39,8 +41,7 @@ __global__ void furthest_point_sampling_kernel(int b, int n, int m,
  const int stride = block_size;
  int old = 0;
-    if (threadIdx.x == 0)
+  if (threadIdx.x == 0) idxs[0] = old;
-    idxs[0] = old;
  __syncthreads();
  for (int j = 1; j < m; j++) {
@@ -58,7 +59,8 @@ __global__ void furthest_point_sampling_kernel(int b, int n, int m,
      // if (mag <= 1e-3)
      // continue;
-        float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
+      float d =
+          (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
      float d2 = min(d, temp[k]);
      temp[k] = d2;
      besti = d2 > best ? k : besti;
@@ -131,13 +133,13 @@ __global__ void furthest_point_sampling_kernel(int b, int n, int m,
    }
    old = dists_i[0];
-    if (tid == 0)
+    if (tid == 0) idxs[j] = old;
-        idxs[j] = old;
  }
 }
 void furthest_point_sampling_kernel_launcher(int b, int n, int m,
-    const float *dataset, float *temp, int *idxs, cudaStream_t stream) {
+                                             const float *dataset, float *temp,
+                                             int *idxs, cudaStream_t stream) {
  // dataset: (B, N, 3)
  // tmp: (B, N)
  // output:
@@ -148,29 +150,52 @@ void furthest_point_sampling_kernel_launcher(int b, int n, int m,
  switch (n_threads) {
    case 1024:
-        furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<1024>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 512:
-        furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<512>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 256:
-        furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<256>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 128:
-        furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<128>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 64:
-        furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<64>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 32:
-        furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<32>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 16:
-        furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<16>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 8:
-        furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<8>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 4:
-        furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<4>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 2:
-        furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<2>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    case 1:
-        furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
+      furthest_point_sampling_kernel<1>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      break;
    default:
-        furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
+      furthest_point_sampling_kernel<512>
+          <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
  }
  err = cudaGetLastError();

--- a/mmdet3d/ops/gather_points/src/gather_points.cpp
+++ b/mmdet3d/ops/gather_points/src/gather_points.cpp
-#include <torch/serialize/tensor.h>
 #include <ATen/cuda/CUDAContext.h>
-#include <vector>
 #include <THC/THC.h>
 #include <torch/extension.h>
+#include <torch/serialize/tensor.h>
+#include <vector>
 extern THCState *state;
 int gather_points_wrapper(int b, int c, int n, int npoints,
-    at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
+                          at::Tensor points_tensor, at::Tensor idx_tensor,
+                          at::Tensor out_tensor);
 void gather_points_kernel_launcher(int b, int c, int n, int npoints,
-    const float *points, const int *idx, float *out, cudaStream_t stream);
+                                   const float *points, const int *idx,
+                                   float *out, cudaStream_t stream);
 int gather_points_grad_wrapper(int b, int c, int n, int npoints,
-    at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
+                               at::Tensor grad_out_tensor,
+                               at::Tensor idx_tensor,
+                               at::Tensor grad_points_tensor);
 void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,
-    const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
+                                        const float *grad_out, const int *idx,
+                                        float *grad_points,
+                                        cudaStream_t stream);
 int gather_points_wrapper(int b, int c, int n, int npoints,
-    at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
+                          at::Tensor points_tensor, at::Tensor idx_tensor,
-    const float *points = points_tensor.data<float>();
+                          at::Tensor out_tensor) {
-    const int *idx = idx_tensor.data<int>();
+  const float *points = points_tensor.data_ptr<float>();
-    float *out = out_tensor.data<float>();
+  const int *idx = idx_tensor.data_ptr<int>();
+  float *out = out_tensor.data_ptr<float>();
  cudaStream_t stream = THCState_getCurrentStream(state);
  gather_points_kernel_launcher(b, c, n, npoints, points, idx, out, stream);
  return 1;
 }
 int gather_points_grad_wrapper(int b, int c, int n, int npoints,
-    at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
+                               at::Tensor grad_out_tensor,
+                               at::Tensor idx_tensor,
-    const float *grad_out = grad_out_tensor.data<float>();
+                               at::Tensor grad_points_tensor) {
-    const int *idx = idx_tensor.data<int>();
+  const float *grad_out = grad_out_tensor.data_ptr<float>();
-    float *grad_points = grad_points_tensor.data<float>();
+  const int *idx = idx_tensor.data_ptr<int>();
+  float *grad_points = grad_points_tensor.data_ptr<float>();
  cudaStream_t stream = THCState_getCurrentStream(state);
-    gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out, idx, grad_points, stream);
+  gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out, idx,
+                                     grad_points, stream);
  return 1;
 }
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    m.def("gather_points_wrapper", &gather_points_wrapper, "gather_points_wrapper");
+  m.def("gather_points_wrapper", &gather_points_wrapper,
-    m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper, "gather_points_grad_wrapper");
+        "gather_points_wrapper");
+  m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper,
+        "gather_points_grad_wrapper");
 }