Merge branch 'master_temp' into indoor_loading

# Conflicts: # tools/data_converter/sunrgbd_data_utils.py

Merge branch 'master_temp' into indoor_loading
# Conflicts: # tools/data_converter/sunrgbd_data_utils.py
e0d892c7 · liyinhao · 929ebfe8 · f584b970 · e0d892c7 · e0d892c7
Commit e0d892c7 authored May 08, 2020 by liyinhao
20 changed files
--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
 import torch.nn as nn
+from mmcv.cnn import build_norm_layer

 import mmdet3d.ops.spconv as spconv
-from mmdet.ops import build_norm_layer
 from ..registry import MIDDLE_ENCODERS


-@MIDDLE_ENCODERS.register_module
+@MIDDLE_ENCODERS.register_module()
 class SparseEncoder(nn.Module):

    def __init__(self,

--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
+import torch
+import torch.nn as nn
+from mmcv.cnn import build_norm_layer
+
+import mmdet3d.ops.spconv as spconv
+from mmdet3d.ops import SparseBasicBlock
+from ..registry import MIDDLE_ENCODERS
+
+
+@MIDDLE_ENCODERS.register_module()
+class SparseUNet(nn.Module):
+
+    def __init__(self,
+                 in_channels,
+                 output_shape,
+                 pre_act=False,
+                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 base_channels=16,
+                 output_channels=128,
+                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
+                                                                        64)),
+                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
+                                                                 1)),
+                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
+                                   (16, 16, 16)),
+                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):
+        """SparseUNet for PartA^2
+
+        See https://arxiv.org/abs/1907.03670 for more detials.
+
+        Args:
+            in_channels (int): the number of input channels
+            output_shape (list[int]): the shape of output tensor
+            pre_act (bool): use pre_act_block or post_act_block
+            norm_cfg (dict): config of normalization layer
+            base_channels (int): out channels for conv_input layer
+            output_channels (int): out channels for conv_out layer
+            encoder_channels (tuple[tuple[int]]):
+                conv channels of each encode block
+            encoder_paddings (tuple[tuple[int]]): paddings of each encode block
+            decoder_channels (tuple[tuple[int]]):
+                conv channels of each decode block
+            decoder_paddings (tuple[tuple[int]]): paddings of each decode block
+        """
+        super().__init__()
+        self.sparse_shape = output_shape
+        self.output_shape = output_shape
+        self.in_channels = in_channels
+        self.pre_act = pre_act
+        self.base_channels = base_channels
+        self.output_channels = output_channels
+        self.encoder_channels = encoder_channels
+        self.encoder_paddings = encoder_paddings
+        self.decoder_channels = decoder_channels
+        self.decoder_paddings = decoder_paddings
+        self.stage_num = len(self.encoder_channels)
+        # Spconv init all weight on its own
+
+        if pre_act:
+            # TODO: use ConvModule to encapsulate
+            self.conv_input = spconv.SparseSequential(
+                spconv.SubMConv3d(
+                    in_channels,
+                    self.base_channels,
+                    3,
+                    padding=1,
+                    bias=False,
+                    indice_key='subm1'))
+            make_block = self.pre_act_block
+        else:
+            self.conv_input = spconv.SparseSequential(
+                spconv.SubMConv3d(
+                    in_channels,
+                    self.base_channels,
+                    3,
+                    padding=1,
+                    bias=False,
+                    indice_key='subm1'),
+                build_norm_layer(norm_cfg, self.base_channels)[1], nn.ReLU())
+            make_block = self.post_act_block
+
+        encoder_out_channels = self.make_encoder_layers(
+            make_block, norm_cfg, self.base_channels)
+        self.make_decoder_layers(make_block, norm_cfg, encoder_out_channels)
+
+        self.conv_out = spconv.SparseSequential(
+            # [200, 176, 5] -> [200, 176, 2]
+            spconv.SparseConv3d(
+                encoder_out_channels,
+                self.output_channels, (3, 1, 1),
+                stride=(2, 1, 1),
+                padding=0,
+                bias=False,
+                indice_key='spconv_down2'),
+            build_norm_layer(norm_cfg, self.output_channels)[1],
+            nn.ReLU())
+
+    def forward(self, voxel_features, coors, batch_size):
+        """Forward of SparseUNet
+
+        Args:
+            voxel_features (torch.float32): shape [N, C]
+            coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx)
+            batch_size (int): batch size
+
+        Returns:
+            dict: backbone features
+        """
+        coors = coors.int()
+        input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
+                                                  self.sparse_shape,
+                                                  batch_size)
+        x = self.conv_input(input_sp_tensor)
+
+        encode_features = []
+        for encoder_layer in self.encoder_layers:
+            x = encoder_layer(x)
+            encode_features.append(x)
+
+        # for detection head
+        # [200, 176, 5] -> [200, 176, 2]
+        out = self.conv_out(encode_features[-1])
+        spatial_features = out.dense()
+
+        N, C, D, H, W = spatial_features.shape
+        spatial_features = spatial_features.view(N, C * D, H, W)
+
+        # for segmentation head, with output shape:
+        # [400, 352, 11] <- [200, 176, 5]
+        # [800, 704, 21] <- [400, 352, 11]
+        # [1600, 1408, 41] <- [800, 704, 21]
+        # [1600, 1408, 41] <- [1600, 1408, 41]
+        decode_features = []
+        x = encode_features[-1]
+        for i in range(self.stage_num, 0, -1):
+            x = self.decoder_layer_forward(encode_features[i - 1], x,
+                                           getattr(self, f'lateral_layer{i}'),
+                                           getattr(self, f'merge_layer{i}'),
+                                           getattr(self, f'upsample_layer{i}'))
+            decode_features.append(x)
+
+        seg_features = decode_features[-1].features
+
+        ret = dict(
+            spatial_features=spatial_features, seg_features=seg_features)
+
+        return ret
+
+    def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer,
+                              merge_layer, upsample_layer):
+        """Forward of upsample and residual block.
+
+        Args:
+            x_lateral (SparseConvTensor): lateral tensor
+            x_bottom (SparseConvTensor): feature from bottom layer
+            lateral_layer (SparseBasicBlock): convolution for lateral tensor
+            merge_layer (SparseSequential): convolution for merging features
+            upsample_layer (SparseSequential): convolution for upsampling
+
+        Returns:
+            SparseConvTensor: upsampled feature
+        """
+        x = lateral_layer(x_lateral)
+        x.features = torch.cat((x_bottom.features, x.features), dim=1)
+        x_merge = merge_layer(x)
+        x = self.reduce_channel(x, x_merge.features.shape[1])
+        x.features = x_merge.features + x.features
+        x = upsample_layer(x)
+        return x
+
+    @staticmethod
+    def reduce_channel(x, out_channels):
+        """reduce channel for element-wise addition.
+
+        Args:
+            x (SparseConvTensor): x.features (N, C1)
+            out_channels (int): the number of channel after reduction
+
+        Returns:
+            SparseConvTensor: channel reduced feature
+        """
+        features = x.features
+        n, in_channels = features.shape
+        assert (in_channels % out_channels
+                == 0) and (in_channels >= out_channels)
+
+        x.features = features.view(n, out_channels, -1).sum(dim=2)
+        return x
+
+    def pre_act_block(self,
+                      in_channels,
+                      out_channels,
+                      kernel_size,
+                      indice_key=None,
+                      stride=1,
+                      padding=0,
+                      conv_type='subm',
+                      norm_cfg=None):
+        """Make pre activate sparse convolution block.
+
+        Args:
+            in_channels (int): the number of input channels
+            out_channels (int): the number of out channels
+            kernel_size (int): kernel size of convolution
+            indice_key (str): the indice key used for sparse tensor
+            stride (int): the stride of convolution
+            padding (int or list[int]): the padding number of input
+            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
+            norm_cfg (dict): config of normalization layer
+
+        Returns:
+            spconv.SparseSequential: pre activate sparse convolution block.
+        """
+        # TODO: use ConvModule to encapsulate
+        assert conv_type in ['subm', 'spconv', 'inverseconv']
+
+        if conv_type == 'subm':
+            m = spconv.SparseSequential(
+                build_norm_layer(norm_cfg, in_channels)[1],
+                nn.ReLU(inplace=True),
+                spconv.SubMConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    padding=padding,
+                    bias=False,
+                    indice_key=indice_key))
+        elif conv_type == 'spconv':
+            m = spconv.SparseSequential(
+                build_norm_layer(norm_cfg, in_channels)[1],
+                nn.ReLU(inplace=True),
+                spconv.SparseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    bias=False,
+                    indice_key=indice_key))
+        elif conv_type == 'inverseconv':
+            m = spconv.SparseSequential(
+                build_norm_layer(norm_cfg, in_channels)[1],
+                nn.ReLU(inplace=True),
+                spconv.SparseInverseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    bias=False,
+                    indice_key=indice_key))
+        else:
+            raise NotImplementedError
+        return m
+
+    def post_act_block(self,
+                       in_channels,
+                       out_channels,
+                       kernel_size,
+                       indice_key,
+                       stride=1,
+                       padding=0,
+                       conv_type='subm',
+                       norm_cfg=None):
+        """Make post activate sparse convolution block.
+
+        Args:
+            in_channels (int): the number of input channels
+            out_channels (int): the number of out channels
+            kernel_size (int): kernel size of convolution
+            indice_key (str): the indice key used for sparse tensor
+            stride (int): the stride of convolution
+            padding (int or list[int]): the padding number of input
+            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
+            norm_cfg (dict[str]): config of normalization layer
+
+        Returns:
+            spconv.SparseSequential: post activate sparse convolution block.
+        """
+        # TODO: use ConvModule to encapsulate
+        assert conv_type in ['subm', 'spconv', 'inverseconv']
+
+        if conv_type == 'subm':
+            m = spconv.SparseSequential(
+                spconv.SubMConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    bias=False,
+                    indice_key=indice_key),
+                build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True))
+        elif conv_type == 'spconv':
+            m = spconv.SparseSequential(
+                spconv.SparseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    bias=False,
+                    indice_key=indice_key),
+                build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True))
+        elif conv_type == 'inverseconv':
+            m = spconv.SparseSequential(
+                spconv.SparseInverseConv3d(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    bias=False,
+                    indice_key=indice_key),
+                build_norm_layer(norm_cfg, out_channels)[1],
+                nn.ReLU(inplace=True))
+        else:
+            raise NotImplementedError
+        return m
+
+    def make_encoder_layers(self, make_block, norm_cfg, in_channels):
+        """make encoder layers using sparse convs
+
+        Args:
+            make_block (method): a bounded function to build blocks
+            norm_cfg (dict[str]): config of normalization layer
+            in_channels (int): the number of encoder input channels
+
+        Returns:
+            int: the number of encoder output channels
+        """
+        self.encoder_layers = spconv.SparseSequential()
+        for i, blocks in enumerate(self.encoder_channels):
+            blocks_list = []
+            for j, out_channels in enumerate(tuple(blocks)):
+                padding = tuple(self.encoder_paddings[i])[j]
+                # each stage started with a spconv layer
+                # except the first stage
+                if i != 0 and j == 0:
+                    blocks_list.append(
+                        make_block(
+                            in_channels,
+                            out_channels,
+                            3,
+                            norm_cfg=norm_cfg,
+                            stride=2,
+                            padding=padding,
+                            indice_key=f'spconv{i + 1}',
+                            conv_type='spconv'))
+                else:
+                    blocks_list.append(
+                        make_block(
+                            in_channels,
+                            out_channels,
+                            3,
+                            norm_cfg=norm_cfg,
+                            padding=padding,
+                            indice_key=f'subm{i + 1}'))
+                in_channels = out_channels
+            stage_name = f'encoder_layer{i + 1}'
+            stage_layers = spconv.SparseSequential(*blocks_list)
+            self.encoder_layers.add_module(stage_name, stage_layers)
+        return out_channels
+
+    def make_decoder_layers(self, make_block, norm_cfg, in_channels):
+        """make decoder layers using sparse convs
+
+        Args:
+            make_block (method): a bounded function to build blocks
+            norm_cfg (dict[str]): config of normalization layer
+            in_channels (int): the number of encoder input channels
+
+        Returns:
+            int: the number of encoder output channels
+        """
+        block_num = len(self.decoder_channels)
+        for i, block_channels in enumerate(self.decoder_channels):
+            paddings = self.decoder_paddings[i]
+            setattr(
+                self, f'lateral_layer{block_num - i}',
+                SparseBasicBlock(
+                    in_channels,
+                    block_channels[0],
+                    conv_cfg=dict(
+                        type='SubMConv3d', indice_key=f'subm{block_num - i}'),
+                    norm_cfg=norm_cfg))
+            setattr(
+                self, f'merge_layer{block_num - i}',
+                make_block(
+                    in_channels * 2,
+                    block_channels[1],
+                    3,
+                    norm_cfg=norm_cfg,
+                    padding=paddings[0],
+                    indice_key=f'subm{block_num - i}'))
+            if block_num - i != 1:
+                setattr(
+                    self, f'upsample_layer{block_num - i}',
+                    make_block(
+                        in_channels,
+                        block_channels[2],
+                        3,
+                        norm_cfg=norm_cfg,
+                        padding=paddings[1],
+                        indice_key=f'spconv{block_num - i}',
+                        conv_type='inverseconv'))
+            else:
+                # use submanifold conv instead of inverse conv
+                # in the last block
+                setattr(
+                    self, f'upsample_layer{block_num - i}',
+                    make_block(
+                        in_channels,
+                        block_channels[2],
+                        3,
+                        norm_cfg=norm_cfg,
+                        padding=paddings[1],
+                        indice_key='subm1',
+                        conv_type='subm'))
+            in_channels = block_channels[2]
--- a/mmdet3d/models/necks/second_fpn.py
+++ b/mmdet3d/models/necks/second_fpn.py
@@ -2,16 +2,15 @@ from functools import partial

 import torch
 import torch.nn as nn
-from mmcv.cnn import constant_init, kaiming_init
+from mmcv.cnn import build_norm_layer, constant_init, kaiming_init
 from torch.nn import Sequential
 from torch.nn.modules.batchnorm import _BatchNorm

 from mmdet.models import NECKS
-from mmdet.ops import build_norm_layer
 from .. import builder


-@NECKS.register_module
+@NECKS.register_module()
 class SECONDFPN(nn.Module):
    """Compare with RPN, RPNV2 support arbitrary number of stage.
    """
@@ -64,7 +63,7 @@ class SECONDFPN(nn.Module):
        return [out]


-@NECKS.register_module
+@NECKS.register_module()
 class SECONDFusionFPN(SECONDFPN):
    """Compare with RPN, RPNV2 support arbitrary number of stage.
    """

--- a/mmdet3d/models/registry.py
+++ b/mmdet3d/models/registry.py
-from mmdet.utils import Registry
+from mmcv.utils import Registry

 VOXEL_ENCODERS = Registry('voxel_encoder')
 MIDDLE_ENCODERS = Registry('middle_encoder')

--- a/mmdet3d/models/roi_heads/__init__.py
+++ b/mmdet3d/models/roi_heads/__init__.py
+from .mask_heads import PointwiseSemanticHead
+
+__all__ = ['PointwiseSemanticHead']
--- a/mmdet3d/models/bbox_heads/__init__.py
+++ b/mmdet3d/models/bbox_heads/__init__.py
-from mmdet.models.bbox_heads import (BBoxHead, ConvFCBBoxHead,
-                                     DoubleConvFCBBoxHead, Shared2FCBBoxHead,
+from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
+                                               DoubleConvFCBBoxHead,
+                                               Shared2FCBBoxHead,
                                               Shared4Conv1FCBBoxHead)

 __all__ = [

--- a/mmdet3d/models/roi_heads/mask_heads/__init__.py
+++ b/mmdet3d/models/roi_heads/mask_heads/__init__.py
+from .pointwise_semantic_head import PointwiseSemanticHead
+
+__all__ = ['PointwiseSemanticHead']
--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmdet3d.core import multi_apply
+from mmdet3d.core.bbox import box_torch_ops
+from mmdet3d.models.builder import build_loss
+from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
+from mmdet.models import HEADS
+
+
+@HEADS.register_module()
+class PointwiseSemanticHead(nn.Module):
+    """Semantic segmentation head for point-wise segmentation.
+
+    Predict point-wise segmentation and part regression results for PartA2.
+    See https://arxiv.org/abs/1907.03670 for more detials.
+
+    Args:
+        in_channels (int): the number of input channel.
+        num_classes (int): the number of class.
+        extra_width (float): boxes enlarge width.
+        loss_seg (dict): Config of segmentation loss.
+        loss_part (dict): Config of part prediction loss.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 num_classes=3,
+                 extra_width=0.2,
+                 seg_score_thr=0.3,
+                 loss_seg=dict(
+                     type='FocalLoss',
+                     use_sigmoid=True,
+                     reduction='sum',
+                     gamma=2.0,
+                     alpha=0.25,
+                     loss_weight=1.0),
+                 loss_part=dict(
+                     type='CrossEntropyLoss',
+                     use_sigmoid=True,
+                     loss_weight=1.0)):
+        super(PointwiseSemanticHead, self).__init__()
+        self.extra_width = extra_width
+        self.num_classes = num_classes
+        self.seg_score_thr = seg_score_thr
+        self.seg_cls_layer = nn.Linear(in_channels, 1, bias=True)
+        self.seg_reg_layer = nn.Linear(in_channels, 3, bias=True)
+
+        self.loss_seg = build_loss(loss_seg)
+        self.loss_part = build_loss(loss_part)
+
+    def forward(self, x):
+        seg_preds = self.seg_cls_layer(x)  # (N, 1)
+        part_preds = self.seg_reg_layer(x)  # (N, 3)
+
+        seg_scores = torch.sigmoid(seg_preds).detach()
+        seg_mask = (seg_scores > self.seg_score_thr)
+
+        part_offsets = torch.sigmoid(part_preds).clone().detach()
+        part_offsets[seg_mask.view(-1) == 0] = 0
+        part_feats = torch.cat((part_offsets, seg_scores),
+                               dim=-1)  # shape (npoints, 4)
+        return dict(
+            seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)
+
+    def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):
+        """generate segmentation and part prediction targets
+
+        Args:
+            voxel_centers (torch.Tensor): shape [voxel_num, 3],
+                the center of voxels
+            gt_bboxes_3d (torch.Tensor): shape [box_num, 7], gt boxes
+            gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
+
+        Returns:
+            tuple : segmentation targets with shape [voxel_num]
+                part prediction targets with shape [voxel_num, 3]
+        """
+        enlarged_gt_boxes = box_torch_ops.enlarge_box3d_lidar(
+            gt_bboxes_3d, extra_width=self.extra_width)
+        part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
+                                               dtype=torch.float32)
+        box_idx = points_in_boxes_gpu(
+            voxel_centers.unsqueeze(0),
+            gt_bboxes_3d.unsqueeze(0)).squeeze(0)  # -1 ~ box_num
+        enlarge_box_idx = points_in_boxes_gpu(
+            voxel_centers.unsqueeze(0),
+            enlarged_gt_boxes.unsqueeze(0)).squeeze(0).long()  # -1 ~ box_num
+
+        gt_labels_pad = F.pad(
+            gt_labels_3d, (1, 0), mode='constant', value=self.num_classes)
+        seg_targets = gt_labels_pad[(box_idx.long() + 1)]
+        fg_pt_flag = box_idx > -1
+        ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1)
+        seg_targets[ignore_flag] = -1
+
+        for k in range(gt_bboxes_3d.shape[0]):
+            k_box_flag = box_idx == k
+            # no point in current box (caused by velodyne reduce)
+            if not k_box_flag.any():
+                continue
+            fg_voxels = voxel_centers[k_box_flag]
+            transformed_voxels = fg_voxels - gt_bboxes_3d[k, 0:3]
+            transformed_voxels = box_torch_ops.rotation_3d_in_axis(
+                transformed_voxels.unsqueeze(0),
+                -gt_bboxes_3d[k, 6].view(1),
+                axis=2)
+            part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d[
+                k, 3:6] + voxel_centers.new_tensor([0.5, 0.5, 0])
+
+        part_targets = torch.clamp(part_targets, min=0)
+        return seg_targets, part_targets
+
+    def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):
+        batch_size = len(gt_labels_3d)
+        voxel_center_list = []
+        for idx in range(batch_size):
+            coords_idx = voxels_dict['coors'][:, 0] == idx
+            voxel_center_list.append(voxels_dict['voxel_centers'][coords_idx])
+
+        seg_targets, part_targets = multi_apply(self.get_targets_single,
+                                                voxel_center_list,
+                                                gt_bboxes_3d, gt_labels_3d)
+        seg_targets = torch.cat(seg_targets, dim=0)
+        part_targets = torch.cat(part_targets, dim=0)
+        return dict(seg_targets=seg_targets, part_targets=part_targets)
+
+    def loss(self, seg_preds, part_preds, seg_targets, part_targets):
+        """Calculate point-wise segmentation and part prediction losses.
+
+        Args:
+            seg_preds (torch.Tensor): prediction of binary
+                segmentation with shape [voxel_num, 1].
+            part_preds (torch.Tensor): prediction of part
+                with shape [voxel_num, 3].
+            seg_targets (torch.Tensor): target of segmentation
+                with shape [voxel_num, 1].
+            part_targets (torch.Tensor): target of part with
+                shape [voxel_num, 3].
+
+        Returns:
+            dict: loss of segmentation and part prediction.
+        """
+        pos_mask = (seg_targets > -1) & (seg_targets < self.num_classes)
+        binary_seg_target = pos_mask.long()
+        pos = pos_mask.float()
+        neg = (seg_targets == self.num_classes).float()
+        seg_weights = pos + neg
+        pos_normalizer = pos.sum()
+        seg_weights = seg_weights / torch.clamp(pos_normalizer, min=1.0)
+        loss_seg = self.loss_seg(seg_preds, binary_seg_target, seg_weights)
+
+        if pos_normalizer > 0:
+            loss_part = self.loss_part(part_preds[pos_mask],
+                                       part_targets[pos_mask])
+        else:
+            # fake a part loss
+            loss_part = loss_seg.new_tensor(0)
+
+        return dict(loss_seg=loss_seg, loss_part=loss_part)
--- a/mmdet3d/models/roi_extractors/__init__.py
+++ b/mmdet3d/models/roi_extractors/__init__.py
-from mmdet.models.roi_extractors.single_level import SingleRoIExtractor
+from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor

 __all__ = ['SingleRoIExtractor']
--- a/mmdet3d/models/utils/__init__.py
+++ b/mmdet3d/models/utils/__init__.py
-from mmdet.models.utils import ResLayer, bias_init_with_prob
-
-__all__ = ['bias_init_with_prob', 'ResLayer']
--- a/mmdet3d/models/utils/weight_init.py
+++ b/mmdet3d/models/utils/weight_init.py
-import numpy as np
-import torch.nn as nn
-
-
-def xavier_init(module, gain=1, bias=0, distribution='normal'):
-    assert distribution in ['uniform', 'normal']
-    if distribution == 'uniform':
-        nn.init.xavier_uniform_(module.weight, gain=gain)
-    else:
-        nn.init.xavier_normal_(module.weight, gain=gain)
-    if hasattr(module, 'bias'):
-        nn.init.constant_(module.bias, bias)
-
-
-def normal_init(module, mean=0, std=1, bias=0):
-    nn.init.normal_(module.weight, mean, std)
-    if hasattr(module, 'bias'):
-        nn.init.constant_(module.bias, bias)
-
-
-def uniform_init(module, a=0, b=1, bias=0):
-    nn.init.uniform_(module.weight, a, b)
-    if hasattr(module, 'bias'):
-        nn.init.constant_(module.bias, bias)
-
-
-def kaiming_init(module,
-                 mode='fan_out',
-                 nonlinearity='relu',
-                 bias=0,
-                 distribution='normal'):
-    assert distribution in ['uniform', 'normal']
-    if distribution == 'uniform':
-        nn.init.kaiming_uniform_(
-            module.weight, mode=mode, nonlinearity=nonlinearity)
-    else:
-        nn.init.kaiming_normal_(
-            module.weight, mode=mode, nonlinearity=nonlinearity)
-    if hasattr(module, 'bias'):
-        nn.init.constant_(module.bias, bias)
-
-
-def bias_init_with_prob(prior_prob):
-    """ initialize conv/fc bias value according to giving probablity"""
-    bias_init = float(-np.log((1 - prior_prob) / prior_prob))
-    return bias_init
--- a/mmdet3d/models/voxel_encoders/pillar_encoder.py
+++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py
 import torch
+from mmcv.cnn import build_norm_layer
 from torch import nn

 from mmdet3d.ops import DynamicScatter
-from mmdet.ops import build_norm_layer
 from ..registry import VOXEL_ENCODERS
 from .utils import PFNLayer, get_paddings_indicator


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class PillarFeatureNet(nn.Module):

    def __init__(self,
@@ -118,7 +118,7 @@ class PillarFeatureNet(nn.Module):
        return features.squeeze()


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class DynamicPillarFeatureNet(PillarFeatureNet):

    def __init__(self,
@@ -237,7 +237,7 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
        return voxel_feats, voxel_coors


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class AlignedPillarFeatureNet(nn.Module):

    def __init__(self,

--- a/mmdet3d/models/voxel_encoders/utils.py
+++ b/mmdet3d/models/voxel_encoders/utils.py
 import torch
+from mmcv.cnn import build_norm_layer
 from torch import nn
 from torch.nn import functional as F

-from mmdet.ops import build_norm_layer
-

 class Empty(nn.Module):


--- a/mmdet3d/models/voxel_encoders/voxel_encoder.py
+++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py
 import torch
+from mmcv.cnn import build_norm_layer
 from torch import nn
 from torch.nn import functional as F

 from mmdet3d.ops import DynamicScatter
-from mmdet.ops import build_norm_layer
 from .. import builder
 from ..registry import VOXEL_ENCODERS
 from .utils import Empty, VFELayer, get_paddings_indicator


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class VoxelFeatureExtractor(nn.Module):

    def __init__(self,
@@ -71,7 +71,7 @@ class VoxelFeatureExtractor(nn.Module):
        return voxelwise


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class VoxelFeatureExtractorV2(nn.Module):

    def __init__(self,
@@ -132,7 +132,7 @@ class VoxelFeatureExtractorV2(nn.Module):
        return voxelwise


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class VoxelFeatureExtractorV3(nn.Module):

    def __init__(self,
@@ -152,7 +152,7 @@ class VoxelFeatureExtractorV3(nn.Module):
        return points_mean.contiguous()


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class DynamicVFEV3(nn.Module):

    def __init__(self,
@@ -170,7 +170,7 @@ class DynamicVFEV3(nn.Module):
        return features, features_coors


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class DynamicVFE(nn.Module):

    def __init__(self,
@@ -318,7 +318,7 @@ class DynamicVFE(nn.Module):
        return voxel_feats, voxel_coors


-@VOXEL_ENCODERS.register_module
+@VOXEL_ENCODERS.register_module()
 class HardVFE(nn.Module):

    def __init__(self,

--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -2,12 +2,28 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                       get_compiling_cuda_version, nms, roi_align,
                       sigmoid_focal_loss)
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
+from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
+                           SparseBottleneck, SparseBottleneckV0)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization

 __all__ = [
-    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
-    'get_compiling_cuda_version', 'build_conv_layer', 'NaiveSyncBatchNorm1d',
-    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
-    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
-    'SigmoidFocalLoss'
+    'nms',
+    'soft_nms',
+    'RoIAlign',
+    'roi_align',
+    'get_compiler_version',
+    'get_compiling_cuda_version',
+    'NaiveSyncBatchNorm1d',
+    'NaiveSyncBatchNorm2d',
+    'batched_nms',
+    'Voxelization',
+    'voxelization',
+    'dynamic_scatter',
+    'DynamicScatter',
+    'sigmoid_focal_loss',
+    'SigmoidFocalLoss',
+    'SparseBasicBlockV0',
+    'SparseBottleneckV0',
+    'SparseBasicBlock',
+    'SparseBottleneck',
 ]
--- a/mmdet3d/ops/iou3d/src/iou3d.cpp
+++ b/mmdet3d/ops/iou3d/src/iou3d.cpp
-#include <torch/serialize/tensor.h>
-#include <torch/extension.h>
-#include <vector>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
+#include <torch/extension.h>
+#include <torch/serialize/tensor.h>

-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
-#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
-
-#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+#include <vector>

-#define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
-{
-   if (code != cudaSuccess)
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+#define CHECK_CUDA(x) \
+  TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+#define CHECK_ERROR(ans) \
+  { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line,
+                      bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
    if (abort) exit(code);
  }
 }

 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;

-
-void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap);
-void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou);
-void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh);
-void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh);
-
-int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){
+void boxesoverlapLauncher(const int num_a, const float *boxes_a,
+                          const int num_b, const float *boxes_b,
+                          float *ans_overlap);
+void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,
+                         const float *boxes_b, float *ans_iou);
+void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,
+                 float nms_overlap_thresh);
+void nmsNormalLauncher(const float *boxes, unsigned long long *mask,
+                       int boxes_num, float nms_overlap_thresh);
+
+int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b,
+                          at::Tensor ans_overlap) {
  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
  // params boxes_b: (M, 5)
  // params ans_overlap: (N, M)
@@ -40,16 +51,18 @@ int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans
  int num_a = boxes_a.size(0);
  int num_b = boxes_b.size(0);

-    const float * boxes_a_data = boxes_a.data<float>();
-    const float * boxes_b_data = boxes_b.data<float>();
-    float * ans_overlap_data = ans_overlap.data<float>();
+  const float *boxes_a_data = boxes_a.data_ptr<float>();
+  const float *boxes_b_data = boxes_b.data_ptr<float>();
+  float *ans_overlap_data = ans_overlap.data_ptr<float>();

-    boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_overlap_data);
+  boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data,
+                       ans_overlap_data);

  return 1;
 }

-int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou){
+int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b,
+                      at::Tensor ans_iou) {
  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
  // params boxes_b: (M, 5)
  // params ans_overlap: (N, M)
@@ -61,16 +74,16 @@ int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou
  int num_a = boxes_a.size(0);
  int num_b = boxes_b.size(0);

-    const float * boxes_a_data = boxes_a.data<float>();
-    const float * boxes_b_data = boxes_b.data<float>();
-    float * ans_iou_data = ans_iou.data<float>();
+  const float *boxes_a_data = boxes_a.data_ptr<float>();
+  const float *boxes_b_data = boxes_b.data_ptr<float>();
+  float *ans_iou_data = ans_iou.data_ptr<float>();

  boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data);

  return 1;
 }

-int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){
+int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh) {
  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
  // params keep: (N)

@@ -78,21 +91,24 @@ int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){
  CHECK_CONTIGUOUS(keep);

  int boxes_num = boxes.size(0);
-    const float * boxes_data = boxes.data<float>();
-    long * keep_data = keep.data<long>();
+  const float *boxes_data = boxes.data_ptr<float>();
+  long *keep_data = keep.data_ptr<long>();

  const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);

  unsigned long long *mask_data = NULL;
-    CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long)));
+  CHECK_ERROR(cudaMalloc((void **)&mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long)));
  nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);

  // unsigned long long mask_cpu[boxes_num * col_blocks];
-    // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks];
+  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *
+  // col_blocks];
  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);

-//    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
-    CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long),
+  //    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
+  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long),
                         cudaMemcpyDeviceToHost));

  cudaFree(mask_data);
@@ -102,25 +118,25 @@ int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){

  int num_to_keep = 0;

-    for (int i = 0; i < boxes_num; i++){
+  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / THREADS_PER_BLOCK_NMS;
    int inblock = i % THREADS_PER_BLOCK_NMS;

-        if (!(remv_cpu[nblock] & (1ULL << inblock))){
+    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
      keep_data[num_to_keep++] = i;
      unsigned long long *p = &mask_cpu[0] + i * col_blocks;
-            for (int j = nblock; j < col_blocks; j++){
+      for (int j = nblock; j < col_blocks; j++) {
        remv_cpu[j] |= p[j];
      }
    }
  }
-    if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" );
+  if (cudaSuccess != cudaGetLastError()) printf("Error!\n");

  return num_to_keep;
 }

-
-int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){
+int nms_normal_gpu(at::Tensor boxes, at::Tensor keep,
+                   float nms_overlap_thresh) {
  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
  // params keep: (N)

@@ -128,21 +144,24 @@ int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){
  CHECK_CONTIGUOUS(keep);

  int boxes_num = boxes.size(0);
-    const float * boxes_data = boxes.data<float>();
-    long * keep_data = keep.data<long>();
+  const float *boxes_data = boxes.data_ptr<float>();
+  long *keep_data = keep.data_ptr<long>();

  const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);

  unsigned long long *mask_data = NULL;
-    CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long)));
+  CHECK_ERROR(cudaMalloc((void **)&mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long)));
  nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);

  // unsigned long long mask_cpu[boxes_num * col_blocks];
-    // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks];
+  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *
+  // col_blocks];
  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);

-//    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
-    CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long),
+  //    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
+  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long),
                         cudaMemcpyDeviceToHost));

  cudaFree(mask_data);
@@ -152,27 +171,26 @@ int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){

  int num_to_keep = 0;

-    for (int i = 0; i < boxes_num; i++){
+  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / THREADS_PER_BLOCK_NMS;
    int inblock = i % THREADS_PER_BLOCK_NMS;

-        if (!(remv_cpu[nblock] & (1ULL << inblock))){
+    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
      keep_data[num_to_keep++] = i;
      unsigned long long *p = &mask_cpu[0] + i * col_blocks;
-            for (int j = nblock; j < col_blocks; j++){
+      for (int j = nblock; j < col_blocks; j++) {
        remv_cpu[j] |= p[j];
      }
    }
  }
-    if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" );
+  if (cudaSuccess != cudaGetLastError()) printf("Error!\n");

  return num_to_keep;
 }

-
-
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("boxes_overlap_bev_gpu", &boxes_overlap_bev_gpu, "oriented boxes overlap");
+  m.def("boxes_overlap_bev_gpu", &boxes_overlap_bev_gpu,
+        "oriented boxes overlap");
  m.def("boxes_iou_bev_gpu", &boxes_iou_bev_gpu, "oriented boxes iou");
  m.def("nms_gpu", &nms_gpu, "oriented nms gpu");
  m.def("nms_normal_gpu", &nms_normal_gpu, "nms gpu");

--- a/mmdet3d/ops/norm.py
+++ b/mmdet3d/ops/norm.py
 import torch
 import torch.distributed as dist
 import torch.nn as nn
+from mmcv.cnn import NORM_LAYERS
 from torch.autograd.function import Function

-from mmdet.ops.norm import norm_cfg
-

 class AllReduce(Function):

@@ -24,6 +23,7 @@ class AllReduce(Function):
        return grad_output


+@NORM_LAYERS.register_module('naiveSyncBN1d')
 class NaiveSyncBatchNorm1d(nn.BatchNorm1d):
    """Syncronized Batch Normalization for 3D Tensors

@@ -68,6 +68,7 @@ class NaiveSyncBatchNorm1d(nn.BatchNorm1d):
        return input * scale + bias


+@NORM_LAYERS.register_module('naiveSyncBN2d')
 class NaiveSyncBatchNorm2d(nn.BatchNorm2d):
    """Syncronized Batch Normalization for 4D Tensors

@@ -110,10 +111,3 @@ class NaiveSyncBatchNorm2d(nn.BatchNorm2d):
        scale = scale.reshape(1, -1, 1, 1)
        bias = bias.reshape(1, -1, 1, 1)
        return input * scale + bias
-
-
-norm_cfg.update({
-    'BN1d': ('bn', nn.BatchNorm1d),
-    'naiveSyncBN2d': ('bn', NaiveSyncBatchNorm2d),
-    'naiveSyncBN1d': ('bn', NaiveSyncBatchNorm1d),
-})
--- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
+++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py
@@ -4,12 +4,14 @@ from . import roiaware_pool3d_ext


 def points_in_boxes_gpu(points, boxes):
-    """
+    """Find points that are in boxes (CUDA)
+
    Args:
        points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate
        boxes (torch.Tensor): [B, T, 7],
            num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,
            (x, y, z) is the bottom center
+
    Returns:
        box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
    """
@@ -27,14 +29,20 @@ def points_in_boxes_gpu(points, boxes):


 def points_in_boxes_cpu(points, boxes):
-    """
+    """Find points that are in boxes (CPU)
+
+    Note: Currently, the output of this function is different from that of
+        points_in_boxes_gpu.
+
    Args:
        points (torch.Tensor): [npoints, 3]
        boxes (torch.Tensor): [N, 7], in LiDAR coordinate,
            (x, y, z) is the bottom center
+
    Returns:
        point_indices (torch.Tensor): (N, npoints)
    """
+    # TODO: Refactor this function as a CPU version of points_in_boxes_gpu
    assert boxes.shape[1] == 7
    assert points.shape[1] == 3


--- a/mmdet3d/ops/roiaware_pool3d/roiaware_pool3d.py
+++ b/mmdet3d/ops/roiaware_pool3d/roiaware_pool3d.py
@@ -10,7 +10,8 @@ class RoIAwarePool3d(nn.Module):

    def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
        super().__init__()
-        """
+        """RoIAwarePool3d module
+
        Args:
            out_size (int or tuple): n or [n1, n2, n3]
            max_pts_per_voxel (int): m
@@ -23,12 +24,14 @@ class RoIAwarePool3d(nn.Module):
        self.mode = pool_method_map[mode]

    def forward(self, rois, pts, pts_feature):
-        """
+        """RoIAwarePool3d module forward
+
        Args:
            rois (torch.Tensor): [N, 7],in LiDAR coordinate,
                (x, y, z) is the bottom center of rois
            pts (torch.Tensor): [npoints, 3]
            pts_feature (torch.Tensor): [npoints, C]
+
        Returns:
            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
        """
@@ -43,7 +46,8 @@ class RoIAwarePool3dFunction(Function):
    @staticmethod
    def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
                mode):
-        """
+        """RoIAwarePool3d function forward
+
        Args:
            rois (torch.Tensor): [N, 7], in LiDAR coordinate,
                (x, y, z) is the bottom center of rois
@@ -52,6 +56,7 @@ class RoIAwarePool3dFunction(Function):
            out_size (int or tuple): n or [n1, n2, n3]
            max_pts_per_voxel (int): m
            mode (int): 0 (max pool) or 1 (average pool)
+
        Returns:
            pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
        """
@@ -84,11 +89,12 @@ class RoIAwarePool3dFunction(Function):

    @staticmethod
    def backward(ctx, grad_out):
-        """
+        """RoIAwarePool3d function forward
+
        Args:
-            grad_out: [N, out_x, out_y, out_z, C]
+            grad_out (torch.Tensor): [N, out_x, out_y, out_z, C]
        Returns:
-            grad_in: [npoints, C]
+            grad_in (torch.Tensor): [npoints, C]
        """
        ret = ctx.roiaware_pool3d_for_backward
        pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret

--- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
+++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
-//Modified from
-//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
-//Points in boxes cpu
-//Written by Shaoshuai Shi
-//All Rights Reserved 2019.
+// Modified from
+// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+// Points in boxes cpu
+// Written by Shaoshuai Shi
+// All Rights Reserved 2019.

-
-#include <torch/serialize/tensor.h>
-#include <torch/extension.h>
 #include <assert.h>
-
 #include <math.h>
 #include <stdio.h>
+#include <torch/extension.h>
+#include <torch/serialize/tensor.h>

-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 // #define DEBUG

-
-inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz, float &local_x, float &local_y){
+inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz,
+                                      float &local_x, float &local_y) {
  // should rotate pi/2 + alpha to translate LiDAR to local
  float rot_angle = rz + M_PI / 2;
  float cosa = cos(rot_angle), sina = sin(rot_angle);
@@ -24,10 +23,11 @@ inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz, fl
  local_y = shift_x * sina + shift_y * cosa;
 }

-
-inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, float &local_x, float &local_y){
+inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d,
+                                 float &local_x, float &local_y) {
  // param pt: (x, y, z)
-    // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
+  // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
+  // bottom center
  float x = pt[0], y = pt[1], z = pt[2];
  float cx = box3d[0], cy = box3d[1], cz = box3d[2];
  float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
@@ -35,15 +35,16 @@ inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, float &loc

  if (fabsf(z - cz) > h / 2.0) return 0;
  lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y);
-    float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0);
+  float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
+                  (local_y > -w / 2.0) & (local_y < w / 2.0);
  return in_flag;
 }

-
-int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor){
-    // params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
-    // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
-    // params pts_indices: (N, npoints)
+int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor,
+                        at::Tensor pts_indices_tensor) {
+  // params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the
+  // bottom center, each box DO NOT overlaps params pts: (npoints, 3) [x, y, z]
+  // in LiDAR coordinate params pts_indices: (N, npoints)

  CHECK_CONTIGUOUS(boxes_tensor);
  CHECK_CONTIGUOUS(pts_tensor);
@@ -52,14 +53,15 @@ int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tens
  int boxes_num = boxes_tensor.size(0);
  int pts_num = pts_tensor.size(0);

-    const float *boxes = boxes_tensor.data<float>();
-    const float *pts = pts_tensor.data<float>();
-    int *pts_indices = pts_indices_tensor.data<int>();
+  const float *boxes = boxes_tensor.data_ptr<float>();
+  const float *pts = pts_tensor.data_ptr<float>();
+  int *pts_indices = pts_indices_tensor.data_ptr<int>();

  float local_x = 0, local_y = 0;
-    for (int i = 0; i < boxes_num; i++){
-        for (int j = 0; j < pts_num; j++){
-            int cur_in_flag = check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y);
+  for (int i = 0; i < boxes_num; i++) {
+    for (int j = 0; j < pts_num; j++) {
+      int cur_in_flag =
+          check_pt_in_box3d_cpu(pts + j * 3, boxes + i * 7, local_x, local_y);
      pts_indices[i * pts_num + j] = cur_in_flag;
    }
  }