Feat clean sparse block

df7e4e30 · wuyuefeng · zhangwenwei · 97e4ed42 · df7e4e30 · df7e4e30
Commit df7e4e30 authored May 17, 2020 by wuyuefeng Committed by zhangwenwei May 17, 2020
11 changed files
--- a/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
@@ -49,9 +49,8 @@ model = dict(
    pts_middle_encoder=dict(
        type='SparseEncoder',
        in_channels=128,
-        output_shape=[41, 1600, 1408],  # checked from PointCloud3D
-        pre_act=False,
-    ),
+        sparse_shape=[41, 1600, 1408],
+        order=('conv', 'norm', 'act')),
    pts_backbone=dict(
        type='SECOND',
        in_channels=256,

--- a/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
@@ -18,9 +18,8 @@ model = dict(
    middle_encoder=dict(
        type='SparseEncoder',
        in_channels=4,
-        output_shape=[41, 1600, 1408],
-        pre_act=False,
-    ),
+        sparse_shape=[41, 1600, 1408],
+        order=('conv', 'norm', 'act')),
    backbone=dict(
        type='SECOND',
        in_channels=256,

--- a/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -18,9 +18,8 @@ model = dict(
    middle_encoder=dict(
        type='SparseEncoder',
        in_channels=4,
-        output_shape=[41, 1600, 1408],  # checked from PointCloud3D
-        pre_act=False,
-    ),
+        sparse_shape=[41, 1600, 1408],
+        order=('conv', 'norm', 'act')),
    backbone=dict(
        type='SECOND',
        in_channels=256,

--- a/configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
+++ b/configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
@@ -18,8 +18,8 @@ model = dict(
    middle_encoder=dict(
        type='SparseUNet',
        in_channels=4,
-        output_shape=[41, 1600, 1408],
-        pre_act=False),
+        sparse_shape=[41, 1600, 1408],
+        order=('conv', 'norm', 'act')),
    backbone=dict(
        type='SECOND',
        in_channels=256,

--- a/configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
@@ -18,8 +18,8 @@ model = dict(
    middle_encoder=dict(
        type='SparseUNet',
        in_channels=4,
-        output_shape=[41, 1600, 1408],
-        pre_act=False),
+        sparse_shape=[41, 1600, 1408],
+        order=('conv', 'norm', 'act')),
    backbone=dict(
        type='SECOND',
        in_channels=256,

--- a/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
@@ -18,9 +18,8 @@ model = dict(
    middle_encoder=dict(
        type='SparseEncoder',
        in_channels=4,
-        output_shape=[41, 1600, 1408],  # checked from PointCloud3D
-        pre_act=False,
-    ),
+        sparse_shape=[41, 1600, 1408],
+        order=('conv', 'norm', 'act')),
    backbone=dict(
        type='SECOND',
        in_channels=256,

--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
 import torch.nn as nn
-from mmcv.cnn import build_norm_layer

 import mmdet3d.ops.spconv as spconv
+from mmdet3d.ops import make_sparse_convmodule
 from ..registry import MIDDLE_ENCODERS


 @MIDDLE_ENCODERS.register_module()
 class SparseEncoder(nn.Module):
+    """Sparse encoder for Second
+
+    See https://arxiv.org/abs/1907.03670 for more detials.
+
+    Args:
+        in_channels (int): the number of input channels
+        sparse_shape (list[int]): the sparse shape of input tensor
+        norm_cfg (dict): config of normalization layer
+        base_channels (int): out channels for conv_input layer
+        output_channels (int): out channels for conv_out layer
+        encoder_channels (tuple[tuple[int]]):
+            conv channels of each encode block
+        encoder_paddings (tuple[tuple[int]]): paddings of each encode block
+    """

    def __init__(self,
                 in_channels,
-                 output_shape,
-                 pre_act,
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)):
+                 sparse_shape,
+                 order=('conv', 'norm', 'act'),
+                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 base_channels=16,
+                 output_channels=128,
+                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
+                                                                        64)),
+                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
+                                                                 1))):
        super().__init__()
-        self.sparse_shape = output_shape
-        self.output_shape = output_shape
+        self.sparse_shape = sparse_shape
        self.in_channels = in_channels
-        self.pre_act = pre_act
+        self.order = order
+        self.base_channels = base_channels
+        self.output_channels = output_channels
+        self.encoder_channels = encoder_channels
+        self.encoder_paddings = encoder_paddings
+        self.stage_num = len(self.encoder_channels)
        # Spconv init all weight on its own
-        # TODO: make the network could be modified

-        if pre_act:
-            self.conv_input = spconv.SparseSequential(
-                spconv.SubMConv3d(
-                    in_channels,
-                    16,
-                    3,
-                    padding=1,
-                    bias=False,
-                    indice_key='subm1'), )
-            block = self.pre_act_block
-        else:
-            norm_name, norm_layer = build_norm_layer(norm_cfg, 16)
-            self.conv_input = spconv.SparseSequential(
-                spconv.SubMConv3d(
+        assert isinstance(order, tuple) and len(order) == 3
+        assert set(order) == {'conv', 'norm', 'act'}
+
+        if self.order[0] != 'conv':  # pre activate
+            self.conv_input = make_sparse_convmodule(
                in_channels,
-                    16,
-                    3,
-                    padding=1,
-                    bias=False,
-                    indice_key='subm1'),
-                norm_layer,
-                nn.ReLU(),
-            )
-            block = self.post_act_block
-
-        self.conv1 = spconv.SparseSequential(
-            block(16, 16, 3, norm_cfg=norm_cfg, padding=1,
-                  indice_key='subm1'), )
-
-        self.conv2 = spconv.SparseSequential(
-            # [1600, 1408, 41] -> [800, 704, 21]
-            block(
-                16,
-                32,
+                self.base_channels,
                3,
                norm_cfg=norm_cfg,
-                stride=2,
                padding=1,
-                indice_key='spconv2',
-                conv_type='spconv'),
-            block(32, 32, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm2'),
-            block(32, 32, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm2'),
-        )
-
-        self.conv3 = spconv.SparseSequential(
-            # [800, 704, 21] -> [400, 352, 11]
-            block(
-                32,
-                64,
+                indice_key='subm1',
+                conv_type='SubMConv3d',
+                order=('conv', ))
+        else:  # post activate
+            self.conv_input = make_sparse_convmodule(
+                in_channels,
+                self.base_channels,
                3,
                norm_cfg=norm_cfg,
-                stride=2,
                padding=1,
-                indice_key='spconv3',
-                conv_type='spconv'),
-            block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm3'),
-            block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm3'),
-        )
-
-        self.conv4 = spconv.SparseSequential(
-            # [400, 352, 11] -> [200, 176, 5]
-            block(
-                64,
-                64,
-                3,
-                norm_cfg=norm_cfg,
-                stride=2,
-                padding=(0, 1, 1),
-                indice_key='spconv4',
-                conv_type='spconv'),
-            block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm4'),
-            block(64, 64, 3, norm_cfg=norm_cfg, padding=1, indice_key='subm4'),
-        )
-
-        norm_name, norm_layer = build_norm_layer(norm_cfg, 128)
-        self.conv_out = spconv.SparseSequential(
-            # [200, 176, 5] -> [200, 176, 2]
-            spconv.SparseConv3d(
-                128,
-                128, (3, 1, 1),
+                indice_key='subm1',
+                conv_type='SubMConv3d')
+
+        encoder_out_channels = self.make_encoder_layers(
+            make_sparse_convmodule, norm_cfg, self.base_channels)
+
+        self.conv_out = make_sparse_convmodule(
+            encoder_out_channels,
+            self.output_channels,
+            kernel_size=(3, 1, 1),
            stride=(2, 1, 1),
+            norm_cfg=norm_cfg,
            padding=0,
-                bias=False,
-                indice_key='spconv_down2'),
-            norm_layer,
-            nn.ReLU(),
-        )
+            indice_key='spconv_down2',
+            conv_type='SparseConv3d')

    def forward(self, voxel_features, coors, batch_size):
-        """
-        :param voxel_features:  (N, C)
-        :param coors:   (N, 4)  [batch_idx, z_idx, y_idx, x_idx]
-        :param batch_size:
-        :return:
+        """Forward of SparseEncoder
+
+        Args:
+            voxel_features (torch.float32): shape [N, C]
+            coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx)
+            batch_size (int): batch size
+
+        Returns:
+            dict: backbone features
        """
        coors = coors.int()
        input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
@@ -122,14 +97,14 @@ class SparseEncoder(nn.Module):
                                                  batch_size)
        x = self.conv_input(input_sp_tensor)

-        x_conv1 = self.conv1(x)
-        x_conv2 = self.conv2(x_conv1)
-        x_conv3 = self.conv3(x_conv2)
-        x_conv4 = self.conv4(x_conv3)
+        encode_features = []
+        for encoder_layer in self.encoder_layers:
+            x = encoder_layer(x)
+            encode_features.append(x)

        # for detection head
        # [200, 176, 5] -> [200, 176, 2]
-        out = self.conv_out(x_conv4)
+        out = self.conv_out(encode_features[-1])
        spatial_features = out.dense()

        N, C, D, H, W = spatial_features.shape
@@ -137,79 +112,48 @@ class SparseEncoder(nn.Module):

        return spatial_features

-    def pre_act_block(self,
-                      in_channels,
-                      out_channels,
-                      kernel_size,
-                      indice_key=None,
-                      stride=1,
-                      padding=0,
-                      conv_type='subm',
-                      norm_cfg=None):
-        norm_name, norm_layer = build_norm_layer(norm_cfg, in_channels)
-        if conv_type == 'subm':
-            m = spconv.SparseSequential(
-                norm_layer,
-                nn.ReLU(inplace=True),
-                spconv.SubMConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    padding=padding,
-                    bias=False,
-                    indice_key=indice_key),
-            )
-        elif conv_type == 'spconv':
-            m = spconv.SparseSequential(
-                norm_layer,
-                nn.ReLU(inplace=True),
-                spconv.SparseConv3d(
+    def make_encoder_layers(self, make_block, norm_cfg, in_channels):
+        """make encoder layers using sparse convs
+
+        Args:
+            make_block (method): a bounded function to build blocks
+            norm_cfg (dict[str]): config of normalization layer
+            in_channels (int): the number of encoder input channels
+
+        Returns:
+            int: the number of encoder output channels
+        """
+        self.encoder_layers = spconv.SparseSequential()
+
+        for i, blocks in enumerate(self.encoder_channels):
+            blocks_list = []
+            for j, out_channels in enumerate(tuple(blocks)):
+                padding = tuple(self.encoder_paddings[i])[j]
+                # each stage started with a spconv layer
+                # except the first stage
+                if i != 0 and j == 0:
+                    blocks_list.append(
+                        make_block(
                            in_channels,
                            out_channels,
-                    kernel_size,
-                    stride=stride,
+                            3,
+                            norm_cfg=norm_cfg,
+                            stride=2,
                            padding=padding,
-                    bias=False,
-                    indice_key=indice_key),
-            )
+                            indice_key=f'spconv{i + 1}',
+                            conv_type='SparseConv3d'))
                else:
-            raise NotImplementedError
-        return m
-
-    def post_act_block(self,
-                       in_channels,
-                       out_channels,
-                       kernel_size,
-                       indice_key,
-                       stride=1,
-                       padding=0,
-                       conv_type='subm',
-                       norm_cfg=None):
-        norm_name, norm_layer = build_norm_layer(norm_cfg, out_channels)
-        if conv_type == 'subm':
-            m = spconv.SparseSequential(
-                spconv.SubMConv3d(
+                    blocks_list.append(
+                        make_block(
                            in_channels,
                            out_channels,
-                    kernel_size,
-                    bias=False,
-                    indice_key=indice_key),
-                norm_layer,
-                nn.ReLU(inplace=True),
-            )
-        elif conv_type == 'spconv':
-            m = spconv.SparseSequential(
-                spconv.SparseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride=stride,
+                            3,
+                            norm_cfg=norm_cfg,
                            padding=padding,
-                    bias=False,
-                    indice_key=indice_key),
-                norm_layer,
-                nn.ReLU(inplace=True),
-            )
-        else:
-            raise NotImplementedError
-        return m
+                            indice_key=f'subm{i + 1}',
+                            conv_type='SubMConv3d'))
+                in_channels = out_channels
+            stage_name = f'encoder_layer{i + 1}'
+            stage_layers = spconv.SparseSequential(*blocks_list)
+            self.encoder_layers.add_module(stage_name, stage_layers)
+        return out_channels
--- a/mmdet3d/models/middle_encoders/sparse_unet.py
+++ b/mmdet3d/models/middle_encoders/sparse_unet.py
 import torch
 import torch.nn as nn
-from mmcv.cnn import build_norm_layer

 import mmdet3d.ops.spconv as spconv
-from mmdet3d.ops import SparseBasicBlock
+from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
 from ..registry import MIDDLE_ENCODERS


 @MIDDLE_ENCODERS.register_module()
 class SparseUNet(nn.Module):
-
-    def __init__(self,
-                 in_channels,
-                 output_shape,
-                 pre_act=False,
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 base_channels=16,
-                 output_channels=128,
-                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
-                                                                        64)),
-                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
-                                                                 1)),
-                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
-                                   (16, 16, 16)),
-                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):
    """SparseUNet for PartA^2

    See https://arxiv.org/abs/1907.03670 for more detials.

    Args:
        in_channels (int): the number of input channels
-            output_shape (list[int]): the shape of output tensor
-            pre_act (bool): use pre_act_block or post_act_block
+        sparse_shape (list[int]): the sparse shape of input tensor
        norm_cfg (dict): config of normalization layer
        base_channels (int): out channels for conv_input layer
        output_channels (int): out channels for conv_out layer
@@ -42,11 +25,25 @@ class SparseUNet(nn.Module):
            conv channels of each decode block
        decoder_paddings (tuple[tuple[int]]): paddings of each decode block
    """
+
+    def __init__(self,
+                 in_channels,
+                 sparse_shape,
+                 order=('conv', 'norm', 'act'),
+                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 base_channels=16,
+                 output_channels=128,
+                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
+                                                                        64)),
+                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
+                                                                 1)),
+                 decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
+                                   (16, 16, 16)),
+                 decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):
        super().__init__()
-        self.sparse_shape = output_shape
-        self.output_shape = output_shape
+        self.sparse_shape = sparse_shape
        self.in_channels = in_channels
-        self.pre_act = pre_act
+        self.order = order
        self.base_channels = base_channels
        self.output_channels = output_channels
        self.encoder_channels = encoder_channels
@@ -56,44 +53,43 @@ class SparseUNet(nn.Module):
        self.stage_num = len(self.encoder_channels)
        # Spconv init all weight on its own

-        if pre_act:
-            # TODO: use ConvModule to encapsulate
-            self.conv_input = spconv.SparseSequential(
-                spconv.SubMConv3d(
+        assert isinstance(order, tuple) and len(order) == 3
+        assert set(order) == {'conv', 'norm', 'act'}
+
+        if self.order[0] != 'conv':  # pre activate
+            self.conv_input = make_sparse_convmodule(
                in_channels,
                self.base_channels,
                3,
+                norm_cfg=norm_cfg,
                padding=1,
-                    bias=False,
-                    indice_key='subm1'))
-            make_block = self.pre_act_block
-        else:
-            self.conv_input = spconv.SparseSequential(
-                spconv.SubMConv3d(
+                indice_key='subm1',
+                conv_type='SubMConv3d',
+                order=('conv', ))
+        else:  # post activate
+            self.conv_input = make_sparse_convmodule(
                in_channels,
                self.base_channels,
                3,
+                norm_cfg=norm_cfg,
                padding=1,
-                    bias=False,
-                    indice_key='subm1'),
-                build_norm_layer(norm_cfg, self.base_channels)[1], nn.ReLU())
-            make_block = self.post_act_block
+                indice_key='subm1',
+                conv_type='SubMConv3d')

        encoder_out_channels = self.make_encoder_layers(
-            make_block, norm_cfg, self.base_channels)
-        self.make_decoder_layers(make_block, norm_cfg, encoder_out_channels)
+            make_sparse_convmodule, norm_cfg, self.base_channels)
+        self.make_decoder_layers(make_sparse_convmodule, norm_cfg,
+                                 encoder_out_channels)

-        self.conv_out = spconv.SparseSequential(
-            # [200, 176, 5] -> [200, 176, 2]
-            spconv.SparseConv3d(
+        self.conv_out = make_sparse_convmodule(
            encoder_out_channels,
-                self.output_channels, (3, 1, 1),
+            self.output_channels,
+            kernel_size=(3, 1, 1),
            stride=(2, 1, 1),
+            norm_cfg=norm_cfg,
            padding=0,
-                bias=False,
-                indice_key='spconv_down2'),
-            build_norm_layer(norm_cfg, self.output_channels)[1],
-            nn.ReLU())
+            indice_key='spconv_down2',
+            conv_type='SparseConv3d')

    def forward(self, voxel_features, coors, batch_size):
        """Forward of SparseUNet
@@ -187,133 +183,6 @@ class SparseUNet(nn.Module):
        x.features = features.view(n, out_channels, -1).sum(dim=2)
        return x

-    def pre_act_block(self,
-                      in_channels,
-                      out_channels,
-                      kernel_size,
-                      indice_key=None,
-                      stride=1,
-                      padding=0,
-                      conv_type='subm',
-                      norm_cfg=None):
-        """Make pre activate sparse convolution block.
-
-        Args:
-            in_channels (int): the number of input channels
-            out_channels (int): the number of out channels
-            kernel_size (int): kernel size of convolution
-            indice_key (str): the indice key used for sparse tensor
-            stride (int): the stride of convolution
-            padding (int or list[int]): the padding number of input
-            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
-            norm_cfg (dict): config of normalization layer
-
-        Returns:
-            spconv.SparseSequential: pre activate sparse convolution block.
-        """
-        # TODO: use ConvModule to encapsulate
-        assert conv_type in ['subm', 'spconv', 'inverseconv']
-
-        if conv_type == 'subm':
-            m = spconv.SparseSequential(
-                build_norm_layer(norm_cfg, in_channels)[1],
-                nn.ReLU(inplace=True),
-                spconv.SubMConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    padding=padding,
-                    bias=False,
-                    indice_key=indice_key))
-        elif conv_type == 'spconv':
-            m = spconv.SparseSequential(
-                build_norm_layer(norm_cfg, in_channels)[1],
-                nn.ReLU(inplace=True),
-                spconv.SparseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride=stride,
-                    padding=padding,
-                    bias=False,
-                    indice_key=indice_key))
-        elif conv_type == 'inverseconv':
-            m = spconv.SparseSequential(
-                build_norm_layer(norm_cfg, in_channels)[1],
-                nn.ReLU(inplace=True),
-                spconv.SparseInverseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    bias=False,
-                    indice_key=indice_key))
-        else:
-            raise NotImplementedError
-        return m
-
-    def post_act_block(self,
-                       in_channels,
-                       out_channels,
-                       kernel_size,
-                       indice_key,
-                       stride=1,
-                       padding=0,
-                       conv_type='subm',
-                       norm_cfg=None):
-        """Make post activate sparse convolution block.
-
-        Args:
-            in_channels (int): the number of input channels
-            out_channels (int): the number of out channels
-            kernel_size (int): kernel size of convolution
-            indice_key (str): the indice key used for sparse tensor
-            stride (int): the stride of convolution
-            padding (int or list[int]): the padding number of input
-            conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
-            norm_cfg (dict[str]): config of normalization layer
-
-        Returns:
-            spconv.SparseSequential: post activate sparse convolution block.
-        """
-        # TODO: use ConvModule to encapsulate
-        assert conv_type in ['subm', 'spconv', 'inverseconv']
-
-        if conv_type == 'subm':
-            m = spconv.SparseSequential(
-                spconv.SubMConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    bias=False,
-                    indice_key=indice_key),
-                build_norm_layer(norm_cfg, out_channels)[1],
-                nn.ReLU(inplace=True))
-        elif conv_type == 'spconv':
-            m = spconv.SparseSequential(
-                spconv.SparseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride=stride,
-                    padding=padding,
-                    bias=False,
-                    indice_key=indice_key),
-                build_norm_layer(norm_cfg, out_channels)[1],
-                nn.ReLU(inplace=True))
-        elif conv_type == 'inverseconv':
-            m = spconv.SparseSequential(
-                spconv.SparseInverseConv3d(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    bias=False,
-                    indice_key=indice_key),
-                build_norm_layer(norm_cfg, out_channels)[1],
-                nn.ReLU(inplace=True))
-        else:
-            raise NotImplementedError
-        return m
-
    def make_encoder_layers(self, make_block, norm_cfg, in_channels):
        """make encoder layers using sparse convs

@@ -326,6 +195,7 @@ class SparseUNet(nn.Module):
            int: the number of encoder output channels
        """
        self.encoder_layers = spconv.SparseSequential()
+
        for i, blocks in enumerate(self.encoder_channels):
            blocks_list = []
            for j, out_channels in enumerate(tuple(blocks)):
@@ -342,7 +212,7 @@ class SparseUNet(nn.Module):
                            stride=2,
                            padding=padding,
                            indice_key=f'spconv{i + 1}',
-                            conv_type='spconv'))
+                            conv_type='SparseConv3d'))
                else:
                    blocks_list.append(
                        make_block(
@@ -351,7 +221,8 @@ class SparseUNet(nn.Module):
                            3,
                            norm_cfg=norm_cfg,
                            padding=padding,
-                            indice_key=f'subm{i + 1}'))
+                            indice_key=f'subm{i + 1}',
+                            conv_type='SubMConv3d'))
                in_channels = out_channels
            stage_name = f'encoder_layer{i + 1}'
            stage_layers = spconv.SparseSequential(*blocks_list)
@@ -388,7 +259,8 @@ class SparseUNet(nn.Module):
                    3,
                    norm_cfg=norm_cfg,
                    padding=paddings[0],
-                    indice_key=f'subm{block_num - i}'))
+                    indice_key=f'subm{block_num - i}',
+                    conv_type='SubMConv3d'))
            if block_num - i != 1:
                setattr(
                    self, f'upsample_layer{block_num - i}',
@@ -397,9 +269,8 @@ class SparseUNet(nn.Module):
                        block_channels[2],
                        3,
                        norm_cfg=norm_cfg,
-                        padding=paddings[1],
                        indice_key=f'spconv{block_num - i}',
-                        conv_type='inverseconv'))
+                        conv_type='SparseInverseConv3d'))
            else:
                # use submanifold conv instead of inverse conv
                # in the last block
@@ -412,5 +283,5 @@ class SparseUNet(nn.Module):
                        norm_cfg=norm_cfg,
                        padding=paddings[1],
                        indice_key='subm1',
-                        conv_type='subm'))
+                        conv_type='SubMConv3d'))
            in_channels = block_channels[2]
--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -4,8 +4,8 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
 from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
                              points_in_boxes_gpu)
-from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
-                           SparseBottleneck, SparseBottleneckV0)
+from .sparse_block import (SparseBasicBlock, SparseBottleneck,
+                           make_sparse_convmodule)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization

 __all__ = [
@@ -13,7 +13,7 @@ __all__ = [
    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
-    'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
-    'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
-    'points_in_boxes_gpu', 'points_in_boxes_cpu'
+    'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck',
+    'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu',
+    'make_sparse_convmodule'
 ]
--- a/mmdet3d/ops/sparse_block.py
+++ b/mmdet3d/ops/sparse_block.py
-from mmcv.cnn import build_norm_layer
+from mmcv.cnn import build_conv_layer, build_norm_layer
 from torch import nn

+from mmdet3d.ops import spconv
 from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
-from . import spconv
-
-
-def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
-    """3x3 submanifold sparse convolution with padding.
-
-    Args:
-        in_planes (int): the number of input channels
-        out_planes (int): the number of output channels
-        stride (int): the stride of convolution
-        indice_key (str): the indice key used for sparse tensor
-
-    Returns:
-        spconv.conv.SubMConv3d: 3x3 submanifold sparse convolution ops
-    """
-    # TODO: deprecate this class
-    return spconv.SubMConv3d(
-        in_planes,
-        out_planes,
-        kernel_size=3,
-        stride=stride,
-        padding=1,
-        bias=False,
-        indice_key=indice_key)
-
-
-def conv1x1(in_planes, out_planes, stride=1, indice_key=None):
-    """1x1 submanifold sparse convolution with padding.
-
-    Args:
-        in_planes (int): the number of input channels
-        out_planes (int): the number of output channels
-        stride (int): the stride of convolution
-        indice_key (str): the indice key used for sparse tensor
-
-    Returns:
-        spconv.conv.SubMConv3d: 1x1 submanifold sparse convolution ops
-    """
-    # TODO: deprecate this class
-    return spconv.SubMConv3d(
-        in_planes,
-        out_planes,
-        kernel_size=1,
-        stride=stride,
-        padding=1,
-        bias=False,
-        indice_key=indice_key)
-
-
-class SparseBasicBlockV0(spconv.SparseModule):
-    expansion = 1
-
-    def __init__(self,
-                 inplanes,
-                 planes,
-                 stride=1,
-                 downsample=None,
-                 indice_key=None,
-                 norm_cfg=None):
-        """Sparse basic block for PartA^2.
-
-        Sparse basic block implemented with submanifold sparse convolution.
-        """
-        # TODO: deprecate this class
-        super().__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride, indice_key=indice_key)
-        norm_name1, norm_layer1 = build_norm_layer(norm_cfg, planes)
-        self.bn1 = norm_layer1
-        self.relu = nn.ReLU()
-        self.conv2 = conv3x3(planes, planes, indice_key=indice_key)
-        norm_name2, norm_layer2 = build_norm_layer(norm_cfg, planes)
-        self.bn2 = norm_layer2
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        identity = x.features
-
-        assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
-
-        out = self.conv1(x)
-        out.features = self.bn1(out.features)
-        out.features = self.relu(out.features)
-
-        out = self.conv2(out)
-        out.features = self.bn2(out.features)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out.features += identity
-        out.features = self.relu(out.features)
-
-        return out
-
-
-class SparseBottleneckV0(spconv.SparseModule):
-    expansion = 4
-
-    def __init__(self,
-                 inplanes,
-                 planes,
-                 stride=1,
-                 downsample=None,
-                 indice_key=None,
-                 norm_fn=None):
-        """Sparse bottleneck block for PartA^2.
-
-        Bottleneck block implemented with submanifold sparse convolution.
-        """
-        # TODO: deprecate this class
-        super().__init__()
-        self.conv1 = conv1x1(inplanes, planes, indice_key=indice_key)
-        self.bn1 = norm_fn(planes)
-        self.conv2 = conv3x3(planes, planes, stride, indice_key=indice_key)
-        self.bn2 = norm_fn(planes)
-        self.conv3 = conv1x1(
-            planes, planes * self.expansion, indice_key=indice_key)
-        self.bn3 = norm_fn(planes * self.expansion)
-        self.relu = nn.ReLU()
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        identity = x.features
-
-        out = self.conv1(x)
-        out.features = self.bn1(out.features)
-        out.features = self.relu(out.features)
-
-        out = self.conv2(out)
-        out.features = self.bn2(out.features)
-        out.features = self.relu(out.features)
-
-        out = self.conv3(out)
-        out.features = self.bn3(out.features)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out.features += identity
-        out.features = self.relu(out.features)
-
-        return out


 class SparseBottleneck(Bottleneck, spconv.SparseModule):
@@ -238,3 +95,67 @@ class SparseBasicBlock(BasicBlock, spconv.SparseModule):
        out.features = self.relu(out.features)

        return out
+
+
+def make_sparse_convmodule(in_channels,
+                           out_channels,
+                           kernel_size,
+                           indice_key,
+                           stride=1,
+                           padding=0,
+                           conv_type='SubMConv3d',
+                           norm_cfg=None,
+                           order=('conv', 'norm', 'act')):
+    """Make sparse convolution module.
+
+    Args:
+        in_channels (int): the number of input channels
+        out_channels (int): the number of out channels
+        kernel_size (int|tuple(int)): kernel size of convolution
+        indice_key (str): the indice key used for sparse tensor
+        stride (int|tuple(int)): the stride of convolution
+        padding (int or list[int]): the padding number of input
+        conv_type (str): sparse conv type in spconv
+        norm_cfg (dict[str]): config of normalization layer
+        order (tuple[str]): The order of conv/norm/activation layers. It is a
+            sequence of "conv", "norm" and "act". Common examples are
+            ("conv", "norm", "act") and ("act", "conv", "norm").
+
+    Returns:
+        spconv.SparseSequential: sparse convolution module.
+    """
+    assert isinstance(order, tuple) and len(order) <= 3
+
+    conv_cfg = dict(type=conv_type, indice_key=indice_key)
+
+    layers = list()
+    for layer in order:
+        if layer == 'conv':
+            if conv_type not in [
+                    'SparseInverseConv3d', 'SparseInverseConv2d',
+                    'SparseInverseConv1d'
+            ]:
+                layers.append(
+                    build_conv_layer(
+                        conv_cfg,
+                        in_channels,
+                        out_channels,
+                        kernel_size,
+                        stride=stride,
+                        padding=padding,
+                        bias=False))
+            else:
+                layers.append(
+                    build_conv_layer(
+                        conv_cfg,
+                        in_channels,
+                        out_channels,
+                        kernel_size,
+                        bias=False))
+        elif layer == 'norm':
+            layers.append(build_norm_layer(norm_cfg, out_channels)[1])
+        elif layer == 'act':
+            layers.append(nn.ReLU(inplace=True))
+
+    layers = spconv.SparseSequential(*layers)
+    return layers
--- a/tests/test_sparse_unet.py
+++ b/tests/test_sparse_unet.py
 import torch

 import mmdet3d.ops.spconv as spconv
-from mmdet3d.ops import SparseBasicBlock, SparseBasicBlockV0
+from mmdet3d.ops import SparseBasicBlock


 def test_SparseUNet():
    from mmdet3d.models.middle_encoders.sparse_unet import SparseUNet
-    self = SparseUNet(
-        in_channels=4, output_shape=[41, 1600, 1408], pre_act=False)
+    self = SparseUNet(in_channels=4, sparse_shape=[41, 1600, 1408])

    # test encoder layers
    assert len(self.encoder_layers) == 4
@@ -61,17 +60,6 @@ def test_SparseBasicBlock():
         [1, 35, 930, 469]],
        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)

-    # test v0
-    self = SparseBasicBlockV0(
-        4,
-        4,
-        indice_key='subm0',
-        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))
-    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
-                                              [41, 1600, 1408], 2)
-    out_features = self(input_sp_tensor)
-    assert out_features.features.shape == torch.Size([4, 4])
-
    # test
    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
                                              [41, 1600, 1408], 2)
@@ -92,3 +80,57 @@ def test_SparseBasicBlock():

    out_features = self(input_sp_tensor)
    assert out_features.features.shape == torch.Size([4, 4])
+
+
+def test_make_sparse_convmodule():
+    from mmdet3d.ops import make_sparse_convmodule
+
+    voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
+                                   [6.8162713, -2.480431, -1.3616394, 0.36],
+                                   [11.643568, -4.744306, -1.3580885, 0.16],
+                                   [23.482342, 6.5036807, 0.5806964, 0.35]],
+                                  dtype=torch.float32)  # n, point_features
+    coordinates = torch.tensor(
+        [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
+         [1, 35, 930, 469]],
+        dtype=torch.int32)  # n, 4(batch, ind_x, ind_y, ind_z)
+
+    # test
+    input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
+                                              [41, 1600, 1408], 2)
+
+    sparse_block0 = make_sparse_convmodule(
+        4,
+        16,
+        3,
+        'test0',
+        stride=1,
+        padding=0,
+        conv_type='SubMConv3d',
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+        order=('conv', 'norm', 'act'))
+    assert isinstance(sparse_block0[0], spconv.SubMConv3d)
+    assert sparse_block0[0].in_channels == 4
+    assert sparse_block0[0].out_channels == 16
+    assert isinstance(sparse_block0[1], torch.nn.BatchNorm1d)
+    assert sparse_block0[1].eps == 0.001
+    assert sparse_block0[1].momentum == 0.01
+    assert isinstance(sparse_block0[2], torch.nn.ReLU)
+
+    # test forward
+    out_features = sparse_block0(input_sp_tensor)
+    assert out_features.features.shape == torch.Size([4, 16])
+
+    sparse_block1 = make_sparse_convmodule(
+        4,
+        16,
+        3,
+        'test1',
+        stride=1,
+        padding=0,
+        conv_type='SparseInverseConv3d',
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+        order=('norm', 'act', 'conv'))
+    assert isinstance(sparse_block1[0], torch.nn.BatchNorm1d)
+    assert isinstance(sparse_block1[1], torch.nn.ReLU)
+    assert isinstance(sparse_block1[2], spconv.SparseInverseConv3d)