sparse_encoder.py 8.26 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
VVsssssk's avatar
VVsssssk committed
2

3
from mmcv.runner import auto_fp16
zhangwenwei's avatar
zhangwenwei committed
4
from torch import nn as nn
zhangwenwei's avatar
zhangwenwei committed
5

6
from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule
VVsssssk's avatar
VVsssssk committed
7
from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE
8
from ..builder import MIDDLE_ENCODERS
zhangwenwei's avatar
zhangwenwei committed
9

VVsssssk's avatar
VVsssssk committed
10
11
12
13
14
if IS_SPCONV2_AVAILABLE:
    from spconv.pytorch import SparseConvTensor, SparseSequential
else:
    from mmcv.ops import SparseConvTensor, SparseSequential

zhangwenwei's avatar
zhangwenwei committed
15

16
@MIDDLE_ENCODERS.register_module()
zhangwenwei's avatar
zhangwenwei committed
17
class SparseEncoder(nn.Module):
zhangwenwei's avatar
zhangwenwei committed
18
    r"""Sparse encoder for SECOND and Part-A2.
wuyuefeng's avatar
wuyuefeng committed
19
20

    Args:
wangtai's avatar
wangtai committed
21
22
        in_channels (int): The number of input channels.
        sparse_shape (list[int]): The sparse shape of input tensor.
23
24
25
        order (list[str], optional): Order of conv module.
            Defaults to ('conv', 'norm', 'act').
        norm_cfg (dict, optional): Config of normalization layer. Defaults to
26
            dict(type='BN1d', eps=1e-3, momentum=0.01).
27
        base_channels (int, optional): Out channels for conv_input layer.
28
            Defaults to 16.
29
        output_channels (int, optional): Out channels for conv_out layer.
30
            Defaults to 128.
31
        encoder_channels (tuple[tuple[int]], optional):
wangtai's avatar
wangtai committed
32
            Convolutional channels of each encode block.
33
34
        encoder_paddings (tuple[tuple[int]], optional):
            Paddings of each encode block.
35
            Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)).
36
37
        block_type (str, optional): Type of the block to use.
            Defaults to 'conv_module'.
wuyuefeng's avatar
wuyuefeng committed
38
    """
zhangwenwei's avatar
zhangwenwei committed
39
40
41

    def __init__(self,
                 in_channels,
wuyuefeng's avatar
wuyuefeng committed
42
43
44
45
46
47
48
49
                 sparse_shape,
                 order=('conv', 'norm', 'act'),
                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
                 base_channels=16,
                 output_channels=128,
                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
                                                                        64)),
                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
50
51
                                                                 1)),
                 block_type='conv_module'):
zhangwenwei's avatar
zhangwenwei committed
52
        super().__init__()
53
        assert block_type in ['conv_module', 'basicblock']
wuyuefeng's avatar
wuyuefeng committed
54
        self.sparse_shape = sparse_shape
zhangwenwei's avatar
zhangwenwei committed
55
        self.in_channels = in_channels
wuyuefeng's avatar
wuyuefeng committed
56
57
58
59
60
61
        self.order = order
        self.base_channels = base_channels
        self.output_channels = output_channels
        self.encoder_channels = encoder_channels
        self.encoder_paddings = encoder_paddings
        self.stage_num = len(self.encoder_channels)
62
        self.fp16_enabled = False
zhangwenwei's avatar
zhangwenwei committed
63
        # Spconv init all weight on its own
wuyuefeng's avatar
wuyuefeng committed
64
65
66
67
68
69
70
71

        assert isinstance(order, tuple) and len(order) == 3
        assert set(order) == {'conv', 'norm', 'act'}

        if self.order[0] != 'conv':  # pre activate
            self.conv_input = make_sparse_convmodule(
                in_channels,
                self.base_channels,
zhangwenwei's avatar
zhangwenwei committed
72
73
74
                3,
                norm_cfg=norm_cfg,
                padding=1,
wuyuefeng's avatar
wuyuefeng committed
75
76
77
78
79
80
81
                indice_key='subm1',
                conv_type='SubMConv3d',
                order=('conv', ))
        else:  # post activate
            self.conv_input = make_sparse_convmodule(
                in_channels,
                self.base_channels,
zhangwenwei's avatar
zhangwenwei committed
82
83
84
                3,
                norm_cfg=norm_cfg,
                padding=1,
wuyuefeng's avatar
wuyuefeng committed
85
86
87
88
                indice_key='subm1',
                conv_type='SubMConv3d')

        encoder_out_channels = self.make_encoder_layers(
89
90
91
92
            make_sparse_convmodule,
            norm_cfg,
            self.base_channels,
            block_type=block_type)
wuyuefeng's avatar
wuyuefeng committed
93
94
95
96
97
98
99
100
101
102

        self.conv_out = make_sparse_convmodule(
            encoder_out_channels,
            self.output_channels,
            kernel_size=(3, 1, 1),
            stride=(2, 1, 1),
            norm_cfg=norm_cfg,
            padding=0,
            indice_key='spconv_down2',
            conv_type='SparseConv3d')
zhangwenwei's avatar
zhangwenwei committed
103

104
    @auto_fp16(apply_to=('voxel_features', ))
zhangwenwei's avatar
zhangwenwei committed
105
    def forward(self, voxel_features, coors, batch_size):
zhangwenwei's avatar
zhangwenwei committed
106
        """Forward of SparseEncoder.
wuyuefeng's avatar
wuyuefeng committed
107
108

        Args:
wangtai's avatar
wangtai committed
109
            voxel_features (torch.float32): Voxel features in shape (N, C).
110
            coors (torch.int32): Coordinates in shape (N, 4),
wangtai's avatar
wangtai committed
111
112
                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
            batch_size (int): Batch size.
wuyuefeng's avatar
wuyuefeng committed
113
114

        Returns:
wangtai's avatar
wangtai committed
115
            dict: Backbone features.
zhangwenwei's avatar
zhangwenwei committed
116
117
        """
        coors = coors.int()
118
119
        input_sp_tensor = SparseConvTensor(voxel_features, coors,
                                           self.sparse_shape, batch_size)
zhangwenwei's avatar
zhangwenwei committed
120
121
        x = self.conv_input(input_sp_tensor)

wuyuefeng's avatar
wuyuefeng committed
122
123
124
125
        encode_features = []
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x)
            encode_features.append(x)
zhangwenwei's avatar
zhangwenwei committed
126
127
128

        # for detection head
        # [200, 176, 5] -> [200, 176, 2]
wuyuefeng's avatar
wuyuefeng committed
129
        out = self.conv_out(encode_features[-1])
zhangwenwei's avatar
zhangwenwei committed
130
131
132
133
134
135
136
        spatial_features = out.dense()

        N, C, D, H, W = spatial_features.shape
        spatial_features = spatial_features.view(N, C * D, H, W)

        return spatial_features

137
138
139
140
141
142
    def make_encoder_layers(self,
                            make_block,
                            norm_cfg,
                            in_channels,
                            block_type='conv_module',
                            conv_cfg=dict(type='SubMConv3d')):
zhangwenwei's avatar
zhangwenwei committed
143
        """make encoder layers using sparse convs.
wuyuefeng's avatar
wuyuefeng committed
144
145

        Args:
wangtai's avatar
wangtai committed
146
147
148
            make_block (method): A bounded function to build blocks.
            norm_cfg (dict[str]): Config of normalization layer.
            in_channels (int): The number of encoder input channels.
149
150
151
            block_type (str, optional): Type of the block to use.
                Defaults to 'conv_module'.
            conv_cfg (dict, optional): Config of conv layer. Defaults to
152
                dict(type='SubMConv3d').
wuyuefeng's avatar
wuyuefeng committed
153
154

        Returns:
wangtai's avatar
wangtai committed
155
            int: The number of encoder output channels.
wuyuefeng's avatar
wuyuefeng committed
156
        """
157
        assert block_type in ['conv_module', 'basicblock']
158
        self.encoder_layers = SparseSequential()
wuyuefeng's avatar
wuyuefeng committed
159
160
161
162
163
164
165

        for i, blocks in enumerate(self.encoder_channels):
            blocks_list = []
            for j, out_channels in enumerate(tuple(blocks)):
                padding = tuple(self.encoder_paddings[i])[j]
                # each stage started with a spconv layer
                # except the first stage
166
                if i != 0 and j == 0 and block_type == 'conv_module':
wuyuefeng's avatar
wuyuefeng committed
167
168
169
170
171
172
173
174
175
176
                    blocks_list.append(
                        make_block(
                            in_channels,
                            out_channels,
                            3,
                            norm_cfg=norm_cfg,
                            stride=2,
                            padding=padding,
                            indice_key=f'spconv{i + 1}',
                            conv_type='SparseConv3d'))
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
                elif block_type == 'basicblock':
                    if j == len(blocks) - 1 and i != len(
                            self.encoder_channels) - 1:
                        blocks_list.append(
                            make_block(
                                in_channels,
                                out_channels,
                                3,
                                norm_cfg=norm_cfg,
                                stride=2,
                                padding=padding,
                                indice_key=f'spconv{i + 1}',
                                conv_type='SparseConv3d'))
                    else:
                        blocks_list.append(
                            SparseBasicBlock(
                                out_channels,
                                out_channels,
                                norm_cfg=norm_cfg,
                                conv_cfg=conv_cfg))
wuyuefeng's avatar
wuyuefeng committed
197
198
199
200
201
202
203
204
205
206
207
208
                else:
                    blocks_list.append(
                        make_block(
                            in_channels,
                            out_channels,
                            3,
                            norm_cfg=norm_cfg,
                            padding=padding,
                            indice_key=f'subm{i + 1}',
                            conv_type='SubMConv3d'))
                in_channels = out_channels
            stage_name = f'encoder_layer{i + 1}'
209
            stage_layers = SparseSequential(*blocks_list)
wuyuefeng's avatar
wuyuefeng committed
210
211
            self.encoder_layers.add_module(stage_name, stage_layers)
        return out_channels