pillar_encoder.py 13.6 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
zhangwenwei's avatar
zhangwenwei committed
2
import torch
3
from mmcv.cnn import build_norm_layer
4
from mmcv.runner import force_fp32
zhangwenwei's avatar
zhangwenwei committed
5
6
from torch import nn

zhangwenwei's avatar
zhangwenwei committed
7
from mmdet3d.ops import DynamicScatter
8
from ..builder import VOXEL_ENCODERS
zhangwenwei's avatar
zhangwenwei committed
9
10
11
from .utils import PFNLayer, get_paddings_indicator


12
@VOXEL_ENCODERS.register_module()
zhangwenwei's avatar
zhangwenwei committed
13
class PillarFeatureNet(nn.Module):
zhangwenwei's avatar
zhangwenwei committed
14
15
16
17
    """Pillar Feature Net.

    The network prepares the pillar features and performs forward pass
    through PFNLayers.
18

zhangwenwei's avatar
zhangwenwei committed
19
    Args:
zhangwenwei's avatar
zhangwenwei committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
        in_channels (int, optional): Number of input features,
            either x, y, z or x, y, z, r. Defaults to 4.
        feat_channels (tuple, optional): Number of features in each of the
            N PFNLayers. Defaults to (64, ).
        with_distance (bool, optional): Whether to include Euclidean distance
            to points. Defaults to False.
        with_cluster_center (bool, optional): [description]. Defaults to True.
        with_voxel_center (bool, optional): [description]. Defaults to True.
        voxel_size (tuple[float], optional): Size of voxels, only utilize x
            and y size. Defaults to (0.2, 0.2, 4).
        point_cloud_range (tuple[float], optional): Point cloud range, only
            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).
        norm_cfg ([type], optional): [description].
            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
        mode (str, optional): The mode to gather point features. Options are
            'max' or 'avg'. Defaults to 'max'.
36
        legacy (bool, optional): Whether to use the new behavior or
37
            the original behavior. Defaults to True.
zhangwenwei's avatar
zhangwenwei committed
38
    """
zhangwenwei's avatar
zhangwenwei committed
39
40

    def __init__(self,
zhangwenwei's avatar
zhangwenwei committed
41
42
                 in_channels=4,
                 feat_channels=(64, ),
zhangwenwei's avatar
zhangwenwei committed
43
44
45
46
47
                 with_distance=False,
                 with_cluster_center=True,
                 with_voxel_center=True,
                 voxel_size=(0.2, 0.2, 4),
                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
zhangwenwei's avatar
zhangwenwei committed
48
                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
49
50
                 mode='max',
                 legacy=True):
zhangwenwei's avatar
zhangwenwei committed
51
        super(PillarFeatureNet, self).__init__()
zhangwenwei's avatar
zhangwenwei committed
52
        assert len(feat_channels) > 0
53
        self.legacy = legacy
zhangwenwei's avatar
zhangwenwei committed
54
        if with_cluster_center:
zhangwenwei's avatar
zhangwenwei committed
55
            in_channels += 3
zhangwenwei's avatar
zhangwenwei committed
56
        if with_voxel_center:
57
            in_channels += 3
zhangwenwei's avatar
zhangwenwei committed
58
        if with_distance:
zhangwenwei's avatar
zhangwenwei committed
59
            in_channels += 1
zhangwenwei's avatar
zhangwenwei committed
60
61
62
        self._with_distance = with_distance
        self._with_cluster_center = with_cluster_center
        self._with_voxel_center = with_voxel_center
63
        self.fp16_enabled = False
zhangwenwei's avatar
zhangwenwei committed
64
        # Create PillarFeatureNet layers
zhangwenwei's avatar
zhangwenwei committed
65
66
        self.in_channels = in_channels
        feat_channels = [in_channels] + list(feat_channels)
zhangwenwei's avatar
zhangwenwei committed
67
        pfn_layers = []
zhangwenwei's avatar
zhangwenwei committed
68
69
70
71
        for i in range(len(feat_channels) - 1):
            in_filters = feat_channels[i]
            out_filters = feat_channels[i + 1]
            if i < len(feat_channels) - 2:
zhangwenwei's avatar
zhangwenwei committed
72
73
74
75
76
77
78
                last_layer = False
            else:
                last_layer = True
            pfn_layers.append(
                PFNLayer(
                    in_filters,
                    out_filters,
zhangwenwei's avatar
zhangwenwei committed
79
                    norm_cfg=norm_cfg,
zhangwenwei's avatar
zhangwenwei committed
80
81
82
83
84
85
86
                    last_layer=last_layer,
                    mode=mode))
        self.pfn_layers = nn.ModuleList(pfn_layers)

        # Need pillar (voxel) size and x/y offset in order to calculate offset
        self.vx = voxel_size[0]
        self.vy = voxel_size[1]
87
        self.vz = voxel_size[2]
zhangwenwei's avatar
zhangwenwei committed
88
89
        self.x_offset = self.vx / 2 + point_cloud_range[0]
        self.y_offset = self.vy / 2 + point_cloud_range[1]
90
        self.z_offset = self.vz / 2 + point_cloud_range[2]
zhangwenwei's avatar
zhangwenwei committed
91
92
        self.point_cloud_range = point_cloud_range

93
    @force_fp32(out_fp16=True)
zhangwenwei's avatar
zhangwenwei committed
94
    def forward(self, features, num_points, coors):
zhangwenwei's avatar
zhangwenwei committed
95
        """Forward function.
zhangwenwei's avatar
zhangwenwei committed
96
97
98
99
100

        Args:
            features (torch.Tensor): Point features or raw points in shape
                (N, M, C).
            num_points (torch.Tensor): Number of points in each pillar.
101
            coors (torch.Tensor): Coordinates of each voxel.
102

zhangwenwei's avatar
zhangwenwei committed
103
104
105
        Returns:
            torch.Tensor: Features of pillars.
        """
zhangwenwei's avatar
zhangwenwei committed
106
107
108
109
110
111
112
113
114
115
        features_ls = [features]
        # Find distance of x, y, and z from cluster center
        if self._with_cluster_center:
            points_mean = features[:, :, :3].sum(
                dim=1, keepdim=True) / num_points.type_as(features).view(
                    -1, 1, 1)
            f_cluster = features[:, :, :3] - points_mean
            features_ls.append(f_cluster)

        # Find distance of x, y, and z from pillar center
116
        dtype = features.dtype
zhangwenwei's avatar
zhangwenwei committed
117
        if self._with_voxel_center:
118
            if not self.legacy:
119
                f_center = torch.zeros_like(features[:, :, :3])
120
121
122
123
124
125
                f_center[:, :, 0] = features[:, :, 0] - (
                    coors[:, 3].to(dtype).unsqueeze(1) * self.vx +
                    self.x_offset)
                f_center[:, :, 1] = features[:, :, 1] - (
                    coors[:, 2].to(dtype).unsqueeze(1) * self.vy +
                    self.y_offset)
126
127
128
                f_center[:, :, 2] = features[:, :, 2] - (
                    coors[:, 1].to(dtype).unsqueeze(1) * self.vz +
                    self.z_offset)
129
            else:
130
                f_center = features[:, :, :3]
131
132
133
134
135
136
                f_center[:, :, 0] = f_center[:, :, 0] - (
                    coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
                    self.x_offset)
                f_center[:, :, 1] = f_center[:, :, 1] - (
                    coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
                    self.y_offset)
137
138
139
                f_center[:, :, 2] = f_center[:, :, 2] - (
                    coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
                    self.z_offset)
zhangwenwei's avatar
zhangwenwei committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
            features_ls.append(f_center)

        if self._with_distance:
            points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
            features_ls.append(points_dist)

        # Combine together feature decorations
        features = torch.cat(features_ls, dim=-1)
        # The feature decorations were calculated without regard to whether
        # pillar was empty. Need to ensure that
        # empty pillars remain set to zeros.
        voxel_count = features.shape[1]
        mask = get_paddings_indicator(num_points, voxel_count, axis=0)
        mask = torch.unsqueeze(mask, -1).type_as(features)
        features *= mask

        for pfn in self.pfn_layers:
            features = pfn(features, num_points)

159
        return features.squeeze(1)
zhangwenwei's avatar
zhangwenwei committed
160
161


162
@VOXEL_ENCODERS.register_module()
zhangwenwei's avatar
zhangwenwei committed
163
class DynamicPillarFeatureNet(PillarFeatureNet):
zhangwenwei's avatar
zhangwenwei committed
164
    """Pillar Feature Net using dynamic voxelization.
zhangwenwei's avatar
zhangwenwei committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

    The network prepares the pillar features and performs forward pass
    through PFNLayers. The main difference is that it is used for
    dynamic voxels, which contains different number of points inside a voxel
    without limits.

    Args:
        in_channels (int, optional): Number of input features,
            either x, y, z or x, y, z, r. Defaults to 4.
        feat_channels (tuple, optional): Number of features in each of the
            N PFNLayers. Defaults to (64, ).
        with_distance (bool, optional): Whether to include Euclidean distance
            to points. Defaults to False.
        with_cluster_center (bool, optional): [description]. Defaults to True.
        with_voxel_center (bool, optional): [description]. Defaults to True.
        voxel_size (tuple[float], optional): Size of voxels, only utilize x
            and y size. Defaults to (0.2, 0.2, 4).
        point_cloud_range (tuple[float], optional): Point cloud range, only
            utilizes x and y min. Defaults to (0, -40, -3, 70.4, 40, 1).
        norm_cfg ([type], optional): [description].
            Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01).
        mode (str, optional): The mode to gather point features. Options are
            'max' or 'avg'. Defaults to 'max'.
188
189
        legacy (bool, optional): Whether to use the new behavior or
            the original behavior. Defaults to True.
zhangwenwei's avatar
zhangwenwei committed
190
    """
zhangwenwei's avatar
zhangwenwei committed
191
192

    def __init__(self,
zhangwenwei's avatar
zhangwenwei committed
193
194
                 in_channels=4,
                 feat_channels=(64, ),
zhangwenwei's avatar
zhangwenwei committed
195
196
197
198
199
200
                 with_distance=False,
                 with_cluster_center=True,
                 with_voxel_center=True,
                 voxel_size=(0.2, 0.2, 4),
                 point_cloud_range=(0, -40, -3, 70.4, 40, 1),
                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
201
202
                 mode='max',
                 legacy=True):
zhangwenwei's avatar
zhangwenwei committed
203
        super(DynamicPillarFeatureNet, self).__init__(
zhangwenwei's avatar
zhangwenwei committed
204
205
            in_channels,
            feat_channels,
zhangwenwei's avatar
zhangwenwei committed
206
207
208
209
210
            with_distance,
            with_cluster_center=with_cluster_center,
            with_voxel_center=with_voxel_center,
            voxel_size=voxel_size,
            point_cloud_range=point_cloud_range,
zhangwenwei's avatar
zhangwenwei committed
211
            norm_cfg=norm_cfg,
212
213
            mode=mode,
            legacy=legacy)
214
        self.fp16_enabled = False
zhangwenwei's avatar
zhangwenwei committed
215
        feat_channels = [self.in_channels] + list(feat_channels)
zhangwenwei's avatar
zhangwenwei committed
216
217
218
        pfn_layers = []
        # TODO: currently only support one PFNLayer

zhangwenwei's avatar
zhangwenwei committed
219
220
221
        for i in range(len(feat_channels) - 1):
            in_filters = feat_channels[i]
            out_filters = feat_channels[i + 1]
zhangwenwei's avatar
zhangwenwei committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
            if i > 0:
                in_filters *= 2
            norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
            pfn_layers.append(
                nn.Sequential(
                    nn.Linear(in_filters, out_filters, bias=False), norm_layer,
                    nn.ReLU(inplace=True)))
        self.num_pfn = len(pfn_layers)
        self.pfn_layers = nn.ModuleList(pfn_layers)
        self.pfn_scatter = DynamicScatter(voxel_size, point_cloud_range,
                                          (mode != 'max'))
        self.cluster_scatter = DynamicScatter(
            voxel_size, point_cloud_range, average_points=True)

    def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
zhangwenwei's avatar
zhangwenwei committed
237
        """Map the centers of voxels to its corresponding points.
zhangwenwei's avatar
zhangwenwei committed
238
239
240
241

        Args:
            pts_coors (torch.Tensor): The coordinates of each points, shape
                (M, 3), where M is the number of points.
242
            voxel_mean (torch.Tensor): The mean or aggregated features of a
zhangwenwei's avatar
zhangwenwei committed
243
244
245
246
247
                voxel, shape (N, C), where N is the number of voxels.
            voxel_coors (torch.Tensor): The coordinates of each voxel.

        Returns:
            torch.Tensor: Corresponding voxel centers of each points, shape
248
                (M, C), where M is the number of points.
zhangwenwei's avatar
zhangwenwei committed
249
        """
zhangwenwei's avatar
zhangwenwei committed
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
        # Step 1: scatter voxel into canvas
        # Calculate necessary things for canvas creation
        canvas_y = int(
            (self.point_cloud_range[4] - self.point_cloud_range[1]) / self.vy)
        canvas_x = int(
            (self.point_cloud_range[3] - self.point_cloud_range[0]) / self.vx)
        canvas_channel = voxel_mean.size(1)
        batch_size = pts_coors[-1, 0] + 1
        canvas_len = canvas_y * canvas_x * batch_size
        # Create the canvas for this sample
        canvas = voxel_mean.new_zeros(canvas_channel, canvas_len)
        # Only include non-empty pillars
        indices = (
            voxel_coors[:, 0] * canvas_y * canvas_x +
            voxel_coors[:, 2] * canvas_x + voxel_coors[:, 3])
        # Scatter the blob back to the canvas
        canvas[:, indices.long()] = voxel_mean.t()

        # Step 2: get voxel mean for each point
        voxel_index = (
            pts_coors[:, 0] * canvas_y * canvas_x +
            pts_coors[:, 2] * canvas_x + pts_coors[:, 3])
        center_per_point = canvas[:, voxel_index.long()].t()
        return center_per_point

275
    @force_fp32(out_fp16=True)
zhangwenwei's avatar
zhangwenwei committed
276
    def forward(self, features, coors):
zhangwenwei's avatar
zhangwenwei committed
277
        """Forward function.
zhangwenwei's avatar
zhangwenwei committed
278
279
280
281
282
283
284
285

        Args:
            features (torch.Tensor): Point features or raw points in shape
                (N, M, C).
            coors (torch.Tensor): Coordinates of each voxel

        Returns:
            torch.Tensor: Features of pillars.
zhangwenwei's avatar
zhangwenwei committed
286
287
288
289
290
291
292
293
294
295
296
297
298
        """
        features_ls = [features]
        # Find distance of x, y, and z from cluster center
        if self._with_cluster_center:
            voxel_mean, mean_coors = self.cluster_scatter(features, coors)
            points_mean = self.map_voxel_center_to_point(
                coors, voxel_mean, mean_coors)
            # TODO: maybe also do cluster for reflectivity
            f_cluster = features[:, :3] - points_mean[:, :3]
            features_ls.append(f_cluster)

        # Find distance of x, y, and z from pillar center
        if self._with_voxel_center:
299
            f_center = features.new_zeros(size=(features.size(0), 3))
zhangwenwei's avatar
zhangwenwei committed
300
301
302
303
            f_center[:, 0] = features[:, 0] - (
                coors[:, 3].type_as(features) * self.vx + self.x_offset)
            f_center[:, 1] = features[:, 1] - (
                coors[:, 2].type_as(features) * self.vy + self.y_offset)
304
305
            f_center[:, 2] = features[:, 2] - (
                coors[:, 1].type_as(features) * self.vz + self.z_offset)
zhangwenwei's avatar
zhangwenwei committed
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
            features_ls.append(f_center)

        if self._with_distance:
            points_dist = torch.norm(features[:, :3], 2, 1, keepdim=True)
            features_ls.append(points_dist)

        # Combine together feature decorations
        features = torch.cat(features_ls, dim=-1)
        for i, pfn in enumerate(self.pfn_layers):
            point_feats = pfn(features)
            voxel_feats, voxel_coors = self.pfn_scatter(point_feats, coors)
            if i != len(self.pfn_layers) - 1:
                # need to concat voxel feats if it is not the last pfn
                feat_per_point = self.map_voxel_center_to_point(
                    coors, voxel_feats, voxel_coors)
                features = torch.cat([point_feats, feat_per_point], dim=1)

        return voxel_feats, voxel_coors