transforms_3d.py 16.8 KB
Newer Older
zhangwenwei's avatar
zhangwenwei committed
1
import numpy as np
2
from mmcv.utils import build_from_cfg
zhangwenwei's avatar
zhangwenwei committed
3
4

from mmdet3d.core.bbox import box_np_ops
5
from mmdet.datasets.builder import PIPELINES
zhangwenwei's avatar
zhangwenwei committed
6
from mmdet.datasets.pipelines import RandomFlip
zhangwenwei's avatar
zhangwenwei committed
7
8
9
10
from ..registry import OBJECTSAMPLERS
from .data_augment_utils import noise_per_object_v3_


11
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
12
13
14
15
16
17
18
19
class RandomFlip3D(RandomFlip):
    """Flip the points & bbox.

    If the input dict contains the key "flip", then the flag will be used,
    otherwise it will be randomly decided by a ratio specified in the init
    method.

    Args:
zhangwenwei's avatar
zhangwenwei committed
20
21
22
23
        sync_2d (bool, optional): Whether to apply flip according to the 2D
            images. If True, it will apply the same flip as that to 2D images.
            If False, it will decide whether to flip randomly and independently
            to that of 2D images.
wuyuefeng's avatar
wuyuefeng committed
24
25
26
27
        flip_ratio_bev_horizontal (float, optional): The flipping probability
            in horizontal direction.
        flip_ratio_bev_vertical (float, optional): The flipping probability
            in vertical direction.
zhangwenwei's avatar
zhangwenwei committed
28
29
    """

wuyuefeng's avatar
wuyuefeng committed
30
31
32
33
34
35
36
    def __init__(self,
                 sync_2d=True,
                 flip_ratio_bev_horizontal=0.0,
                 flip_ratio_bev_vertical=0.0,
                 **kwargs):
        super(RandomFlip3D, self).__init__(
            flip_ratio=flip_ratio_bev_horizontal, **kwargs)
zhangwenwei's avatar
zhangwenwei committed
37
        self.sync_2d = sync_2d
wuyuefeng's avatar
wuyuefeng committed
38
39
40
41
42
43
44
45
46
47
48
49
        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
        if flip_ratio_bev_horizontal is not None:
            assert isinstance(
                flip_ratio_bev_horizontal,
                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
        if flip_ratio_bev_vertical is not None:
            assert isinstance(
                flip_ratio_bev_vertical,
                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1

    def random_flip_data_3d(self, input_dict, direction='horizontal'):
        assert direction in ['horizontal', 'vertical']
zhangwenwei's avatar
zhangwenwei committed
50
        for key in input_dict['bbox3d_fields']:
wuyuefeng's avatar
wuyuefeng committed
51
52
            input_dict['points'] = input_dict[key].flip(
                direction, points=input_dict['points'])
zhangwenwei's avatar
zhangwenwei committed
53
54

    def __call__(self, input_dict):
zhangwenwei's avatar
zhangwenwei committed
55
        # filp 2D image and its annotations
zhangwenwei's avatar
zhangwenwei committed
56
        super(RandomFlip3D, self).__call__(input_dict)
zhangwenwei's avatar
zhangwenwei committed
57

zhangwenwei's avatar
zhangwenwei committed
58
        if self.sync_2d:
wuyuefeng's avatar
wuyuefeng committed
59
60
            input_dict['pcd_horizontal_flip'] = input_dict['flip']
            input_dict['pcd_vertical_flip'] = False
zhangwenwei's avatar
zhangwenwei committed
61
        else:
wuyuefeng's avatar
wuyuefeng committed
62
63
64
65
66
67
68
69
70
71
72
73
74
            if 'pcd_horizontal_flip' not in input_dict:
                flip_horizontal = True if np.random.rand(
                ) < self.flip_ratio else False
                input_dict['pcd_horizontal_flip'] = flip_horizontal
            if 'pcd_vertical_flip' not in input_dict:
                flip_vertical = True if np.random.rand(
                ) < self.flip_ratio_bev_vertical else False
                input_dict['pcd_vertical_flip'] = flip_vertical

        if input_dict['pcd_horizontal_flip']:
            self.random_flip_data_3d(input_dict, 'horizontal')
        if input_dict['pcd_vertical_flip']:
            self.random_flip_data_3d(input_dict, 'vertical')
zhangwenwei's avatar
zhangwenwei committed
75
76
        return input_dict

zhangwenwei's avatar
zhangwenwei committed
77
    def __repr__(self):
wuyuefeng's avatar
wuyuefeng committed
78
79
80
81
82
83
84
        repr_str = self.__class__.__name__
        repr_str += '(sync_2d={},'.format(self.sync_2d)
        repr_str += '(flip_ratio_bev_horizontal={},'.format(
            self.flip_ratio_bev_horizontal)
        repr_str += '(flip_ratio_bev_vertical={},'.format(
            self.flip_ratio_bev_vertical)
        return repr_str
zhangwenwei's avatar
zhangwenwei committed
85

zhangwenwei's avatar
zhangwenwei committed
86

87
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
88
class ObjectSample(object):
zhangwenwei's avatar
zhangwenwei committed
89
90
91
92
93
94
95
    """Sample GT objects to the data

    Args:
        db_sampler (dict): Config dict of the database sampler.
        sample_2d (bool): Whether to also paste 2D image patch to the images
            This should be true when applying multi-modality cut-and-paste.
    """
zhangwenwei's avatar
zhangwenwei committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

    def __init__(self, db_sampler, sample_2d=False):
        self.sampler_cfg = db_sampler
        self.sample_2d = sample_2d
        if 'type' not in db_sampler.keys():
            db_sampler['type'] = 'DataBaseSampler'
        self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)

    @staticmethod
    def remove_points_in_boxes(points, boxes):
        masks = box_np_ops.points_in_rbbox(points, boxes)
        points = points[np.logical_not(masks.any(-1))]
        return points

    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
zhangwenwei's avatar
zhangwenwei committed
112
113
        gt_labels_3d = input_dict['gt_labels_3d']

zhangwenwei's avatar
zhangwenwei committed
114
115
116
        # change to float for blending operation
        points = input_dict['points']
        if self.sample_2d:
wuyuefeng's avatar
wuyuefeng committed
117
            img = input_dict['img']
zhangwenwei's avatar
zhangwenwei committed
118
119
120
            gt_bboxes_2d = input_dict['gt_bboxes']
            # Assume for now 3D & 2D bboxes are the same
            sampled_dict = self.db_sampler.sample_all(
121
122
123
124
                gt_bboxes_3d.tensor.numpy(),
                gt_labels_3d,
                gt_bboxes_2d=gt_bboxes_2d,
                img=img)
zhangwenwei's avatar
zhangwenwei committed
125
126
        else:
            sampled_dict = self.db_sampler.sample_all(
127
                gt_bboxes_3d.tensor.numpy(), gt_labels_3d, img=None)
zhangwenwei's avatar
zhangwenwei committed
128
129
130
131

        if sampled_dict is not None:
            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
            sampled_points = sampled_dict['points']
zhangwenwei's avatar
zhangwenwei committed
132
            sampled_gt_labels = sampled_dict['gt_labels_3d']
zhangwenwei's avatar
zhangwenwei committed
133

zhangwenwei's avatar
zhangwenwei committed
134
135
            gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
                                          axis=0)
136
137
138
            gt_bboxes_3d = gt_bboxes_3d.new_box(
                np.concatenate(
                    [gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))
zhangwenwei's avatar
zhangwenwei committed
139

zhangwenwei's avatar
zhangwenwei committed
140
141
142
143
144
145
146
147
148
149
            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
            # check the points dimension
            dim_inds = points.shape[-1]
            points = np.concatenate([sampled_points[:, :dim_inds], points],
                                    axis=0)

            if self.sample_2d:
                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
                gt_bboxes_2d = np.concatenate(
                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
zhangwenwei's avatar
zhangwenwei committed
150

zhangwenwei's avatar
zhangwenwei committed
151
                input_dict['gt_bboxes'] = gt_bboxes_2d
wuyuefeng's avatar
wuyuefeng committed
152
                input_dict['img'] = sampled_dict['img']
zhangwenwei's avatar
zhangwenwei committed
153
154

        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
zhangwenwei's avatar
zhangwenwei committed
155
        input_dict['gt_labels_3d'] = gt_labels_3d
zhangwenwei's avatar
zhangwenwei committed
156
        input_dict['points'] = points
zhangwenwei's avatar
zhangwenwei committed
157

zhangwenwei's avatar
zhangwenwei committed
158
159
160
161
162
163
        return input_dict

    def __repr__(self):
        return self.__class__.__name__


164
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
165
class ObjectNoise(object):
zhangwenwei's avatar
zhangwenwei committed
166
167
168
169
170
171
172
173
174
175
176
177
178
    """Apply noise to each GT objects in the scene

    Args:
        translation_std (list, optional): Standard deviation of the
            distribution where translation noise are sampled from.
            Defaults to [0.25, 0.25, 0.25].
        global_rot_range (list, optional): Global rotation to the scene.
            Defaults to [0.0, 0.0].
        rot_range (list, optional): Object rotation range.
            Defaults to [-0.15707963267, 0.15707963267].
        num_try (int, optional): Number of times to try if the noise applied is
            invalid. Defaults to 100.
    """
zhangwenwei's avatar
zhangwenwei committed
179
180

    def __init__(self,
zhangwenwei's avatar
zhangwenwei committed
181
                 translation_std=[0.25, 0.25, 0.25],
zhangwenwei's avatar
zhangwenwei committed
182
                 global_rot_range=[0.0, 0.0],
zhangwenwei's avatar
zhangwenwei committed
183
                 rot_range=[-0.15707963267, 0.15707963267],
zhangwenwei's avatar
zhangwenwei committed
184
                 num_try=100):
zhangwenwei's avatar
zhangwenwei committed
185
        self.translation_std = translation_std
zhangwenwei's avatar
zhangwenwei committed
186
        self.global_rot_range = global_rot_range
zhangwenwei's avatar
zhangwenwei committed
187
        self.rot_range = rot_range
zhangwenwei's avatar
zhangwenwei committed
188
189
190
191
192
        self.num_try = num_try

    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
        points = input_dict['points']
zhangwenwei's avatar
zhangwenwei committed
193

zhangwenwei's avatar
zhangwenwei committed
194
        # TODO: check this inplace function
195
        numpy_box = gt_bboxes_3d.tensor.numpy()
zhangwenwei's avatar
zhangwenwei committed
196
        noise_per_object_v3_(
197
            numpy_box,
zhangwenwei's avatar
zhangwenwei committed
198
            points,
zhangwenwei's avatar
zhangwenwei committed
199
200
            rotation_perturb=self.rot_range,
            center_noise_std=self.translation_std,
zhangwenwei's avatar
zhangwenwei committed
201
202
            global_random_rot_range=self.global_rot_range,
            num_try=self.num_try)
203
204

        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
zhangwenwei's avatar
zhangwenwei committed
205
206
207
208
209
210
        input_dict['points'] = points
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(num_try={},'.format(self.num_try)
zhangwenwei's avatar
zhangwenwei committed
211
        repr_str += ' translation_std={},'.format(self.translation_std)
zhangwenwei's avatar
zhangwenwei committed
212
        repr_str += ' global_rot_range={},'.format(self.global_rot_range)
zhangwenwei's avatar
zhangwenwei committed
213
        repr_str += ' rot_range={})'.format(self.rot_range)
zhangwenwei's avatar
zhangwenwei committed
214
215
216
        return repr_str


217
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
218
219
220
221
222
223
224
225
226
227
228
229
class GlobalRotScaleTrans(object):
    """Apply global rotation, scaling and translation to a 3D scene

    Args:
        rot_range (list[float]): Range of rotation angle.
            Default to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
        scale_ratio_range (list[float]): Range of scale ratio.
            Default to [0.95, 1.05].
        translation_std (list[float]): The standard deviation of ranslation
            noise. This apply random translation to a scene by a noise, which
            is sampled from a gaussian distribution whose standard deviation
            is set by ``translation_std``. Default to [0, 0, 0]
wuyuefeng's avatar
wuyuefeng committed
230
231
        shift_height (bool): whether to shift height
            (the fourth dimension of indoor points) when scaling.
zhangwenwei's avatar
zhangwenwei committed
232
    """
zhangwenwei's avatar
zhangwenwei committed
233
234

    def __init__(self,
zhangwenwei's avatar
zhangwenwei committed
235
236
                 rot_range=[-0.78539816, 0.78539816],
                 scale_ratio_range=[0.95, 1.05],
wuyuefeng's avatar
wuyuefeng committed
237
238
                 translation_std=[0, 0, 0],
                 shift_height=False):
zhangwenwei's avatar
zhangwenwei committed
239
240
241
        self.rot_range = rot_range
        self.scale_ratio_range = scale_ratio_range
        self.translation_std = translation_std
wuyuefeng's avatar
wuyuefeng committed
242
        self.shift_height = shift_height
zhangwenwei's avatar
zhangwenwei committed
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

    def _trans_bbox_points(self, input_dict):
        if not isinstance(self.translation_std, (list, tuple, np.ndarray)):
            translation_std = [
                self.translation_std, self.translation_std,
                self.translation_std
            ]
        else:
            translation_std = self.translation_std
        translation_std = np.array(translation_std, dtype=np.float32)
        trans_factor = np.random.normal(scale=translation_std, size=3).T

        input_dict['points'][:, :3] += trans_factor
        input_dict['pcd_trans'] = trans_factor
        for key in input_dict['bbox3d_fields']:
            input_dict[key].translate(trans_factor)

    def _rot_bbox_points(self, input_dict):
        rotation = self.rot_range
zhangwenwei's avatar
zhangwenwei committed
262
263
264
        if not isinstance(rotation, list):
            rotation = [-rotation, rotation]
        noise_rotation = np.random.uniform(rotation[0], rotation[1])
zhangwenwei's avatar
zhangwenwei committed
265
266

        for key in input_dict['bbox3d_fields']:
wuyuefeng's avatar
wuyuefeng committed
267
268
269
270
271
            if len(input_dict[key].tensor) != 0:
                points, rot_mat_T = input_dict[key].rotate(
                    noise_rotation, input_dict['points'])
                input_dict['points'] = points
                input_dict['pcd_rotation'] = rot_mat_T
272

zhangwenwei's avatar
zhangwenwei committed
273
274
275
    def _scale_bbox_points(self, input_dict):
        scale = input_dict['pcd_scale_factor']
        input_dict['points'][:, :3] *= scale
wuyuefeng's avatar
wuyuefeng committed
276
277
278
        if self.shift_height:
            input_dict['points'][:, -1] *= scale

zhangwenwei's avatar
zhangwenwei committed
279
280
        for key in input_dict['bbox3d_fields']:
            input_dict[key].scale(scale)
zhangwenwei's avatar
zhangwenwei committed
281

zhangwenwei's avatar
zhangwenwei committed
282
283
284
285
    def _random_scale(self, input_dict):
        scale_factor = np.random.uniform(self.scale_ratio_range[0],
                                         self.scale_ratio_range[1])
        input_dict['pcd_scale_factor'] = scale_factor
zhangwenwei's avatar
zhangwenwei committed
286
287

    def __call__(self, input_dict):
zhangwenwei's avatar
zhangwenwei committed
288
        self._rot_bbox_points(input_dict)
zhangwenwei's avatar
zhangwenwei committed
289

zhangwenwei's avatar
zhangwenwei committed
290
291
292
        if 'pcd_scale_factor' not in input_dict:
            self._random_scale(input_dict)
        self._scale_bbox_points(input_dict)
zhangwenwei's avatar
zhangwenwei committed
293

zhangwenwei's avatar
zhangwenwei committed
294
        self._trans_bbox_points(input_dict)
zhangwenwei's avatar
zhangwenwei committed
295
296
297
298
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
zhangwenwei's avatar
zhangwenwei committed
299
300
301
        repr_str += '(rot_range={},'.format(self.rot_range)
        repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)
        repr_str += ' translation_std={})'.format(self.translation_std)
wuyuefeng's avatar
wuyuefeng committed
302
        repr_str += ' shift_height={})'.format(self.shift_height)
zhangwenwei's avatar
zhangwenwei committed
303
304
305
        return repr_str


306
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
307
308
309
310
311
312
313
314
315
316
class PointShuffle(object):

    def __call__(self, input_dict):
        np.random.shuffle(input_dict['points'])
        return input_dict

    def __repr__(self):
        return self.__class__.__name__


317
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
318
319
320
321
322
323
324
325
class ObjectRangeFilter(object):

    def __init__(self, point_cloud_range):
        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
        self.bev_range = self.pcd_range[[0, 1, 3, 4]]

    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
zhangwenwei's avatar
zhangwenwei committed
326
        gt_labels_3d = input_dict['gt_labels_3d']
327
        mask = gt_bboxes_3d.in_range_bev(self.bev_range)
zhangwenwei's avatar
zhangwenwei committed
328
        gt_bboxes_3d = gt_bboxes_3d[mask]
ZwwWayne's avatar
ZwwWayne committed
329
330
331
332
333
        # mask is a torch tensor but gt_labels_3d is still numpy array
        # using mask to index gt_labels_3d will cause bug when
        # len(gt_labels_3d) == 1, where mask=1 will be interpreted
        # as gt_labels_3d[1] and cause out of index error
        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]
zhangwenwei's avatar
zhangwenwei committed
334
335

        # limit rad to [-pi, pi]
336
337
        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
zhangwenwei's avatar
zhangwenwei committed
338
339
        input_dict['gt_labels_3d'] = gt_labels_3d

zhangwenwei's avatar
zhangwenwei committed
340
341
342
343
344
345
346
347
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
        return repr_str


348
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
class PointsRangeFilter(object):

    def __init__(self, point_cloud_range):
        self.pcd_range = np.array(
            point_cloud_range, dtype=np.float32)[np.newaxis, :]

    def __call__(self, input_dict):
        points = input_dict['points']
        points_mask = ((points[:, :3] >= self.pcd_range[:, :3])
                       & (points[:, :3] < self.pcd_range[:, 3:]))
        points_mask = points_mask[:, 0] & points_mask[:, 1] & points_mask[:, 2]
        clean_points = points[points_mask, :]
        input_dict['points'] = clean_points
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
        return repr_str
zhangwenwei's avatar
zhangwenwei committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394


@PIPELINES.register_module()
class ObjectNameFilter(object):
    """Filter GT objects by their names

    Args:
        classes (list[str]): list of class names to be kept for training
    """

    def __init__(self, classes):
        self.classes = classes
        self.labels = list(range(len(self.classes)))

    def __call__(self, input_dict):
        gt_labels_3d = input_dict['gt_labels_3d']
        gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],
                                  dtype=np.bool_)
        input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]
        input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]

        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f'(classes={self.classes})'
        return repr_str
wuyuefeng's avatar
wuyuefeng committed
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458


@PIPELINES.register_module()
class IndoorPointSample(object):
    """Indoor point sample.

    Sampling data to a certain number.

    Args:
        name (str): Name of the dataset.
        num_points (int): Number of points to be sampled.
    """

    def __init__(self, num_points):
        self.num_points = num_points

    def points_random_sampling(self,
                               points,
                               num_samples,
                               replace=None,
                               return_choices=False):
        """Points random sampling.

        Sample points to a certain number.

        Args:
            points (ndarray): 3D Points.
            num_samples (int): Number of samples to be sampled.
            replace (bool): Whether the sample is with or without replacement.
            return_choices (bool): Whether return choice.

        Returns:
            points (ndarray): 3D Points.
            choices (ndarray): The generated random samples.
        """
        if replace is None:
            replace = (points.shape[0] < num_samples)
        choices = np.random.choice(
            points.shape[0], num_samples, replace=replace)
        if return_choices:
            return points[choices], choices
        else:
            return points[choices]

    def __call__(self, results):
        points = results['points']
        points, choices = self.points_random_sampling(
            points, self.num_points, return_choices=True)
        pts_instance_mask = results.get('pts_instance_mask', None)
        pts_semantic_mask = results.get('pts_semantic_mask', None)
        results['points'] = points

        if pts_instance_mask is not None and pts_semantic_mask is not None:
            pts_instance_mask = pts_instance_mask[choices]
            pts_semantic_mask = pts_semantic_mask[choices]
            results['pts_instance_mask'] = pts_instance_mask
            results['pts_semantic_mask'] = pts_semantic_mask

        return results

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(num_points={})'.format(self.num_points)
        return repr_str