transforms_3d.py 17 KB
Newer Older
zhangwenwei's avatar
zhangwenwei committed
1
import numpy as np
2
from mmcv.utils import build_from_cfg
zhangwenwei's avatar
zhangwenwei committed
3
4

from mmdet3d.core.bbox import box_np_ops
5
from mmdet.datasets.builder import PIPELINES
zhangwenwei's avatar
zhangwenwei committed
6
from mmdet.datasets.pipelines import RandomFlip
zhangwenwei's avatar
zhangwenwei committed
7
8
9
10
from ..registry import OBJECTSAMPLERS
from .data_augment_utils import noise_per_object_v3_


11
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
12
13
14
15
16
17
18
19
class RandomFlip3D(RandomFlip):
    """Flip the points & bbox.

    If the input dict contains the key "flip", then the flag will be used,
    otherwise it will be randomly decided by a ratio specified in the init
    method.

    Args:
zhangwenwei's avatar
zhangwenwei committed
20
21
22
23
        sync_2d (bool, optional): Whether to apply flip according to the 2D
            images. If True, it will apply the same flip as that to 2D images.
            If False, it will decide whether to flip randomly and independently
            to that of 2D images.
wuyuefeng's avatar
wuyuefeng committed
24
25
26
27
        flip_ratio_bev_horizontal (float, optional): The flipping probability
            in horizontal direction.
        flip_ratio_bev_vertical (float, optional): The flipping probability
            in vertical direction.
zhangwenwei's avatar
zhangwenwei committed
28
29
    """

wuyuefeng's avatar
wuyuefeng committed
30
31
32
33
34
35
36
    def __init__(self,
                 sync_2d=True,
                 flip_ratio_bev_horizontal=0.0,
                 flip_ratio_bev_vertical=0.0,
                 **kwargs):
        super(RandomFlip3D, self).__init__(
            flip_ratio=flip_ratio_bev_horizontal, **kwargs)
zhangwenwei's avatar
zhangwenwei committed
37
        self.sync_2d = sync_2d
wuyuefeng's avatar
wuyuefeng committed
38
39
40
41
42
43
44
45
46
47
48
49
        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
        if flip_ratio_bev_horizontal is not None:
            assert isinstance(
                flip_ratio_bev_horizontal,
                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
        if flip_ratio_bev_vertical is not None:
            assert isinstance(
                flip_ratio_bev_vertical,
                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1

    def random_flip_data_3d(self, input_dict, direction='horizontal'):
        assert direction in ['horizontal', 'vertical']
50
51
52
53
54
        if len(input_dict['bbox3d_fields']) == 0:  # test mode
            input_dict['bbox3d_fields'].append('empty_box3d')
            input_dict['empty_box3d'] = input_dict['box_type_3d'](
                np.array([], dtype=np.float32))
        assert len(input_dict['bbox3d_fields']) == 1
zhangwenwei's avatar
zhangwenwei committed
55
        for key in input_dict['bbox3d_fields']:
wuyuefeng's avatar
wuyuefeng committed
56
57
            input_dict['points'] = input_dict[key].flip(
                direction, points=input_dict['points'])
zhangwenwei's avatar
zhangwenwei committed
58
59

    def __call__(self, input_dict):
zhangwenwei's avatar
zhangwenwei committed
60
        # filp 2D image and its annotations
zhangwenwei's avatar
zhangwenwei committed
61
        super(RandomFlip3D, self).__call__(input_dict)
zhangwenwei's avatar
zhangwenwei committed
62

zhangwenwei's avatar
zhangwenwei committed
63
        if self.sync_2d:
wuyuefeng's avatar
wuyuefeng committed
64
65
            input_dict['pcd_horizontal_flip'] = input_dict['flip']
            input_dict['pcd_vertical_flip'] = False
zhangwenwei's avatar
zhangwenwei committed
66
        else:
wuyuefeng's avatar
wuyuefeng committed
67
68
69
70
71
72
73
74
75
76
77
78
79
            if 'pcd_horizontal_flip' not in input_dict:
                flip_horizontal = True if np.random.rand(
                ) < self.flip_ratio else False
                input_dict['pcd_horizontal_flip'] = flip_horizontal
            if 'pcd_vertical_flip' not in input_dict:
                flip_vertical = True if np.random.rand(
                ) < self.flip_ratio_bev_vertical else False
                input_dict['pcd_vertical_flip'] = flip_vertical

        if input_dict['pcd_horizontal_flip']:
            self.random_flip_data_3d(input_dict, 'horizontal')
        if input_dict['pcd_vertical_flip']:
            self.random_flip_data_3d(input_dict, 'vertical')
zhangwenwei's avatar
zhangwenwei committed
80
81
        return input_dict

zhangwenwei's avatar
zhangwenwei committed
82
    def __repr__(self):
wuyuefeng's avatar
wuyuefeng committed
83
84
85
86
87
88
89
        repr_str = self.__class__.__name__
        repr_str += '(sync_2d={},'.format(self.sync_2d)
        repr_str += '(flip_ratio_bev_horizontal={},'.format(
            self.flip_ratio_bev_horizontal)
        repr_str += '(flip_ratio_bev_vertical={},'.format(
            self.flip_ratio_bev_vertical)
        return repr_str
zhangwenwei's avatar
zhangwenwei committed
90

zhangwenwei's avatar
zhangwenwei committed
91

92
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
93
class ObjectSample(object):
zhangwenwei's avatar
zhangwenwei committed
94
    """Sample GT objects to the data.
zhangwenwei's avatar
zhangwenwei committed
95
96
97
98
99
100

    Args:
        db_sampler (dict): Config dict of the database sampler.
        sample_2d (bool): Whether to also paste 2D image patch to the images
            This should be true when applying multi-modality cut-and-paste.
    """
zhangwenwei's avatar
zhangwenwei committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

    def __init__(self, db_sampler, sample_2d=False):
        self.sampler_cfg = db_sampler
        self.sample_2d = sample_2d
        if 'type' not in db_sampler.keys():
            db_sampler['type'] = 'DataBaseSampler'
        self.db_sampler = build_from_cfg(db_sampler, OBJECTSAMPLERS)

    @staticmethod
    def remove_points_in_boxes(points, boxes):
        masks = box_np_ops.points_in_rbbox(points, boxes)
        points = points[np.logical_not(masks.any(-1))]
        return points

    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
zhangwenwei's avatar
zhangwenwei committed
117
118
        gt_labels_3d = input_dict['gt_labels_3d']

zhangwenwei's avatar
zhangwenwei committed
119
120
121
        # change to float for blending operation
        points = input_dict['points']
        if self.sample_2d:
wuyuefeng's avatar
wuyuefeng committed
122
            img = input_dict['img']
zhangwenwei's avatar
zhangwenwei committed
123
124
125
            gt_bboxes_2d = input_dict['gt_bboxes']
            # Assume for now 3D & 2D bboxes are the same
            sampled_dict = self.db_sampler.sample_all(
126
127
128
129
                gt_bboxes_3d.tensor.numpy(),
                gt_labels_3d,
                gt_bboxes_2d=gt_bboxes_2d,
                img=img)
zhangwenwei's avatar
zhangwenwei committed
130
131
        else:
            sampled_dict = self.db_sampler.sample_all(
132
                gt_bboxes_3d.tensor.numpy(), gt_labels_3d, img=None)
zhangwenwei's avatar
zhangwenwei committed
133
134
135
136

        if sampled_dict is not None:
            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
            sampled_points = sampled_dict['points']
zhangwenwei's avatar
zhangwenwei committed
137
            sampled_gt_labels = sampled_dict['gt_labels_3d']
zhangwenwei's avatar
zhangwenwei committed
138

zhangwenwei's avatar
zhangwenwei committed
139
140
            gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
                                          axis=0)
141
142
143
            gt_bboxes_3d = gt_bboxes_3d.new_box(
                np.concatenate(
                    [gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))
zhangwenwei's avatar
zhangwenwei committed
144

zhangwenwei's avatar
zhangwenwei committed
145
146
147
148
149
150
151
152
153
154
            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
            # check the points dimension
            dim_inds = points.shape[-1]
            points = np.concatenate([sampled_points[:, :dim_inds], points],
                                    axis=0)

            if self.sample_2d:
                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
                gt_bboxes_2d = np.concatenate(
                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
zhangwenwei's avatar
zhangwenwei committed
155

zhangwenwei's avatar
zhangwenwei committed
156
                input_dict['gt_bboxes'] = gt_bboxes_2d
wuyuefeng's avatar
wuyuefeng committed
157
                input_dict['img'] = sampled_dict['img']
zhangwenwei's avatar
zhangwenwei committed
158
159

        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
zhangwenwei's avatar
zhangwenwei committed
160
        input_dict['gt_labels_3d'] = gt_labels_3d
zhangwenwei's avatar
zhangwenwei committed
161
        input_dict['points'] = points
zhangwenwei's avatar
zhangwenwei committed
162

zhangwenwei's avatar
zhangwenwei committed
163
164
165
166
167
168
        return input_dict

    def __repr__(self):
        return self.__class__.__name__


169
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
170
class ObjectNoise(object):
zhangwenwei's avatar
zhangwenwei committed
171
    """Apply noise to each GT objects in the scene.
zhangwenwei's avatar
zhangwenwei committed
172
173
174
175
176
177
178
179
180
181
182
183

    Args:
        translation_std (list, optional): Standard deviation of the
            distribution where translation noise are sampled from.
            Defaults to [0.25, 0.25, 0.25].
        global_rot_range (list, optional): Global rotation to the scene.
            Defaults to [0.0, 0.0].
        rot_range (list, optional): Object rotation range.
            Defaults to [-0.15707963267, 0.15707963267].
        num_try (int, optional): Number of times to try if the noise applied is
            invalid. Defaults to 100.
    """
zhangwenwei's avatar
zhangwenwei committed
184
185

    def __init__(self,
zhangwenwei's avatar
zhangwenwei committed
186
                 translation_std=[0.25, 0.25, 0.25],
zhangwenwei's avatar
zhangwenwei committed
187
                 global_rot_range=[0.0, 0.0],
zhangwenwei's avatar
zhangwenwei committed
188
                 rot_range=[-0.15707963267, 0.15707963267],
zhangwenwei's avatar
zhangwenwei committed
189
                 num_try=100):
zhangwenwei's avatar
zhangwenwei committed
190
        self.translation_std = translation_std
zhangwenwei's avatar
zhangwenwei committed
191
        self.global_rot_range = global_rot_range
zhangwenwei's avatar
zhangwenwei committed
192
        self.rot_range = rot_range
zhangwenwei's avatar
zhangwenwei committed
193
194
195
196
197
        self.num_try = num_try

    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
        points = input_dict['points']
zhangwenwei's avatar
zhangwenwei committed
198

zhangwenwei's avatar
zhangwenwei committed
199
        # TODO: check this inplace function
200
        numpy_box = gt_bboxes_3d.tensor.numpy()
zhangwenwei's avatar
zhangwenwei committed
201
        noise_per_object_v3_(
202
            numpy_box,
zhangwenwei's avatar
zhangwenwei committed
203
            points,
zhangwenwei's avatar
zhangwenwei committed
204
205
            rotation_perturb=self.rot_range,
            center_noise_std=self.translation_std,
zhangwenwei's avatar
zhangwenwei committed
206
207
            global_random_rot_range=self.global_rot_range,
            num_try=self.num_try)
208
209

        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
zhangwenwei's avatar
zhangwenwei committed
210
211
212
213
214
215
        input_dict['points'] = points
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(num_try={},'.format(self.num_try)
zhangwenwei's avatar
zhangwenwei committed
216
        repr_str += ' translation_std={},'.format(self.translation_std)
zhangwenwei's avatar
zhangwenwei committed
217
        repr_str += ' global_rot_range={},'.format(self.global_rot_range)
zhangwenwei's avatar
zhangwenwei committed
218
        repr_str += ' rot_range={})'.format(self.rot_range)
zhangwenwei's avatar
zhangwenwei committed
219
220
221
        return repr_str


222
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
223
class GlobalRotScaleTrans(object):
zhangwenwei's avatar
zhangwenwei committed
224
    """Apply global rotation, scaling and translation to a 3D scene.
zhangwenwei's avatar
zhangwenwei committed
225
226
227
228
229
230
231
232
233
234

    Args:
        rot_range (list[float]): Range of rotation angle.
            Default to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
        scale_ratio_range (list[float]): Range of scale ratio.
            Default to [0.95, 1.05].
        translation_std (list[float]): The standard deviation of ranslation
            noise. This apply random translation to a scene by a noise, which
            is sampled from a gaussian distribution whose standard deviation
            is set by ``translation_std``. Default to [0, 0, 0]
wuyuefeng's avatar
wuyuefeng committed
235
236
        shift_height (bool): whether to shift height
            (the fourth dimension of indoor points) when scaling.
zhangwenwei's avatar
zhangwenwei committed
237
    """
zhangwenwei's avatar
zhangwenwei committed
238
239

    def __init__(self,
zhangwenwei's avatar
zhangwenwei committed
240
241
                 rot_range=[-0.78539816, 0.78539816],
                 scale_ratio_range=[0.95, 1.05],
wuyuefeng's avatar
wuyuefeng committed
242
243
                 translation_std=[0, 0, 0],
                 shift_height=False):
zhangwenwei's avatar
zhangwenwei committed
244
245
246
        self.rot_range = rot_range
        self.scale_ratio_range = scale_ratio_range
        self.translation_std = translation_std
wuyuefeng's avatar
wuyuefeng committed
247
        self.shift_height = shift_height
zhangwenwei's avatar
zhangwenwei committed
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

    def _trans_bbox_points(self, input_dict):
        if not isinstance(self.translation_std, (list, tuple, np.ndarray)):
            translation_std = [
                self.translation_std, self.translation_std,
                self.translation_std
            ]
        else:
            translation_std = self.translation_std
        translation_std = np.array(translation_std, dtype=np.float32)
        trans_factor = np.random.normal(scale=translation_std, size=3).T

        input_dict['points'][:, :3] += trans_factor
        input_dict['pcd_trans'] = trans_factor
        for key in input_dict['bbox3d_fields']:
            input_dict[key].translate(trans_factor)

    def _rot_bbox_points(self, input_dict):
        rotation = self.rot_range
zhangwenwei's avatar
zhangwenwei committed
267
268
269
        if not isinstance(rotation, list):
            rotation = [-rotation, rotation]
        noise_rotation = np.random.uniform(rotation[0], rotation[1])
zhangwenwei's avatar
zhangwenwei committed
270
271

        for key in input_dict['bbox3d_fields']:
wuyuefeng's avatar
wuyuefeng committed
272
273
274
275
276
            if len(input_dict[key].tensor) != 0:
                points, rot_mat_T = input_dict[key].rotate(
                    noise_rotation, input_dict['points'])
                input_dict['points'] = points
                input_dict['pcd_rotation'] = rot_mat_T
277

zhangwenwei's avatar
zhangwenwei committed
278
279
280
    def _scale_bbox_points(self, input_dict):
        scale = input_dict['pcd_scale_factor']
        input_dict['points'][:, :3] *= scale
wuyuefeng's avatar
wuyuefeng committed
281
282
283
        if self.shift_height:
            input_dict['points'][:, -1] *= scale

zhangwenwei's avatar
zhangwenwei committed
284
285
        for key in input_dict['bbox3d_fields']:
            input_dict[key].scale(scale)
zhangwenwei's avatar
zhangwenwei committed
286

zhangwenwei's avatar
zhangwenwei committed
287
288
289
290
    def _random_scale(self, input_dict):
        scale_factor = np.random.uniform(self.scale_ratio_range[0],
                                         self.scale_ratio_range[1])
        input_dict['pcd_scale_factor'] = scale_factor
zhangwenwei's avatar
zhangwenwei committed
291
292

    def __call__(self, input_dict):
zhangwenwei's avatar
zhangwenwei committed
293
        self._rot_bbox_points(input_dict)
zhangwenwei's avatar
zhangwenwei committed
294

zhangwenwei's avatar
zhangwenwei committed
295
296
297
        if 'pcd_scale_factor' not in input_dict:
            self._random_scale(input_dict)
        self._scale_bbox_points(input_dict)
zhangwenwei's avatar
zhangwenwei committed
298

zhangwenwei's avatar
zhangwenwei committed
299
        self._trans_bbox_points(input_dict)
zhangwenwei's avatar
zhangwenwei committed
300
301
302
303
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
zhangwenwei's avatar
zhangwenwei committed
304
305
306
        repr_str += '(rot_range={},'.format(self.rot_range)
        repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)
        repr_str += ' translation_std={})'.format(self.translation_std)
wuyuefeng's avatar
wuyuefeng committed
307
        repr_str += ' shift_height={})'.format(self.shift_height)
zhangwenwei's avatar
zhangwenwei committed
308
309
310
        return repr_str


311
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
312
313
314
315
316
317
318
319
320
321
class PointShuffle(object):

    def __call__(self, input_dict):
        np.random.shuffle(input_dict['points'])
        return input_dict

    def __repr__(self):
        return self.__class__.__name__


322
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
323
324
325
326
327
328
329
330
class ObjectRangeFilter(object):

    def __init__(self, point_cloud_range):
        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
        self.bev_range = self.pcd_range[[0, 1, 3, 4]]

    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
zhangwenwei's avatar
zhangwenwei committed
331
        gt_labels_3d = input_dict['gt_labels_3d']
332
        mask = gt_bboxes_3d.in_range_bev(self.bev_range)
zhangwenwei's avatar
zhangwenwei committed
333
        gt_bboxes_3d = gt_bboxes_3d[mask]
ZwwWayne's avatar
ZwwWayne committed
334
335
336
337
338
        # mask is a torch tensor but gt_labels_3d is still numpy array
        # using mask to index gt_labels_3d will cause bug when
        # len(gt_labels_3d) == 1, where mask=1 will be interpreted
        # as gt_labels_3d[1] and cause out of index error
        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]
zhangwenwei's avatar
zhangwenwei committed
339
340

        # limit rad to [-pi, pi]
341
342
        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
zhangwenwei's avatar
zhangwenwei committed
343
344
        input_dict['gt_labels_3d'] = gt_labels_3d

zhangwenwei's avatar
zhangwenwei committed
345
346
347
348
349
350
351
352
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
        return repr_str


353
@PIPELINES.register_module()
zhangwenwei's avatar
zhangwenwei committed
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
class PointsRangeFilter(object):

    def __init__(self, point_cloud_range):
        self.pcd_range = np.array(
            point_cloud_range, dtype=np.float32)[np.newaxis, :]

    def __call__(self, input_dict):
        points = input_dict['points']
        points_mask = ((points[:, :3] >= self.pcd_range[:, :3])
                       & (points[:, :3] < self.pcd_range[:, 3:]))
        points_mask = points_mask[:, 0] & points_mask[:, 1] & points_mask[:, 2]
        clean_points = points[points_mask, :]
        input_dict['points'] = clean_points
        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(point_cloud_range={})'.format(self.pcd_range.tolist())
        return repr_str
zhangwenwei's avatar
zhangwenwei committed
373
374
375
376


@PIPELINES.register_module()
class ObjectNameFilter(object):
zhangwenwei's avatar
zhangwenwei committed
377
    """Filter GT objects by their names.
zhangwenwei's avatar
zhangwenwei committed
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399

    Args:
        classes (list[str]): list of class names to be kept for training
    """

    def __init__(self, classes):
        self.classes = classes
        self.labels = list(range(len(self.classes)))

    def __call__(self, input_dict):
        gt_labels_3d = input_dict['gt_labels_3d']
        gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],
                                  dtype=np.bool_)
        input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]
        input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]

        return input_dict

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f'(classes={self.classes})'
        return repr_str
wuyuefeng's avatar
wuyuefeng committed
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463


@PIPELINES.register_module()
class IndoorPointSample(object):
    """Indoor point sample.

    Sampling data to a certain number.

    Args:
        name (str): Name of the dataset.
        num_points (int): Number of points to be sampled.
    """

    def __init__(self, num_points):
        self.num_points = num_points

    def points_random_sampling(self,
                               points,
                               num_samples,
                               replace=None,
                               return_choices=False):
        """Points random sampling.

        Sample points to a certain number.

        Args:
            points (ndarray): 3D Points.
            num_samples (int): Number of samples to be sampled.
            replace (bool): Whether the sample is with or without replacement.
            return_choices (bool): Whether return choice.

        Returns:
            points (ndarray): 3D Points.
            choices (ndarray): The generated random samples.
        """
        if replace is None:
            replace = (points.shape[0] < num_samples)
        choices = np.random.choice(
            points.shape[0], num_samples, replace=replace)
        if return_choices:
            return points[choices], choices
        else:
            return points[choices]

    def __call__(self, results):
        points = results['points']
        points, choices = self.points_random_sampling(
            points, self.num_points, return_choices=True)
        pts_instance_mask = results.get('pts_instance_mask', None)
        pts_semantic_mask = results.get('pts_semantic_mask', None)
        results['points'] = points

        if pts_instance_mask is not None and pts_semantic_mask is not None:
            pts_instance_mask = pts_instance_mask[choices]
            pts_semantic_mask = pts_semantic_mask[choices]
            results['pts_instance_mask'] = pts_instance_mask
            results['pts_semantic_mask'] = pts_semantic_mask

        return results

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += '(num_points={})'.format(self.num_points)
        return repr_str