Merge rot scale

92ae69fb · wuyuefeng · zhangwenwei · 5a1575a0 · 92ae69fb · 92ae69fb
Commit 92ae69fb authored Jun 20, 2020 by wuyuefeng Committed by zhangwenwei Jun 20, 2020
18 changed files
--- a/mmdet3d/datasets/pipelines/__init__.py
+++ b/mmdet3d/datasets/pipelines/__init__.py
 from mmdet.datasets.pipelines import Compose
 from .dbsampler import DataBaseSampler
-from .formating import DefaultFormatBundle, DefaultFormatBundle3D
+from .formating import Collect3D, DefaultFormatBundle, DefaultFormatBundle3D
-from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScaleTrans,
+from .loading import (LoadAnnotations3D, LoadMultiViewImageFromFiles,
-                             IndoorPointsColorJitter)
+                      LoadPointsFromFile, NormalizePointsColor,
-from .indoor_loading import (LoadAnnotations3D, LoadPointsFromFile,
+                      PointSegClassMapping)
-                             NormalizePointsColor)
-from .indoor_sample import IndoorPointSample
-from .loading import LoadMultiViewImageFromFiles
-from .point_seg_class_mapping import PointSegClassMapping
 from .test_time_aug import MultiScaleFlipAug3D
-from .transforms_3d import (GlobalRotScaleTrans, ObjectNoise,
+from .transforms_3d import (GlobalRotScaleTrans, IndoorPointSample,
-                            ObjectRangeFilter, ObjectSample, PointShuffle,
+                            ObjectNoise, ObjectRangeFilter, ObjectSample,
-                            PointsRangeFilter, RandomFlip3D)
+                            PointShuffle, PointsRangeFilter, RandomFlip3D)
 __all__ = [
    'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
    'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
    'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
    'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
-    'IndoorGlobalRotScaleTrans', 'IndoorPointsColorJitter', 'IndoorFlipData',
    'NormalizePointsColor', 'LoadAnnotations3D', 'IndoorPointSample',
    'PointSegClassMapping', 'MultiScaleFlipAug3D'
 ]
--- a/mmdet3d/datasets/pipelines/dbsampler.py
+++ b/mmdet3d/datasets/pipelines/dbsampler.py
@@ -58,7 +58,6 @@ class DataBaseSampler(object):
                 data_root,
                 rate,
                 prepare,
-                 object_rot_range,
                 sample_groups,
                 classes=None):
        super().__init__()
@@ -66,7 +65,6 @@ class DataBaseSampler(object):
        self.info_path = info_path
        self.rate = rate
        self.prepare = prepare
-        self.object_rot_range = object_rot_range
        self.classes = classes
        self.cat2label = {name: i for i, name in enumerate(classes)}
        self.label2cat = {i: name for i, name in enumerate(classes)}
@@ -103,11 +101,6 @@ class DataBaseSampler(object):
        self.sampler_dict = {}
        for k, v in self.group_db_infos.items():
            self.sampler_dict[k] = BatchSampler(v, k, shuffle=True)
-        self.object_rot_range = object_rot_range
-        self.object_rot_enable = np.abs(self.object_rot_range[0] -
-                                        self.object_rot_range[1]) >= 1e-3
        # TODO: No group_sampling currently
    @staticmethod
@@ -183,11 +176,6 @@ class DataBaseSampler(object):
                    info['path']) if self.data_root else info['path']
                s_points = np.fromfile(
                    file_path, dtype=np.float32).reshape([-1, 4])
-                if 'rot_transform' in info:
-                    rot = info['rot_transform']
-                    s_points[:, :3] = box_np_ops.rotation_points_single_angle(
-                        s_points[:, :3], rot, axis=2)
                s_points[:, :3] += info['box3d_lidar'][:3]
                count += 1
@@ -219,24 +207,7 @@ class DataBaseSampler(object):
            gt_bboxes[:, 0:2], gt_bboxes[:, 3:5], gt_bboxes[:, 6])
        sp_boxes = np.stack([i['box3d_lidar'] for i in sampled], axis=0)
-        valid_mask = np.zeros([gt_bboxes.shape[0]], dtype=np.bool_)
-        valid_mask = np.concatenate(
-            [valid_mask,
-             np.ones([sp_boxes.shape[0]], dtype=np.bool_)], axis=0)
        boxes = np.concatenate([gt_bboxes, sp_boxes], axis=0).copy()
-        if self.object_rot_enable:
-            assert False, 'This part needs to be checked'
-            # place samples to any place in a circle.
-            # TODO: rm it if not needed
-            data_augment_utils.noise_per_object_v3_(
-                boxes,
-                None,
-                valid_mask,
-                0,
-                0,
-                self._global_rot_range,
-                num_try=100)
        sp_boxes_new = boxes[gt_bboxes.shape[0]:]
        sp_boxes_bv = box_np_ops.center_to_corner_box2d(
@@ -253,11 +224,5 @@ class DataBaseSampler(object):
                coll_mat[i] = False
                coll_mat[:, i] = False
            else:
-                if self.object_rot_enable:
-                    assert False, 'This part needs to be checked'
-                    sampled[i - num_gt]['box3d_lidar'][:2] = boxes[i, :2]
-                    sampled[i - num_gt]['box3d_lidar'][-1] = boxes[i, -1]
-                    sampled[i - num_gt]['rot_transform'] = (
-                        boxes[i, -1] - sp_boxes[i - num_gt, -1])
                valid_samples.append(sampled[i - num_gt])
        return valid_samples
--- a/mmdet3d/datasets/pipelines/formating.py
+++ b/mmdet3d/datasets/pipelines/formating.py
@@ -74,7 +74,8 @@ class Collect3D(object):
    def __init__(self,
                 keys,
                 meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
-                            'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
+                            'pad_shape', 'scale_factor', 'flip',
+                            'pcd_horizontal_flip', 'pcd_vertical_flip',
                            'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
                            'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
                            'pcd_scale_factor', 'pcd_rotation')):

--- a/mmdet3d/datasets/pipelines/indoor_augment.py
+++ b/mmdet3d/datasets/pipelines/indoor_augment.py
-import numpy as np
-from mmdet.datasets.builder import PIPELINES
-@PIPELINES.register_module()
-class IndoorFlipData(object):
-    """Indoor flip data.
-    Flip point cloud and ground truth boxes.
-    The point cloud will be flipped along the yz plane
-    and the xz plane with a certain probability.
-    Args:
-        flip_ratio_yz (float): Probability of being flipped along yz plane.
-            Default: 0.5.
-        flip_ratio_xz (float): Probability of being flipped along xz plane.
-            Default: 0.5.
-    """
-    def __init__(self, flip_ratio_yz=0.5, flip_ratio_xz=0.5):
-        self.flip_ratio_yz = flip_ratio_yz
-        self.flip_ratio_xz = flip_ratio_xz
-    def __call__(self, results):
-        points = results['points']
-        gt_bboxes_3d = results['gt_bboxes_3d']
-        results['flip_yz'] = False
-        results['flip_xz'] = False
-        if np.random.random() < self.flip_ratio_yz:
-            # Flipping along the YZ plane
-            points[:, 0] = -1 * points[:, 0]
-            gt_bboxes_3d.flip('horizontal')
-            results['flip_yz'] = True
-        if not gt_bboxes_3d.with_yaw and np.random.random(
-        ) < self.flip_ratio_xz:
-            # Flipping along the XZ plane
-            points[:, 1] = -1 * points[:, 1]
-            gt_bboxes_3d.flip('vertical')
-            results['flip_xz'] = True
-        results['points'] = points
-        results['gt_bboxes_3d'] = gt_bboxes_3d
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(flip_ratio_yz={})'.format(self.flip_ratio_yz)
-        repr_str += '(flip_ratio_xz={})'.format(self.flip_ratio_xz)
-        return repr_str
-@PIPELINES.register_module()
-class IndoorPointsColorJitter(object):
-    """Indoor points color jitter.
-    Randomly change the brightness and color of the point cloud, and
-    drop out the points' colors with a certain range and probability.
-    Args:
-        color_mean (list[float]): Mean color of the point cloud.
-            Default: [0.5, 0.5, 0.5].
-        bright_range (list[float]): Range of brightness.
-            Default: [0.8, 1.2].
-        color_shift_range (list[float]): Range of color shift.
-            Default: [0.95, 1.05].
-        jitter_range (list[float]): Range of jittering.
-            Default: [-0.025, 0.025].
-        drop_prob (float): Probability to drop out points' color.
-            Default: 0.3
-    """
-    def __init__(self,
-                 color_mean=[0.5, 0.5, 0.5],
-                 bright_range=[0.8, 1.2],
-                 color_shift_range=[0.95, 1.05],
-                 jitter_range=[-0.025, 0.025],
-                 drop_prob=0.3):
-        self.color_mean = color_mean
-        self.bright_range = bright_range
-        self.color_shift_range = color_shift_range
-        self.jitter_range = jitter_range
-        self.drop_prob = drop_prob
-    def __call__(self, results):
-        points = results['points']
-        assert points.shape[1] >= 6, \
-            f'Expect points have channel >=6, got {points.shape[1]}.'
-        rgb_color = points[:, 3:6] + self.color_mean
-        # brightness change for each channel
-        rgb_color *= np.random.uniform(self.bright_range[0],
-                                       self.bright_range[1], 3)
-        # color shift for each channel
-        rgb_color += np.random.uniform(self.color_shift_range[0],
-                                       self.color_shift_range[1], 3)
-        # jittering on each pixel
-        rgb_color += np.expand_dims(
-            np.random.uniform(self.jitter_range[0], self.jitter_range[1]), -1)
-        rgb_color = np.clip(rgb_color, 0, 1)
-        # randomly drop out points' colors
-        rgb_color *= np.expand_dims(
-            np.random.random(points.shape[0]) > self.drop_prob, -1)
-        points[:, 3:6] = rgb_color - self.color_mean
-        results['points'] = points
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(color_mean={})'.format(self.color_mean)
-        repr_str += '(bright_range={})'.format(self.bright_range)
-        repr_str += '(color_shift_range={})'.format(self.color_shift_range)
-        repr_str += '(jitter_range={})'.format(self.jitter_range)
-        repr_str += '(drop_prob={})'.format(self.drop_prob)
-# TODO: merge outdoor indoor transform.
-# TODO: try transform noise.
-@PIPELINES.register_module()
-class IndoorGlobalRotScaleTrans(object):
-    """Indoor global rotate and scale.
-    Augment sunrgbd and scannet data with global rotating and scaling.
-    Args:
-        shift_height (bool): Whether to use height.
-            Default: True.
-        rot_range (list[float]): Range of rotation.
-            Default: None.
-        scale_range (list[float]): Range of scale.
-            Default: None.
-    """
-    def __init__(self, shift_height=True, rot_range=None, scale_range=None):
-        self.shift_height = shift_height
-        self.rot_range = np.pi * np.array(rot_range)
-        self.scale_range = scale_range
-    def _rotz(self, t):
-        """Rotate About Z.
-        Rotation about the z-axis.
-        Args:
-            t (float): Angle of rotation.
-        Returns:
-            rot_mat (ndarray): Matrix of rotation.
-        """
-        c = np.cos(t)
-        s = np.sin(t)
-        rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
-        return rot_mat
-    def __call__(self, results):
-        points = results['points']
-        gt_bboxes_3d = results['gt_bboxes_3d']
-        if self.rot_range is not None:
-            assert len(self.rot_range) == 2, \
-                f'Expect length of rot range =2, ' \
-                f'got {len(self.rot_range)}.'
-            rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
-            if gt_bboxes_3d.tensor.shape[0] != 0:
-                gt_bboxes_3d.rotate(rot_angle)
-            points[:, :3] = np.dot(points[:, :3], self._rotz(rot_angle).T)
-            results['rot_angle'] = rot_angle
-        if self.scale_range is not None:
-            assert len(self.scale_range) == 2, \
-                f'Expect length of scale range =2, ' \
-                f'got {len(self.scale_range)}.'
-            # Augment point cloud scale
-            scale_ratio = np.random.uniform(self.scale_range[0],
-                                            self.scale_range[1])
-            points[:, :3] *= scale_ratio
-            gt_bboxes_3d.scale(scale_ratio)
-            if self.shift_height:
-                points[:, -1] *= scale_ratio
-            results['scale_ratio'] = scale_ratio
-        results['points'] = points
-        results['gt_bboxes_3d'] = gt_bboxes_3d
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(shift_height={})'.format(self.shift_height)
-        repr_str += '(rot_range={})'.format(self.rot_range)
-        repr_str += '(scale_range={})'.format(self.scale_range)
-        return repr_str
--- a/mmdet3d/datasets/pipelines/indoor_loading.py
+++ b/mmdet3d/datasets/pipelines/indoor_loading.py
-import mmcv
-import numpy as np
-from mmdet.datasets.builder import PIPELINES
-from mmdet.datasets.pipelines import LoadAnnotations
-@PIPELINES.register_module()
-class NormalizePointsColor(object):
-    """Normalize color of points
-    Normalize color of the points.
-    Args:
-        color_mean (list[float]): Mean color of the point cloud.
-    """
-    def __init__(self, color_mean):
-        self.color_mean = color_mean
-    def __call__(self, results):
-        points = results['points']
-        assert points.shape[1] >= 6,\
-            f'Expect points have channel >=6, got {points.shape[1]}'
-        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
-        results['points'] = points
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(color_mean={})'.format(self.color_mean)
-        return repr_str
-@PIPELINES.register_module()
-class LoadPointsFromFile(object):
-    """Load Points From File.
-    Load sunrgbd and scannet points from file.
-    Args:
-        shift_height (bool): Whether to use shifted height.
-        load_dim (int): The dimension of the loaded points.
-            Default: 6.
-        use_dim (list[int]): Which dimensions of the points to be used.
-            Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
-            or use_dim=[0, 1, 2, 3] to use the intensity dimension
-        file_client_args (dict): Config dict of file clients, refer to
-            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
-            for more details.
-    """
-    def __init__(self,
-                 load_dim=6,
-                 use_dim=[0, 1, 2],
-                 shift_height=False,
-                 file_client_args=dict(backend='disk')):
-        self.shift_height = shift_height
-        if isinstance(use_dim, int):
-            use_dim = list(range(use_dim))
-        assert max(use_dim) < load_dim, \
-            f'Expect all used dimensions < {load_dim}, got {use_dim}'
-        self.load_dim = load_dim
-        self.use_dim = use_dim
-        self.file_client_args = file_client_args.copy()
-        self.file_client = None
-    def _load_points(self, pts_filename):
-        if self.file_client is None:
-            self.file_client = mmcv.FileClient(**self.file_client_args)
-        try:
-            pts_bytes = self.file_client.get(pts_filename)
-            points = np.frombuffer(pts_bytes, dtype=np.float32)
-        except ConnectionError:
-            mmcv.check_file_exist(pts_filename)
-            if pts_filename.endswith('.npy'):
-                points = np.load(pts_filename)
-            else:
-                points = np.fromfile(pts_filename, dtype=np.float32)
-        return points
-    def __call__(self, results):
-        pts_filename = results['pts_filename']
-        points = self._load_points(pts_filename)
-        points = points.reshape(-1, self.load_dim)
-        points = points[:, self.use_dim]
-        if self.shift_height:
-            floor_height = np.percentile(points[:, 2], 0.99)
-            height = points[:, 2] - floor_height
-            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
-        results['points'] = points
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(shift_height={})'.format(self.shift_height)
-        repr_str += '(mean_color={})'.format(self.color_mean)
-        repr_str += '(load_dim={})'.format(self.load_dim)
-        repr_str += '(use_dim={})'.format(self.use_dim)
-        return repr_str
-@PIPELINES.register_module()
-class LoadAnnotations3D(LoadAnnotations):
-    """Load Annotations3D.
-    Load instance mask and semantic mask of points and
-    encapsulate the items into related fields.
-    Args:
-        with_bbox_3d (bool, optional): Whether to load 3D boxes.
-            Defaults to True.
-        with_label_3d (bool, optional): Whether to load 3D labels.
-            Defaults to True.
-        with_mask_3d (bool, optional): Whether to load 3D instance masks.
-            for points. Defaults to False.
-        with_seg_3d (bool, optional): Whether to load 3D semantic masks.
-            for points. Defaults to False.
-        with_bbox (bool, optional): Whether to load 2D boxes.
-            Defaults to False.
-        with_label (bool, optional): Whether to load 2D labels.
-            Defaults to False.
-        with_mask (bool, optional): Whether to load 2D instance masks.
-            Defaults to False.
-        with_seg (bool, optional): Whether to load 2D semantic masks.
-            Defaults to False.
-        poly2mask (bool, optional): Whether to convert polygon annotations
-            to bitmasks. Defaults to True.
-        file_client_args (dict): Config dict of file clients, refer to
-            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
-            for more details.
-    """
-    def __init__(self,
-                 with_bbox_3d=True,
-                 with_label_3d=True,
-                 with_mask_3d=False,
-                 with_seg_3d=False,
-                 with_bbox=False,
-                 with_label=False,
-                 with_mask=False,
-                 with_seg=False,
-                 poly2mask=True,
-                 file_client_args=dict(backend='disk')):
-        super().__init__(
-            with_bbox,
-            with_label,
-            with_mask,
-            with_seg,
-            poly2mask,
-            file_client_args=file_client_args)
-        self.with_bbox_3d = with_bbox_3d
-        self.with_label_3d = with_label_3d
-        self.with_mask_3d = with_mask_3d
-        self.with_seg_3d = with_seg_3d
-    def _load_bboxes_3d(self, results):
-        results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
-        results['bbox3d_fields'].append('gt_bboxes_3d')
-        return results
-    def _load_labels_3d(self, results):
-        results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
-        return results
-    def _load_masks_3d(self, results):
-        pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
-        if self.file_client is None:
-            self.file_client = mmcv.FileClient(**self.file_client_args)
-        try:
-            mask_bytes = self.file_client.get(pts_instance_mask_path)
-            pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
-        except ConnectionError:
-            mmcv.check_file_exist(pts_instance_mask_path)
-            pts_instance_mask = np.fromfile(
-                pts_instance_mask_path, dtype=np.long)
-        results['pts_instance_mask'] = pts_instance_mask
-        results['pts_mask_fields'].append('pts_instance_mask')
-        return results
-    def _load_semantic_seg_3d(self, results):
-        pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
-        if self.file_client is None:
-            self.file_client = mmcv.FileClient(**self.file_client_args)
-        try:
-            mask_bytes = self.file_client.get(pts_semantic_mask_path)
-            # add .copy() to fix read-only bug
-            pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
-        except ConnectionError:
-            mmcv.check_file_exist(pts_semantic_mask_path)
-            pts_semantic_mask = np.fromfile(
-                pts_semantic_mask_path, dtype=np.long)
-        results['pts_semantic_mask'] = pts_semantic_mask
-        results['pts_seg_fields'].append('pts_semantic_mask')
-        return results
-    def __call__(self, results):
-        results = super().__call__(results)
-        if self.with_bbox_3d:
-            results = self._load_bboxes_3d(results)
-            if results is None:
-                return None
-        if self.with_label_3d:
-            results = self._load_labels_3d(results)
-        if self.with_mask_3d:
-            results = self._load_masks_3d(results)
-        if self.with_seg_3d:
-            results = self._load_semantic_seg_3d(results)
-        return results
-    def __repr__(self):
-        indent_str = '    '
-        repr_str = self.__class__.__name__ + '(\n'
-        repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
-        repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
-        repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
-        repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
-        repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
-        repr_str += f'{indent_str}with_label={self.with_label},\n'
-        repr_str += f'{indent_str}with_mask={self.with_mask},\n'
-        repr_str += f'{indent_str}with_seg={self.with_seg},\n'
-        repr_str += f'{indent_str}poly2mask={self.poly2mask})'
-        return repr_str
--- a/mmdet3d/datasets/pipelines/indoor_sample.py
+++ b/mmdet3d/datasets/pipelines/indoor_sample.py
-import numpy as np
-from mmdet.datasets.builder import PIPELINES
-@PIPELINES.register_module()
-class IndoorPointSample(object):
-    """Indoor point sample.
-    Sampling data to a certain number.
-    Args:
-        name (str): Name of the dataset.
-        num_points (int): Number of points to be sampled.
-    """
-    def __init__(self, num_points):
-        self.num_points = num_points
-    def points_random_sampling(self,
-                               points,
-                               num_samples,
-                               replace=None,
-                               return_choices=False):
-        """Points random sampling.
-        Sample points to a certain number.
-        Args:
-            points (ndarray): 3D Points.
-            num_samples (int): Number of samples to be sampled.
-            replace (bool): Whether the sample is with or without replacement.
-            return_choices (bool): Whether return choice.
-        Returns:
-            points (ndarray): 3D Points.
-            choices (ndarray): The generated random samples.
-        """
-        if replace is None:
-            replace = (points.shape[0] < num_samples)
-        choices = np.random.choice(
-            points.shape[0], num_samples, replace=replace)
-        if return_choices:
-            return points[choices], choices
-        else:
-            return points[choices]
-    def __call__(self, results):
-        points = results['points']
-        points, choices = self.points_random_sampling(
-            points, self.num_points, return_choices=True)
-        pts_instance_mask = results.get('pts_instance_mask', None)
-        pts_semantic_mask = results.get('pts_semantic_mask', None)
-        results['points'] = points
-        if pts_instance_mask is not None and pts_semantic_mask is not None:
-            pts_instance_mask = pts_instance_mask[choices]
-            pts_semantic_mask = pts_semantic_mask[choices]
-            results['pts_instance_mask'] = pts_instance_mask
-            results['pts_semantic_mask'] = pts_semantic_mask
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(num_points={})'.format(self.num_points)
-        return repr_str
--- a/mmdet3d/datasets/pipelines/loading.py
+++ b/mmdet3d/datasets/pipelines/loading.py
@@ -2,6 +2,7 @@ import mmcv
 import numpy as np
 from mmdet.datasets.builder import PIPELINES
+from mmdet.datasets.pipelines import LoadAnnotations
 @PIPELINES.register_module()
@@ -103,3 +104,263 @@ class LoadPointsFromMultiSweeps(object):
    def __repr__(self):
        return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})'
+@PIPELINES.register_module()
+class PointSegClassMapping(object):
+    """Map original semantic class to valid category ids.
+    Map valid classes as 0~len(valid_cat_ids)-1 and
+    others as len(valid_cat_ids).
+    Args:
+        valid_cat_ids (tuple[int): A tuple of valid category.
+    """
+    def __init__(self, valid_cat_ids):
+        self.valid_cat_ids = valid_cat_ids
+    def __call__(self, results):
+        assert 'pts_semantic_mask' in results
+        pts_semantic_mask = results['pts_semantic_mask']
+        neg_cls = len(self.valid_cat_ids)
+        for i in range(pts_semantic_mask.shape[0]):
+            if pts_semantic_mask[i] in self.valid_cat_ids:
+                converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
+                pts_semantic_mask[i] = converted_id
+            else:
+                pts_semantic_mask[i] = neg_cls
+        results['pts_semantic_mask'] = pts_semantic_mask
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
+        return repr_str
+@PIPELINES.register_module()
+class NormalizePointsColor(object):
+    """Normalize color of points
+    Normalize color of the points.
+    Args:
+        color_mean (list[float]): Mean color of the point cloud.
+    """
+    def __init__(self, color_mean):
+        self.color_mean = color_mean
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6,\
+            f'Expect points have channel >=6, got {points.shape[1]}'
+        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
+        results['points'] = points
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        return repr_str
+@PIPELINES.register_module()
+class LoadPointsFromFile(object):
+    """Load Points From File.
+    Load sunrgbd and scannet points from file.
+    Args:
+        shift_height (bool): Whether to use shifted height.
+        load_dim (int): The dimension of the loaded points.
+            Default: 6.
+        use_dim (list[int]): Which dimensions of the points to be used.
+            Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
+            or use_dim=[0, 1, 2, 3] to use the intensity dimension
+        file_client_args (dict): Config dict of file clients, refer to
+            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
+            for more details.
+    """
+    def __init__(self,
+                 load_dim=6,
+                 use_dim=[0, 1, 2],
+                 shift_height=False,
+                 file_client_args=dict(backend='disk')):
+        self.shift_height = shift_height
+        if isinstance(use_dim, int):
+            use_dim = list(range(use_dim))
+        assert max(use_dim) < load_dim, \
+            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+        self.file_client_args = file_client_args.copy()
+        self.file_client = None
+    def _load_points(self, pts_filename):
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+        try:
+            pts_bytes = self.file_client.get(pts_filename)
+            points = np.frombuffer(pts_bytes, dtype=np.float32)
+        except ConnectionError:
+            mmcv.check_file_exist(pts_filename)
+            if pts_filename.endswith('.npy'):
+                points = np.load(pts_filename)
+            else:
+                points = np.fromfile(pts_filename, dtype=np.float32)
+        return points
+    def __call__(self, results):
+        pts_filename = results['pts_filename']
+        points = self._load_points(pts_filename)
+        points = points.reshape(-1, self.load_dim)
+        points = points[:, self.use_dim]
+        if self.shift_height:
+            floor_height = np.percentile(points[:, 2], 0.99)
+            height = points[:, 2] - floor_height
+            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
+        results['points'] = points
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(shift_height={})'.format(self.shift_height)
+        repr_str += '(mean_color={})'.format(self.color_mean)
+        repr_str += '(load_dim={})'.format(self.load_dim)
+        repr_str += '(use_dim={})'.format(self.use_dim)
+        return repr_str
+@PIPELINES.register_module()
+class LoadAnnotations3D(LoadAnnotations):
+    """Load Annotations3D.
+    Load instance mask and semantic mask of points and
+    encapsulate the items into related fields.
+    Args:
+        with_bbox_3d (bool, optional): Whether to load 3D boxes.
+            Defaults to True.
+        with_label_3d (bool, optional): Whether to load 3D labels.
+            Defaults to True.
+        with_mask_3d (bool, optional): Whether to load 3D instance masks.
+            for points. Defaults to False.
+        with_seg_3d (bool, optional): Whether to load 3D semantic masks.
+            for points. Defaults to False.
+        with_bbox (bool, optional): Whether to load 2D boxes.
+            Defaults to False.
+        with_label (bool, optional): Whether to load 2D labels.
+            Defaults to False.
+        with_mask (bool, optional): Whether to load 2D instance masks.
+            Defaults to False.
+        with_seg (bool, optional): Whether to load 2D semantic masks.
+            Defaults to False.
+        poly2mask (bool, optional): Whether to convert polygon annotations
+            to bitmasks. Defaults to True.
+        file_client_args (dict): Config dict of file clients, refer to
+            https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
+            for more details.
+    """
+    def __init__(self,
+                 with_bbox_3d=True,
+                 with_label_3d=True,
+                 with_mask_3d=False,
+                 with_seg_3d=False,
+                 with_bbox=False,
+                 with_label=False,
+                 with_mask=False,
+                 with_seg=False,
+                 poly2mask=True,
+                 file_client_args=dict(backend='disk')):
+        super().__init__(
+            with_bbox,
+            with_label,
+            with_mask,
+            with_seg,
+            poly2mask,
+            file_client_args=file_client_args)
+        self.with_bbox_3d = with_bbox_3d
+        self.with_label_3d = with_label_3d
+        self.with_mask_3d = with_mask_3d
+        self.with_seg_3d = with_seg_3d
+    def _load_bboxes_3d(self, results):
+        results['gt_bboxes_3d'] = results['ann_info']['gt_bboxes_3d']
+        results['bbox3d_fields'].append('gt_bboxes_3d')
+        return results
+    def _load_labels_3d(self, results):
+        results['gt_labels_3d'] = results['ann_info']['gt_labels_3d']
+        return results
+    def _load_masks_3d(self, results):
+        pts_instance_mask_path = results['ann_info']['pts_instance_mask_path']
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+        try:
+            mask_bytes = self.file_client.get(pts_instance_mask_path)
+            pts_instance_mask = np.frombuffer(mask_bytes, dtype=np.int)
+        except ConnectionError:
+            mmcv.check_file_exist(pts_instance_mask_path)
+            pts_instance_mask = np.fromfile(
+                pts_instance_mask_path, dtype=np.long)
+        results['pts_instance_mask'] = pts_instance_mask
+        results['pts_mask_fields'].append('pts_instance_mask')
+        return results
+    def _load_semantic_seg_3d(self, results):
+        pts_semantic_mask_path = results['ann_info']['pts_semantic_mask_path']
+        if self.file_client is None:
+            self.file_client = mmcv.FileClient(**self.file_client_args)
+        try:
+            mask_bytes = self.file_client.get(pts_semantic_mask_path)
+            # add .copy() to fix read-only bug
+            pts_semantic_mask = np.frombuffer(mask_bytes, dtype=np.int).copy()
+        except ConnectionError:
+            mmcv.check_file_exist(pts_semantic_mask_path)
+            pts_semantic_mask = np.fromfile(
+                pts_semantic_mask_path, dtype=np.long)
+        results['pts_semantic_mask'] = pts_semantic_mask
+        results['pts_seg_fields'].append('pts_semantic_mask')
+        return results
+    def __call__(self, results):
+        results = super().__call__(results)
+        if self.with_bbox_3d:
+            results = self._load_bboxes_3d(results)
+            if results is None:
+                return None
+        if self.with_label_3d:
+            results = self._load_labels_3d(results)
+        if self.with_mask_3d:
+            results = self._load_masks_3d(results)
+        if self.with_seg_3d:
+            results = self._load_semantic_seg_3d(results)
+        return results
+    def __repr__(self):
+        indent_str = '    '
+        repr_str = self.__class__.__name__ + '(\n'
+        repr_str += f'{indent_str}with_bbox_3d={self.with_bbox_3d},\n'
+        repr_str += f'{indent_str}with_label_3d={self.with_label_3d},\n'
+        repr_str += f'{indent_str}with_mask_3d={self.with_mask_3d},\n'
+        repr_str += f'{indent_str}with_seg_3d={self.with_seg_3d},\n'
+        repr_str += f'{indent_str}with_bbox={self.with_bbox},\n'
+        repr_str += f'{indent_str}with_label={self.with_label},\n'
+        repr_str += f'{indent_str}with_mask={self.with_mask},\n'
+        repr_str += f'{indent_str}with_seg={self.with_seg},\n'
+        repr_str += f'{indent_str}poly2mask={self.poly2mask})'
+        return repr_str
--- a/mmdet3d/datasets/pipelines/point_seg_class_mapping.py
+++ b/mmdet3d/datasets/pipelines/point_seg_class_mapping.py
-from mmdet.datasets.builder import PIPELINES
-@PIPELINES.register_module()
-class PointSegClassMapping(object):
-    """Map original semantic class to valid category ids.
-    Map valid classes as 0~len(valid_cat_ids)-1 and
-    others as len(valid_cat_ids).
-    Args:
-        valid_cat_ids (tuple[int): A tuple of valid category.
-    """
-    def __init__(self, valid_cat_ids):
-        self.valid_cat_ids = valid_cat_ids
-    def __call__(self, results):
-        assert 'pts_semantic_mask' in results
-        pts_semantic_mask = results['pts_semantic_mask']
-        neg_cls = len(self.valid_cat_ids)
-        for i in range(pts_semantic_mask.shape[0]):
-            if pts_semantic_mask[i] in self.valid_cat_ids:
-                converted_id = self.valid_cat_ids.index(pts_semantic_mask[i])
-                pts_semantic_mask[i] = converted_id
-            else:
-                pts_semantic_mask[i] = neg_cls
-        results['pts_semantic_mask'] = pts_semantic_mask
-        return results
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += '(valid_cat_ids={})'.format(self.valid_cat_ids)
-        return repr_str
--- a/mmdet3d/datasets/pipelines/transforms_3d.py
+++ b/mmdet3d/datasets/pipelines/transforms_3d.py
@@ -21,35 +21,67 @@ class RandomFlip3D(RandomFlip):
            images. If True, it will apply the same flip as that to 2D images.
            If False, it will decide whether to flip randomly and independently
            to that of 2D images.
-        flip_ratio (float, optional): The flipping probability.
+        flip_ratio_bev_horizontal (float, optional): The flipping probability
+            in horizontal direction.
+        flip_ratio_bev_vertical (float, optional): The flipping probability
+            in vertical direction.
    """
-    def __init__(self, sync_2d=True, **kwargs):
+    def __init__(self,
-        super(RandomFlip3D, self).__init__(**kwargs)
+                 sync_2d=True,
+                 flip_ratio_bev_horizontal=0.0,
+                 flip_ratio_bev_vertical=0.0,
+                 **kwargs):
+        super(RandomFlip3D, self).__init__(
+            flip_ratio=flip_ratio_bev_horizontal, **kwargs)
        self.sync_2d = sync_2d
+        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
-    def random_flip_data_3d(self, input_dict):
+        if flip_ratio_bev_horizontal is not None:
-        input_dict['points'][:, 1] = -input_dict['points'][:, 1]
+            assert isinstance(
+                flip_ratio_bev_horizontal,
+                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
+        if flip_ratio_bev_vertical is not None:
+            assert isinstance(
+                flip_ratio_bev_vertical,
+                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1
+    def random_flip_data_3d(self, input_dict, direction='horizontal'):
+        assert direction in ['horizontal', 'vertical']
        for key in input_dict['bbox3d_fields']:
-            input_dict[key].flip()
+            input_dict['points'] = input_dict[key].flip(
+                direction, points=input_dict['points'])
    def __call__(self, input_dict):
        # filp 2D image and its annotations
        super(RandomFlip3D, self).__call__(input_dict)
        if self.sync_2d:
-            input_dict['pcd_flip'] = input_dict['flip']
+            input_dict['pcd_horizontal_flip'] = input_dict['flip']
+            input_dict['pcd_vertical_flip'] = False
        else:
-            flip = True if np.random.rand() < self.flip_ratio else False
+            if 'pcd_horizontal_flip' not in input_dict:
-            input_dict['pcd_flip'] = flip
+                flip_horizontal = True if np.random.rand(
+                ) < self.flip_ratio else False
-        if input_dict['pcd_flip']:
+                input_dict['pcd_horizontal_flip'] = flip_horizontal
-            self.random_flip_data_3d(input_dict)
+            if 'pcd_vertical_flip' not in input_dict:
+                flip_vertical = True if np.random.rand(
+                ) < self.flip_ratio_bev_vertical else False
+                input_dict['pcd_vertical_flip'] = flip_vertical
+        if input_dict['pcd_horizontal_flip']:
+            self.random_flip_data_3d(input_dict, 'horizontal')
+        if input_dict['pcd_vertical_flip']:
+            self.random_flip_data_3d(input_dict, 'vertical')
        return input_dict
    def __repr__(self):
-        return self.__class__.__name__ + '(flip_ratio={}, sync_2d={})'.format(
+        repr_str = self.__class__.__name__
-            self.flip_ratio, self.sync_2d)
+        repr_str += '(sync_2d={},'.format(self.sync_2d)
+        repr_str += '(flip_ratio_bev_horizontal={},'.format(
+            self.flip_ratio_bev_horizontal)
+        repr_str += '(flip_ratio_bev_vertical={},'.format(
+            self.flip_ratio_bev_vertical)
+        return repr_str
 @PIPELINES.register_module()
@@ -195,15 +227,19 @@ class GlobalRotScaleTrans(object):
            noise. This apply random translation to a scene by a noise, which
            is sampled from a gaussian distribution whose standard deviation
            is set by ``translation_std``. Default to [0, 0, 0]
+        shift_height (bool): whether to shift height
+            (the fourth dimension of indoor points) when scaling.
    """
    def __init__(self,
                 rot_range=[-0.78539816, 0.78539816],
                 scale_ratio_range=[0.95, 1.05],
-                 translation_std=[0, 0, 0]):
+                 translation_std=[0, 0, 0],
+                 shift_height=False):
        self.rot_range = rot_range
        self.scale_ratio_range = scale_ratio_range
        self.translation_std = translation_std
+        self.shift_height = shift_height
    def _trans_bbox_points(self, input_dict):
        if not isinstance(self.translation_std, (list, tuple, np.ndarray)):
@@ -227,18 +263,19 @@ class GlobalRotScaleTrans(object):
            rotation = [-rotation, rotation]
        noise_rotation = np.random.uniform(rotation[0], rotation[1])
-        points = input_dict['points']
-        points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
-            points[:, :3], noise_rotation, axis=2)
-        input_dict['points'] = points
-        input_dict['pcd_rotation'] = rot_mat_T
        for key in input_dict['bbox3d_fields']:
-            input_dict[key].rotate(noise_rotation)
+            if len(input_dict[key].tensor) != 0:
+                points, rot_mat_T = input_dict[key].rotate(
+                    noise_rotation, input_dict['points'])
+                input_dict['points'] = points
+                input_dict['pcd_rotation'] = rot_mat_T
    def _scale_bbox_points(self, input_dict):
        scale = input_dict['pcd_scale_factor']
        input_dict['points'][:, :3] *= scale
+        if self.shift_height:
+            input_dict['points'][:, -1] *= scale
        for key in input_dict['bbox3d_fields']:
            input_dict[key].scale(scale)
@@ -262,6 +299,7 @@ class GlobalRotScaleTrans(object):
        repr_str += '(rot_range={},'.format(self.rot_range)
        repr_str += ' scale_ratio_range={},'.format(self.scale_ratio_range)
        repr_str += ' translation_std={})'.format(self.translation_std)
+        repr_str += ' shift_height={})'.format(self.shift_height)
        return repr_str
@@ -283,23 +321,6 @@ class ObjectRangeFilter(object):
        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
        self.bev_range = self.pcd_range[[0, 1, 3, 4]]
-    @staticmethod
-    def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
-        """remove gtbox outside training range.
-        this function should be applied after other prep functions
-        Args:
-            gt_bboxes_3d ([type]): [description]
-            limit_range ([type]): [description]
-        """
-        gt_bboxes_3d_bv = box_np_ops.center_to_corner_box2d(
-            gt_bboxes_3d[:, [0, 1]], gt_bboxes_3d[:, [3, 3 + 1]],
-            gt_bboxes_3d[:, 6])
-        bounding_box = box_np_ops.minmax_to_corner_2d(
-            np.asarray(limit_range)[np.newaxis, ...])
-        ret = box_np_ops.points_in_convex_polygon_jit(
-            gt_bboxes_3d_bv.reshape(-1, 2), bounding_box)
-        return np.any(ret.reshape(-1, 4), axis=1)
    def __call__(self, input_dict):
        gt_bboxes_3d = input_dict['gt_bboxes_3d']
        gt_labels_3d = input_dict['gt_labels_3d']
@@ -371,3 +392,67 @@ class ObjectNameFilter(object):
        repr_str = self.__class__.__name__
        repr_str += f'(classes={self.classes})'
        return repr_str
+@PIPELINES.register_module()
+class IndoorPointSample(object):
+    """Indoor point sample.
+    Sampling data to a certain number.
+    Args:
+        name (str): Name of the dataset.
+        num_points (int): Number of points to be sampled.
+    """
+    def __init__(self, num_points):
+        self.num_points = num_points
+    def points_random_sampling(self,
+                               points,
+                               num_samples,
+                               replace=None,
+                               return_choices=False):
+        """Points random sampling.
+        Sample points to a certain number.
+        Args:
+            points (ndarray): 3D Points.
+            num_samples (int): Number of samples to be sampled.
+            replace (bool): Whether the sample is with or without replacement.
+            return_choices (bool): Whether return choice.
+        Returns:
+            points (ndarray): 3D Points.
+            choices (ndarray): The generated random samples.
+        """
+        if replace is None:
+            replace = (points.shape[0] < num_samples)
+        choices = np.random.choice(
+            points.shape[0], num_samples, replace=replace)
+        if return_choices:
+            return points[choices], choices
+        else:
+            return points[choices]
+    def __call__(self, results):
+        points = results['points']
+        points, choices = self.points_random_sampling(
+            points, self.num_points, return_choices=True)
+        pts_instance_mask = results.get('pts_instance_mask', None)
+        pts_semantic_mask = results.get('pts_semantic_mask', None)
+        results['points'] = points
+        if pts_instance_mask is not None and pts_semantic_mask is not None:
+            pts_instance_mask = pts_instance_mask[choices]
+            pts_semantic_mask = pts_semantic_mask[choices]
+            results['pts_instance_mask'] = pts_instance_mask
+            results['pts_semantic_mask'] = pts_semantic_mask
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(num_points={})'.format(self.num_points)
+        return repr_str
--- a/mmdet3d/models/roi_heads/base_3droi_head.py
+++ b/mmdet3d/models/roi_heads/base_3droi_head.py
@@ -56,9 +56,22 @@ class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
-                      gt_masks=None,
                      **kwargs):
-        """Forward function during training"""
+        """Forward function during training
+        Args:
+            x (dict): Contains features from the first stage.
+            img_metas (list[dict]): Meta info of each image.
+            proposal_list (list[dict]): Proposal information from rpn.
+            gt_bboxes (list[:obj:BaseInstance3DBoxes]):
+                GT bboxes of each sample. The bboxes are encapsulated
+                by 3D box structures.
+            gt_labels (list[LongTensor]): GT labels of each sample.
+            gt_bboxes_ignore (list[Tensor], optional): Specify which bounding.
+        Returns:
+            dict: losses from each head.
+        """
        pass
    def simple_test(self,

--- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
@@ -231,6 +231,15 @@ class PartA2BboxHead(nn.Module):
        normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
    def forward(self, seg_feats, part_feats):
+        """Forward pass.
+        Args:
+            seg_feats (torch.Tensor): Point-wise semantic features.
+            part_feats (torch.Tensor): Point-wise part prediction features.
+        Returns:
+            tuple[torch.Tensor]: Score of class and bbox predictions.
+        """
        # (B * N, out_x, out_y, out_z, 4)
        rcnn_batch_size = part_feats.shape[0]
@@ -273,6 +282,22 @@ class PartA2BboxHead(nn.Module):
    def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
             pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
+        """Coumputing losses.
+        Args:
+            cls_score (Torch.tensor): Scores of each roi.
+            bbox_pred (Torch.tensor): Predictions of bboxes.
+            rois (Torch.tensor): Roi bboxes.
+            labels (Torch.tensor): Labels of class.
+            bbox_targets (Torch.tensor): Target of positive bboxes.
+            pos_gt_bboxes (Torch.tensor): Gt of positive bboxes.
+            reg_mask (Torch.tensor): Mask for positive bboxes.
+            label_weights (Torch.tensor): Weights of class loss.
+            bbox_weights (Torch.tensor): Weights of bbox loss.
+        Returns:
+            dict: Computed losses.
+        """
        losses = dict()
        rcnn_batch_size = cls_score.shape[0]
@@ -325,6 +350,17 @@ class PartA2BboxHead(nn.Module):
        return losses
    def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
+        """Generate targets.
+        Args:
+            sampling_results (list[:obj:SamplingResult]):
+                Sampled results from rois.
+            rcnn_train_cfg (ConfigDict): Training config of rcnn.
+            concat (bool): Whether to concatenate targets between batches.
+        Returns:
+            tuple: Targets of boxes and class prediction.
+        """
        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
        iou_list = [res.iou for res in sampling_results]
@@ -444,6 +480,20 @@ class PartA2BboxHead(nn.Module):
                   class_pred,
                   img_metas,
                   cfg=None):
+        """Generate bboxes from bbox head predictions.
+        Args:
+            rois (torch.Tensor): Roi bboxes.
+            cls_score (torch.Tensor): Scores of bboxes.
+            bbox_pred (torch.Tensor): Bbox predictions
+            class_labels (torch.Tensor): Label of classes
+            class_pred (torch.Tensor): Score for nms.
+            img_metas (list[dict]): Contain pcd and img's meta info.
+            cfg (ConfigDict): Testing config.
+        Returns:
+            list[tuple]: Decoded bbox, scores and labels after nms.
+        """
        roi_batch_id = rois[..., 0]
        roi_boxes = rois[..., 1:]  # boxes without batch id
        batch_size = int(roi_batch_id.max().item() + 1)

--- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
@@ -50,6 +50,15 @@ class PointwiseSemanticHead(nn.Module):
        self.loss_part = build_loss(loss_part)
    def forward(self, x):
+        """Forward pass.
+        Args:
+            x (torch.Tensor): Features from the first stage.
+        Returns:
+            dict: part features, segmentation and part predictions.
+        """
        seg_preds = self.seg_cls_layer(x)  # (N, 1)
        part_preds = self.seg_reg_layer(x)  # (N, 3)

--- a/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+++ b/mmdet3d/models/roi_heads/part_aggregation_roi_head.py
@@ -10,7 +10,17 @@ from .base_3droi_head import Base3DRoIHead
 @HEADS.register_module()
 class PartAggregationROIHead(Base3DRoIHead):
-    """Part aggregation roi head for PartA2"""
+    """Part aggregation roi head for PartA2
+    Args:
+        semantic_head (ConfigDict): Config of semantic head.
+        num_classes (int): The number of classes.
+        seg_roi_extractor (ConfigDict): Config of seg_roi_extractor.
+        part_roi_extractor (ConfigDict): Config of part_roi_extractor.
+        bbox_head (ConfigDict): Config of bbox_head.
+        train_cfg (ConfigDict): Training config.
+        test_cfg (ConfigDict): Testing config.
+    """
    def __init__(self,
                 semantic_head,
@@ -156,6 +166,18 @@ class PartAggregationROIHead(Base3DRoIHead):
        return bbox_results
    def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):
+        """Forward function of roi_extractor and bbox_head.
+        Args:
+            seg_feats (torch.Tensor): Point-wise semantic features.
+            part_feats (torch.Tensor): Point-wise part prediction features.
+            voxels_dict (dict): Contains information of voxels.
+            rois (Tensor): Roi boxes.
+        Returns:
+            dict: Contains predictions of bbox_head and
+                features of roi_extractor.
+        """
        pooled_seg_feats = self.seg_roi_extractor(seg_feats,
                                                  voxels_dict['voxel_centers'],
                                                  voxels_dict['coors'][..., 0],

--- a/tests/test_dataset/test_scannet_dataset.py
+++ b/tests/test_dataset/test_scannet_dataset.py
@@ -25,12 +25,16 @@ def test_getitem():
            with_mask_3d=True,
            with_seg_3d=True),
        dict(type='IndoorPointSample', num_points=5),
-        dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
        dict(
-            type='IndoorGlobalRotScaleTrans',
+            type='RandomFlip3D',
-            shift_height=True,
+            sync_2d=False,
-            rot_range=[-1 / 36, 1 / 36],
+            flip_ratio_bev_horizontal=1.0,
-            scale_range=None),
+            flip_ratio_bev_vertical=1.0),
+        dict(
+            type='GlobalRotScaleTrans',
+            rot_range=[-0.087266, 0.087266],
+            scale_ratio_range=[1.0, 1.0],
+            shift_height=True),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(
            type='Collect3D',
@@ -38,9 +42,7 @@ def test_getitem():
                'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
                'pts_instance_mask'
            ],
-            meta_keys=[
+            meta_keys=['file_name', 'sample_idx', 'pcd_rotation']),
-                'file_name', 'flip_xz', 'flip_yz', 'sample_idx', 'rot_angle'
-            ]),
    ]
    scannet_dataset = ScanNetDataset(root_path, ann_file, pipelines)
@@ -51,28 +53,24 @@ def test_getitem():
    pts_semantic_mask = data['pts_semantic_mask']._data
    pts_instance_mask = data['pts_instance_mask']._data
    file_name = data['img_metas']._data['file_name']
-    flip_xz = data['img_metas']._data['flip_xz']
+    pcd_rotation = data['img_metas']._data['pcd_rotation']
-    flip_yz = data['img_metas']._data['flip_yz']
-    rot_angle = data['img_metas']._data['rot_angle']
    sample_idx = data['img_metas']._data['sample_idx']
-    assert file_name == './tests/data/scannet/' \
+    expected_rotation = np.array([[0.99654, 0.08311407, 0.],
-                        'points/scene0000_00.bin'
+                                  [-0.08311407, 0.99654, 0.], [0., 0., 1.]])
-    assert flip_xz is True
+    assert file_name == './tests/data/scannet/points/scene0000_00.bin'
-    assert flip_yz is True
+    assert np.allclose(pcd_rotation, expected_rotation, 1e-3)
-    assert abs(rot_angle - (-0.005471397477913809)) < 1e-5
    assert sample_idx == 'scene0000_00'
-    expected_points = np.array(
+    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
-        [[-2.9078157, -1.9569951, 2.3543026, 2.389488],
+                                    [-0.4065, -3.4857, 2.1330, 2.1682],
-         [-0.71360034, -3.4359822, 2.1330001, 2.1681855],
+                                    [-1.4578, 1.3510, -0.0441, -0.0089],
-         [-1.332374, 1.474838, -0.04405887, -0.00887359],
+                                    [2.2428, -1.1323, -0.0288, 0.0064],
-         [2.1336637, -1.3265059, -0.02880373, 0.00638155],
+                                    [0.7052, -2.9752, 1.5560, 1.5912]])
-         [0.43895668, -3.0259454, 1.5560012, 1.5911865]])
    expected_gt_bboxes_3d = torch.tensor(
-        [[-1.5005, -3.5126, 1.5704, 1.7457, 0.2415, 0.5724, 0.0000],
+        [[-1.1835, -3.6317, 1.5704, 1.7577, 0.3761, 0.5724, 0.0000],
-         [-2.8849, 3.4962, 1.1911, 0.6617, 0.1743, 0.6715, 0.0000],
+         [-3.1832, 3.2269, 1.1911, 0.6727, 0.2251, 0.6715, 0.0000],
-         [-1.1586, -2.1924, 0.0093, 0.5557, 2.5376, 1.2145, 0.0000],
+         [-0.9598, -2.2864, 0.0093, 0.7506, 2.5709, 1.2145, 0.0000],
-         [-2.9305, -2.4856, 0.8288, 0.6270, 1.8462, 0.2870, 0.0000],
+         [-2.6988, -2.7354, 0.8288, 0.7680, 1.8877, 0.2870, 0.0000],
-         [3.3115, -0.0048, -0.0090, 0.4619, 3.8605, 2.1603, 0.0000]])
+         [3.2989, 0.2885, -0.0090, 0.7600, 3.8814, 2.1603, 0.0000]])
    expected_gt_labels = np.array([
        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
        0, 0, 0, 5, 5, 5
@@ -82,7 +80,7 @@ def test_getitem():
    original_classes = scannet_dataset.CLASSES
    assert scannet_dataset.CLASSES == class_names
-    assert np.allclose(points, expected_points)
+    assert torch.allclose(points, expected_points, 1e-2)
    assert gt_bboxes_3d.tensor[:5].shape == (5, 7)
    assert torch.allclose(gt_bboxes_3d.tensor[:5], expected_gt_bboxes_3d, 1e-2)
    assert np.all(gt_labels.numpy() == expected_gt_labels)

--- a/tests/test_dataset/test_sunrgbd_dataset.py
+++ b/tests/test_dataset/test_sunrgbd_dataset.py
@@ -17,20 +17,24 @@ def test_getitem():
            load_dim=6,
            use_dim=[0, 1, 2]),
        dict(type='LoadAnnotations3D'),
-        dict(type='IndoorFlipData', flip_ratio_yz=1.0),
        dict(
-            type='IndoorGlobalRotScaleTrans',
+            type='RandomFlip3D',
-            shift_height=True,
+            sync_2d=False,
-            rot_range=[-1 / 6, 1 / 6],
+            flip_ratio_bev_horizontal=0.5,
-            scale_range=[0.85, 1.15]),
+        ),
+        dict(
+            type='GlobalRotScaleTrans',
+            rot_range=[-0.523599, 0.523599],
+            scale_ratio_range=[0.85, 1.15],
+            shift_height=True),
        dict(type='IndoorPointSample', num_points=5),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(
            type='Collect3D',
            keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'],
            meta_keys=[
-                'file_name', 'flip_xz', 'flip_yz', 'sample_idx', 'scale_ratio',
+                'file_name', 'pcd_horizontal_flip', 'sample_idx',
-                'rot_angle'
+                'pcd_scale_factor', 'pcd_rotation'
            ]),
    ]
@@ -40,32 +44,32 @@ def test_getitem():
    gt_bboxes_3d = data['gt_bboxes_3d']._data
    gt_labels_3d = data['gt_labels_3d']._data
    file_name = data['img_metas']._data['file_name']
-    flip_xz = data['img_metas']._data['flip_xz']
+    pcd_horizontal_flip = data['img_metas']._data['pcd_horizontal_flip']
-    flip_yz = data['img_metas']._data['flip_yz']
+    pcd_scale_factor = data['img_metas']._data['pcd_scale_factor']
-    scale_ratio = data['img_metas']._data['scale_ratio']
+    pcd_rotation = data['img_metas']._data['pcd_rotation']
-    rot_angle = data['img_metas']._data['rot_angle']
    sample_idx = data['img_metas']._data['sample_idx']
-    assert file_name == './tests/data/sunrgbd' \
+    pcd_rotation_expected = np.array([[0.99889565, 0.04698427, 0.],
-                        '/points/000001.bin'
+                                      [-0.04698427, 0.99889565, 0.],
-    assert flip_xz is False
+                                      [0., 0., 1.]])
-    assert flip_yz is True
+    assert file_name == './tests/data/sunrgbd/points/000001.bin'
-    assert abs(scale_ratio - 1.0308290128214932) < 1e-5
+    assert pcd_horizontal_flip is False
-    assert abs(rot_angle - 0.22534577750874518) < 1e-5
+    assert abs(pcd_scale_factor - 0.9770964398016714) < 1e-5
+    assert np.allclose(pcd_rotation, pcd_rotation_expected, 1e-3)
    assert sample_idx == 1
-    expected_points = np.array([[0.6512, 1.5781, 0.0710, 0.0499],
+    expected_points = torch.tensor([[-0.9904, 1.2596, 0.1105, 0.0905],
-                                [0.6473, 1.5701, 0.0657, 0.0447],
+                                    [-0.9948, 1.2758, 0.0437, 0.0238],
-                                [0.6464, 1.5635, 0.0826, 0.0616],
+                                    [-0.9866, 1.2641, 0.0504, 0.0304],
-                                [0.6453, 1.5603, 0.0849, 0.0638],
+                                    [-0.9915, 1.2586, 0.1265, 0.1065],
-                                [0.6488, 1.5786, 0.0461, 0.0251]])
+                                    [-0.9890, 1.2561, 0.1216, 0.1017]])
    expected_gt_bboxes_3d = torch.tensor(
-        [[-2.0125, 3.9473, -1.2696, 2.3730, 1.9458, 2.0303, 1.2206],
+        [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],
-         [-3.7037, 4.2396, -1.3126, 0.6032, 0.9104, 1.0033, 1.2663],
+         [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],
-         [0.6529, 2.1638, -1.2370, 0.7348, 1.6113, 2.1694, 2.8140]])
+         [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])
    expected_gt_labels = np.array([0, 7, 6])
    original_classes = sunrgbd_dataset.CLASSES
-    assert np.allclose(points, expected_points, 1e-2)
+    assert torch.allclose(points, expected_points, 1e-2)
-    assert np.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
+    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
    assert original_classes == class_names

--- a/tests/test_pipeline/test_indoor_augment.py
+++ b/tests/test_pipeline/test_indoor_augment.py
-import numpy as np
-import torch
-from mmdet3d.core.bbox import DepthInstance3DBoxes
-from mmdet3d.datasets.pipelines import (IndoorFlipData,
-                                        IndoorGlobalRotScaleTrans)
-def test_indoor_flip_data():
-    np.random.seed(0)
-    sunrgbd_indoor_flip_data = IndoorFlipData(1, 1)
-    sunrgbd_results = dict()
-    sunrgbd_results['points'] = np.array(
-        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
-         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
-    sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
-        np.array([[
-            0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
-            3.07028526
-        ],
-                  [
-                      -0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
-                      0.636364, -1.58242359
-                  ]]))
-    sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
-    sunrgbd_points = sunrgbd_results['points']
-    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
-    expected_sunrgbd_points = np.array(
-        [[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
-         [0.39597902, 1.05465031, -0.74920434, 0.673096]])
-    expected_sunrgbd_gt_bboxes_3d = torch.tensor(
-        [[-0.2137, 1.0364, -0.9823, 0.6154, 0.5726, 0.8727, 0.0713],
-         [0.4500, 1.3955, -1.0278, 1.5010, 1.6373, 0.6364, 4.7240]])
-    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
-    assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
-                          expected_sunrgbd_gt_bboxes_3d, 1e-3)
-    np.random.seed(0)
-    scannet_indoor_flip_data = IndoorFlipData(1, 1)
-    scannet_results = dict()
-    scannet_results['points'] = np.array(
-        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
-         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
-    scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
-        np.array([[
-            0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
-            0.5163464
-        ],
-                  [
-                      -0.03226406, 1.70392646, 0.60348618, 0.65165804,
-                      0.72084366, 0.64667457
-                  ]]),
-        box_dim=6,
-        with_yaw=False)
-    scannet_results = scannet_indoor_flip_data(scannet_results)
-    scannet_points = scannet_results['points']
-    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
-    expected_scannet_points = np.array(
-        [[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
-         [-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
-    expected_scannet_gt_bboxes_3d = torch.tensor(
-        [[-0.5590, -0.4820, 0.6569, 0.6537, 0.6003, 0.5163, 0.0000],
-         [0.0323, -1.7039, 0.6035, 0.6517, 0.7208, 0.6467, 0.0000]])
-    assert np.allclose(scannet_points, expected_scannet_points)
-    assert torch.allclose(scannet_gt_bboxes_3d.tensor,
-                          expected_scannet_gt_bboxes_3d, 1e-2)
-def test_global_rot_scale():
-    np.random.seed(0)
-    sunrgbd_augment = IndoorGlobalRotScaleTrans(
-        True, rot_range=[-1 / 6, 1 / 6], scale_range=[0.85, 1.15])
-    sunrgbd_results = dict()
-    sunrgbd_results['points'] = np.array(
-        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
-         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
-    sunrgbd_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
-        np.array([[
-            0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728,
-            3.07028526
-        ],
-                  [
-                      -0.449953, 1.395455, -1.027778, 1.500956, 1.637298,
-                      0.636364, -1.58242359
-                  ]]))
-    sunrgbd_results = sunrgbd_augment(sunrgbd_results)
-    sunrgbd_points = sunrgbd_results['points']
-    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
-    expected_sunrgbd_points = np.array(
-        [[0.89427376, 3.94489646, 0.21003141, 1.72415094],
-         [-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
-    expected_sunrgbd_gt_bboxes_3d = torch.tensor(
-        [[0.1708, 1.1135, -1.0457, 0.6551, 0.6095, 0.9291, 3.0192],
-         [-0.5543, 1.4591, -1.0941, 1.5979, 1.7430, 0.6774, -1.6335]])
-    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
-    assert torch.allclose(sunrgbd_gt_bboxes_3d.tensor,
-                          expected_sunrgbd_gt_bboxes_3d, 1e-3)
-    np.random.seed(0)
-    scannet_augment = IndoorGlobalRotScaleTrans(
-        True, rot_range=[-1 * 1 / 36, 1 / 36], scale_range=None)
-    scannet_results = dict()
-    scannet_results['points'] = np.array(
-        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
-         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
-    scannet_results['gt_bboxes_3d'] = DepthInstance3DBoxes(
-        np.array([[
-            0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864,
-            0.5163464
-        ],
-                  [
-                      -0.03226406, 1.70392646, 0.60348618, 0.65165804,
-                      0.72084366, 0.64667457
-                  ]]),
-        box_dim=6,
-        with_yaw=False)
-    scannet_results = scannet_augment(scannet_results)
-    scannet_points = scannet_results['points']
-    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
-    expected_scannet_points = np.array(
-        [[1.61240576, -0.15530836, 0.5811581, 0.5989725],
-         [1.39417555, 0.43225122, 0.38729519, 0.40510958]])
-    expected_scannet_gt_bboxes_3d = torch.tensor(
-        [[0.5549, 0.4868, 0.6569, 0.6588, 0.6058, 0.5163, 0.0000],
-         [-0.0468, 1.7036, 0.6035, 0.6578, 0.7264, 0.6467, 0.0000]])
-    assert np.allclose(scannet_points, expected_scannet_points)
-    assert torch.allclose(scannet_gt_bboxes_3d.tensor,
-                          expected_scannet_gt_bboxes_3d, 1e-3)
--- a/tests/test_pipeline/test_indoor_pipeline.py
+++ b/tests/test_pipeline/test_indoor_pipeline.py
@@ -28,12 +28,16 @@ def test_scannet_pipeline():
            with_mask_3d=True,
            with_seg_3d=True),
        dict(type='IndoorPointSample', num_points=5),
-        dict(type='IndoorFlipData', flip_ratio_yz=1.0, flip_ratio_xz=1.0),
        dict(
-            type='IndoorGlobalRotScaleTrans',
+            type='RandomFlip3D',
-            shift_height=True,
+            sync_2d=False,
-            rot_range=[-1 / 36, 1 / 36],
+            flip_ratio_bev_horizontal=1.0,
-            scale_range=None),
+            flip_ratio_bev_vertical=1.0),
+        dict(
+            type='GlobalRotScaleTrans',
+            rot_range=[-0.087266, 0.087266],
+            scale_ratio_range=[1.0, 1.0],
+            shift_height=True),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(
            type='Collect3D',
@@ -63,6 +67,7 @@ def test_scannet_pipeline():
        scannet_gt_bboxes_3d, box_dim=6, with_yaw=False)
    results['ann_info']['gt_labels_3d'] = scannet_gt_labels_3d
+    results['img_fields'] = []
    results['bbox3d_fields'] = []
    results['pts_mask_fields'] = []
    results['pts_seg_fields'] = []
@@ -74,25 +79,24 @@ def test_scannet_pipeline():
    gt_labels_3d = results['gt_labels_3d']._data
    pts_semantic_mask = results['pts_semantic_mask']._data
    pts_instance_mask = results['pts_instance_mask']._data
-    expected_points = np.array(
+    expected_points = torch.tensor([[-2.7231, -2.2068, 2.3543, 2.3895],
-        [[-2.9078157, -1.9569951, 2.3543026, 2.389488],
+                                    [-0.4065, -3.4857, 2.1330, 2.1682],
-         [-0.71360034, -3.4359822, 2.1330001, 2.1681855],
+                                    [-1.4578, 1.3510, -0.0441, -0.0089],
-         [-1.332374, 1.474838, -0.04405887, -0.00887359],
+                                    [2.2428, -1.1323, -0.0288, 0.0064],
-         [2.1336637, -1.3265059, -0.02880373, 0.00638155],
+                                    [0.7052, -2.9752, 1.5560, 1.5912]])
-         [0.43895668, -3.0259454, 1.5560012, 1.5911865]])
    expected_gt_bboxes_3d = torch.tensor(
-        [[-1.5005, -3.5126, 1.8565, 1.7457, 0.2415, 0.5724, 0.0000],
+        [[-1.1835, -3.6317, 1.8565, 1.7577, 0.3761, 0.5724, 0.0000],
-         [-2.8849, 3.4962, 1.5268, 0.6617, 0.1743, 0.6715, 0.0000],
+         [-3.1832, 3.2269, 1.5268, 0.6727, 0.2251, 0.6715, 0.0000],
-         [-1.1586, -2.1924, 0.6165, 0.5557, 2.5376, 1.2145, 0.0000],
+         [-0.9598, -2.2864, 0.6165, 0.7506, 2.5709, 1.2145, 0.0000],
-         [-2.9305, -2.4856, 0.9722, 0.6270, 1.8462, 0.2870, 0.0000],
+         [-2.6988, -2.7354, 0.9722, 0.7680, 1.8877, 0.2870, 0.0000],
-         [3.3115, -0.0048, 1.0712, 0.4619, 3.8605, 2.1603, 0.0000]])
+         [3.2989, 0.2885, 1.0712, 0.7600, 3.8814, 2.1603, 0.0000]])
    expected_gt_labels_3d = np.array([
        6, 6, 4, 9, 11, 11, 10, 0, 15, 17, 17, 17, 3, 12, 4, 4, 14, 1, 0, 0, 0,
        0, 0, 0, 5, 5, 5
    ])
    expected_pts_semantic_mask = np.array([3, 1, 2, 2, 15])
    expected_pts_instance_mask = np.array([44, 22, 10, 10, 57])
-    assert np.allclose(points, expected_points)
+    assert torch.allclose(points, expected_points, 1e-2)
    assert torch.allclose(gt_bboxes_3d.tensor[:5, :], expected_gt_bboxes_3d,
                          1e-2)
    assert np.all(gt_labels_3d.numpy() == expected_gt_labels_3d)
@@ -111,12 +115,16 @@ def test_sunrgbd_pipeline():
            load_dim=6,
            use_dim=[0, 1, 2]),
        dict(type='LoadAnnotations3D'),
-        dict(type='IndoorFlipData', flip_ratio_yz=1.0),
        dict(
-            type='IndoorGlobalRotScaleTrans',
+            type='RandomFlip3D',
-            shift_height=True,
+            sync_2d=False,
-            rot_range=[-1 / 6, 1 / 6],
+            flip_ratio_bev_horizontal=1.0,
-            scale_range=[0.85, 1.15]),
+        ),
+        dict(
+            type='GlobalRotScaleTrans',
+            rot_range=[-0.523599, 0.523599],
+            scale_ratio_range=[0.85, 1.15],
+            shift_height=True),
        dict(type='IndoorPointSample', num_points=5),
        dict(type='DefaultFormatBundle3D', class_names=class_names),
        dict(
@@ -140,6 +148,7 @@ def test_sunrgbd_pipeline():
    results['ann_info'] = dict()
    results['ann_info']['gt_bboxes_3d'] = DepthInstance3DBoxes(gt_bboxes_3d)
    results['ann_info']['gt_labels_3d'] = gt_labels_3d
+    results['img_fields'] = []
    results['bbox3d_fields'] = []
    results['pts_mask_fields'] = []
    results['pts_seg_fields'] = []
@@ -148,16 +157,16 @@ def test_sunrgbd_pipeline():
    points = results['points']._data
    gt_bboxes_3d = results['gt_bboxes_3d']._data
    gt_labels_3d = results['gt_labels_3d']._data
-    expected_points = np.array([[0.6512, 1.5781, 0.0710, 0.0499],
+    expected_points = torch.tensor([[0.8678, 1.3470, 0.1105, 0.0905],
-                                [0.6473, 1.5701, 0.0657, 0.0447],
+                                    [0.8707, 1.3635, 0.0437, 0.0238],
-                                [0.6464, 1.5635, 0.0826, 0.0616],
+                                    [0.8636, 1.3511, 0.0504, 0.0304],
-                                [0.6453, 1.5603, 0.0849, 0.0638],
+                                    [0.8690, 1.3461, 0.1265, 0.1065],
-                                [0.6488, 1.5786, 0.0461, 0.0251]])
+                                    [0.8668, 1.3434, 0.1216, 0.1017]])
    expected_gt_bboxes_3d = torch.tensor(
-        [[-2.0125, 3.9473, -0.2545, 2.3730, 1.9458, 2.0303, 1.2206],
+        [[-1.2136, 4.0206, -0.2412, 2.2493, 1.8444, 1.9245, 1.3989],
-         [-3.7037, 4.2396, -0.8109, 0.6032, 0.9104, 1.0033, 1.2663],
+         [-2.7420, 4.5777, -0.7686, 0.5718, 0.8629, 0.9510, 1.4446],
-         [0.6529, 2.1638, -0.1523, 0.7348, 1.6113, 2.1694, 2.8140]])
+         [0.9729, 1.9087, -0.1443, 0.6965, 1.5273, 2.0563, 2.9924]])
    expected_gt_labels_3d = np.array([0, 7, 6])
    assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
    assert np.allclose(gt_labels_3d.flatten(), expected_gt_labels_3d)
-    assert np.allclose(points, expected_points, 1e-2)
+    assert torch.allclose(points, expected_points, 1e-2)
--- a/tests/test_pipeline/test_outdoor_pipeline.py
+++ b/tests/test_pipeline/test_outdoor_pipeline.py
@@ -19,7 +19,7 @@ def test_outdoor_aug_pipeline():
            translation_std=[1.0, 1.0, 0.5],
            global_rot_range=[0.0, 0.0],
            rot_range=[-0.78539816, 0.78539816]),
-        dict(type='RandomFlip3D', flip_ratio=0.5),
+        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
        dict(
            type='GlobalRotScaleTrans',
            rot_range=[-0.78539816, 0.78539816],
@@ -137,7 +137,7 @@ def test_outdoor_velocity_aug_pipeline():
            rot_range=[-0.3925, 0.3925],
            scale_ratio_range=[0.95, 1.05],
            translation_std=[0, 0, 0]),
-        dict(type='RandomFlip3D', flip_ratio=0.5),
+        dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
        dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
        dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
        dict(type='PointShuffle'),