Merge branch 'master_temp' into scannet_dataset

49121b64 · yinchimaoliang · f8f05baf · 868c5fab · 49121b64 · 49121b64
Commit 49121b64 authored May 12, 2020 by yinchimaoliang
20 changed files
--- a/mmdet3d/datasets/pipelines/indoor_augment.py
+++ b/mmdet3d/datasets/pipelines/indoor_augment.py
+import numpy as np
+
+from mmdet.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class IndoorFlipData(object):
+    """Indoor Flip Data.
+
+    Flip point cloud and ground truth boxes.
+    The point cloud will ve flipped along the yz plane
+    and the xz plane with a certain probability.
+
+    Args:
+        flip_ratio_yz (float): Probability of being flipped along yz plane.
+            Default: 0.5.
+        flip_ratio_xz (float): Probability of being flipped along xz plane.
+            Default: 0.5.
+    """
+
+    def __init__(self, flip_ratio_yz=0.5, flip_ratio_xz=0.5):
+        self.flip_ratio_yz = flip_ratio_yz
+        self.flip_ratio_xz = flip_ratio_xz
+
+    def __call__(self, results):
+        points = results['points']
+        gt_bboxes_3d = results['gt_bboxes_3d']
+        aligned = True if gt_bboxes_3d.shape[1] == 6 else False
+        if np.random.random() < self.flip_ratio_yz:
+            # Flipping along the YZ plane
+            points[:, 0] = -1 * points[:, 0]
+            gt_bboxes_3d[:, 0] = -1 * gt_bboxes_3d[:, 0]
+            if not aligned:
+                gt_bboxes_3d[:, 6] = np.pi - gt_bboxes_3d[:, 6]
+            results['flip_yz'] = True
+
+        if aligned and np.random.random() < self.flip_ratio_xz:
+            # Flipping along the XZ plane
+            points[:, 1] = -1 * points[:, 1]
+            gt_bboxes_3d[:, 1] = -1 * gt_bboxes_3d[:, 1]
+            results['flip_xz'] = True
+
+        results['points'] = points
+        results['gt_bboxes_3d'] = gt_bboxes_3d
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(flip_ratio_yz={})'.format(self.flip_ratio_yz)
+        repr_str += '(flip_ratio_xz={})'.format(self.flip_ratio_xz)
+        return repr_str
+
+
+@PIPELINES.register_module()
+class IndoorPointsColorJitter(object):
+    """Indoor Points Color Jitter.
+
+    Randomly change the brightness and color of the point cloud, and
+    drop out the points' colors with a certain range and probability.
+
+    Args:
+        color_mean (List[float]): Mean color of the point cloud.
+            Default: [0.5, 0.5, 0.5].
+        bright_range (List[float]): Range of brightness.
+            Default: [0.8, 1.2].
+        color_shift_range (List[float]): Range of color shift.
+            Default: [0.95, 1.05].
+        jitter_range (List[float]): Range of jittering.
+            Default: [-0.025, 0.025].
+        drop_prob (float): Probability to drop out points' color.
+            Default: 0.3
+    """
+
+    def __init__(self,
+                 color_mean=[0.5, 0.5, 0.5],
+                 bright_range=[0.8, 1.2],
+                 color_shift_range=[0.95, 1.05],
+                 jitter_range=[-0.025, 0.025],
+                 drop_prob=0.3):
+        self.color_mean = color_mean
+        self.bright_range = bright_range
+        self.color_shift_range = color_shift_range
+        self.jitter_range = jitter_range
+        self.drop_prob = drop_prob
+
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6, \
+            f'Expect points have channel >=6, got {points.shape[1]}.'
+        rgb_color = points[:, 3:6] + self.color_mean
+        # brightness change for each channel
+        rgb_color *= np.random.uniform(self.bright_range[0],
+                                       self.bright_range[1], 3)
+        # color shift for each channel
+        rgb_color += np.random.uniform(self.color_shift_range[0],
+                                       self.color_shift_range[1], 3)
+        # jittering on each pixel
+        rgb_color += np.expand_dims(
+            np.random.uniform(self.jitter_range[0], self.jitter_range[1]), -1)
+        rgb_color = np.clip(rgb_color, 0, 1)
+        # randomly drop out points' colors
+        rgb_color *= np.expand_dims(
+            np.random.random(points.shape[0]) > self.drop_prob, -1)
+        points[:, 3:6] = rgb_color - self.color_mean
+        results['points'] = points
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        repr_str += '(bright_range={})'.format(self.bright_range)
+        repr_str += '(color_shift_range={})'.format(self.color_shift_range)
+        repr_str += '(jitter_range={})'.format(self.jitter_range)
+        repr_str += '(drop_prob={})'.format(self.drop_prob)
+
+
+# TODO: merge outdoor indoor transform.
+# TODO: try transform noise.
+@PIPELINES.register_module()
+class IndoorGlobalRotScale(object):
+    """Indoor Global Rotate Scale.
+
+    Augment sunrgbd and scannet data with global rotating and scaling.
+
+    Args:
+        use_height (bool): Whether to use height.
+            Default: True.
+        rot_range (List[float]): Range of rotation.
+            Default: None.
+        scale_range (List[float]): Range of scale.
+            Default: None.
+    """
+
+    def __init__(self, use_height=True, rot_range=None, scale_range=None):
+        self.use_height = use_height
+        self.rot_range = rot_range
+        self.scale_range = scale_range
+
+    def _rotz(self, t):
+        """Rotate About Z.
+
+        Rotation about the z-axis.
+
+        Args:
+            t (float): Angle of rotation.
+
+        Returns:
+            rot_mat (ndarray): Matrix of rotation.
+        """
+        c = np.cos(t)
+        s = np.sin(t)
+        rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
+        return rot_mat
+
+    def _rotate_aligned_boxes(self, input_boxes, rot_mat):
+        """Rotate Aligned Boxes.
+
+        Rotate function for the aligned boxes.
+
+        Args:
+            input_boxes (ndarray): 3D boxes.
+            rot_mat (ndarray): Rotation matrix.
+
+        Returns:
+            rotated_boxes (ndarry): 3D boxes after rotation.
+        """
+        centers, lengths = input_boxes[:, 0:3], input_boxes[:, 3:6]
+        new_centers = np.dot(centers, rot_mat.T)
+
+        dx, dy = lengths[:, 0] / 2.0, lengths[:, 1] / 2.0
+        new_x = np.zeros((dx.shape[0], 4))
+        new_y = np.zeros((dx.shape[0], 4))
+
+        for i, corner in enumerate([(-1, -1), (1, -1), (1, 1), (-1, 1)]):
+            corners = np.zeros((dx.shape[0], 3))
+            corners[:, 0] = corner[0] * dx
+            corners[:, 1] = corner[1] * dy
+            corners = np.dot(corners, rot_mat.T)
+            new_x[:, i] = corners[:, 0]
+            new_y[:, i] = corners[:, 1]
+
+        new_dx = 2.0 * np.max(new_x, 1)
+        new_dy = 2.0 * np.max(new_y, 1)
+        new_lengths = np.stack((new_dx, new_dy, lengths[:, 2]), axis=1)
+
+        return np.concatenate([new_centers, new_lengths], axis=1)
+
+    def __call__(self, results):
+        points = results['points']
+        gt_bboxes_3d = results['gt_bboxes_3d']
+        aligned = True if gt_bboxes_3d.shape[1] == 6 else False
+
+        if self.rot_range is not None:
+            assert len(self.rot_range) == 2, \
+                f'Expect length of rot range =2, ' \
+                f'got {len(self.rot_range)}.'
+            rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
+            rot_mat = self._rotz(rot_angle)
+            points[:, :3] = np.dot(points[:, :3], rot_mat.T)
+            if aligned:
+                gt_bboxes_3d = self._rotate_aligned_boxes(
+                    gt_bboxes_3d, rot_mat)
+            else:
+                gt_bboxes_3d[:, :3] = np.dot(gt_bboxes_3d[:, :3], rot_mat.T)
+                gt_bboxes_3d[:, 6] -= rot_angle
+
+        if self.scale_range is not None:
+            assert len(self.scale_range) == 2, \
+                f'Expect length of scale range =2, ' \
+                f'got {len(self.scale_range)}.'
+            # Augment point cloud scale
+            scale_ratio = np.random.uniform(self.scale_range[0],
+                                            self.scale_range[1])
+
+            points[:, :3] *= scale_ratio
+            gt_bboxes_3d[:, :3] *= scale_ratio
+            gt_bboxes_3d[:, 3:6] *= scale_ratio
+            if self.use_height:
+                points[:, -1] *= scale_ratio
+
+        results['points'] = points
+        results['gt_bboxes_3d'] = gt_bboxes_3d
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(use_height={})'.format(self.use_height)
+        repr_str += '(rot_range={})'.format(self.rot_range)
+        repr_str += '(scale_range={})'.format(self.scale_range)
+        return repr_str
--- a/mmdet3d/datasets/pipelines/indoor_loading.py
+++ b/mmdet3d/datasets/pipelines/indoor_loading.py
+import mmcv
+import numpy as np
+
+from mmdet.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class IndoorPointsColorNormalize(object):
+    """Indoor Points Color Normalize
+
+    Normalize color of the points.
+
+    Args:
+        color_mean (List[float]): Mean color of the point cloud.
+    """
+
+    def __init__(self, color_mean):
+        self.color_mean = color_mean
+
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6,\
+            f'Expect points have channel >=6, got {points.shape[1]}'
+        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
+        results['points'] = points
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        return repr_str
+
+
+@PIPELINES.register_module()
+class IndoorLoadPointsFromFile(object):
+    """Indoor Load Points From File.
+
+    Load sunrgbd and scannet points from file.
+
+    Args:
+        use_height (bool): Whether to use height.
+        load_dim (int): The dimension of the loaded points.
+            Default: 6.
+        use_dim (List[int]): Which dimensions of the points to be used.
+            Default: [0, 1, 2].
+    """
+
+    def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
+        self.use_height = use_height
+        assert max(use_dim) < load_dim, \
+            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+
+    def __call__(self, results):
+        pts_filename = results['pts_filename']
+        mmcv.check_file_exist(pts_filename)
+        points = np.load(pts_filename)
+        points = points.reshape(-1, self.load_dim)
+        points = points[:, self.use_dim]
+
+        if self.use_height:
+            floor_height = np.percentile(points[:, 2], 0.99)
+            height = points[:, 2] - floor_height
+            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
+        results['points'] = points
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(use_height={})'.format(self.use_height)
+        repr_str += '(mean_color={})'.format(self.color_mean)
+        repr_str += '(load_dim={})'.format(self.load_dim)
+        repr_str += '(use_dim={})'.format(self.use_dim)
+        return repr_str
+
+
+@PIPELINES.register_module
+class IndoorLoadAnnotations3D(object):
+    """Indoor Load Annotations3D.
+
+    Load instance mask and semantic mask of points.
+    """
+
+    def __init__(self):
+        pass
+
+    def __call__(self, results):
+        pts_instance_mask_path = results['pts_instance_mask_path']
+        pts_semantic_mask_path = results['pts_semantic_mask_path']
+
+        mmcv.check_file_exist(pts_instance_mask_path)
+        mmcv.check_file_exist(pts_semantic_mask_path)
+        pts_instance_mask = np.load(pts_instance_mask_path)
+        pts_semantic_mask = np.load(pts_semantic_mask_path)
+        results['pts_instance_mask'] = pts_instance_mask
+        results['pts_semantic_mask'] = pts_semantic_mask
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
--- a/mmdet3d/datasets/pipelines/indoor_sample.py
+++ b/mmdet3d/datasets/pipelines/indoor_sample.py
@@ -4,7 +4,7 @@ from mmdet.datasets.builder import PIPELINES


 @PIPELINES.register_module()
-class PointSample(object):
+class IndoorPointSample(object):
    """Point Sample.

    Sampling data to a certain number.
@@ -46,7 +46,7 @@ class PointSample(object):
            return points[choices]

    def __call__(self, results):
-        points = results.get('points', None)
+        points = results['points']
        points, choices = self.points_random_sampling(
            points, self.num_points, return_choices=True)
        pts_instance_mask = results.get('pts_instance_mask', None)

--- a/mmdet3d/models/roi_heads/roi_extractors/__init__.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/__init__.py
 from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
+from .single_roiaware_extractor import Single3DRoIAwareExtractor

-__all__ = ['SingleRoIExtractor']
+__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
--- a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+import torch
+import torch.nn as nn
+
+from mmdet3d import ops
+from mmdet.models.builder import ROI_EXTRACTORS
+
+
+@ROI_EXTRACTORS.register_module
+class Single3DRoIAwareExtractor(nn.Module):
+    """Point-wise roi-aware Extractor
+
+    Extract Point-wise roi features.
+
+    Args:
+        roi_layer (dict): the config of roi layer
+    """
+
+    def __init__(self, roi_layer=None):
+        super(Single3DRoIAwareExtractor, self).__init__()
+        self.roi_layer = self.build_roi_layers(roi_layer)
+
+    def build_roi_layers(self, layer_cfg):
+        cfg = layer_cfg.copy()
+        layer_type = cfg.pop('type')
+        assert hasattr(ops, layer_type)
+        layer_cls = getattr(ops, layer_type)
+        roi_layers = layer_cls(**cfg)
+        return roi_layers
+
+    def forward(self, feats, coordinate, batch_inds, rois):
+        """Extract point-wise roi features
+
+        Args:
+            feats (FloatTensor): point-wise features with
+                shape (batch, npoints, channels) for pooling
+            coordinate (FloatTensor): coordinate of each point
+            batch_inds (longTensor): indicate the batch of each point
+            rois (FloatTensor): roi boxes with batch indices
+
+        Returns:
+            FloatTensor: pooled features
+        """
+        pooled_roi_feats = []
+        for batch_idx in range(int(batch_inds.max()) + 1):
+            roi_inds = (rois[..., 0].int() == batch_idx)
+            coors_inds = (batch_inds.int() == batch_idx)
+            pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],
+                                             coordinate[coors_inds],
+                                             feats[coors_inds])
+            pooled_roi_feats.append(pooled_roi_feat)
+        pooled_roi_feats = torch.cat(pooled_roi_feats, 0)
+        return pooled_roi_feats
--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                       get_compiling_cuda_version, nms, roi_align,
                       sigmoid_focal_loss)
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
+                              points_in_boxes_gpu)
 from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
                           SparseBottleneck, SparseBottleneckV0)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization

 __all__ = [
-    'nms',
-    'soft_nms',
-    'RoIAlign',
-    'roi_align',
-    'get_compiler_version',
-    'get_compiling_cuda_version',
-    'NaiveSyncBatchNorm1d',
-    'NaiveSyncBatchNorm2d',
-    'batched_nms',
-    'Voxelization',
-    'voxelization',
-    'dynamic_scatter',
-    'DynamicScatter',
-    'sigmoid_focal_loss',
-    'SigmoidFocalLoss',
-    'SparseBasicBlockV0',
-    'SparseBottleneckV0',
-    'SparseBasicBlock',
-    'SparseBottleneck',
+    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
+    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
+    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
+    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
+    'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
+    'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
+    'points_in_boxes_gpu', 'points_in_boxes_cpu'
 ]
--- a/mmdet3d/ops/iou3d/__init__.py
+++ b/mmdet3d/ops/iou3d/__init__.py
-from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu,
-                          nms_normal_gpu)
+from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
+                          boxes_iou_bev, nms_gpu, nms_normal_gpu)

-__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu']
+__all__ = [
+    'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
+    'boxes_iou3d_gpu_lidar'
+]
--- a/mmdet3d/ops/iou3d/iou3d_utils.py
+++ b/mmdet3d/ops/iou3d/iou3d_utils.py
@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
    return ans_iou


-def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
-    """
-    :param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
-    :param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
-    :param mode  "iou" (intersection over union) or iof (intersection over
+def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
+    """Calculate 3d iou of boxes in camera coordinate
+
+    Args:
+        boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
+            in LiDAR coordinate
+        boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
+        mode (str): "iou" (intersection over union) or iof (intersection over
            foreground).
-    :return:
-        ans_iou: (M, N)
+
+    Returns:
+        FloatTensor: (M, N)
    """
-    boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
-    boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
+
+    boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
+    boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)

    # bev overlap
    overlaps_bev = torch.cuda.FloatTensor(
@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
    # 3d iou
    overlaps_3d = overlaps_bev * overlaps_h

-    vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
-    vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
+    volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
+    volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)

    if mode == 'iou':
        # the clamp func is used to avoid division of 0
        iou3d = overlaps_3d / torch.clamp(
-            vol_a + vol_b - overlaps_3d, min=1e-8)
+            volume_a + volume_b - overlaps_3d, min=1e-8)
    else:
-        iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8)
+        iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
+
+    return iou3d
+
+
+def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
+    """Calculate 3d iou of boxes in lidar coordinate
+
+    Args:
+        boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
+            in LiDAR coordinate
+        boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
+        mode (str): "iou" (intersection over union) or iof (intersection over
+            foreground).
+
+    :Returns:
+        FloatTensor: (M, N)
+    """
+    boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
+    boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
+    # height overlap
+    boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
+    boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
+    boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
+    boxes_b_height_min = boxes_b[:, 2].view(1, -1)
+
+    # bev overlap
+    overlaps_bev = boxes_a.new_zeros(
+        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))  # (N, M)
+    iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
+                                     boxes_b_bev.contiguous(), overlaps_bev)
+
+    max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
+    min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
+    overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
+
+    # 3d iou
+    overlaps_3d = overlaps_bev * overlaps_h
+
+    volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
+    volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
+
+    if mode == 'iou':
+        # the clamp func is used to avoid division of 0
+        iou3d = overlaps_3d / torch.clamp(
+            volume_a + volume_b - overlaps_3d, min=1e-8)
+    else:
+        iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)

    return iou3d

@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
    return order[keep[:num_out].cuda()].contiguous()


-def boxes3d_to_bev_torch(boxes3d):
-    """
-    :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
-    :return:
-        boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
+def boxes3d_to_bev_torch_camera(boxes3d):
+    """covert boxes3d to bev in in camera coords
+
+    Args:
+        boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
+
+    Return:
+        FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
    """
    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
+
    cu, cv = boxes3d[:, 0], boxes3d[:, 2]
    half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
    boxes_bev[:, 4] = boxes3d[:, 6]
    return boxes_bev
+
+
+def boxes3d_to_bev_torch_lidar(boxes3d):
+    """covert boxes3d to bev in in LiDAR coords
+
+    Args:
+        boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
+
+    Returns:
+        FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
+    """
+    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
+
+    x, y = boxes3d[:, 0], boxes3d[:, 1]
+    half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
+    boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
+    boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
+    boxes_bev[:, 4] = boxes3d[:, 6]
+    return boxes_bev
--- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
@@ -7,8 +7,8 @@
 #include <assert.h>
 #include <math.h>
 #include <stdio.h>
-#include <torch/extension.h>
 #include <torch/serialize/tensor.h>
+#include <torch/types.h>

 #define THREADS_PER_BLOCK 256
 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))

--- a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+++ b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
--- a/mmdet3d/ops/sparse_block.py
+++ b/mmdet3d/ops/sparse_block.py
 from mmcv.cnn import build_norm_layer
 from torch import nn

-import mmdet3d.ops.spconv as spconv
 from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
+from . import spconv


 def conv3x3(in_planes, out_planes, stride=1, indice_key=None):

--- a/tests/data/scannet/scannet_infos.pkl
+++ b/tests/data/scannet/scannet_infos.pkl
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
--- a/tests/data/sunrgbd/sunrgbd_infos.pkl
+++ b/tests/data/sunrgbd/sunrgbd_infos.pkl
--- a/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
+++ b/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
--- a/tests/test_indoor_augment.py
+++ b/tests/test_indoor_augment.py
+import numpy as np
+
+from mmdet3d.datasets.pipelines import IndoorFlipData, IndoorGlobalRotScale
+
+
+def test_indoor_flip_data():
+    np.random.seed(0)
+    sunrgbd_indoor_flip_data = IndoorFlipData(1, 1)
+    sunrgbd_results = dict()
+    sunrgbd_results['points'] = np.array(
+        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
+         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
+    sunrgbd_results['gt_bboxes_3d'] = np.array([[
+        0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
+    ],
+                                                [
+                                                    -0.449953, 1.395455,
+                                                    -1.027778, 1.500956,
+                                                    1.637298, 0.636364,
+                                                    -1.58242359
+                                                ]])
+    sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
+    sunrgbd_points = sunrgbd_results['points']
+    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
+
+    expected_sunrgbd_points = np.array(
+        [[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
+         [0.39597902, 1.05465031, -0.74920434, 0.673096]])
+    expected_sunrgbd_gt_bboxes_3d = np.array([[
+        -0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 0.07130739
+    ], [
+        0.449953, 1.395455, -1.027778, 1.500956, 1.637298, 0.636364, 4.72401624
+    ]])
+    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
+    assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
+
+    np.random.seed(0)
+    scannet_indoor_flip_data = IndoorFlipData(1, 1)
+    scannet_results = dict()
+    scannet_results['points'] = np.array(
+        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
+         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
+    scannet_results['gt_bboxes_3d'] = np.array([[
+        0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
+    ], [
+        -0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
+    ]])
+    scannet_results = scannet_indoor_flip_data(scannet_results)
+    scannet_points = scannet_results['points']
+    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
+
+    expected_scannet_points = np.array(
+        [[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
+         [-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
+    expected_scannet_gt_bboxes_3d = np.array([[
+        -0.55903838, -0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
+    ], [
+        0.03226406, -1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
+    ]])
+    assert np.allclose(scannet_points, expected_scannet_points)
+    assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
+
+
+def test_global_rot_scale():
+    np.random.seed(0)
+    sunrgbd_augment = IndoorGlobalRotScale(
+        True, rot_range=[-np.pi / 6, np.pi / 6], scale_range=[0.85, 1.15])
+    sunrgbd_results = dict()
+    sunrgbd_results['points'] = np.array(
+        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
+         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
+    sunrgbd_results['gt_bboxes_3d'] = np.array([[
+        0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
+    ],
+                                                [
+                                                    -0.449953, 1.395455,
+                                                    -1.027778, 1.500956,
+                                                    1.637298, 0.636364,
+                                                    -1.58242359
+                                                ]])
+
+    sunrgbd_results = sunrgbd_augment(sunrgbd_results)
+    sunrgbd_points = sunrgbd_results['points']
+    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
+
+    expected_sunrgbd_points = np.array(
+        [[0.89427376, 3.94489646, 0.21003141, 1.72415094],
+         [-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
+    expected_sunrgbd_gt_bboxes_3d = np.array([[
+        0.17080999, 1.11345031, -1.04573864, 0.65513891, 0.60953755,
+        0.92906854, 3.01916788
+    ],
+                                              [
+                                                  -0.55427876, 1.45912611,
+                                                  -1.09412807, 1.59785293,
+                                                  1.74299674, 0.67744563,
+                                                  -1.63354097
+                                              ]])
+    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
+    assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
+
+    np.random.seed(0)
+    scannet_augment = IndoorGlobalRotScale(
+        True, rot_range=[-np.pi * 1 / 36, np.pi * 1 / 36], scale_range=None)
+    scannet_results = dict()
+    scannet_results['points'] = np.array(
+        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
+         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
+    scannet_results['gt_bboxes_3d'] = np.array([[
+        0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
+    ], [
+        -0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
+    ]])
+    scannet_results = scannet_augment(scannet_results)
+    scannet_points = scannet_results['points']
+    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
+
+    expected_scannet_points = np.array(
+        [[1.61240576, -0.15530836, 0.5811581, 0.5989725],
+         [1.39417555, 0.43225122, 0.38729519, 0.40510958]])
+    expected_scannet_gt_bboxes_3d = np.array([[
+        0.55491157, 0.48676213, 0.65688646, 0.65879754, 0.60584609, 0.5163464
+    ], [
+        -0.04677942, 1.70358975, 0.60348618, 0.65777559, 0.72636927, 0.64667457
+    ]])
+    assert np.allclose(scannet_points, expected_scannet_points)
+    assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
--- a/tests/test_indoor_loading.py
+++ b/tests/test_indoor_loading.py
+import os.path as osp
+
+import mmcv
+import numpy as np
+
+from mmdet3d.datasets.pipelines import (IndoorLoadAnnotations3D,
+                                        IndoorLoadPointsFromFile)
+
+
+def test_indoor_load_points_from_file():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
+    sunrgbd_load_points_from_file = IndoorLoadPointsFromFile(True, 6)
+    sunrgbd_results = dict()
+    data_path = './tests/data/sunrgbd/sunrgbd_trainval'
+    sunrgbd_info = sunrgbd_info[0]
+    scan_name = sunrgbd_info['point_cloud']['lidar_idx']
+    sunrgbd_results['pts_filename'] = osp.join(data_path, 'lidar',
+                                               f'{scan_name:06d}.npy')
+    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
+    sunrgbd_point_cloud = sunrgbd_results['points']
+    assert sunrgbd_point_cloud.shape == (100, 4)
+
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
+    scannet_load_data = IndoorLoadPointsFromFile(True)
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    scannet_results['data_path'] = data_path
+    scannet_info = scannet_info[0]
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+
+    scannet_results['pts_filename'] = osp.join(data_path,
+                                               f'{scan_name}_vert.npy')
+    scannet_results = scannet_load_data(scannet_results)
+    scannet_point_cloud = scannet_results['points']
+    assert scannet_point_cloud.shape == (100, 4)
+
+
+def test_load_annotations3D():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
+    if sunrgbd_info['annos']['gt_num'] != 0:
+        sunrgbd_gt_bboxes_3d = sunrgbd_info['annos']['gt_boxes_upright_depth']
+        sunrgbd_gt_labels = sunrgbd_info['annos']['class'].reshape(-1, 1)
+        sunrgbd_gt_bboxes_3d_mask = np.ones_like(sunrgbd_gt_labels)
+    else:
+        sunrgbd_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        sunrgbd_gt_labels = np.zeros((1, 1))
+        sunrgbd_gt_bboxes_3d_mask = np.zeros((1, 1))
+    assert sunrgbd_gt_bboxes_3d.shape == (3, 7)
+    assert sunrgbd_gt_labels.shape == (3, 1)
+    assert sunrgbd_gt_bboxes_3d_mask.shape == (3, 1)
+
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
+    scannet_load_annotations3D = IndoorLoadAnnotations3D()
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    if scannet_info['annos']['gt_num'] != 0:
+        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
+        scannet_gt_labels = scannet_info['annos']['class'].reshape(-1, 1)
+        scannet_gt_bboxes_3d_mask = np.ones_like(scannet_gt_labels)
+    else:
+        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        scannet_gt_labels = np.zeros((1, 1))
+        scannet_gt_bboxes_3d_mask = np.zeros((1, 1))
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+    scannet_results['pts_instance_mask_path'] = osp.join(
+        data_path, f'{scan_name}_ins_label.npy')
+    scannet_results['pts_semantic_mask_path'] = osp.join(
+        data_path, f'{scan_name}_sem_label.npy')
+    scannet_results['info'] = scannet_info
+    scannet_results['gt_bboxes_3d'] = scannet_gt_bboxes_3d
+    scannet_results['gt_labels'] = scannet_gt_labels
+    scannet_results['gt_bboxes_3d_mask'] = scannet_gt_bboxes_3d_mask
+    scannet_results = scannet_load_annotations3D(scannet_results)
+    scannet_gt_boxes = scannet_results['gt_bboxes_3d']
+    scannet_gt_lbaels = scannet_results['gt_labels']
+    scannet_gt_boxes_mask = scannet_results['gt_bboxes_3d_mask']
+    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
+    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
+    assert scannet_gt_boxes.shape == (27, 6)
+    assert scannet_gt_lbaels.shape == (27, 1)
+    assert scannet_gt_boxes_mask.shape == (27, 1)
+    assert scannet_pts_instance_mask.shape == (100, )
+    assert scannet_pts_semantic_mask.shape == (100, )
--- a/tests/test_indoor_sample.py
+++ b/tests/test_indoor_sample.py
 import numpy as np

-from mmdet3d.datasets.pipelines.indoor_sample import PointSample
+from mmdet3d.datasets.pipelines import IndoorPointSample


 def test_indoor_sample():
    np.random.seed(0)
-    scannet_sample_points = PointSample(5)
+    scannet_sample_points = IndoorPointSample(5)
    scannet_results = dict()
    scannet_points = np.array([[1.0719866, -0.7870435, 0.8408122, 0.9196809],
                               [1.103661, 0.81065744, 2.6616862, 2.7405548],
@@ -24,11 +24,9 @@ def test_indoor_sample():
    scannet_pts_semantic_mask = np.array([38, 1, 1, 40, 0, 40, 1, 1, 1, 0])
    scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
    scannet_results = scannet_sample_points(scannet_results)
-    scannet_points_result = scannet_results.get('points', None)
-    scannet_instance_labels_result = scannet_results.get(
-        'pts_instance_mask', None)
-    scannet_semantic_labels_result = scannet_results.get(
-        'pts_semantic_mask', None)
+    scannet_points_result = scannet_results['points']
+    scannet_instance_labels_result = scannet_results['pts_instance_mask']
+    scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
    scannet_choices = np.array([2, 8, 4, 9, 1])
    assert np.allclose(scannet_points[scannet_choices], scannet_points_result)
    assert np.all(scannet_pts_instance_mask[scannet_choices] ==
@@ -37,7 +35,7 @@ def test_indoor_sample():
                  scannet_semantic_labels_result)

    np.random.seed(0)
-    sunrgbd_sample_points = PointSample(5)
+    sunrgbd_sample_points = IndoorPointSample(5)
    sunrgbd_results = dict()
    sunrgbd_point_cloud = np.array(
        [[-1.8135729e-01, 1.4695230e+00, -1.2780589e+00, 7.8938007e-03],
@@ -53,6 +51,6 @@ def test_indoor_sample():
    sunrgbd_results['points'] = sunrgbd_point_cloud
    sunrgbd_results = sunrgbd_sample_points(sunrgbd_results)
    sunrgbd_choices = np.array([2, 8, 4, 9, 1])
-    sunrgbd_points_result = sunrgbd_results.get('points', None)
+    sunrgbd_points_result = sunrgbd_results['points']
    assert np.allclose(sunrgbd_point_cloud[sunrgbd_choices],
                       sunrgbd_points_result)