Merge branch 'master_temp' into scannet_dataset

49121b64 · yinchimaoliang · f8f05baf · 868c5fab · 49121b64 · 49121b64
Commit 49121b64 authored May 12, 2020 by yinchimaoliang
20 changed files
--- a/mmdet3d/datasets/pipelines/indoor_augment.py
+++ b/mmdet3d/datasets/pipelines/indoor_augment.py
+import numpy as np
+
+from mmdet.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class IndoorFlipData(object):
+    """Indoor Flip Data.
+
+    Flip point cloud and ground truth boxes.
+    The point cloud will ve flipped along the yz plane
+    and the xz plane with a certain probability.
+
+    Args:
+        flip_ratio_yz (float): Probability of being flipped along yz plane.
+            Default: 0.5.
+        flip_ratio_xz (float): Probability of being flipped along xz plane.
+            Default: 0.5.
+    """
+
+    def __init__(self, flip_ratio_yz=0.5, flip_ratio_xz=0.5):
+        self.flip_ratio_yz = flip_ratio_yz
+        self.flip_ratio_xz = flip_ratio_xz
+
+    def __call__(self, results):
+        points = results['points']
+        gt_bboxes_3d = results['gt_bboxes_3d']
+        aligned = True if gt_bboxes_3d.shape[1] == 6 else False
+        if np.random.random() < self.flip_ratio_yz:
+            # Flipping along the YZ plane
+            points[:, 0] = -1 * points[:, 0]
+            gt_bboxes_3d[:, 0] = -1 * gt_bboxes_3d[:, 0]
+            if not aligned:
+                gt_bboxes_3d[:, 6] = np.pi - gt_bboxes_3d[:, 6]
+            results['flip_yz'] = True
+
+        if aligned and np.random.random() < self.flip_ratio_xz:
+            # Flipping along the XZ plane
+            points[:, 1] = -1 * points[:, 1]
+            gt_bboxes_3d[:, 1] = -1 * gt_bboxes_3d[:, 1]
+            results['flip_xz'] = True
+
+        results['points'] = points
+        results['gt_bboxes_3d'] = gt_bboxes_3d
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(flip_ratio_yz={})'.format(self.flip_ratio_yz)
+        repr_str += '(flip_ratio_xz={})'.format(self.flip_ratio_xz)
+        return repr_str
+
+
+@PIPELINES.register_module()
+class IndoorPointsColorJitter(object):
+    """Indoor Points Color Jitter.
+
+    Randomly change the brightness and color of the point cloud, and
+    drop out the points' colors with a certain range and probability.
+
+    Args:
+        color_mean (List[float]): Mean color of the point cloud.
+            Default: [0.5, 0.5, 0.5].
+        bright_range (List[float]): Range of brightness.
+            Default: [0.8, 1.2].
+        color_shift_range (List[float]): Range of color shift.
+            Default: [0.95, 1.05].
+        jitter_range (List[float]): Range of jittering.
+            Default: [-0.025, 0.025].
+        drop_prob (float): Probability to drop out points' color.
+            Default: 0.3
+    """
+
+    def __init__(self,
+                 color_mean=[0.5, 0.5, 0.5],
+                 bright_range=[0.8, 1.2],
+                 color_shift_range=[0.95, 1.05],
+                 jitter_range=[-0.025, 0.025],
+                 drop_prob=0.3):
+        self.color_mean = color_mean
+        self.bright_range = bright_range
+        self.color_shift_range = color_shift_range
+        self.jitter_range = jitter_range
+        self.drop_prob = drop_prob
+
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6, \
+            f'Expect points have channel >=6, got {points.shape[1]}.'
+        rgb_color = points[:, 3:6] + self.color_mean
+        # brightness change for each channel
+        rgb_color *= np.random.uniform(self.bright_range[0],
+                                       self.bright_range[1], 3)
+        # color shift for each channel
+        rgb_color += np.random.uniform(self.color_shift_range[0],
+                                       self.color_shift_range[1], 3)
+        # jittering on each pixel
+        rgb_color += np.expand_dims(
+            np.random.uniform(self.jitter_range[0], self.jitter_range[1]), -1)
+        rgb_color = np.clip(rgb_color, 0, 1)
+        # randomly drop out points' colors
+        rgb_color *= np.expand_dims(
+            np.random.random(points.shape[0]) > self.drop_prob, -1)
+        points[:, 3:6] = rgb_color - self.color_mean
+        results['points'] = points
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        repr_str += '(bright_range={})'.format(self.bright_range)
+        repr_str += '(color_shift_range={})'.format(self.color_shift_range)
+        repr_str += '(jitter_range={})'.format(self.jitter_range)
+        repr_str += '(drop_prob={})'.format(self.drop_prob)
+
+
+# TODO: merge outdoor indoor transform.
+# TODO: try transform noise.
+@PIPELINES.register_module()
+class IndoorGlobalRotScale(object):
+    """Indoor Global Rotate Scale.
+
+    Augment sunrgbd and scannet data with global rotating and scaling.
+
+    Args:
+        use_height (bool): Whether to use height.
+            Default: True.
+        rot_range (List[float]): Range of rotation.
+            Default: None.
+        scale_range (List[float]): Range of scale.
+            Default: None.
+    """
+
+    def __init__(self, use_height=True, rot_range=None, scale_range=None):
+        self.use_height = use_height
+        self.rot_range = rot_range
+        self.scale_range = scale_range
+
+    def _rotz(self, t):
+        """Rotate About Z.
+
+        Rotation about the z-axis.
+
+        Args:
+            t (float): Angle of rotation.
+
+        Returns:
+            rot_mat (ndarray): Matrix of rotation.
+        """
+        c = np.cos(t)
+        s = np.sin(t)
+        rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
+        return rot_mat
+
+    def _rotate_aligned_boxes(self, input_boxes, rot_mat):
+        """Rotate Aligned Boxes.
+
+        Rotate function for the aligned boxes.
+
+        Args:
+            input_boxes (ndarray): 3D boxes.
+            rot_mat (ndarray): Rotation matrix.
+
+        Returns:
+            rotated_boxes (ndarry): 3D boxes after rotation.
+        """
+        centers, lengths = input_boxes[:, 0:3], input_boxes[:, 3:6]
+        new_centers = np.dot(centers, rot_mat.T)
+
+        dx, dy = lengths[:, 0] / 2.0, lengths[:, 1] / 2.0
+        new_x = np.zeros((dx.shape[0], 4))
+        new_y = np.zeros((dx.shape[0], 4))
+
+        for i, corner in enumerate([(-1, -1), (1, -1), (1, 1), (-1, 1)]):
+            corners = np.zeros((dx.shape[0], 3))
+            corners[:, 0] = corner[0] * dx
+            corners[:, 1] = corner[1] * dy
+            corners = np.dot(corners, rot_mat.T)
+            new_x[:, i] = corners[:, 0]
+            new_y[:, i] = corners[:, 1]
+
+        new_dx = 2.0 * np.max(new_x, 1)
+        new_dy = 2.0 * np.max(new_y, 1)
+        new_lengths = np.stack((new_dx, new_dy, lengths[:, 2]), axis=1)
+
+        return np.concatenate([new_centers, new_lengths], axis=1)
+
+    def __call__(self, results):
+        points = results['points']
+        gt_bboxes_3d = results['gt_bboxes_3d']
+        aligned = True if gt_bboxes_3d.shape[1] == 6 else False
+
+        if self.rot_range is not None:
+            assert len(self.rot_range) == 2, \
+                f'Expect length of rot range =2, ' \
+                f'got {len(self.rot_range)}.'
+            rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
+            rot_mat = self._rotz(rot_angle)
+            points[:, :3] = np.dot(points[:, :3], rot_mat.T)
+            if aligned:
+                gt_bboxes_3d = self._rotate_aligned_boxes(
+                    gt_bboxes_3d, rot_mat)
+            else:
+                gt_bboxes_3d[:, :3] = np.dot(gt_bboxes_3d[:, :3], rot_mat.T)
+                gt_bboxes_3d[:, 6] -= rot_angle
+
+        if self.scale_range is not None:
+            assert len(self.scale_range) == 2, \
+                f'Expect length of scale range =2, ' \
+                f'got {len(self.scale_range)}.'
+            # Augment point cloud scale
+            scale_ratio = np.random.uniform(self.scale_range[0],
+                                            self.scale_range[1])
+
+            points[:, :3] *= scale_ratio
+            gt_bboxes_3d[:, :3] *= scale_ratio
+            gt_bboxes_3d[:, 3:6] *= scale_ratio
+            if self.use_height:
+                points[:, -1] *= scale_ratio
+
+        results['points'] = points
+        results['gt_bboxes_3d'] = gt_bboxes_3d
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(use_height={})'.format(self.use_height)
+        repr_str += '(rot_range={})'.format(self.rot_range)
+        repr_str += '(scale_range={})'.format(self.scale_range)
+        return repr_str
--- a/mmdet3d/datasets/pipelines/indoor_loading.py
+++ b/mmdet3d/datasets/pipelines/indoor_loading.py
+import mmcv
+import numpy as np
+
+from mmdet.datasets.builder import PIPELINES
+
+
+@PIPELINES.register_module()
+class IndoorPointsColorNormalize(object):
+    """Indoor Points Color Normalize
+
+    Normalize color of the points.
+
+    Args:
+        color_mean (List[float]): Mean color of the point cloud.
+    """
+
+    def __init__(self, color_mean):
+        self.color_mean = color_mean
+
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6,\
+            f'Expect points have channel >=6, got {points.shape[1]}'
+        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
+        results['points'] = points
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        return repr_str
+
+
+@PIPELINES.register_module()
+class IndoorLoadPointsFromFile(object):
+    """Indoor Load Points From File.
+
+    Load sunrgbd and scannet points from file.
+
+    Args:
+        use_height (bool): Whether to use height.
+        load_dim (int): The dimension of the loaded points.
+            Default: 6.
+        use_dim (List[int]): Which dimensions of the points to be used.
+            Default: [0, 1, 2].
+    """
+
+    def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
+        self.use_height = use_height
+        assert max(use_dim) < load_dim, \
+            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+
+    def __call__(self, results):
+        pts_filename = results['pts_filename']
+        mmcv.check_file_exist(pts_filename)
+        points = np.load(pts_filename)
+        points = points.reshape(-1, self.load_dim)
+        points = points[:, self.use_dim]
+
+        if self.use_height:
+            floor_height = np.percentile(points[:, 2], 0.99)
+            height = points[:, 2] - floor_height
+            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
+        results['points'] = points
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(use_height={})'.format(self.use_height)
+        repr_str += '(mean_color={})'.format(self.color_mean)
+        repr_str += '(load_dim={})'.format(self.load_dim)
+        repr_str += '(use_dim={})'.format(self.use_dim)
+        return repr_str
+
+
+@PIPELINES.register_module
+class IndoorLoadAnnotations3D(object):
+    """Indoor Load Annotations3D.
+
+    Load instance mask and semantic mask of points.
+    """
+
+    def __init__(self):
+        pass
+
+    def __call__(self, results):
+        pts_instance_mask_path = results['pts_instance_mask_path']
+        pts_semantic_mask_path = results['pts_semantic_mask_path']
+
+        mmcv.check_file_exist(pts_instance_mask_path)
+        mmcv.check_file_exist(pts_semantic_mask_path)
+        pts_instance_mask = np.load(pts_instance_mask_path)
+        pts_semantic_mask = np.load(pts_semantic_mask_path)
+        results['pts_instance_mask'] = pts_instance_mask
+        results['pts_semantic_mask'] = pts_semantic_mask
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
--- a/mmdet3d/datasets/pipelines/indoor_sample.py
+++ b/mmdet3d/datasets/pipelines/indoor_sample.py
@@ -4,7 +4,7 @@ from mmdet.datasets.builder import PIPELINES


 @PIPELINES.register_module()
-class PointSample(object):
+class IndoorPointSample(object):
    """Point Sample.

    Sampling data to a certain number.
@@ -46,7 +46,7 @@ class PointSample(object):
            return points[choices]

    def __call__(self, results):
-        points = results.get('points', None)
+        points = results['points']
        points, choices = self.points_random_sampling(
            points, self.num_points, return_choices=True)
        pts_instance_mask = results.get('pts_instance_mask', None)

--- a/mmdet3d/models/roi_heads/roi_extractors/__init__.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/__init__.py
 from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
+from .single_roiaware_extractor import Single3DRoIAwareExtractor

-__all__ = ['SingleRoIExtractor']
+__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
--- a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+import torch
+import torch.nn as nn
+
+from mmdet3d import ops
+from mmdet.models.builder import ROI_EXTRACTORS
+
+
+@ROI_EXTRACTORS.register_module
+class Single3DRoIAwareExtractor(nn.Module):
+    """Point-wise roi-aware Extractor
+
+    Extract Point-wise roi features.
+
+    Args:
+        roi_layer (dict): the config of roi layer
+    """
+
+    def __init__(self, roi_layer=None):
+        super(Single3DRoIAwareExtractor, self).__init__()
+        self.roi_layer = self.build_roi_layers(roi_layer)
+
+    def build_roi_layers(self, layer_cfg):
+        cfg = layer_cfg.copy()
+        layer_type = cfg.pop('type')
+        assert hasattr(ops, layer_type)
+        layer_cls = getattr(ops, layer_type)
+        roi_layers = layer_cls(**cfg)
+        return roi_layers
+
+    def forward(self, feats, coordinate, batch_inds, rois):
+        """Extract point-wise roi features
+
+        Args:
+            feats (FloatTensor): point-wise features with
+                shape (batch, npoints, channels) for pooling
+            coordinate (FloatTensor): coordinate of each point
+            batch_inds (longTensor): indicate the batch of each point
+            rois (FloatTensor): roi boxes with batch indices
+
+        Returns:
+            FloatTensor: pooled features
+        """
+        pooled_roi_feats = []
+        for batch_idx in range(int(batch_inds.max()) + 1):
+            roi_inds = (rois[..., 0].int() == batch_idx)
+            coors_inds = (batch_inds.int() == batch_idx)
+            pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],
+                                             coordinate[coors_inds],
+                                             feats[coors_inds])
+            pooled_roi_feats.append(pooled_roi_feat)
+        pooled_roi_feats = torch.cat(pooled_roi_feats, 0)
+        return pooled_roi_feats
--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                       get_compiling_cuda_version, nms, roi_align,
                       sigmoid_focal_loss)
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
+                              points_in_boxes_gpu)
 from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
                           SparseBottleneck, SparseBottleneckV0)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization

 __all__ = [
-    'nms',
-    'soft_nms',
-    'RoIAlign',
-    'roi_align',
-    'get_compiler_version',
-    'get_compiling_cuda_version',
-    'NaiveSyncBatchNorm1d',
-    'NaiveSyncBatchNorm2d',
-    'batched_nms',
-    'Voxelization',
-    'voxelization',
-    'dynamic_scatter',
-    'DynamicScatter',
-    'sigmoid_focal_loss',
-    'SigmoidFocalLoss',
-    'SparseBasicBlockV0',
-    'SparseBottleneckV0',
-    'SparseBasicBlock',
-    'SparseBottleneck',
+    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
+    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
+    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
+    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
+    'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
+    'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
+    'points_in_boxes_gpu', 'points_in_boxes_cpu'
 ]
--- a/mmdet3d/ops/iou3d/__init__.py
+++ b/mmdet3d/ops/iou3d/__init__.py
-from .iou3d_utils import (boxes_iou3d_gpu, boxes_iou_bev, nms_gpu,
-                          nms_normal_gpu)
+from .iou3d_utils import (boxes_iou3d_gpu_camera, boxes_iou3d_gpu_lidar,
+                          boxes_iou_bev, nms_gpu, nms_normal_gpu)

-__all__ = ['boxes_iou_bev', 'boxes_iou3d_gpu', 'nms_gpu', 'nms_normal_gpu']
+__all__ = [
+    'boxes_iou_bev', 'boxes_iou3d_gpu_camera', 'nms_gpu', 'nms_normal_gpu',
+    'boxes_iou3d_gpu_lidar'
+]
--- a/mmdet3d/ops/iou3d/iou3d_utils.py
+++ b/mmdet3d/ops/iou3d/iou3d_utils.py
@@ -20,17 +20,22 @@ def boxes_iou_bev(boxes_a, boxes_b):
    return ans_iou


-def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
-    """
-    :param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
-    :param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
-    :param mode  "iou" (intersection over union) or iof (intersection over
+def boxes_iou3d_gpu_camera(boxes_a, boxes_b, mode='iou'):
+    """Calculate 3d iou of boxes in camera coordinate
+
+    Args:
+        boxes_a (FloatTensor): (N, 7) [x, y, z, h, w, l, ry]
+            in LiDAR coordinate
+        boxes_b (FloatTensor): (M, 7) [x, y, z, h, w, l, ry]
+        mode (str): "iou" (intersection over union) or iof (intersection over
            foreground).
-    :return:
-        ans_iou: (M, N)
+
+    Returns:
+        FloatTensor: (M, N)
    """
-    boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
-    boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
+
+    boxes_a_bev = boxes3d_to_bev_torch_camera(boxes_a)
+    boxes_b_bev = boxes3d_to_bev_torch_camera(boxes_b)

    # bev overlap
    overlaps_bev = torch.cuda.FloatTensor(
@@ -51,15 +56,62 @@ def boxes_iou3d_gpu(boxes_a, boxes_b, mode='iou'):
    # 3d iou
    overlaps_3d = overlaps_bev * overlaps_h

-    vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
-    vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
+    volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
+    volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)

    if mode == 'iou':
        # the clamp func is used to avoid division of 0
        iou3d = overlaps_3d / torch.clamp(
-            vol_a + vol_b - overlaps_3d, min=1e-8)
+            volume_a + volume_b - overlaps_3d, min=1e-8)
    else:
-        iou3d = overlaps_3d / torch.clamp(vol_a, min=1e-8)
+        iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)
+
+    return iou3d
+
+
+def boxes_iou3d_gpu_lidar(boxes_a, boxes_b, mode='iou'):
+    """Calculate 3d iou of boxes in lidar coordinate
+
+    Args:
+        boxes_a (FloatTensor): (N, 7) [x, y, z, w, l, h, ry]
+            in LiDAR coordinate
+        boxes_b (FloatTensor): (M, 7) [x, y, z, w, l, h, ry]
+        mode (str): "iou" (intersection over union) or iof (intersection over
+            foreground).
+
+    :Returns:
+        FloatTensor: (M, N)
+    """
+    boxes_a_bev = boxes3d_to_bev_torch_lidar(boxes_a)
+    boxes_b_bev = boxes3d_to_bev_torch_lidar(boxes_b)
+    # height overlap
+    boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
+    boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
+    boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
+    boxes_b_height_min = boxes_b[:, 2].view(1, -1)
+
+    # bev overlap
+    overlaps_bev = boxes_a.new_zeros(
+        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))  # (N, M)
+    iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(),
+                                     boxes_b_bev.contiguous(), overlaps_bev)
+
+    max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
+    min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
+    overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
+
+    # 3d iou
+    overlaps_3d = overlaps_bev * overlaps_h
+
+    volume_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
+    volume_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
+
+    if mode == 'iou':
+        # the clamp func is used to avoid division of 0
+        iou3d = overlaps_3d / torch.clamp(
+            volume_a + volume_b - overlaps_3d, min=1e-8)
+    else:
+        iou3d = overlaps_3d / torch.clamp(volume_a, min=1e-8)

    return iou3d

@@ -98,16 +150,39 @@ def nms_normal_gpu(boxes, scores, thresh):
    return order[keep[:num_out].cuda()].contiguous()


-def boxes3d_to_bev_torch(boxes3d):
-    """
-    :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
-    :return:
-        boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
+def boxes3d_to_bev_torch_camera(boxes3d):
+    """covert boxes3d to bev in in camera coords
+
+    Args:
+        boxes3d (FloartTensor): (N, 7) [x, y, z, h, w, l, ry] in camera coords
+
+    Return:
+        FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
    """
    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
+
    cu, cv = boxes3d[:, 0], boxes3d[:, 2]
    half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
    boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
    boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
    boxes_bev[:, 4] = boxes3d[:, 6]
    return boxes_bev
+
+
+def boxes3d_to_bev_torch_lidar(boxes3d):
+    """covert boxes3d to bev in in LiDAR coords
+
+    Args:
+        boxes3d (FloartTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
+
+    Returns:
+        FloartTensor: (N, 5) [x1, y1, x2, y2, ry]
+    """
+    boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
+
+    x, y = boxes3d[:, 0], boxes3d[:, 1]
+    half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
+    boxes_bev[:, 0], boxes_bev[:, 1] = x - half_w, y - half_l
+    boxes_bev[:, 2], boxes_bev[:, 3] = x + half_w, y + half_l
+    boxes_bev[:, 4] = boxes3d[:, 6]
+    return boxes_bev
--- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
@@ -7,8 +7,8 @@
 #include <assert.h>
 #include <math.h>
 #include <stdio.h>
-#include <torch/extension.h>
 #include <torch/serialize/tensor.h>
+#include <torch/types.h>

 #define THREADS_PER_BLOCK 256
 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))

--- a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+++ b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
-//Modified from
-//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
-//RoI-aware point cloud feature pooling
-//Written by Shaoshuai Shi
-//All Rights Reserved 2019.
+// Modified from
+// https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+// RoI-aware point cloud feature pooling
+// Written by Shaoshuai Shi
+// All Rights Reserved 2019.

-
-#include <torch/serialize/tensor.h>
-#include <torch/extension.h>
 #include <assert.h>
-
 #include <math.h>
 #include <stdio.h>
+#include <torch/serialize/tensor.h>
+#include <torch/types.h>

 #define THREADS_PER_BLOCK 256
-#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))

 // #define DEBUG

-
-__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rz, float &local_x, float &local_y){
+__device__ inline void lidar_to_local_coords(float shift_x, float shift_y,
+                                             float rz, float &local_x,
+                                             float &local_y) {
  // should rotate pi/2 + alpha to translate LiDAR to local
  float rot_angle = rz + M_PI / 2;
  float cosa = cos(rot_angle), sina = sin(rot_angle);
@@ -26,10 +25,11 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float
  local_y = shift_x * sina + shift_y * cosa;
 }

-
-__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
+__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d,
+                                        float &local_x, float &local_y) {
  // param pt: (x, y, z)
-    // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
+  // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
+  // bottom center
  float x = pt[0], y = pt[1], z = pt[2];
  float cx = box3d[0], cy = box3d[1], cz = box3d[2];
  float w = box3d[3], l = box3d[4], h = box3d[5], rz = box3d[6];
@@ -37,16 +37,19 @@ __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, flo

  if (fabsf(z - cz) > h / 2.0) return 0;
  lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
-    float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) & (local_y > -w / 2.0) & (local_y < w / 2.0);
+  float in_flag = (local_x > -l / 2.0) & (local_x < l / 2.0) &
+                  (local_y > -w / 2.0) & (local_y < w / 2.0);
  return in_flag;
 }

-
-__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z,
-    const float *rois, const float *pts, int *pts_mask){
+__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num,
+                                            int out_x, int out_y, int out_z,
+                                            const float *rois, const float *pts,
+                                            int *pts_mask) {
  // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
  // params pts: (npoints, 3) [x, y, z]
-    // params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
+  // params pts_mask: (N, npoints): -1 means point doesnot in this box,
+  // otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
  int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
  int box_idx = blockIdx.y;
  if (pt_idx >= pts_num || box_idx >= boxes_num) return;
@@ -59,7 +62,7 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_
  int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);

  pts_mask[0] = -1;
-    if (cur_in_flag > 0){
+  if (cur_in_flag > 0) {
    float local_z = pts[2] - rois[2];
    float w = rois[3], l = rois[4], h = rois[5];

@@ -77,17 +80,22 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_

    unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
 #ifdef DEBUG
-        printf("mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, %d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
-            pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx, z_idx, x_res, y_res, z_res, idx_encoding);
+    printf(
+        "mask: pts_%d(%.3f, %.3f, %.3f), local(%.3f, %.3f, %.3f), idx(%d, %d, "
+        "%d), res(%.3f, %.3f, %.3f), idx_encoding=%x\n",
+        pt_idx, pts[0], pts[1], pts[2], local_x, local_y, local_z, x_idx, y_idx,
+        z_idx, x_res, y_res, z_res, idx_encoding);
 #endif

    pts_mask[0] = idx_encoding;
  }
 }

-
-__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel,
-    int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){
+__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num,
+                                             int max_pts_each_voxel, int out_x,
+                                             int out_y, int out_z,
+                                             const int *pts_mask,
+                                             int *pts_idx_of_voxels) {
  // params pts_mask: (N, npoints)  0 or 1
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)

@@ -97,33 +105,36 @@ __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max
  int max_num_pts = max_pts_each_voxel - 1;  // index 0 is the counter
  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;

-    for (int k = 0; k < pts_num; k++){
-        if (pts_mask[box_idx * pts_num + k] != -1){
+  for (int k = 0; k < pts_num; k++) {
+    if (pts_mask[box_idx * pts_num + k] != -1) {
      unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
      unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
      unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
      unsigned int z_idx = idx_encoding & 0xFF;
-            unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel;
+      unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel +
+                                 y_idx * out_z * max_pts_each_voxel +
+                                 z_idx * max_pts_each_voxel;
      unsigned int cnt = pts_idx_of_voxels[base_offset];
-            if (cnt < max_num_pts){
+      if (cnt < max_num_pts) {
        pts_idx_of_voxels[base_offset + cnt + 1] = k;
        pts_idx_of_voxels[base_offset]++;
      }
 #ifdef DEBUG
-        printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n",
-            k, x_idx, y_idx, z_idx, idx_encoding);
+      printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n", k, x_idx,
+             y_idx, z_idx, idx_encoding);
 #endif
-
    }
  }
 }

-
-__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
-    int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){
+__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels,
+                                   int max_pts_each_voxel, int out_x, int out_y,
+                                   int out_z, const float *pts_feature,
+                                   const int *pts_idx_of_voxels,
+                                   float *pooled_features, int *argmax) {
  // params pts_feature: (npoints, C)
-    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
-    // params pooled_features: (N, out_x, out_y, out_z, C)
+  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
+  // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
  // params argmax: (N, out_x, out_y, out_z, C)

  int box_idx = blockIdx.z;
@@ -133,46 +144,57 @@ __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int
  int x_idx = voxel_idx_flat / (out_y * out_z);
  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
  int z_idx = voxel_idx_flat % out_z;
-    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
+      y_idx >= out_y || z_idx >= out_z)
+    return;

 #ifdef DEBUG
-    printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax);
+  printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels,
+         argmax);
 #endif

  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
-    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
-    pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
-    argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
+                       offset_base * max_pts_each_voxel;
+  pooled_features += box_idx * out_x * out_y * out_z * channels +
+                     offset_base * channels + channel_idx;
+  argmax += box_idx * out_x * out_y * out_z * channels +
+            offset_base * channels + channel_idx;

  int argmax_idx = -1;
  float max_val = -1e50;

  int total_pts = pts_idx_of_voxels[0];

-    for (int k = 1; k <= total_pts; k++){
-        if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){
+  for (int k = 1; k <= total_pts; k++) {
+    if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) {
      max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
      argmax_idx = pts_idx_of_voxels[k];
    }
  }

-    if (argmax_idx != -1){
+  if (argmax_idx != -1) {
    pooled_features[0] = max_val;
  }
  argmax[0] = argmax_idx;

 #ifdef DEBUG
-    printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n",
-        channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx);
+  printf(
+      "channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after "
+      "pts_idx: %p, argmax: (%p, %d)\n",
+      channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts,
+      pts_idx_of_voxels, argmax, argmax_idx);
 #endif
 }

-
-__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
-    int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){
+__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels,
+                                   int max_pts_each_voxel, int out_x, int out_y,
+                                   int out_z, const float *pts_feature,
+                                   const int *pts_idx_of_voxels,
+                                   float *pooled_features) {
  // params pts_feature: (npoints, C)
-    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
-    // params pooled_features: (N, out_x, out_y, out_z, C)
+  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel),
+  // index 0 is the counter params pooled_features: (N, out_x, out_y, out_z, C)
  // params argmax: (N, out_x, out_y, out_z, C)

  int box_idx = blockIdx.z;
@@ -182,28 +204,34 @@ __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int
  int x_idx = voxel_idx_flat / (out_y * out_z);
  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
  int z_idx = voxel_idx_flat % out_z;
-    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
+      y_idx >= out_y || z_idx >= out_z)
+    return;

  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
-    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
-    pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
+                       offset_base * max_pts_each_voxel;
+  pooled_features += box_idx * out_x * out_y * out_z * channels +
+                     offset_base * channels + channel_idx;

  float sum_val = 0;
  int total_pts = pts_idx_of_voxels[0];

-    for (int k = 1; k <= total_pts; k++){
+  for (int k = 1; k <= total_pts; k++) {
    sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
  }

-    if (total_pts > 0){
+  if (total_pts > 0) {
    pooled_features[0] = sum_val / total_pts;
  }
 }

-
-
-void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z,
-    const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){
+void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels,
+                              int max_pts_each_voxel, int out_x, int out_y,
+                              int out_z, const float *rois, const float *pts,
+                              const float *pts_feature, int *argmax,
+                              int *pts_idx_of_voxels, float *pooled_features,
+                              int pool_method) {
  // params rois: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate
  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
  // params pts_feature: (npoints, C)
@@ -218,25 +246,28 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_

  dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
  dim3 threads(THREADS_PER_BLOCK);
-    generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
+  generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(
+      boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);

  // TODO: Merge the collect and pool functions, SS

  dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
-    collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(boxes_num, pts_num, max_pts_each_voxel,
-        out_x, out_y, out_z, pts_mask, pts_idx_of_voxels);
-
-    dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
-    if (pool_method == 0){
-        roiaware_maxpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
+  collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(
+      boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, out_z, pts_mask,
+      pts_idx_of_voxels);
+
+  dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
+                   boxes_num);
+  if (pool_method == 0) {
+    roiaware_maxpool3d<<<blocks_pool, threads>>>(
+        boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
        pts_feature, pts_idx_of_voxels, pooled_features, argmax);
-    }
-    else if (pool_method == 1){
-        roiaware_avgpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
+  } else if (pool_method == 1) {
+    roiaware_avgpool3d<<<blocks_pool, threads>>>(
+        boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
        pts_feature, pts_idx_of_voxels, pooled_features);
  }

-
  cudaFree(pts_mask);

 #ifdef DEBUG
@@ -244,9 +275,11 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_
 #endif
 }

-
-__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
-    const int *argmax, const float *grad_out, float *grad_in){
+__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels,
+                                            int out_x, int out_y, int out_z,
+                                            const int *argmax,
+                                            const float *grad_out,
+                                            float *grad_in) {
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value
@@ -258,20 +291,27 @@ __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out
  int x_idx = voxel_idx_flat / (out_y * out_z);
  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
  int z_idx = voxel_idx_flat % out_z;
-    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
+      y_idx >= out_y || z_idx >= out_z)
+    return;

  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
-    argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
-    grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+  argmax += box_idx * out_x * out_y * out_z * channels +
+            offset_base * channels + channel_idx;
+  grad_out += box_idx * out_x * out_y * out_z * channels +
+              offset_base * channels + channel_idx;

  if (argmax[0] == -1) return;

  atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
 }

-
-__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
-    int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){
+__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels,
+                                            int out_x, int out_y, int out_z,
+                                            int max_pts_each_voxel,
+                                            const int *pts_idx_of_voxels,
+                                            const float *grad_out,
+                                            float *grad_in) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value
@@ -283,41 +323,45 @@ __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out
  int x_idx = voxel_idx_flat / (out_y * out_z);
  int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
  int z_idx = voxel_idx_flat % out_z;
-    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+  if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x ||
+      y_idx >= out_y || z_idx >= out_z)
+    return;

  int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
-    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
-    grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
-
+  pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel +
+                       offset_base * max_pts_each_voxel;
+  grad_out += box_idx * out_x * out_y * out_z * channels +
+              offset_base * channels + channel_idx;

  int total_pts = pts_idx_of_voxels[0];
  float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
-    for (int k = 1; k <= total_pts; k++){
-        atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad);
+  for (int k = 1; k <= total_pts; k++) {
+    atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx,
+              grad_out[0] * cur_grad);
  }
 }

-
-
-void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
-    const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){
+void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y,
+                                       int out_z, int channels,
+                                       int max_pts_each_voxel,
+                                       const int *pts_idx_of_voxels,
+                                       const int *argmax, const float *grad_out,
+                                       float *grad_in, int pool_method) {
  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
  // params argmax: (N, out_x, out_y, out_z, C)
  // params grad_out: (N, out_x, out_y, out_z, C)
  // params grad_in: (npoints, C), return value
  // params pool_method: 0: max_pool, 1: avg_pool

-    dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
+  dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels,
+              boxes_num);
  dim3 threads(THREADS_PER_BLOCK);
-    if (pool_method == 0){
+  if (pool_method == 0) {
    roiaware_maxpool3d_backward<<<blocks, threads>>>(
-            boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in
-        );
-    }
-    else if (pool_method == 1){
+        boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in);
+  } else if (pool_method == 1) {
    roiaware_avgpool3d_backward<<<blocks, threads>>>(
-            boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in
-        );
+        boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel,
+        pts_idx_of_voxels, grad_out, grad_in);
  }
-
 }
--- a/mmdet3d/ops/sparse_block.py
+++ b/mmdet3d/ops/sparse_block.py
 from mmcv.cnn import build_norm_layer
 from torch import nn

-import mmdet3d.ops.spconv as spconv
 from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
+from . import spconv


 def conv3x3(in_planes, out_planes, stride=1, indice_key=None):

--- a/tests/data/scannet/scannet_infos.pkl
+++ b/tests/data/scannet/scannet_infos.pkl
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
--- a/tests/data/sunrgbd/sunrgbd_infos.pkl
+++ b/tests/data/sunrgbd/sunrgbd_infos.pkl
--- a/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
+++ b/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
+import numpy as np
+import pytest
+import torch
+
+from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
+                               LiDARInstance3DBoxes)
+
+
+def test_lidar_boxes3d():
+    # test empty initialization
+    empty_boxes = []
+    boxes = LiDARInstance3DBoxes(empty_boxes)
+    assert boxes.tensor.shape[0] == 0
+    assert boxes.tensor.shape[1] == 7
+
+    # Test init with numpy array
+    np_boxes = np.array(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
+        dtype=np.float32)
+    boxes_1 = LiDARInstance3DBoxes(np_boxes)
+    assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))
+
+    # test properties
+    assert boxes_1.volume.size(0) == 2
+    assert (boxes_1.center == boxes_1.bottom_center).all()
+    assert repr(boxes) == (
+        'LiDARInstance3DBoxes(\n    tensor([], size=(0, 7)))')
+
+    # test init with torch.Tensor
+    th_boxes = torch.tensor(
+        [[
+            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
+            1.48000002, -1.57000005
+        ],
+         [
+             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
+             1.39999998, -1.69000006
+         ],
+         [
+             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
+             1.48000002, 2.78999996
+         ]],
+        dtype=torch.float32)
+    boxes_2 = LiDARInstance3DBoxes(th_boxes)
+    assert torch.allclose(boxes_2.tensor, th_boxes)
+
+    # test clone/to/device
+    boxes_2 = boxes_2.clone()
+    boxes_1 = boxes_1.to(boxes_2.device)
+
+    # test box concatenation
+    expected_tensor = torch.tensor(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    boxes = LiDARInstance3DBoxes.cat([boxes_1, boxes_2])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # concatenate empty list
+    empty_boxes = LiDARInstance3DBoxes.cat([])
+    assert empty_boxes.tensor.shape[0] == 0
+    assert empty_boxes.tensor.shape[-1] == 7
+
+    # test box flip
+    expected_tensor = torch.tensor(
+        [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
+         [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
+         [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
+         [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
+         [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])
+    boxes.flip()
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box rotation
+    expected_tensor = torch.tensor(
+        [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
+         [7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
+         [27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
+         [19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
+         [27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]])
+    boxes.rotate(0.27207362796436096)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box scaling
+    expected_tensor = torch.tensor([[
+        1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
+        1.9336663
+    ],
+                                    [
+                                        8.014273, -4.8007393, -1.6448704,
+                                        1.5486219, 4.0324507, 1.57879,
+                                        1.7936664
+                                    ],
+                                    [
+                                        27.558605, -7.1084175, -1.310622,
+                                        1.4782301, 2.242485, 1.488286,
+                                        4.9836664
+                                    ],
+                                    [
+                                        19.934517, -28.344835, -1.7457767,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.130915, -16.369587, -1.6308585,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    boxes.scale(1.00559866335275)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box translation
+    expected_tensor = torch.tensor([[
+        1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
+        1.9336663
+    ],
+                                    [
+                                        8.098079, -4.9332013, -1.8018866,
+                                        1.5486219, 4.0324507, 1.57879,
+                                        1.7936664
+                                    ],
+                                    [
+                                        27.64241, -7.2408795, -1.4676381,
+                                        1.4782301, 2.242485, 1.488286,
+                                        4.9836664
+                                    ],
+                                    [
+                                        20.018322, -28.477297, -1.9027928,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    boxes.translate([0.0838056, -0.13246193, -0.15701613])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test bbox in_range_bev
+    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
+    mask = boxes.in_range_bev([0., -40., 70.4, 40.])
+    assert (mask == expected_tensor).all()
+    mask = boxes.nonempty()
+    assert (mask == expected_tensor).all()
+
+    # test bbox in_range
+    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
+    mask = boxes.in_range_3d([0, -20, -2, 22, 2, 5])
+    assert (mask == expected_tensor).all()
+
+    # test bbox indexing
+    index_boxes = boxes[2:5]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ],
+                                    [
+                                        20.018322, -28.477297, -1.9027928,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    assert len(index_boxes) == 3
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+
+    index_boxes = boxes[2]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ]])
+    assert len(index_boxes) == 1
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+
+    index_boxes = boxes[[2, 4]]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    assert len(index_boxes) == 2
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+
+    # test iteration
+    for i, box in enumerate(index_boxes):
+        torch.allclose(box, expected_tensor[i])
+
+    # test properties
+    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
+    expected_tensor = (
+        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
+        (torch.tensor([0.5, 0.5, 0]) - torch.tensor([0.5, 0.5, 0.5])))
+    assert torch.allclose(boxes.gravity_center, expected_tensor)
+
+    boxes.limit_yaw()
+    assert (boxes.tensor[:, 6] <= np.pi / 2).all()
+    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
+
+    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
+    expected_tesor = boxes.tensor.clone()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+
+    boxes.flip()
+    boxes.flip()
+    boxes.limit_yaw()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+
+    # test nearest_bev
+    expected_tensor = torch.tensor([[-0.5763, -3.9307, 2.8326, -2.1709],
+                                    [6.0819, -5.7075, 10.1143, -4.1589],
+                                    [26.5212, -7.9800, 28.7637, -6.5018],
+                                    [18.2686, -29.2617, 21.7681, -27.6929],
+                                    [27.3398, -18.3976, 29.0896, -14.6065]])
+    # the pytorch print loses some precision
+    assert torch.allclose(
+        boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
+
+    # obtained by the print of the original implementation
+    expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00],
+                                     [2.4093e+00, -4.4784e+00, -2.5769e-01],
+                                     [-7.7767e-01, -3.2684e+00, -2.5769e-01],
+                                     [-7.7767e-01, -3.2684e+00, -1.9169e+00],
+                                     [3.0340e+00, -2.8332e+00, -1.9169e+00],
+                                     [3.0340e+00, -2.8332e+00, -2.5769e-01],
+                                     [-1.5301e-01, -1.6232e+00, -2.5769e-01],
+                                     [-1.5301e-01, -1.6232e+00, -1.9169e+00]],
+                                    [[9.8933e+00, -6.1340e+00, -1.8019e+00],
+                                     [9.8933e+00, -6.1340e+00, -2.2310e-01],
+                                     [5.9606e+00, -5.2427e+00, -2.2310e-01],
+                                     [5.9606e+00, -5.2427e+00, -1.8019e+00],
+                                     [1.0236e+01, -4.6237e+00, -1.8019e+00],
+                                     [1.0236e+01, -4.6237e+00, -2.2310e-01],
+                                     [6.3029e+00, -3.7324e+00, -2.2310e-01],
+                                     [6.3029e+00, -3.7324e+00, -1.8019e+00]],
+                                    [[2.8525e+01, -8.2534e+00, -1.4676e+00],
+                                     [2.8525e+01, -8.2534e+00, 2.0648e-02],
+                                     [2.6364e+01, -7.6525e+00, 2.0648e-02],
+                                     [2.6364e+01, -7.6525e+00, -1.4676e+00],
+                                     [2.8921e+01, -6.8292e+00, -1.4676e+00],
+                                     [2.8921e+01, -6.8292e+00, 2.0648e-02],
+                                     [2.6760e+01, -6.2283e+00, 2.0648e-02],
+                                     [2.6760e+01, -6.2283e+00, -1.4676e+00]],
+                                    [[2.1337e+01, -2.9870e+01, -1.9028e+00],
+                                     [2.1337e+01, -2.9870e+01, -4.9495e-01],
+                                     [1.8102e+01, -2.8535e+01, -4.9495e-01],
+                                     [1.8102e+01, -2.8535e+01, -1.9028e+00],
+                                     [2.1935e+01, -2.8420e+01, -1.9028e+00],
+                                     [2.1935e+01, -2.8420e+01, -4.9495e-01],
+                                     [1.8700e+01, -2.7085e+01, -4.9495e-01],
+                                     [1.8700e+01, -2.7085e+01, -1.9028e+00]],
+                                    [[2.6398e+01, -1.7530e+01, -1.7879e+00],
+                                     [2.6398e+01, -1.7530e+01, -2.9959e-01],
+                                     [2.8612e+01, -1.4452e+01, -2.9959e-01],
+                                     [2.8612e+01, -1.4452e+01, -1.7879e+00],
+                                     [2.7818e+01, -1.8552e+01, -1.7879e+00],
+                                     [2.7818e+01, -1.8552e+01, -2.9959e-01],
+                                     [3.0032e+01, -1.5474e+01, -2.9959e-01],
+                                     [3.0032e+01, -1.5474e+01, -1.7879e+00]]])
+    # the pytorch print loses some precision
+    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
+
+
+def test_boxes_conversion():
+    """Test the conversion of boxes between different modes.
+
+    ComandLine:
+        xdoctest tests/test_box3d.py::test_boxes_conversion zero
+    """
+    lidar_boxes = LiDARInstance3DBoxes(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
+                                       Box3DMode.CAM)
+    lidar_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
+                                         Box3DMode.LIDAR)
+    expected_tensor = torch.tensor(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+
+    assert torch.allclose(expected_tensor, lidar_box_tensor)
+    assert torch.allclose(lidar_boxes.tensor, lidar_box_tensor)
+
+    depth_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
+                                         Box3DMode.DEPTH)
+    depth_to_cam_box_tensor = Box3DMode.convert(depth_box_tensor,
+                                                Box3DMode.DEPTH, Box3DMode.CAM)
+    assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)
+
+    # test error raise with not supported conversion
+    with pytest.raises(NotImplementedError):
+        Box3DMode.convert(lidar_box_tensor, Box3DMode.LIDAR, Box3DMode.DEPTH)
+    with pytest.raises(NotImplementedError):
+        Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
+
+    # test similar mode conversion
+    same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
+                                     Box3DMode.DEPTH)
+    assert (same_results == depth_box_tensor).all()
+
+    # test conversion with a given rt_mat
+    camera_boxes = CameraInstance3DBoxes(
+        [[0.06, 1.77, 21.4, 3.2, 1.61, 1.66, -1.54],
+         [6.59, 1.53, 6.76, 12.78, 3.66, 2.28, 1.55],
+         [6.71, 1.59, 22.18, 14.73, 3.64, 2.32, 1.59],
+         [7.11, 1.58, 34.54, 10.04, 3.61, 2.32, 1.61],
+         [7.78, 1.65, 45.95, 12.83, 3.63, 2.34, 1.64]])
+
+    rect = torch.tensor(
+        [[0.9999239, 0.00983776, -0.00744505, 0.],
+         [-0.0098698, 0.9999421, -0.00427846, 0.],
+         [0.00740253, 0.00435161, 0.9999631, 0.], [0., 0., 0., 1.]],
+        dtype=torch.float32)
+
+    Trv2c = torch.tensor(
+        [[7.533745e-03, -9.999714e-01, -6.166020e-04, -4.069766e-03],
+         [1.480249e-02, 7.280733e-04, -9.998902e-01, -7.631618e-02],
+         [9.998621e-01, 7.523790e-03, 1.480755e-02, -2.717806e-01],
+         [0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00]],
+        dtype=torch.float32)
+
+    expected_tensor = torch.tensor(
+        [[
+            2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 1.65999997e+00,
+            3.20000005e+00, 1.61000001e+00, -1.53999996e+00
+        ],
+         [
+             7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 2.27999997e+00,
+             1.27799997e+01, 3.66000009e+00, 1.54999995e+00
+         ],
+         [
+             2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 2.31999993e+00,
+             1.47299995e+01, 3.64000010e+00, 1.59000003e+00
+         ],
+         [
+             3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 2.31999993e+00,
+             1.00400000e+01, 3.60999990e+00, 1.61000001e+00
+         ],
+         [
+             4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 2.33999991e+00,
+             1.28299999e+01, 3.63000011e+00, 1.63999999e+00
+         ]],
+        dtype=torch.float32)
+
+    rt_mat = rect @ Trv2c
+    # test coversion with Box type
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes, Box3DMode.CAM,
+                                         Box3DMode.LIDAR, rt_mat.inverse())
+    assert torch.allclose(cam_to_lidar_box.tensor, expected_tensor)
+
+    lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box.tensor,
+                                         Box3DMode.LIDAR, Box3DMode.CAM,
+                                         rt_mat)
+    assert torch.allclose(lidar_to_cam_box, camera_boxes.tensor)
+
+    # test numpy convert
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor.numpy(),
+                                         Box3DMode.CAM, Box3DMode.LIDAR,
+                                         rt_mat.inverse().numpy())
+    assert np.allclose(cam_to_lidar_box, expected_tensor.numpy())
+
+    # test list convert
+    cam_to_lidar_box = Box3DMode.convert(
+        camera_boxes.tensor[0].numpy().tolist(), Box3DMode.CAM,
+        Box3DMode.LIDAR,
+        rt_mat.inverse().numpy())
+    assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())
+
+
+def test_camera_boxes3d():
+    # Test init with numpy array
+    np_boxes = np.array(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
+        dtype=np.float32)
+
+    boxes_1 = Box3DMode.convert(
+        LiDARInstance3DBoxes(np_boxes), Box3DMode.LIDAR, Box3DMode.CAM)
+    assert isinstance(boxes_1, CameraInstance3DBoxes)
+
+    cam_np_boxes = Box3DMode.convert(np_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
+    assert torch.allclose(boxes_1.tensor,
+                          boxes_1.tensor.new_tensor(cam_np_boxes))
+
+    # test init with torch.Tensor
+    th_boxes = torch.tensor(
+        [[
+            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
+            1.48000002, -1.57000005
+        ],
+         [
+             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
+             1.39999998, -1.69000006
+         ],
+         [
+             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
+             1.48000002, 2.78999996
+         ]],
+        dtype=torch.float32)
+    cam_th_boxes = Box3DMode.convert(th_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes_2 = CameraInstance3DBoxes(cam_th_boxes)
+    assert torch.allclose(boxes_2.tensor, cam_th_boxes)
+
+    # test clone/to/device
+    boxes_2 = boxes_2.clone()
+    boxes_1 = boxes_1.to(boxes_2.device)
+
+    # test box concatenation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+             [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+             [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+             [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+             [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes = CameraInstance3DBoxes.cat([boxes_1, boxes_2])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box flip
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
+             [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
+             [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
+             [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
+             [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.flip()
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box rotation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
+             [7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
+             [27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
+             [19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
+             [27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.rotate(torch.tensor(0.27207362796436096))
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box scaling
+    expected_tensor = Box3DMode.convert(
+        torch.tensor([[
+            1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
+            1.9336663
+        ],
+                      [
+                          8.014273, -4.8007393, -1.6448704, 1.5486219,
+                          4.0324507, 1.57879, 1.7936664
+                      ],
+                      [
+                          27.558605, -7.1084175, -1.310622, 1.4782301,
+                          2.242485, 1.488286, 4.9836664
+                      ],
+                      [
+                          19.934517, -28.344835, -1.7457767, 1.5687338,
+                          3.4994833, 1.4078381, 5.1036663
+                      ],
+                      [
+                          28.130915, -16.369587, -1.6308585, 1.7497417,
+                          3.791107, 1.488286, 0.6236664
+                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.scale(1.00559866335275)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test box translation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor([[
+            1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
+            1.9336663
+        ],
+                      [
+                          8.098079, -4.9332013, -1.8018866, 1.5486219,
+                          4.0324507, 1.57879, 1.7936664
+                      ],
+                      [
+                          27.64241, -7.2408795, -1.4676381, 1.4782301,
+                          2.242485, 1.488286, 4.9836664
+                      ],
+                      [
+                          20.018322, -28.477297, -1.9027928, 1.5687338,
+                          3.4994833, 1.4078381, 5.1036663
+                      ],
+                      [
+                          28.21472, -16.502048, -1.7878747, 1.7497417,
+                          3.791107, 1.488286, 0.6236664
+                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.translate(torch.tensor([0.13246193, 0.15701613, 0.0838056]))
+    assert torch.allclose(boxes.tensor, expected_tensor)
+
+    # test bbox in_range_bev
+    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
+    mask = boxes.in_range_bev([0., -40., 70.4, 40.])
+    assert (mask == expected_tensor).all()
+    mask = boxes.nonempty()
+    assert (mask == expected_tensor).all()
+
+    # test bbox in_range
+    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
+    mask = boxes.in_range_3d([-2, -5, 0, 20, 2, 22])
+    assert (mask == expected_tensor).all()
+
+    # test properties
+    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
+    expected_tensor = (
+        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
+        (torch.tensor([0.5, 1.0, 0.5]) - torch.tensor([0.5, 0.5, 0.5])))
+    assert torch.allclose(boxes.gravity_center, expected_tensor)
+
+    boxes.limit_yaw()
+    assert (boxes.tensor[:, 6] <= np.pi / 2).all()
+    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
+
+    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
+    expected_tesor = boxes.tensor.clone()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+
+    boxes.flip()
+    boxes.flip()
+    boxes.limit_yaw()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+
+    # test nearest_bev
+    # BEV box in lidar coordinates (x, y)
+    lidar_expected_tensor = torch.tensor(
+        [[-0.5763, -3.9307, 2.8326, -2.1709],
+         [6.0819, -5.7075, 10.1143, -4.1589],
+         [26.5212, -7.9800, 28.7637, -6.5018],
+         [18.2686, -29.2617, 21.7681, -27.6929],
+         [27.3398, -18.3976, 29.0896, -14.6065]])
+    # BEV box in camera coordinate (-y, x)
+    expected_tensor = lidar_expected_tensor.clone()
+    expected_tensor[:, 0::2] = -lidar_expected_tensor[:, [3, 1]]
+    expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]
+    # the pytorch print loses some precision
+    assert torch.allclose(
+        boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
+
+    # obtained by the print of the original implementation
+    expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],
+                                     [1.6232e+00, 2.5769e-01, -1.5301e-01],
+                                     [1.6232e+00, 1.9169e+00, -1.5301e-01],
+                                     [3.2684e+00, 1.9169e+00, -7.7767e-01],
+                                     [4.4784e+00, 2.5769e-01, 2.4093e+00],
+                                     [2.8332e+00, 2.5769e-01, 3.0340e+00],
+                                     [2.8332e+00, 1.9169e+00, 3.0340e+00],
+                                     [4.4784e+00, 1.9169e+00, 2.4093e+00]],
+                                    [[5.2427e+00, 2.2310e-01, 5.9606e+00],
+                                     [3.7324e+00, 2.2310e-01, 6.3029e+00],
+                                     [3.7324e+00, 1.8019e+00, 6.3029e+00],
+                                     [5.2427e+00, 1.8019e+00, 5.9606e+00],
+                                     [6.1340e+00, 2.2310e-01, 9.8933e+00],
+                                     [4.6237e+00, 2.2310e-01, 1.0236e+01],
+                                     [4.6237e+00, 1.8019e+00, 1.0236e+01],
+                                     [6.1340e+00, 1.8019e+00, 9.8933e+00]],
+                                    [[7.6525e+00, -2.0648e-02, 2.6364e+01],
+                                     [6.2283e+00, -2.0648e-02, 2.6760e+01],
+                                     [6.2283e+00, 1.4676e+00, 2.6760e+01],
+                                     [7.6525e+00, 1.4676e+00, 2.6364e+01],
+                                     [8.2534e+00, -2.0648e-02, 2.8525e+01],
+                                     [6.8292e+00, -2.0648e-02, 2.8921e+01],
+                                     [6.8292e+00, 1.4676e+00, 2.8921e+01],
+                                     [8.2534e+00, 1.4676e+00, 2.8525e+01]],
+                                    [[2.8535e+01, 4.9495e-01, 1.8102e+01],
+                                     [2.7085e+01, 4.9495e-01, 1.8700e+01],
+                                     [2.7085e+01, 1.9028e+00, 1.8700e+01],
+                                     [2.8535e+01, 1.9028e+00, 1.8102e+01],
+                                     [2.9870e+01, 4.9495e-01, 2.1337e+01],
+                                     [2.8420e+01, 4.9495e-01, 2.1935e+01],
+                                     [2.8420e+01, 1.9028e+00, 2.1935e+01],
+                                     [2.9870e+01, 1.9028e+00, 2.1337e+01]],
+                                    [[1.4452e+01, 2.9959e-01, 2.8612e+01],
+                                     [1.5474e+01, 2.9959e-01, 3.0032e+01],
+                                     [1.5474e+01, 1.7879e+00, 3.0032e+01],
+                                     [1.4452e+01, 1.7879e+00, 2.8612e+01],
+                                     [1.7530e+01, 2.9959e-01, 2.6398e+01],
+                                     [1.8552e+01, 2.9959e-01, 2.7818e+01],
+                                     [1.8552e+01, 1.7879e+00, 2.7818e+01],
+                                     [1.7530e+01, 1.7879e+00, 2.6398e+01]]])
+
+    # the pytorch print loses some precision
+    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
--- a/tests/test_indoor_augment.py
+++ b/tests/test_indoor_augment.py
+import numpy as np
+
+from mmdet3d.datasets.pipelines import IndoorFlipData, IndoorGlobalRotScale
+
+
+def test_indoor_flip_data():
+    np.random.seed(0)
+    sunrgbd_indoor_flip_data = IndoorFlipData(1, 1)
+    sunrgbd_results = dict()
+    sunrgbd_results['points'] = np.array(
+        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
+         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
+    sunrgbd_results['gt_bboxes_3d'] = np.array([[
+        0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
+    ],
+                                                [
+                                                    -0.449953, 1.395455,
+                                                    -1.027778, 1.500956,
+                                                    1.637298, 0.636364,
+                                                    -1.58242359
+                                                ]])
+    sunrgbd_results = sunrgbd_indoor_flip_data(sunrgbd_results)
+    sunrgbd_points = sunrgbd_results['points']
+    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
+
+    expected_sunrgbd_points = np.array(
+        [[-1.02828765, 3.65790772, 0.1972947, 1.61959505],
+         [0.39597902, 1.05465031, -0.74920434, 0.673096]])
+    expected_sunrgbd_gt_bboxes_3d = np.array([[
+        -0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 0.07130739
+    ], [
+        0.449953, 1.395455, -1.027778, 1.500956, 1.637298, 0.636364, 4.72401624
+    ]])
+    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
+    assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
+
+    np.random.seed(0)
+    scannet_indoor_flip_data = IndoorFlipData(1, 1)
+    scannet_results = dict()
+    scannet_results['points'] = np.array(
+        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
+         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
+    scannet_results['gt_bboxes_3d'] = np.array([[
+        0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
+    ], [
+        -0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
+    ]])
+    scannet_results = scannet_indoor_flip_data(scannet_results)
+    scannet_points = scannet_results['points']
+    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
+
+    expected_scannet_points = np.array(
+        [[-1.6110241, 0.16903955, 0.5811581, 0.5989725],
+         [-1.3978075, -0.42035791, 0.38729519, 0.40510958]])
+    expected_scannet_gt_bboxes_3d = np.array([[
+        -0.55903838, -0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
+    ], [
+        0.03226406, -1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
+    ]])
+    assert np.allclose(scannet_points, expected_scannet_points)
+    assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
+
+
+def test_global_rot_scale():
+    np.random.seed(0)
+    sunrgbd_augment = IndoorGlobalRotScale(
+        True, rot_range=[-np.pi / 6, np.pi / 6], scale_range=[0.85, 1.15])
+    sunrgbd_results = dict()
+    sunrgbd_results['points'] = np.array(
+        [[1.02828765e+00, 3.65790772e+00, 1.97294697e-01, 1.61959505e+00],
+         [-3.95979017e-01, 1.05465031e+00, -7.49204338e-01, 6.73096001e-01]])
+    sunrgbd_results['gt_bboxes_3d'] = np.array([[
+        0.213684, 1.036364, -0.982323, 0.61541, 0.572574, 0.872728, 3.07028526
+    ],
+                                                [
+                                                    -0.449953, 1.395455,
+                                                    -1.027778, 1.500956,
+                                                    1.637298, 0.636364,
+                                                    -1.58242359
+                                                ]])
+
+    sunrgbd_results = sunrgbd_augment(sunrgbd_results)
+    sunrgbd_points = sunrgbd_results['points']
+    sunrgbd_gt_bboxes_3d = sunrgbd_results['gt_bboxes_3d']
+
+    expected_sunrgbd_points = np.array(
+        [[0.89427376, 3.94489646, 0.21003141, 1.72415094],
+         [-0.47835783, 1.09972989, -0.79757058, 0.71654893]])
+    expected_sunrgbd_gt_bboxes_3d = np.array([[
+        0.17080999, 1.11345031, -1.04573864, 0.65513891, 0.60953755,
+        0.92906854, 3.01916788
+    ],
+                                              [
+                                                  -0.55427876, 1.45912611,
+                                                  -1.09412807, 1.59785293,
+                                                  1.74299674, 0.67744563,
+                                                  -1.63354097
+                                              ]])
+    assert np.allclose(sunrgbd_points, expected_sunrgbd_points)
+    assert np.allclose(sunrgbd_gt_bboxes_3d, expected_sunrgbd_gt_bboxes_3d)
+
+    np.random.seed(0)
+    scannet_augment = IndoorGlobalRotScale(
+        True, rot_range=[-np.pi * 1 / 36, np.pi * 1 / 36], scale_range=None)
+    scannet_results = dict()
+    scannet_results['points'] = np.array(
+        [[1.6110241e+00, -1.6903955e-01, 5.8115810e-01, 5.9897250e-01],
+         [1.3978075e+00, 4.2035791e-01, 3.8729519e-01, 4.0510958e-01]])
+    scannet_results['gt_bboxes_3d'] = np.array([[
+        0.55903838, 0.48201692, 0.65688646, 0.65370704, 0.60029864, 0.5163464
+    ], [
+        -0.03226406, 1.70392646, 0.60348618, 0.65165804, 0.72084366, 0.64667457
+    ]])
+    scannet_results = scannet_augment(scannet_results)
+    scannet_points = scannet_results['points']
+    scannet_gt_bboxes_3d = scannet_results['gt_bboxes_3d']
+
+    expected_scannet_points = np.array(
+        [[1.61240576, -0.15530836, 0.5811581, 0.5989725],
+         [1.39417555, 0.43225122, 0.38729519, 0.40510958]])
+    expected_scannet_gt_bboxes_3d = np.array([[
+        0.55491157, 0.48676213, 0.65688646, 0.65879754, 0.60584609, 0.5163464
+    ], [
+        -0.04677942, 1.70358975, 0.60348618, 0.65777559, 0.72636927, 0.64667457
+    ]])
+    assert np.allclose(scannet_points, expected_scannet_points)
+    assert np.allclose(scannet_gt_bboxes_3d, expected_scannet_gt_bboxes_3d)
--- a/tests/test_indoor_loading.py
+++ b/tests/test_indoor_loading.py
+import os.path as osp
+
+import mmcv
+import numpy as np
+
+from mmdet3d.datasets.pipelines import (IndoorLoadAnnotations3D,
+                                        IndoorLoadPointsFromFile)
+
+
+def test_indoor_load_points_from_file():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
+    sunrgbd_load_points_from_file = IndoorLoadPointsFromFile(True, 6)
+    sunrgbd_results = dict()
+    data_path = './tests/data/sunrgbd/sunrgbd_trainval'
+    sunrgbd_info = sunrgbd_info[0]
+    scan_name = sunrgbd_info['point_cloud']['lidar_idx']
+    sunrgbd_results['pts_filename'] = osp.join(data_path, 'lidar',
+                                               f'{scan_name:06d}.npy')
+    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
+    sunrgbd_point_cloud = sunrgbd_results['points']
+    assert sunrgbd_point_cloud.shape == (100, 4)
+
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
+    scannet_load_data = IndoorLoadPointsFromFile(True)
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    scannet_results['data_path'] = data_path
+    scannet_info = scannet_info[0]
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+
+    scannet_results['pts_filename'] = osp.join(data_path,
+                                               f'{scan_name}_vert.npy')
+    scannet_results = scannet_load_data(scannet_results)
+    scannet_point_cloud = scannet_results['points']
+    assert scannet_point_cloud.shape == (100, 4)
+
+
+def test_load_annotations3D():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
+    if sunrgbd_info['annos']['gt_num'] != 0:
+        sunrgbd_gt_bboxes_3d = sunrgbd_info['annos']['gt_boxes_upright_depth']
+        sunrgbd_gt_labels = sunrgbd_info['annos']['class'].reshape(-1, 1)
+        sunrgbd_gt_bboxes_3d_mask = np.ones_like(sunrgbd_gt_labels)
+    else:
+        sunrgbd_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        sunrgbd_gt_labels = np.zeros((1, 1))
+        sunrgbd_gt_bboxes_3d_mask = np.zeros((1, 1))
+    assert sunrgbd_gt_bboxes_3d.shape == (3, 7)
+    assert sunrgbd_gt_labels.shape == (3, 1)
+    assert sunrgbd_gt_bboxes_3d_mask.shape == (3, 1)
+
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
+    scannet_load_annotations3D = IndoorLoadAnnotations3D()
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    if scannet_info['annos']['gt_num'] != 0:
+        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
+        scannet_gt_labels = scannet_info['annos']['class'].reshape(-1, 1)
+        scannet_gt_bboxes_3d_mask = np.ones_like(scannet_gt_labels)
+    else:
+        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        scannet_gt_labels = np.zeros((1, 1))
+        scannet_gt_bboxes_3d_mask = np.zeros((1, 1))
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+    scannet_results['pts_instance_mask_path'] = osp.join(
+        data_path, f'{scan_name}_ins_label.npy')
+    scannet_results['pts_semantic_mask_path'] = osp.join(
+        data_path, f'{scan_name}_sem_label.npy')
+    scannet_results['info'] = scannet_info
+    scannet_results['gt_bboxes_3d'] = scannet_gt_bboxes_3d
+    scannet_results['gt_labels'] = scannet_gt_labels
+    scannet_results['gt_bboxes_3d_mask'] = scannet_gt_bboxes_3d_mask
+    scannet_results = scannet_load_annotations3D(scannet_results)
+    scannet_gt_boxes = scannet_results['gt_bboxes_3d']
+    scannet_gt_lbaels = scannet_results['gt_labels']
+    scannet_gt_boxes_mask = scannet_results['gt_bboxes_3d_mask']
+    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
+    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
+    assert scannet_gt_boxes.shape == (27, 6)
+    assert scannet_gt_lbaels.shape == (27, 1)
+    assert scannet_gt_boxes_mask.shape == (27, 1)
+    assert scannet_pts_instance_mask.shape == (100, )
+    assert scannet_pts_semantic_mask.shape == (100, )
--- a/tests/test_indoor_sample.py
+++ b/tests/test_indoor_sample.py
 import numpy as np

-from mmdet3d.datasets.pipelines.indoor_sample import PointSample
+from mmdet3d.datasets.pipelines import IndoorPointSample


 def test_indoor_sample():
    np.random.seed(0)
-    scannet_sample_points = PointSample(5)
+    scannet_sample_points = IndoorPointSample(5)
    scannet_results = dict()
    scannet_points = np.array([[1.0719866, -0.7870435, 0.8408122, 0.9196809],
                               [1.103661, 0.81065744, 2.6616862, 2.7405548],
@@ -24,11 +24,9 @@ def test_indoor_sample():
    scannet_pts_semantic_mask = np.array([38, 1, 1, 40, 0, 40, 1, 1, 1, 0])
    scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
    scannet_results = scannet_sample_points(scannet_results)
-    scannet_points_result = scannet_results.get('points', None)
-    scannet_instance_labels_result = scannet_results.get(
-        'pts_instance_mask', None)
-    scannet_semantic_labels_result = scannet_results.get(
-        'pts_semantic_mask', None)
+    scannet_points_result = scannet_results['points']
+    scannet_instance_labels_result = scannet_results['pts_instance_mask']
+    scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
    scannet_choices = np.array([2, 8, 4, 9, 1])
    assert np.allclose(scannet_points[scannet_choices], scannet_points_result)
    assert np.all(scannet_pts_instance_mask[scannet_choices] ==
@@ -37,7 +35,7 @@ def test_indoor_sample():
                  scannet_semantic_labels_result)

    np.random.seed(0)
-    sunrgbd_sample_points = PointSample(5)
+    sunrgbd_sample_points = IndoorPointSample(5)
    sunrgbd_results = dict()
    sunrgbd_point_cloud = np.array(
        [[-1.8135729e-01, 1.4695230e+00, -1.2780589e+00, 7.8938007e-03],
@@ -53,6 +51,6 @@ def test_indoor_sample():
    sunrgbd_results['points'] = sunrgbd_point_cloud
    sunrgbd_results = sunrgbd_sample_points(sunrgbd_results)
    sunrgbd_choices = np.array([2, 8, 4, 9, 1])
-    sunrgbd_points_result = sunrgbd_results.get('points', None)
+    sunrgbd_points_result = sunrgbd_results['points']
    assert np.allclose(sunrgbd_point_cloud[sunrgbd_choices],
                       sunrgbd_points_result)