merge master

94bbd751 · liyinhao · f201ba68 · 84569a41 · 94bbd751 · 94bbd751
Commit 94bbd751 authored May 10, 2020 by liyinhao
20 changed files
--- a/docs/INSTALL.md
+++ b/docs/INSTALL.md
@@ -110,7 +110,11 @@ mmdetection
 │   ├── VOCdevkit
 │   │   ├── VOC2007
 │   │   ├── VOC2012
+│   ├── ScanNet
+│   │   ├── meta_data
+│   │   ├── scannet_train_instance_data
+│   ├── SUNRGBD
+│   │   ├── sunrgbd_trainval
 ```
 The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`:
 ```shell

--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
@@ -7,6 +7,7 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
 from .samplers import (BaseSampler, CombinedSampler,
                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
                       PseudoSampler, RandomSampler, SamplingResult)
+from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
 from .transforms import boxes3d_to_bev_torch_lidar
 from .assign_sampling import (  # isort:skip, avoid recursive imports
@@ -20,5 +21,6 @@ __all__ = [
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
-    'bbox_overlaps_3d'
+    'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
+    'CameraInstance3DBoxes'
 ]
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
+from .box_3d_mode import Box3DMode
+from .cam_box3d import CameraInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+__all__ = ['Box3DMode', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes']
--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
+from abc import abstractmethod
+import numpy as np
+import torch
+from .utils import limit_period
+class BaseInstance3DBoxes(object):
+    """Base class for 3D Boxes
+    Args:
+        tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix.
+        box_dim (int): number of the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw).
+    """
+    def __init__(self, tensor, box_dim=7):
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, box_dim)).to(
+                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+        self.box_dim = box_dim
+        self.tensor = tensor
+    @property
+    def volume(self):
+        """Computes the volume of all the boxes.
+        Returns:
+            torch.Tensor: a vector with volume of each box.
+        """
+        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
+    @property
+    def dims(self):
+        """Calculate the length in each dimension of all the boxes.
+        Convert the boxes to the form of (x_size, y_size, z_size)
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        return self.tensor[:, 3:6]
+    @property
+    def center(self):
+        """Calculate the center of all the boxes.
+        Note:
+            In the MMDetection.3D's convention, the bottom center is
+            usually taken as the default center.
+            The relative position of the centers in different kinds of
+            boxes are different, e.g., the relative center of a boxes is
+            [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
+            It is recommended to use `bottom_center` or `gravity_center`
+            for more clear usage.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        return self.bottom_center
+    @property
+    def bottom_center(self):
+        """Calculate the bottom center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        return self.tensor[:, :3]
+    @property
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        pass
+    @property
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with 8 corners of each box.
+        """
+        pass
+    @abstractmethod
+    def rotate(self, angles, axis=0):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            angles (float): rotation angles
+            axis (int): the axis to rotate the boxes
+        """
+        pass
+    @abstractmethod
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        """
+        pass
+    def translate(self, trans_vector):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            trans_vector (torch.Tensor): translation vector of size 1x3
+        """
+        if not isinstance(trans_vector, torch.Tensor):
+            trans_vector = self.tensor.new_tensor(trans_vector)
+        self.tensor[:, :3] += trans_vector
+    def in_range_3d(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, z_min, x_max, y_max, z_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 2] > box_range[2])
+                          & (self.tensor[:, 0] < box_range[3])
+                          & (self.tensor[:, 1] < box_range[4])
+                          & (self.tensor[:, 2] < box_range[5]))
+        return in_range_flags
+    @abstractmethod
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, x_max, y_max)
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        pass
+    def scale(self, scale_factor):
+        """Scale the box with horizontal and vertical scaling factors
+        Args:
+            scale_factors (float):
+                scale factors to scale the boxes.
+        """
+        self.tensor[:, :6] *= scale_factor
+        self.tensor[:, 7:] *= scale_factor
+    def limit_yaw(self, offset=0.5, period=np.pi):
+        """Limit the yaw to a given period and offset
+        Args:
+            offset (float): the offset of the yaw
+            period (float): the expected period
+        """
+        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
+    def nonempty(self, threshold: float = 0.0):
+        """Find boxes that are non-empty.
+        A box is considered empty,
+        if either of its side is no larger than threshold.
+        Args:
+            threshold (float): the threshold of minimal sizes
+        Returns:
+            Tensor:
+                a binary vector which represents whether each box is empty
+                (False) or non-empty (True).
+        """
+        box = self.tensor
+        size_x = box[..., 3]
+        size_y = box[..., 4]
+        size_z = box[..., 5]
+        keep = ((size_x > threshold)
+                & (size_y > threshold) & (size_z > threshold))
+        return keep
+    def __getitem__(self, item):
+        """
+        Note:
+            The following usage are allowed:
+            1. `new_boxes = boxes[3]`:
+                return a `Boxes` that contains only one box.
+            2. `new_boxes = boxes[2:10]`:
+                return a slice of boxes.
+            3. `new_boxes = boxes[vector]`:
+                where vector is a torch.BoolTensor with `length = len(boxes)`.
+                Nonzero elements in the vector will be selected.
+            Note that the returned Boxes might share storage with this Boxes,
+            subject to Pytorch's indexing semantics.
+        Returns:
+            Boxes: Create a new :class:`Boxes` by indexing.
+        """
+        original_type = type(self)
+        if isinstance(item, int):
+            return original_type(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, \
+            f'Indexing on Boxes with {item} failed to return a matrix!'
+        return original_type(b)
+    def __len__(self):
+        return self.tensor.shape[0]
+    def __repr__(self):
+        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'
+    @classmethod
+    def cat(cls, boxes_list):
+        """Concatenates a list of Boxes into a single Boxes
+        Arguments:
+            boxes_list (list[Boxes])
+        Returns:
+            Boxes: the concatenated Boxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all(isinstance(box, cls) for box in boxes_list)
+        # use torch.cat (v.s. layers.cat)
+        # so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+    def to(self, device):
+        original_type = type(self)
+        return original_type(self.tensor.to(device))
+    def clone(self):
+        """Clone the Boxes.
+        Returns:
+            Boxes
+        """
+        original_type = type(self)
+        return original_type(self.tensor.clone())
+    @property
+    def device(self):
+        return self.tensor.device
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (4,) at a time.
+        """
+        yield from self.tensor
--- a/mmdet3d/core/bbox/structures/box_3d_mode.py
+++ b/mmdet3d/core/bbox/structures/box_3d_mode.py
+from enum import IntEnum, unique
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .cam_box3d import CameraInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+@unique
+class Box3DMode(IntEnum):
+    r"""Enum of different ways to represent a box.
+    Coordinates in LiDAR:
+    .. code-block:: none
+                    up z
+                       ^   x front
+                       |  /
+                       | /
+        left y <------ 0
+    The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
+    and the yaw is around the z axis, thus the rotation axis=2.
+    Coordinates in camera:
+    .. code-block:: none
+                z front
+               /
+              /
+             0 ------> x right
+             |
+             |
+             v
+        down y
+    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
+    and the yaw is around the y axis, thus the rotation axis=1.
+    Coordinates in Depth mode:
+    .. code-block:: none
+        up z
+           ^   y front
+           |  /
+           | /
+           0 ------> x right
+    The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0],
+    and the yaw is around the z axis, thus the rotation axis=2.
+    """
+    LIDAR = 0
+    CAM = 1
+    DEPTH = 2
+    @staticmethod
+    def convert(box, src, dst, rt_mat=None):
+        """Convert boxes from `src` mode to `dst` mode.
+        Args:
+            box (tuple | list | np.ndarray | torch.Tensor):
+                can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
+            src (BoxMode): the src Box mode
+            dst (BoxMode): the target Box mode
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+        Returns:
+            (tuple | list | np.ndarray | torch.Tensor):
+                The converted box of the same type.
+        """
+        if src == dst:
+            return box
+        is_numpy = isinstance(box, np.ndarray)
+        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) >= 7, (
+                'BoxMode.convert takes either a k-tuple/list or '
+                'an Nxk array/tensor, where k >= 7')
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            elif is_Instance3DBoxes:
+                arr = box.tensor.clone()
+            else:
+                arr = box.clone()
+        # convert box from `src` mode to `dst` mode.
+        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
+        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
+            xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)
+        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
+            xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)
+        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        else:
+            raise NotImplementedError(
+                f'Conversion from Box3DMode {src} to {dst} '
+                'is not supported yet')
+        if not isinstance(rt_mat, torch.Tensor):
+            rt_mat = arr.new_tensor(rt_mat)
+        if rt_mat.size(1) == 4:
+            extended_xyz = torch.cat(
+                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
+            xyz = extended_xyz @ rt_mat.t()
+        else:
+            xyz = arr[:, :3] @ rt_mat.t()
+        remains = arr[..., 6:]
+        arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)
+        # convert arr to the original type
+        original_type = type(box)
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        elif is_Instance3DBoxes:
+            if dst == Box3DMode.CAM:
+                target_type = CameraInstance3DBoxes
+            elif dst == Box3DMode.LIDAR:
+                target_type = LiDARInstance3DBoxes
+            else:
+                raise NotImplementedError(
+                    f'Conversion to {dst} through {original_type}'
+                    ' is not supported yet')
+            return target_type(arr, box_dim=arr.size(-1))
+        else:
+            return arr
--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .utils import limit_period, rotation_3d_in_axis
+class CameraInstance3DBoxes(BaseInstance3DBoxes):
+    """3D boxes of instances in CAM coordinates
+    Coordinates in camera:
+    .. code-block:: none
+                z front
+               /
+              /
+             0 ------> x right
+             |
+             |
+             v
+        down y
+    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
+    and the yaw is around the y axis, thus the rotation axis=1.
+    Attributes:
+        tensor (torch.Tensor): float matrix of N x box_dim.
+        box_dim (int): integer indicates the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+    """
+    @property
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
+        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
+        return gravity_center
+    @property
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+        Convert the boxes to  in clockwise order, in the form of
+        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
+        .. code-block:: none
+                         front z
+                              /
+                             /
+               (x0, y0, z1) + -----------  + (x1, y0, z1)
+                           /|            / |
+                          / |           /  |
+            (x0, y0, z0) + ----------- +   + (x1, y1, z0)
+                         |  /      .   |  /
+                         | / oriign    | /
+            (x0, y1, z0) + ----------- + -------> x right
+                         |             (x1, y1, z0)
+                         |
+                         v
+                    down y
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        dims = self.dims
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
+                device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin [0.5, 1, 0.5]
+        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        # rotate around y axis
+        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=1)
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+    @property
+    def nearset_bev(self):
+        """Calculate the 2D bounding boxes in BEV without rotation
+        Returns:
+            torch.Tensor: a tensor of 2D BEV box of each box.
+        """
+        # Obtain BEV boxes with rotation in XZWHR format
+        bev_rotated_boxes = self.tensor[:, [0, 2, 3, 5, 6]]
+        # convert the rotation to a valid range
+        rotations = bev_rotated_boxes[:, -1]
+        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
+        # find the center of boxes
+        conditions = (normed_rotations > np.pi / 4)[..., None]
+        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
+                                                                [0, 1, 3, 2]],
+                                  bev_rotated_boxes[:, :4])
+        centers = bboxes_xywh[:, :2]
+        dims = bboxes_xywh[:, 2:]
+        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+        return bev_boxes
+    def rotate(self, angle):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            angles (float | torch.Tensor): rotation angle
+        Returns:
+            None if `return_rot_mat=False`,
+            torch.Tensor if `return_rot_mat=True`
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        rot_sin = torch.sin(angle)
+        rot_cos = torch.cos(angle)
+        rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], [0, 1, 0],
+                                            [rot_sin, 0, rot_cos]])
+        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
+        self.tensor[:, 6] += angle
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        In CAM coordinates, it flips the x axis.
+        """
+        self.tensor[:, 0::7] = -self.tensor[:, 0::7]
+        self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, z_min, x_max, z_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 2] > box_range[1])
+                          & (self.tensor[:, 0] < box_range[2])
+                          & (self.tensor[:, 2] < box_range[3]))
+        return in_range_flags
--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .utils import limit_period, rotation_3d_in_axis
+class LiDARInstance3DBoxes(BaseInstance3DBoxes):
+    """3D boxes of instances in LIDAR coordinates
+    Coordinates in LiDAR:
+    .. code-block:: none
+                    up z    x front
+                       ^   ^
+                       |  /
+                       | /
+        left y <------ 0
+    The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
+    and the yaw is around the z axis, thus the rotation axis=2.
+    Attributes:
+        tensor (torch.Tensor): float matrix of N x box_dim.
+        box_dim (int): integer indicates the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+    """
+    @property
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, :2] = bottom_center[:, :2]
+        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
+        return gravity_center
+    @property
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+        Convert the boxes to corners in clockwise order, in form of
+        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
+        .. code-block:: none
+                                           up z
+                            front x           ^
+                                 /            |
+                                /             |
+                  (x1, y0, z1) + -----------  + (x1, y1, z1)
+                              /|            / |
+                             / |           /  |
+               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
+                            |  /      .   |  /
+                            | / oriign    | /
+            left y<-------- + ----------- + (x0, y1, z0)
+                (x0, y0, z0)
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        dims = self.dims
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
+                device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin [0.5, 0.5, 0]
+        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        # rotate around z axis
+        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+    @property
+    def nearset_bev(self):
+        """Calculate the 2D bounding boxes in BEV without rotation
+        Returns:
+            torch.Tensor: a tensor of 2D BEV box of each box.
+        """
+        # Obtain BEV boxes with rotation in XYWHR format
+        bev_rotated_boxes = self.tensor[:, [0, 1, 3, 4, 6]]
+        # convert the rotation to a valid range
+        rotations = bev_rotated_boxes[:, -1]
+        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
+        # find the center of boxes
+        conditions = (normed_rotations > np.pi / 4)[..., None]
+        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
+                                                                [0, 1, 3, 2]],
+                                  bev_rotated_boxes[:, :4])
+        centers = bboxes_xywh[:, :2]
+        dims = bboxes_xywh[:, 2:]
+        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+        return bev_boxes
+    def rotate(self, angle):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            angles (float | torch.Tensor): rotation angle
+        Returns:
+            None if `return_rot_mat=False`,
+            torch.Tensor if `return_rot_mat=True`
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        rot_sin = torch.sin(angle)
+        rot_cos = torch.cos(angle)
+        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
+                                            [rot_sin, rot_cos, 0], [0, 0, 1]])
+        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
+        self.tensor[:, 6] += angle
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        In LIDAR coordinates, it flips the y axis.
+        """
+        self.tensor[:, 1::7] = -self.tensor[:, 1::7]
+        self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, x_max, y_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 0] < box_range[2])
+                          & (self.tensor[:, 1] < box_range[3]))
+        return in_range_flags
--- a/mmdet3d/core/bbox/structures/utils.py
+++ b/mmdet3d/core/bbox/structures/utils.py
+import numpy as np
+import torch
+def limit_period(val, offset=0.5, period=np.pi):
+    """Limit the value into a period for periodic function.
+    Args:
+        val (torch.Tensor): The value to be converted
+        offset (float, optional): Offset to set the value range.
+            Defaults to 0.5.
+        period ([type], optional): Period of the value. Defaults to np.pi.
+    Returns:
+        torch.Tensor: value in the range of
+            [-offset * period, (1-offset) * period]
+    """
+    return val - torch.floor(val / period + offset) * period
+def rotation_3d_in_axis(points, angles, axis=0):
+    """Rotate points by angles according to axis
+    Args:
+        points (torch.Tensor): Points of shape (N, M, 3).
+        angles (torch.Tensor): Vector of angles in shape (N,)
+        axis (int, optional): The axis to be rotated. Defaults to 0.
+    Raises:
+        ValueError: when the axis is not in range [0, 1, 2], it will
+            raise value error.
+    Returns:
+        torch.Tensor: rotated points in shape (N, M, 3)
+    """
+    rot_sin = torch.sin(angles)
+    rot_cos = torch.cos(angles)
+    ones = torch.ones_like(rot_cos)
+    zeros = torch.zeros_like(rot_cos)
+    if axis == 1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, zeros, -rot_sin]),
+            torch.stack([zeros, ones, zeros]),
+            torch.stack([rot_sin, zeros, rot_cos])
+        ])
+    elif axis == 2 or axis == -1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, -rot_sin, zeros]),
+            torch.stack([rot_sin, rot_cos, zeros]),
+            torch.stack([zeros, zeros, ones])
+        ])
+    elif axis == 0:
+        rot_mat_T = torch.stack([
+            torch.stack([zeros, rot_cos, -rot_sin]),
+            torch.stack([zeros, rot_sin, rot_cos]),
+            torch.stack([ones, zeros, zeros])
+        ])
+    else:
+        raise ValueError(f'axis should in range [0, 1, 2], got {axis}')
+    return torch.einsum('aij,jka->aik', (points, rot_mat_T))
--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
@@ -6,9 +6,11 @@ from .kitti_dataset import KittiDataset
 from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
 from .nuscenes_dataset import NuScenesDataset
 from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
-                        IndoorPointsColorJitter, ObjectNoise,
+                        IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
-                        ObjectRangeFilter, ObjectSample, PointShuffle,
+                        IndoorPointsColorJitter, IndoorPointsColorNormalize,
-                        PointsRangeFilter, RandomFlip3D)
+                        ObjectNoise, ObjectRangeFilter, ObjectSample,
+                        PointSample, PointShuffle, PointsRangeFilter,
+                        RandomFlip3D)
 __all__ = [
    'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
@@ -16,5 +18,7 @@ __all__ = [
    'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
    'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
    'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
-    'IndoorPointsColorJitter', 'IndoorGlobalRotScale', 'IndoorFlipData'
+    'IndoorLoadPointsFromFile', 'IndoorPointsColorNormalize', 'PointSample',
+    'IndoorLoadAnnotations3D', 'IndoorPointsColorJitter',
+    'IndoorGlobalRotScale', 'IndoorFlipData'
 ]
--- a/mmdet3d/datasets/kitti_dataset.py
+++ b/mmdet3d/datasets/kitti_dataset.py
@@ -238,6 +238,8 @@ class KittiDataset(torch_data.Dataset):
                                      axis=1).astype(np.float32)
        difficulty = annos['difficulty']
        # this change gt_bboxes_3d to velodyne coordinates
+        import pdb
+        pdb.set_trace()
        gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
                                                      Trv2c)
        # only center format is allowed. so we need to convert

--- a/mmdet3d/datasets/pipelines/__init__.py
+++ b/mmdet3d/datasets/pipelines/__init__.py
@@ -3,6 +3,9 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
 from .formating import DefaultFormatBundle, DefaultFormatBundle3D
 from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale,
                             IndoorPointsColorJitter)
+from .indoor_loading import (IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
+                             IndoorPointsColorNormalize)
+from .indoor_sample import PointSample
 from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
 from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
                        ObjectSample, PointShuffle, PointsRangeFilter,
@@ -14,5 +17,6 @@ __all__ = [
    'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
    'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
    'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
-    'MMDataBaseSampler'
+    'MMDataBaseSampler', 'IndoorLoadPointsFromFile',
+    'IndoorPointsColorNormalize', 'IndoorLoadAnnotations3D', 'PointSample'
 ]
--- a/mmdet3d/datasets/pipelines/indoor_loading.py
+++ b/mmdet3d/datasets/pipelines/indoor_loading.py
+import mmcv
+import numpy as np
+from mmdet.datasets.builder import PIPELINES
+@PIPELINES.register_module()
+class IndoorPointsColorNormalize(object):
+    """Indoor Points Color Normalize
+    Normalize color of the points.
+    Args:
+        color_mean (List[float]): Mean color of the point cloud.
+    """
+    def __init__(self, color_mean):
+        self.color_mean = color_mean
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6,\
+            f'Expect points have channel >=6, got {points.shape[1]}'
+        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
+        results['points'] = points
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        return repr_str
+@PIPELINES.register_module()
+class IndoorLoadPointsFromFile(object):
+    """Indoor Load Points From File.
+    Load sunrgbd and scannet points from file.
+    Args:
+        use_height (bool): Whether to use height.
+        load_dim (int): The dimension of the loaded points.
+            Default: 6.
+        use_dim (List[int]): Which dimensions of the points to be used.
+            Default: [0, 1, 2].
+    """
+    def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
+        self.use_height = use_height
+        assert max(use_dim) < load_dim, \
+            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+    def __call__(self, results):
+        pts_filename = results['pts_filename']
+        mmcv.check_file_exist(pts_filename)
+        points = np.load(pts_filename)
+        points = points.reshape(-1, self.load_dim)
+        points = points[:, self.use_dim]
+        if self.use_height:
+            floor_height = np.percentile(points[:, 2], 0.99)
+            height = points[:, 2] - floor_height
+            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
+        results['points'] = points
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(use_height={})'.format(self.use_height)
+        repr_str += '(mean_color={})'.format(self.color_mean)
+        repr_str += '(load_dim={})'.format(self.load_dim)
+        repr_str += '(use_dim={})'.format(self.use_dim)
+        return repr_str
+@PIPELINES.register_module
+class IndoorLoadAnnotations3D(object):
+    """Indoor Load Annotations3D.
+    Load instance mask and semantic mask of points.
+    """
+    def __init__(self):
+        pass
+    def __call__(self, results):
+        pts_instance_mask_path = results['pts_instance_mask_path']
+        pts_semantic_mask_path = results['pts_semantic_mask_path']
+        mmcv.check_file_exist(pts_instance_mask_path)
+        mmcv.check_file_exist(pts_semantic_mask_path)
+        pts_instance_mask = np.load(pts_instance_mask_path)
+        pts_semantic_mask = np.load(pts_semantic_mask_path)
+        results['pts_instance_mask'] = pts_instance_mask
+        results['pts_semantic_mask'] = pts_semantic_mask
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
--- a/mmdet3d/models/roi_heads/roi_extractors/__init__.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/__init__.py
 from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
+from .single_roiaware_extractor import Single3DRoIAwareExtractor
-__all__ = ['SingleRoIExtractor']
+__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
--- a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+import torch
+import torch.nn as nn
+from mmdet3d import ops
+from mmdet.models.builder import ROI_EXTRACTORS
+@ROI_EXTRACTORS.register_module
+class Single3DRoIAwareExtractor(nn.Module):
+    """Point-wise roi-aware Extractor
+    Extract Point-wise roi features.
+    Args:
+        roi_layer (dict): the config of roi layer
+    """
+    def __init__(self, roi_layer=None):
+        super(Single3DRoIAwareExtractor, self).__init__()
+        self.roi_layer = self.build_roi_layers(roi_layer)
+    def build_roi_layers(self, layer_cfg):
+        cfg = layer_cfg.copy()
+        layer_type = cfg.pop('type')
+        assert hasattr(ops, layer_type)
+        layer_cls = getattr(ops, layer_type)
+        roi_layers = layer_cls(**cfg)
+        return roi_layers
+    def forward(self, feats, coordinate, batch_inds, rois):
+        """Extract point-wise roi features
+        Args:
+            feats (FloatTensor): point-wise features with
+                shape (batch, npoints, channels) for pooling
+            coordinate (FloatTensor): coordinate of each point
+            batch_inds (longTensor): indicate the batch of each point
+            rois (FloatTensor): roi boxes with batch indices
+        Returns:
+            FloatTensor: pooled features
+        """
+        pooled_roi_feats = []
+        for batch_idx in range(int(batch_inds.max()) + 1):
+            roi_inds = (rois[..., 0].int() == batch_idx)
+            coors_inds = (batch_inds.int() == batch_idx)
+            pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],
+                                             coordinate[coors_inds],
+                                             feats[coors_inds])
+            pooled_roi_feats.append(pooled_roi_feat)
+        pooled_roi_feats = torch.cat(pooled_roi_feats, 0)
+        return pooled_roi_feats
--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                       get_compiling_cuda_version, nms, roi_align,
                       sigmoid_focal_loss)
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
+                              points_in_boxes_gpu)
 from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
                           SparseBottleneck, SparseBottleneckV0)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
 __all__ = [
-    'nms',
+    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
-    'soft_nms',
+    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
-    'RoIAlign',
+    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
-    'roi_align',
+    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
-    'get_compiler_version',
+    'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
-    'get_compiling_cuda_version',
+    'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
-    'NaiveSyncBatchNorm1d',
+    'points_in_boxes_gpu', 'points_in_boxes_cpu'
-    'NaiveSyncBatchNorm2d',
-    'batched_nms',
-    'Voxelization',
-    'voxelization',
-    'dynamic_scatter',
-    'DynamicScatter',
-    'sigmoid_focal_loss',
-    'SigmoidFocalLoss',
-    'SparseBasicBlockV0',
-    'SparseBottleneckV0',
-    'SparseBasicBlock',
-    'SparseBottleneck',
 ]
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
--- a/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
+++ b/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
+import numpy as np
+import pytest
+import torch
+from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
+                               LiDARInstance3DBoxes)
+def test_lidar_boxes3d():
+    # test empty initialization
+    empty_boxes = []
+    boxes = LiDARInstance3DBoxes(empty_boxes)
+    assert boxes.tensor.shape[0] == 0
+    assert boxes.tensor.shape[1] == 7
+    # Test init with numpy array
+    np_boxes = np.array(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
+        dtype=np.float32)
+    boxes_1 = LiDARInstance3DBoxes(np_boxes)
+    assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))
+    # test properties
+    assert boxes_1.volume.size(0) == 2
+    assert (boxes_1.center == boxes_1.bottom_center).all()
+    assert repr(boxes) == (
+        'LiDARInstance3DBoxes(\n    tensor([], size=(0, 7)))')
+    # test init with torch.Tensor
+    th_boxes = torch.tensor(
+        [[
+            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
+            1.48000002, -1.57000005
+        ],
+         [
+             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
+             1.39999998, -1.69000006
+         ],
+         [
+             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
+             1.48000002, 2.78999996
+         ]],
+        dtype=torch.float32)
+    boxes_2 = LiDARInstance3DBoxes(th_boxes)
+    assert torch.allclose(boxes_2.tensor, th_boxes)
+    # test clone/to/device
+    boxes_2 = boxes_2.clone()
+    boxes_1 = boxes_1.to(boxes_2.device)
+    # test box concatenation
+    expected_tensor = torch.tensor(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    boxes = LiDARInstance3DBoxes.cat([boxes_1, boxes_2])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # concatenate empty list
+    empty_boxes = LiDARInstance3DBoxes.cat([])
+    assert empty_boxes.tensor.shape[0] == 0
+    assert empty_boxes.tensor.shape[-1] == 7
+    # test box flip
+    expected_tensor = torch.tensor(
+        [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
+         [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
+         [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
+         [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
+         [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])
+    boxes.flip()
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box rotation
+    expected_tensor = torch.tensor(
+        [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
+         [7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
+         [27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
+         [19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
+         [27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]])
+    boxes.rotate(0.27207362796436096)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box scaling
+    expected_tensor = torch.tensor([[
+        1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
+        1.9336663
+    ],
+                                    [
+                                        8.014273, -4.8007393, -1.6448704,
+                                        1.5486219, 4.0324507, 1.57879,
+                                        1.7936664
+                                    ],
+                                    [
+                                        27.558605, -7.1084175, -1.310622,
+                                        1.4782301, 2.242485, 1.488286,
+                                        4.9836664
+                                    ],
+                                    [
+                                        19.934517, -28.344835, -1.7457767,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.130915, -16.369587, -1.6308585,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    boxes.scale(1.00559866335275)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box translation
+    expected_tensor = torch.tensor([[
+        1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
+        1.9336663
+    ],
+                                    [
+                                        8.098079, -4.9332013, -1.8018866,
+                                        1.5486219, 4.0324507, 1.57879,
+                                        1.7936664
+                                    ],
+                                    [
+                                        27.64241, -7.2408795, -1.4676381,
+                                        1.4782301, 2.242485, 1.488286,
+                                        4.9836664
+                                    ],
+                                    [
+                                        20.018322, -28.477297, -1.9027928,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    boxes.translate([0.0838056, -0.13246193, -0.15701613])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test bbox in_range_bev
+    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
+    mask = boxes.in_range_bev([0., -40., 70.4, 40.])
+    assert (mask == expected_tensor).all()
+    mask = boxes.nonempty()
+    assert (mask == expected_tensor).all()
+    # test bbox in_range
+    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
+    mask = boxes.in_range_3d([0, -20, -2, 22, 2, 5])
+    assert (mask == expected_tensor).all()
+    # test bbox indexing
+    index_boxes = boxes[2:5]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ],
+                                    [
+                                        20.018322, -28.477297, -1.9027928,
+                                        1.5687338, 3.4994833, 1.4078381,
+                                        5.1036663
+                                    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    assert len(index_boxes) == 3
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+    index_boxes = boxes[2]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ]])
+    assert len(index_boxes) == 1
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+    index_boxes = boxes[[2, 4]]
+    expected_tensor = torch.tensor([[
+        27.64241, -7.2408795, -1.4676381, 1.4782301, 2.242485, 1.488286,
+        4.9836664
+    ],
+                                    [
+                                        28.21472, -16.502048, -1.7878747,
+                                        1.7497417, 3.791107, 1.488286,
+                                        0.6236664
+                                    ]])
+    assert len(index_boxes) == 2
+    assert torch.allclose(index_boxes.tensor, expected_tensor)
+    # test iteration
+    for i, box in enumerate(index_boxes):
+        torch.allclose(box, expected_tensor[i])
+    # test properties
+    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
+    expected_tensor = (
+        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
+        (torch.tensor([0.5, 0.5, 0]) - torch.tensor([0.5, 0.5, 0.5])))
+    assert torch.allclose(boxes.gravity_center, expected_tensor)
+    boxes.limit_yaw()
+    assert (boxes.tensor[:, 6] <= np.pi / 2).all()
+    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
+    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
+    expected_tesor = boxes.tensor.clone()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+    boxes.flip()
+    boxes.flip()
+    boxes.limit_yaw()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+    # test nearest_bev
+    expected_tensor = torch.tensor([[-0.5763, -3.9307, 2.8326, -2.1709],
+                                    [6.0819, -5.7075, 10.1143, -4.1589],
+                                    [26.5212, -7.9800, 28.7637, -6.5018],
+                                    [18.2686, -29.2617, 21.7681, -27.6929],
+                                    [27.3398, -18.3976, 29.0896, -14.6065]])
+    # the pytorch print loses some precision
+    assert torch.allclose(
+        boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
+    # obtained by the print of the original implementation
+    expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00],
+                                     [2.4093e+00, -4.4784e+00, -2.5769e-01],
+                                     [-7.7767e-01, -3.2684e+00, -2.5769e-01],
+                                     [-7.7767e-01, -3.2684e+00, -1.9169e+00],
+                                     [3.0340e+00, -2.8332e+00, -1.9169e+00],
+                                     [3.0340e+00, -2.8332e+00, -2.5769e-01],
+                                     [-1.5301e-01, -1.6232e+00, -2.5769e-01],
+                                     [-1.5301e-01, -1.6232e+00, -1.9169e+00]],
+                                    [[9.8933e+00, -6.1340e+00, -1.8019e+00],
+                                     [9.8933e+00, -6.1340e+00, -2.2310e-01],
+                                     [5.9606e+00, -5.2427e+00, -2.2310e-01],
+                                     [5.9606e+00, -5.2427e+00, -1.8019e+00],
+                                     [1.0236e+01, -4.6237e+00, -1.8019e+00],
+                                     [1.0236e+01, -4.6237e+00, -2.2310e-01],
+                                     [6.3029e+00, -3.7324e+00, -2.2310e-01],
+                                     [6.3029e+00, -3.7324e+00, -1.8019e+00]],
+                                    [[2.8525e+01, -8.2534e+00, -1.4676e+00],
+                                     [2.8525e+01, -8.2534e+00, 2.0648e-02],
+                                     [2.6364e+01, -7.6525e+00, 2.0648e-02],
+                                     [2.6364e+01, -7.6525e+00, -1.4676e+00],
+                                     [2.8921e+01, -6.8292e+00, -1.4676e+00],
+                                     [2.8921e+01, -6.8292e+00, 2.0648e-02],
+                                     [2.6760e+01, -6.2283e+00, 2.0648e-02],
+                                     [2.6760e+01, -6.2283e+00, -1.4676e+00]],
+                                    [[2.1337e+01, -2.9870e+01, -1.9028e+00],
+                                     [2.1337e+01, -2.9870e+01, -4.9495e-01],
+                                     [1.8102e+01, -2.8535e+01, -4.9495e-01],
+                                     [1.8102e+01, -2.8535e+01, -1.9028e+00],
+                                     [2.1935e+01, -2.8420e+01, -1.9028e+00],
+                                     [2.1935e+01, -2.8420e+01, -4.9495e-01],
+                                     [1.8700e+01, -2.7085e+01, -4.9495e-01],
+                                     [1.8700e+01, -2.7085e+01, -1.9028e+00]],
+                                    [[2.6398e+01, -1.7530e+01, -1.7879e+00],
+                                     [2.6398e+01, -1.7530e+01, -2.9959e-01],
+                                     [2.8612e+01, -1.4452e+01, -2.9959e-01],
+                                     [2.8612e+01, -1.4452e+01, -1.7879e+00],
+                                     [2.7818e+01, -1.8552e+01, -1.7879e+00],
+                                     [2.7818e+01, -1.8552e+01, -2.9959e-01],
+                                     [3.0032e+01, -1.5474e+01, -2.9959e-01],
+                                     [3.0032e+01, -1.5474e+01, -1.7879e+00]]])
+    # the pytorch print loses some precision
+    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
+def test_boxes_conversion():
+    """Test the conversion of boxes between different modes.
+    ComandLine:
+        xdoctest tests/test_box3d.py::test_boxes_conversion zero
+    """
+    lidar_boxes = LiDARInstance3DBoxes(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
+                                       Box3DMode.CAM)
+    lidar_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
+                                         Box3DMode.LIDAR)
+    expected_tensor = torch.tensor(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    assert torch.allclose(expected_tensor, lidar_box_tensor)
+    assert torch.allclose(lidar_boxes.tensor, lidar_box_tensor)
+    depth_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
+                                         Box3DMode.DEPTH)
+    depth_to_cam_box_tensor = Box3DMode.convert(depth_box_tensor,
+                                                Box3DMode.DEPTH, Box3DMode.CAM)
+    assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)
+    # test error raise with not supported conversion
+    with pytest.raises(NotImplementedError):
+        Box3DMode.convert(lidar_box_tensor, Box3DMode.LIDAR, Box3DMode.DEPTH)
+    with pytest.raises(NotImplementedError):
+        Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
+    # test similar mode conversion
+    same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
+                                     Box3DMode.DEPTH)
+    assert (same_results == depth_box_tensor).all()
+    # test conversion with a given rt_mat
+    camera_boxes = CameraInstance3DBoxes(
+        [[0.06, 1.77, 21.4, 3.2, 1.61, 1.66, -1.54],
+         [6.59, 1.53, 6.76, 12.78, 3.66, 2.28, 1.55],
+         [6.71, 1.59, 22.18, 14.73, 3.64, 2.32, 1.59],
+         [7.11, 1.58, 34.54, 10.04, 3.61, 2.32, 1.61],
+         [7.78, 1.65, 45.95, 12.83, 3.63, 2.34, 1.64]])
+    rect = torch.tensor(
+        [[0.9999239, 0.00983776, -0.00744505, 0.],
+         [-0.0098698, 0.9999421, -0.00427846, 0.],
+         [0.00740253, 0.00435161, 0.9999631, 0.], [0., 0., 0., 1.]],
+        dtype=torch.float32)
+    Trv2c = torch.tensor(
+        [[7.533745e-03, -9.999714e-01, -6.166020e-04, -4.069766e-03],
+         [1.480249e-02, 7.280733e-04, -9.998902e-01, -7.631618e-02],
+         [9.998621e-01, 7.523790e-03, 1.480755e-02, -2.717806e-01],
+         [0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00]],
+        dtype=torch.float32)
+    expected_tensor = torch.tensor(
+        [[
+            2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 1.65999997e+00,
+            3.20000005e+00, 1.61000001e+00, -1.53999996e+00
+        ],
+         [
+             7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 2.27999997e+00,
+             1.27799997e+01, 3.66000009e+00, 1.54999995e+00
+         ],
+         [
+             2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 2.31999993e+00,
+             1.47299995e+01, 3.64000010e+00, 1.59000003e+00
+         ],
+         [
+             3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 2.31999993e+00,
+             1.00400000e+01, 3.60999990e+00, 1.61000001e+00
+         ],
+         [
+             4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 2.33999991e+00,
+             1.28299999e+01, 3.63000011e+00, 1.63999999e+00
+         ]],
+        dtype=torch.float32)
+    rt_mat = rect @ Trv2c
+    # test coversion with Box type
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes, Box3DMode.CAM,
+                                         Box3DMode.LIDAR, rt_mat.inverse())
+    assert torch.allclose(cam_to_lidar_box.tensor, expected_tensor)
+    lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box.tensor,
+                                         Box3DMode.LIDAR, Box3DMode.CAM,
+                                         rt_mat)
+    assert torch.allclose(lidar_to_cam_box, camera_boxes.tensor)
+    # test numpy convert
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor.numpy(),
+                                         Box3DMode.CAM, Box3DMode.LIDAR,
+                                         rt_mat.inverse().numpy())
+    assert np.allclose(cam_to_lidar_box, expected_tensor.numpy())
+    # test list convert
+    cam_to_lidar_box = Box3DMode.convert(
+        camera_boxes.tensor[0].numpy().tolist(), Box3DMode.CAM,
+        Box3DMode.LIDAR,
+        rt_mat.inverse().numpy())
+    assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())
+def test_camera_boxes3d():
+    # Test init with numpy array
+    np_boxes = np.array(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
+        dtype=np.float32)
+    boxes_1 = Box3DMode.convert(
+        LiDARInstance3DBoxes(np_boxes), Box3DMode.LIDAR, Box3DMode.CAM)
+    assert isinstance(boxes_1, CameraInstance3DBoxes)
+    cam_np_boxes = Box3DMode.convert(np_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
+    assert torch.allclose(boxes_1.tensor,
+                          boxes_1.tensor.new_tensor(cam_np_boxes))
+    # test init with torch.Tensor
+    th_boxes = torch.tensor(
+        [[
+            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
+            1.48000002, -1.57000005
+        ],
+         [
+             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
+             1.39999998, -1.69000006
+         ],
+         [
+             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
+             1.48000002, 2.78999996
+         ]],
+        dtype=torch.float32)
+    cam_th_boxes = Box3DMode.convert(th_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes_2 = CameraInstance3DBoxes(cam_th_boxes)
+    assert torch.allclose(boxes_2.tensor, cam_th_boxes)
+    # test clone/to/device
+    boxes_2 = boxes_2.clone()
+    boxes_1 = boxes_1.to(boxes_2.device)
+    # test box concatenation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+             [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+             [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+             [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+             [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes = CameraInstance3DBoxes.cat([boxes_1, boxes_2])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box flip
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
+             [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
+             [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
+             [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
+             [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.flip()
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box rotation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
+             [7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
+             [27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
+             [19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
+             [27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.rotate(torch.tensor(0.27207362796436096))
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box scaling
+    expected_tensor = Box3DMode.convert(
+        torch.tensor([[
+            1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
+            1.9336663
+        ],
+                      [
+                          8.014273, -4.8007393, -1.6448704, 1.5486219,
+                          4.0324507, 1.57879, 1.7936664
+                      ],
+                      [
+                          27.558605, -7.1084175, -1.310622, 1.4782301,
+                          2.242485, 1.488286, 4.9836664
+                      ],
+                      [
+                          19.934517, -28.344835, -1.7457767, 1.5687338,
+                          3.4994833, 1.4078381, 5.1036663
+                      ],
+                      [
+                          28.130915, -16.369587, -1.6308585, 1.7497417,
+                          3.791107, 1.488286, 0.6236664
+                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.scale(1.00559866335275)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box translation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor([[
+            1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
+            1.9336663
+        ],
+                      [
+                          8.098079, -4.9332013, -1.8018866, 1.5486219,
+                          4.0324507, 1.57879, 1.7936664
+                      ],
+                      [
+                          27.64241, -7.2408795, -1.4676381, 1.4782301,
+                          2.242485, 1.488286, 4.9836664
+                      ],
+                      [
+                          20.018322, -28.477297, -1.9027928, 1.5687338,
+                          3.4994833, 1.4078381, 5.1036663
+                      ],
+                      [
+                          28.21472, -16.502048, -1.7878747, 1.7497417,
+                          3.791107, 1.488286, 0.6236664
+                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.translate(torch.tensor([0.13246193, 0.15701613, 0.0838056]))
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test bbox in_range_bev
+    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
+    mask = boxes.in_range_bev([0., -40., 70.4, 40.])
+    assert (mask == expected_tensor).all()
+    mask = boxes.nonempty()
+    assert (mask == expected_tensor).all()
+    # test bbox in_range
+    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
+    mask = boxes.in_range_3d([-2, -5, 0, 20, 2, 22])
+    assert (mask == expected_tensor).all()
+    # test properties
+    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
+    expected_tensor = (
+        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
+        (torch.tensor([0.5, 1.0, 0.5]) - torch.tensor([0.5, 0.5, 0.5])))
+    assert torch.allclose(boxes.gravity_center, expected_tensor)
+    boxes.limit_yaw()
+    assert (boxes.tensor[:, 6] <= np.pi / 2).all()
+    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
+    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
+    expected_tesor = boxes.tensor.clone()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+    boxes.flip()
+    boxes.flip()
+    boxes.limit_yaw()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+    # test nearest_bev
+    # BEV box in lidar coordinates (x, y)
+    lidar_expected_tensor = torch.tensor(
+        [[-0.5763, -3.9307, 2.8326, -2.1709],
+         [6.0819, -5.7075, 10.1143, -4.1589],
+         [26.5212, -7.9800, 28.7637, -6.5018],
+         [18.2686, -29.2617, 21.7681, -27.6929],
+         [27.3398, -18.3976, 29.0896, -14.6065]])
+    # BEV box in camera coordinate (-y, x)
+    expected_tensor = lidar_expected_tensor.clone()
+    expected_tensor[:, 0::2] = -lidar_expected_tensor[:, [3, 1]]
+    expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]
+    # the pytorch print loses some precision
+    assert torch.allclose(
+        boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
+    # obtained by the print of the original implementation
+    expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],
+                                     [1.6232e+00, 2.5769e-01, -1.5301e-01],
+                                     [1.6232e+00, 1.9169e+00, -1.5301e-01],
+                                     [3.2684e+00, 1.9169e+00, -7.7767e-01],
+                                     [4.4784e+00, 2.5769e-01, 2.4093e+00],
+                                     [2.8332e+00, 2.5769e-01, 3.0340e+00],
+                                     [2.8332e+00, 1.9169e+00, 3.0340e+00],
+                                     [4.4784e+00, 1.9169e+00, 2.4093e+00]],
+                                    [[5.2427e+00, 2.2310e-01, 5.9606e+00],
+                                     [3.7324e+00, 2.2310e-01, 6.3029e+00],
+                                     [3.7324e+00, 1.8019e+00, 6.3029e+00],
+                                     [5.2427e+00, 1.8019e+00, 5.9606e+00],
+                                     [6.1340e+00, 2.2310e-01, 9.8933e+00],
+                                     [4.6237e+00, 2.2310e-01, 1.0236e+01],
+                                     [4.6237e+00, 1.8019e+00, 1.0236e+01],
+                                     [6.1340e+00, 1.8019e+00, 9.8933e+00]],
+                                    [[7.6525e+00, -2.0648e-02, 2.6364e+01],
+                                     [6.2283e+00, -2.0648e-02, 2.6760e+01],
+                                     [6.2283e+00, 1.4676e+00, 2.6760e+01],
+                                     [7.6525e+00, 1.4676e+00, 2.6364e+01],
+                                     [8.2534e+00, -2.0648e-02, 2.8525e+01],
+                                     [6.8292e+00, -2.0648e-02, 2.8921e+01],
+                                     [6.8292e+00, 1.4676e+00, 2.8921e+01],
+                                     [8.2534e+00, 1.4676e+00, 2.8525e+01]],
+                                    [[2.8535e+01, 4.9495e-01, 1.8102e+01],
+                                     [2.7085e+01, 4.9495e-01, 1.8700e+01],
+                                     [2.7085e+01, 1.9028e+00, 1.8700e+01],
+                                     [2.8535e+01, 1.9028e+00, 1.8102e+01],
+                                     [2.9870e+01, 4.9495e-01, 2.1337e+01],
+                                     [2.8420e+01, 4.9495e-01, 2.1935e+01],
+                                     [2.8420e+01, 1.9028e+00, 2.1935e+01],
+                                     [2.9870e+01, 1.9028e+00, 2.1337e+01]],
+                                    [[1.4452e+01, 2.9959e-01, 2.8612e+01],
+                                     [1.5474e+01, 2.9959e-01, 3.0032e+01],
+                                     [1.5474e+01, 1.7879e+00, 3.0032e+01],
+                                     [1.4452e+01, 1.7879e+00, 2.8612e+01],
+                                     [1.7530e+01, 2.9959e-01, 2.6398e+01],
+                                     [1.8552e+01, 2.9959e-01, 2.7818e+01],
+                                     [1.8552e+01, 1.7879e+00, 2.7818e+01],
+                                     [1.7530e+01, 1.7879e+00, 2.6398e+01]]])
+    # the pytorch print loses some precision
+    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
--- a/tests/test_indoor_loading.py
+++ b/tests/test_indoor_loading.py
+import os.path as osp
+import mmcv
+import numpy as np
+from mmdet3d.datasets.pipelines import (IndoorLoadAnnotations3D,
+                                        IndoorLoadPointsFromFile)
+def test_indoor_load_points_from_file():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
+    sunrgbd_load_points_from_file = IndoorLoadPointsFromFile(True, 6)
+    sunrgbd_results = dict()
+    data_path = './tests/data/sunrgbd/sunrgbd_trainval'
+    sunrgbd_info = sunrgbd_info[0]
+    scan_name = sunrgbd_info['point_cloud']['lidar_idx']
+    sunrgbd_results['pts_filename'] = osp.join(data_path, 'lidar',
+                                               f'{scan_name:06d}.npy')
+    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
+    sunrgbd_point_cloud = sunrgbd_results['points']
+    assert sunrgbd_point_cloud.shape == (100, 4)
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
+    scannet_load_data = IndoorLoadPointsFromFile(True)
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    scannet_results['data_path'] = data_path
+    scannet_info = scannet_info[0]
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+    scannet_results['pts_filename'] = osp.join(data_path,
+                                               f'{scan_name}_vert.npy')
+    scannet_results = scannet_load_data(scannet_results)
+    scannet_point_cloud = scannet_results['points']
+    assert scannet_point_cloud.shape == (100, 4)
+def test_load_annotations3D():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
+    if sunrgbd_info['annos']['gt_num'] != 0:
+        sunrgbd_gt_bboxes_3d = sunrgbd_info['annos']['gt_boxes_upright_depth']
+        sunrgbd_gt_labels = sunrgbd_info['annos']['class'].reshape(-1, 1)
+        sunrgbd_gt_bboxes_3d_mask = np.ones_like(sunrgbd_gt_labels)
+    else:
+        sunrgbd_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        sunrgbd_gt_labels = np.zeros((1, 1))
+        sunrgbd_gt_bboxes_3d_mask = np.zeros((1, 1))
+    assert sunrgbd_gt_bboxes_3d.shape == (3, 7)
+    assert sunrgbd_gt_labels.shape == (3, 1)
+    assert sunrgbd_gt_bboxes_3d_mask.shape == (3, 1)
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
+    scannet_load_annotations3D = IndoorLoadAnnotations3D()
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    if scannet_info['annos']['gt_num'] != 0:
+        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
+        scannet_gt_labels = scannet_info['annos']['class'].reshape(-1, 1)
+        scannet_gt_bboxes_3d_mask = np.ones_like(scannet_gt_labels)
+    else:
+        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        scannet_gt_labels = np.zeros((1, 1))
+        scannet_gt_bboxes_3d_mask = np.zeros((1, 1))
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+    scannet_results['pts_instance_mask_path'] = osp.join(
+        data_path, f'{scan_name}_ins_label.npy')
+    scannet_results['pts_semantic_mask_path'] = osp.join(
+        data_path, scan_name + '_sem_label.npy')
+    scannet_results['info'] = scannet_info
+    scannet_results['gt_bboxes_3d'] = scannet_gt_bboxes_3d
+    scannet_results['gt_labels'] = scannet_gt_labels
+    scannet_results['gt_bboxes_3d_mask'] = scannet_gt_bboxes_3d_mask
+    scannet_results = scannet_load_annotations3D(scannet_results)
+    scannet_gt_boxes = scannet_results['gt_bboxes_3d']
+    scannet_gt_lbaels = scannet_results['gt_labels']
+    scannet_gt_boxes_mask = scannet_results['gt_bboxes_3d_mask']
+    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
+    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
+    assert scannet_gt_boxes.shape == (27, 6)
+    assert scannet_gt_lbaels.shape == (27, 1)
+    assert scannet_gt_boxes_mask.shape == (27, 1)
+    assert scannet_pts_instance_mask.shape == (100, )
+    assert scannet_pts_semantic_mask.shape == (100, )