merge master

94bbd751 · liyinhao · f201ba68 · 84569a41 · 94bbd751 · 94bbd751
Commit 94bbd751 authored May 10, 2020 by liyinhao
20 changed files
--- a/docs/INSTALL.md
+++ b/docs/INSTALL.md
@@ -110,7 +110,11 @@ mmdetection
 │   ├── VOCdevkit
 │   │   ├── VOC2007
 │   │   ├── VOC2012
+│   ├── ScanNet
+│   │   ├── meta_data
+│   │   ├── scannet_train_instance_data
+│   ├── SUNRGBD
+│   │   ├── sunrgbd_trainval
 ```
 The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`:
 ```shell

--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
@@ -7,6 +7,7 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
 from .samplers import (BaseSampler, CombinedSampler,
                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
                       PseudoSampler, RandomSampler, SamplingResult)
+from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
 from .transforms import boxes3d_to_bev_torch_lidar
 from .assign_sampling import (  # isort:skip, avoid recursive imports
@@ -20,5 +21,6 @@ __all__ = [
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
-    'bbox_overlaps_3d'
+    'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
+    'CameraInstance3DBoxes'
 ]
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
+from .box_3d_mode import Box3DMode
+from .cam_box3d import CameraInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+__all__ = ['Box3DMode', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes']
--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
+from abc import abstractmethod
+import numpy as np
+import torch
+from .utils import limit_period
+class BaseInstance3DBoxes(object):
+    """Base class for 3D Boxes
+    Args:
+        tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix.
+        box_dim (int): number of the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw).
+    """
+    def __init__(self, tensor, box_dim=7):
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, box_dim)).to(
+                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+        self.box_dim = box_dim
+        self.tensor = tensor
+    @property
+    def volume(self):
+        """Computes the volume of all the boxes.
+        Returns:
+            torch.Tensor: a vector with volume of each box.
+        """
+        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
+    @property
+    def dims(self):
+        """Calculate the length in each dimension of all the boxes.
+        Convert the boxes to the form of (x_size, y_size, z_size)
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        return self.tensor[:, 3:6]
+    @property
+    def center(self):
+        """Calculate the center of all the boxes.
+        Note:
+            In the MMDetection.3D's convention, the bottom center is
+            usually taken as the default center.
+            The relative position of the centers in different kinds of
+            boxes are different, e.g., the relative center of a boxes is
+            [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
+            It is recommended to use `bottom_center` or `gravity_center`
+            for more clear usage.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        return self.bottom_center
+    @property
+    def bottom_center(self):
+        """Calculate the bottom center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        return self.tensor[:, :3]
+    @property
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        pass
+    @property
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with 8 corners of each box.
+        """
+        pass
+    @abstractmethod
+    def rotate(self, angles, axis=0):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            angles (float): rotation angles
+            axis (int): the axis to rotate the boxes
+        """
+        pass
+    @abstractmethod
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        """
+        pass
+    def translate(self, trans_vector):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            trans_vector (torch.Tensor): translation vector of size 1x3
+        """
+        if not isinstance(trans_vector, torch.Tensor):
+            trans_vector = self.tensor.new_tensor(trans_vector)
+        self.tensor[:, :3] += trans_vector
+    def in_range_3d(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, z_min, x_max, y_max, z_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 2] > box_range[2])
+                          & (self.tensor[:, 0] < box_range[3])
+                          & (self.tensor[:, 1] < box_range[4])
+                          & (self.tensor[:, 2] < box_range[5]))
+        return in_range_flags
+    @abstractmethod
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, x_max, y_max)
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        pass
+    def scale(self, scale_factor):
+        """Scale the box with horizontal and vertical scaling factors
+        Args:
+            scale_factors (float):
+                scale factors to scale the boxes.
+        """
+        self.tensor[:, :6] *= scale_factor
+        self.tensor[:, 7:] *= scale_factor
+    def limit_yaw(self, offset=0.5, period=np.pi):
+        """Limit the yaw to a given period and offset
+        Args:
+            offset (float): the offset of the yaw
+            period (float): the expected period
+        """
+        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
+    def nonempty(self, threshold: float = 0.0):
+        """Find boxes that are non-empty.
+        A box is considered empty,
+        if either of its side is no larger than threshold.
+        Args:
+            threshold (float): the threshold of minimal sizes
+        Returns:
+            Tensor:
+                a binary vector which represents whether each box is empty
+                (False) or non-empty (True).
+        """
+        box = self.tensor
+        size_x = box[..., 3]
+        size_y = box[..., 4]
+        size_z = box[..., 5]
+        keep = ((size_x > threshold)
+                & (size_y > threshold) & (size_z > threshold))
+        return keep
+    def __getitem__(self, item):
+        """
+        Note:
+            The following usage are allowed:
+            1. `new_boxes = boxes[3]`:
+                return a `Boxes` that contains only one box.
+            2. `new_boxes = boxes[2:10]`:
+                return a slice of boxes.
+            3. `new_boxes = boxes[vector]`:
+                where vector is a torch.BoolTensor with `length = len(boxes)`.
+                Nonzero elements in the vector will be selected.
+            Note that the returned Boxes might share storage with this Boxes,
+            subject to Pytorch's indexing semantics.
+        Returns:
+            Boxes: Create a new :class:`Boxes` by indexing.
+        """
+        original_type = type(self)
+        if isinstance(item, int):
+            return original_type(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert b.dim() == 2, \
+            f'Indexing on Boxes with {item} failed to return a matrix!'
+        return original_type(b)
+    def __len__(self):
+        return self.tensor.shape[0]
+    def __repr__(self):
+        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'
+    @classmethod
+    def cat(cls, boxes_list):
+        """Concatenates a list of Boxes into a single Boxes
+        Arguments:
+            boxes_list (list[Boxes])
+        Returns:
+            Boxes: the concatenated Boxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all(isinstance(box, cls) for box in boxes_list)
+        # use torch.cat (v.s. layers.cat)
+        # so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+    def to(self, device):
+        original_type = type(self)
+        return original_type(self.tensor.to(device))
+    def clone(self):
+        """Clone the Boxes.
+        Returns:
+            Boxes
+        """
+        original_type = type(self)
+        return original_type(self.tensor.clone())
+    @property
+    def device(self):
+        return self.tensor.device
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (4,) at a time.
+        """
+        yield from self.tensor
--- a/mmdet3d/core/bbox/structures/box_3d_mode.py
+++ b/mmdet3d/core/bbox/structures/box_3d_mode.py
+from enum import IntEnum, unique
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .cam_box3d import CameraInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+@unique
+class Box3DMode(IntEnum):
+    r"""Enum of different ways to represent a box.
+    Coordinates in LiDAR:
+    .. code-block:: none
+                    up z
+                       ^   x front
+                       |  /
+                       | /
+        left y <------ 0
+    The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
+    and the yaw is around the z axis, thus the rotation axis=2.
+    Coordinates in camera:
+    .. code-block:: none
+                z front
+               /
+              /
+             0 ------> x right
+             |
+             |
+             v
+        down y
+    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
+    and the yaw is around the y axis, thus the rotation axis=1.
+    Coordinates in Depth mode:
+    .. code-block:: none
+        up z
+           ^   y front
+           |  /
+           | /
+           0 ------> x right
+    The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0],
+    and the yaw is around the z axis, thus the rotation axis=2.
+    """
+    LIDAR = 0
+    CAM = 1
+    DEPTH = 2
+    @staticmethod
+    def convert(box, src, dst, rt_mat=None):
+        """Convert boxes from `src` mode to `dst` mode.
+        Args:
+            box (tuple | list | np.ndarray | torch.Tensor):
+                can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
+            src (BoxMode): the src Box mode
+            dst (BoxMode): the target Box mode
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+        Returns:
+            (tuple | list | np.ndarray | torch.Tensor):
+                The converted box of the same type.
+        """
+        if src == dst:
+            return box
+        is_numpy = isinstance(box, np.ndarray)
+        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) >= 7, (
+                'BoxMode.convert takes either a k-tuple/list or '
+                'an Nxk array/tensor, where k >= 7')
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            elif is_Instance3DBoxes:
+                arr = box.tensor.clone()
+            else:
+                arr = box.clone()
+        # convert box from `src` mode to `dst` mode.
+        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
+        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
+            xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)
+        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
+            xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)
+        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        else:
+            raise NotImplementedError(
+                f'Conversion from Box3DMode {src} to {dst} '
+                'is not supported yet')
+        if not isinstance(rt_mat, torch.Tensor):
+            rt_mat = arr.new_tensor(rt_mat)
+        if rt_mat.size(1) == 4:
+            extended_xyz = torch.cat(
+                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
+            xyz = extended_xyz @ rt_mat.t()
+        else:
+            xyz = arr[:, :3] @ rt_mat.t()
+        remains = arr[..., 6:]
+        arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)
+        # convert arr to the original type
+        original_type = type(box)
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        elif is_Instance3DBoxes:
+            if dst == Box3DMode.CAM:
+                target_type = CameraInstance3DBoxes
+            elif dst == Box3DMode.LIDAR:
+                target_type = LiDARInstance3DBoxes
+            else:
+                raise NotImplementedError(
+                    f'Conversion to {dst} through {original_type}'
+                    ' is not supported yet')
+            return target_type(arr, box_dim=arr.size(-1))
+        else:
+            return arr
--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .utils import limit_period, rotation_3d_in_axis
+class CameraInstance3DBoxes(BaseInstance3DBoxes):
+    """3D boxes of instances in CAM coordinates
+    Coordinates in camera:
+    .. code-block:: none
+                z front
+               /
+              /
+             0 ------> x right
+             |
+             |
+             v
+        down y
+    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
+    and the yaw is around the y axis, thus the rotation axis=1.
+    Attributes:
+        tensor (torch.Tensor): float matrix of N x box_dim.
+        box_dim (int): integer indicates the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+    """
+    @property
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
+        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
+        return gravity_center
+    @property
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+        Convert the boxes to  in clockwise order, in the form of
+        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
+        .. code-block:: none
+                         front z
+                              /
+                             /
+               (x0, y0, z1) + -----------  + (x1, y0, z1)
+                           /|            / |
+                          / |           /  |
+            (x0, y0, z0) + ----------- +   + (x1, y1, z0)
+                         |  /      .   |  /
+                         | / oriign    | /
+            (x0, y1, z0) + ----------- + -------> x right
+                         |             (x1, y1, z0)
+                         |
+                         v
+                    down y
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        dims = self.dims
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
+                device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin [0.5, 1, 0.5]
+        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        # rotate around y axis
+        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=1)
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+    @property
+    def nearset_bev(self):
+        """Calculate the 2D bounding boxes in BEV without rotation
+        Returns:
+            torch.Tensor: a tensor of 2D BEV box of each box.
+        """
+        # Obtain BEV boxes with rotation in XZWHR format
+        bev_rotated_boxes = self.tensor[:, [0, 2, 3, 5, 6]]
+        # convert the rotation to a valid range
+        rotations = bev_rotated_boxes[:, -1]
+        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
+        # find the center of boxes
+        conditions = (normed_rotations > np.pi / 4)[..., None]
+        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
+                                                                [0, 1, 3, 2]],
+                                  bev_rotated_boxes[:, :4])
+        centers = bboxes_xywh[:, :2]
+        dims = bboxes_xywh[:, 2:]
+        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+        return bev_boxes
+    def rotate(self, angle):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            angles (float | torch.Tensor): rotation angle
+        Returns:
+            None if `return_rot_mat=False`,
+            torch.Tensor if `return_rot_mat=True`
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        rot_sin = torch.sin(angle)
+        rot_cos = torch.cos(angle)
+        rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], [0, 1, 0],
+                                            [rot_sin, 0, rot_cos]])
+        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
+        self.tensor[:, 6] += angle
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        In CAM coordinates, it flips the x axis.
+        """
+        self.tensor[:, 0::7] = -self.tensor[:, 0::7]
+        self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, z_min, x_max, z_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 2] > box_range[1])
+                          & (self.tensor[:, 0] < box_range[2])
+                          & (self.tensor[:, 2] < box_range[3]))
+        return in_range_flags
--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .utils import limit_period, rotation_3d_in_axis
+class LiDARInstance3DBoxes(BaseInstance3DBoxes):
+    """3D boxes of instances in LIDAR coordinates
+    Coordinates in LiDAR:
+    .. code-block:: none
+                    up z    x front
+                       ^   ^
+                       |  /
+                       | /
+        left y <------ 0
+    The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
+    and the yaw is around the z axis, thus the rotation axis=2.
+    Attributes:
+        tensor (torch.Tensor): float matrix of N x box_dim.
+        box_dim (int): integer indicates the dimension of a box
+        Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+    """
+    @property
+    def gravity_center(self):
+        """Calculate the gravity center of all the boxes.
+        Returns:
+            torch.Tensor: a tensor with center of each box.
+        """
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, :2] = bottom_center[:, :2]
+        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
+        return gravity_center
+    @property
+    def corners(self):
+        """Calculate the coordinates of corners of all the boxes.
+        Convert the boxes to corners in clockwise order, in form of
+        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
+        .. code-block:: none
+                                           up z
+                            front x           ^
+                                 /            |
+                                /             |
+                  (x1, y0, z1) + -----------  + (x1, y1, z1)
+                              /|            / |
+                             / |           /  |
+               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
+                            |  /      .   |  /
+                            | / oriign    | /
+            left y<-------- + ----------- + (x0, y1, z0)
+                (x0, y0, z0)
+        Returns:
+            torch.Tensor: corners of each box with size (N, 8, 3)
+        """
+        dims = self.dims
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
+                device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin [0.5, 0.5, 0]
+        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        # rotate around z axis
+        corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+    @property
+    def nearset_bev(self):
+        """Calculate the 2D bounding boxes in BEV without rotation
+        Returns:
+            torch.Tensor: a tensor of 2D BEV box of each box.
+        """
+        # Obtain BEV boxes with rotation in XYWHR format
+        bev_rotated_boxes = self.tensor[:, [0, 1, 3, 4, 6]]
+        # convert the rotation to a valid range
+        rotations = bev_rotated_boxes[:, -1]
+        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
+        # find the center of boxes
+        conditions = (normed_rotations > np.pi / 4)[..., None]
+        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
+                                                                [0, 1, 3, 2]],
+                                  bev_rotated_boxes[:, :4])
+        centers = bboxes_xywh[:, :2]
+        dims = bboxes_xywh[:, 2:]
+        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+        return bev_boxes
+    def rotate(self, angle):
+        """Calculate whether the points is in any of the boxes
+        Args:
+            angles (float | torch.Tensor): rotation angle
+        Returns:
+            None if `return_rot_mat=False`,
+            torch.Tensor if `return_rot_mat=True`
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        rot_sin = torch.sin(angle)
+        rot_cos = torch.cos(angle)
+        rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
+                                            [rot_sin, rot_cos, 0], [0, 0, 1]])
+        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
+        self.tensor[:, 6] += angle
+    def flip(self):
+        """Flip the boxes in horizontal direction
+        In LIDAR coordinates, it flips the y axis.
+        """
+        self.tensor[:, 1::7] = -self.tensor[:, 1::7]
+        self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, x_max, y_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burdun for simpler cases.
+            TODO: check whether this will effect the performance
+        Returns:
+            a binary vector, indicating whether each box is inside
+            the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 0] < box_range[2])
+                          & (self.tensor[:, 1] < box_range[3]))
+        return in_range_flags
--- a/mmdet3d/core/bbox/structures/utils.py
+++ b/mmdet3d/core/bbox/structures/utils.py
+import numpy as np
+import torch
+def limit_period(val, offset=0.5, period=np.pi):
+    """Limit the value into a period for periodic function.
+    Args:
+        val (torch.Tensor): The value to be converted
+        offset (float, optional): Offset to set the value range.
+            Defaults to 0.5.
+        period ([type], optional): Period of the value. Defaults to np.pi.
+    Returns:
+        torch.Tensor: value in the range of
+            [-offset * period, (1-offset) * period]
+    """
+    return val - torch.floor(val / period + offset) * period
+def rotation_3d_in_axis(points, angles, axis=0):
+    """Rotate points by angles according to axis
+    Args:
+        points (torch.Tensor): Points of shape (N, M, 3).
+        angles (torch.Tensor): Vector of angles in shape (N,)
+        axis (int, optional): The axis to be rotated. Defaults to 0.
+    Raises:
+        ValueError: when the axis is not in range [0, 1, 2], it will
+            raise value error.
+    Returns:
+        torch.Tensor: rotated points in shape (N, M, 3)
+    """
+    rot_sin = torch.sin(angles)
+    rot_cos = torch.cos(angles)
+    ones = torch.ones_like(rot_cos)
+    zeros = torch.zeros_like(rot_cos)
+    if axis == 1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, zeros, -rot_sin]),
+            torch.stack([zeros, ones, zeros]),
+            torch.stack([rot_sin, zeros, rot_cos])
+        ])
+    elif axis == 2 or axis == -1:
+        rot_mat_T = torch.stack([
+            torch.stack([rot_cos, -rot_sin, zeros]),
+            torch.stack([rot_sin, rot_cos, zeros]),
+            torch.stack([zeros, zeros, ones])
+        ])
+    elif axis == 0:
+        rot_mat_T = torch.stack([
+            torch.stack([zeros, rot_cos, -rot_sin]),
+            torch.stack([zeros, rot_sin, rot_cos]),
+            torch.stack([ones, zeros, zeros])
+        ])
+    else:
+        raise ValueError(f'axis should in range [0, 1, 2], got {axis}')
+    return torch.einsum('aij,jka->aik', (points, rot_mat_T))
--- a/mmdet3d/datasets/__init__.py
+++ b/mmdet3d/datasets/__init__.py
@@ -6,9 +6,11 @@ from .kitti_dataset import KittiDataset
 from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
 from .nuscenes_dataset import NuScenesDataset
 from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
-                        IndoorPointsColorJitter, ObjectNoise,
+                        IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
-                        ObjectRangeFilter, ObjectSample, PointShuffle,
+                        IndoorPointsColorJitter, IndoorPointsColorNormalize,
-                        PointsRangeFilter, RandomFlip3D)
+                        ObjectNoise, ObjectRangeFilter, ObjectSample,
+                        PointSample, PointShuffle, PointsRangeFilter,
+                        RandomFlip3D)
 __all__ = [
    'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
@@ -16,5 +18,7 @@ __all__ = [
    'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
    'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
    'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
-    'IndoorPointsColorJitter', 'IndoorGlobalRotScale', 'IndoorFlipData'
+    'IndoorLoadPointsFromFile', 'IndoorPointsColorNormalize', 'PointSample',
+    'IndoorLoadAnnotations3D', 'IndoorPointsColorJitter',
+    'IndoorGlobalRotScale', 'IndoorFlipData'
 ]
--- a/mmdet3d/datasets/kitti_dataset.py
+++ b/mmdet3d/datasets/kitti_dataset.py
@@ -238,6 +238,8 @@ class KittiDataset(torch_data.Dataset):
                                      axis=1).astype(np.float32)
        difficulty = annos['difficulty']
        # this change gt_bboxes_3d to velodyne coordinates
+        import pdb
+        pdb.set_trace()
        gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
                                                      Trv2c)
        # only center format is allowed. so we need to convert

--- a/mmdet3d/datasets/pipelines/__init__.py
+++ b/mmdet3d/datasets/pipelines/__init__.py
@@ -3,6 +3,9 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
 from .formating import DefaultFormatBundle, DefaultFormatBundle3D
 from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale,
                             IndoorPointsColorJitter)
+from .indoor_loading import (IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
+                             IndoorPointsColorNormalize)
+from .indoor_sample import PointSample
 from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
 from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
                        ObjectSample, PointShuffle, PointsRangeFilter,
@@ -14,5 +17,6 @@ __all__ = [
    'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
    'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
    'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
-    'MMDataBaseSampler'
+    'MMDataBaseSampler', 'IndoorLoadPointsFromFile',
+    'IndoorPointsColorNormalize', 'IndoorLoadAnnotations3D', 'PointSample'
 ]
--- a/mmdet3d/datasets/pipelines/indoor_loading.py
+++ b/mmdet3d/datasets/pipelines/indoor_loading.py
+import mmcv
+import numpy as np
+from mmdet.datasets.builder import PIPELINES
+@PIPELINES.register_module()
+class IndoorPointsColorNormalize(object):
+    """Indoor Points Color Normalize
+    Normalize color of the points.
+    Args:
+        color_mean (List[float]): Mean color of the point cloud.
+    """
+    def __init__(self, color_mean):
+        self.color_mean = color_mean
+    def __call__(self, results):
+        points = results['points']
+        assert points.shape[1] >= 6,\
+            f'Expect points have channel >=6, got {points.shape[1]}'
+        points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
+        results['points'] = points
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(color_mean={})'.format(self.color_mean)
+        return repr_str
+@PIPELINES.register_module()
+class IndoorLoadPointsFromFile(object):
+    """Indoor Load Points From File.
+    Load sunrgbd and scannet points from file.
+    Args:
+        use_height (bool): Whether to use height.
+        load_dim (int): The dimension of the loaded points.
+            Default: 6.
+        use_dim (List[int]): Which dimensions of the points to be used.
+            Default: [0, 1, 2].
+    """
+    def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
+        self.use_height = use_height
+        assert max(use_dim) < load_dim, \
+            f'Expect all used dimensions < {load_dim}, got {use_dim}'
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+    def __call__(self, results):
+        pts_filename = results['pts_filename']
+        mmcv.check_file_exist(pts_filename)
+        points = np.load(pts_filename)
+        points = points.reshape(-1, self.load_dim)
+        points = points[:, self.use_dim]
+        if self.use_height:
+            floor_height = np.percentile(points[:, 2], 0.99)
+            height = points[:, 2] - floor_height
+            points = np.concatenate([points, np.expand_dims(height, 1)], 1)
+        results['points'] = points
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        repr_str += '(use_height={})'.format(self.use_height)
+        repr_str += '(mean_color={})'.format(self.color_mean)
+        repr_str += '(load_dim={})'.format(self.load_dim)
+        repr_str += '(use_dim={})'.format(self.use_dim)
+        return repr_str
+@PIPELINES.register_module
+class IndoorLoadAnnotations3D(object):
+    """Indoor Load Annotations3D.
+    Load instance mask and semantic mask of points.
+    """
+    def __init__(self):
+        pass
+    def __call__(self, results):
+        pts_instance_mask_path = results['pts_instance_mask_path']
+        pts_semantic_mask_path = results['pts_semantic_mask_path']
+        mmcv.check_file_exist(pts_instance_mask_path)
+        mmcv.check_file_exist(pts_semantic_mask_path)
+        pts_instance_mask = np.load(pts_instance_mask_path)
+        pts_semantic_mask = np.load(pts_semantic_mask_path)
+        results['pts_instance_mask'] = pts_instance_mask
+        results['pts_semantic_mask'] = pts_semantic_mask
+        return results
+    def __repr__(self):
+        repr_str = self.__class__.__name__
+        return repr_str
--- a/mmdet3d/models/roi_heads/roi_extractors/__init__.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/__init__.py
 from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
+from .single_roiaware_extractor import Single3DRoIAwareExtractor
-__all__ = ['SingleRoIExtractor']
+__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
--- a/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+++ b/mmdet3d/models/roi_heads/roi_extractors/single_roiaware_extractor.py
+import torch
+import torch.nn as nn
+from mmdet3d import ops
+from mmdet.models.builder import ROI_EXTRACTORS
+@ROI_EXTRACTORS.register_module
+class Single3DRoIAwareExtractor(nn.Module):
+    """Point-wise roi-aware Extractor
+    Extract Point-wise roi features.
+    Args:
+        roi_layer (dict): the config of roi layer
+    """
+    def __init__(self, roi_layer=None):
+        super(Single3DRoIAwareExtractor, self).__init__()
+        self.roi_layer = self.build_roi_layers(roi_layer)
+    def build_roi_layers(self, layer_cfg):
+        cfg = layer_cfg.copy()
+        layer_type = cfg.pop('type')
+        assert hasattr(ops, layer_type)
+        layer_cls = getattr(ops, layer_type)
+        roi_layers = layer_cls(**cfg)
+        return roi_layers
+    def forward(self, feats, coordinate, batch_inds, rois):
+        """Extract point-wise roi features
+        Args:
+            feats (FloatTensor): point-wise features with
+                shape (batch, npoints, channels) for pooling
+            coordinate (FloatTensor): coordinate of each point
+            batch_inds (longTensor): indicate the batch of each point
+            rois (FloatTensor): roi boxes with batch indices
+        Returns:
+            FloatTensor: pooled features
+        """
+        pooled_roi_feats = []
+        for batch_idx in range(int(batch_inds.max()) + 1):
+            roi_inds = (rois[..., 0].int() == batch_idx)
+            coors_inds = (batch_inds.int() == batch_idx)
+            pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],
+                                             coordinate[coors_inds],
+                                             feats[coors_inds])
+            pooled_roi_feats.append(pooled_roi_feat)
+        pooled_roi_feats = torch.cat(pooled_roi_feats, 0)
+        return pooled_roi_feats
--- a/mmdet3d/ops/__init__.py
+++ b/mmdet3d/ops/__init__.py
@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
                       get_compiling_cuda_version, nms, roi_align,
                       sigmoid_focal_loss)
 from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
+from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
+                              points_in_boxes_gpu)
 from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
                           SparseBottleneck, SparseBottleneckV0)
 from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
 __all__ = [
-    'nms',
+    'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
-    'soft_nms',
+    'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
-    'RoIAlign',
+    'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
-    'roi_align',
+    'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
-    'get_compiler_version',
+    'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
-    'get_compiling_cuda_version',
+    'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
-    'NaiveSyncBatchNorm1d',
+    'points_in_boxes_gpu', 'points_in_boxes_cpu'
-    'NaiveSyncBatchNorm2d',
-    'batched_nms',
-    'Voxelization',
-    'voxelization',
-    'dynamic_scatter',
-    'DynamicScatter',
-    'sigmoid_focal_loss',
-    'SigmoidFocalLoss',
-    'SparseBasicBlockV0',
-    'SparseBottleneckV0',
-    'SparseBasicBlock',
-    'SparseBottleneck',
 ]
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_ins_label.npy
--- a/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
+++ b/tests/data/scannet/scannet_train_instance_data/scene0000_00_sem_label.npy
--- a/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
+++ b/tests/data/sunrgbd/sunrgbd_trainval/lidar/000001.npy
--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
--- a/tests/test_indoor_loading.py
+++ b/tests/test_indoor_loading.py
+import os.path as osp
+import mmcv
+import numpy as np
+from mmdet3d.datasets.pipelines import (IndoorLoadAnnotations3D,
+                                        IndoorLoadPointsFromFile)
+def test_indoor_load_points_from_file():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
+    sunrgbd_load_points_from_file = IndoorLoadPointsFromFile(True, 6)
+    sunrgbd_results = dict()
+    data_path = './tests/data/sunrgbd/sunrgbd_trainval'
+    sunrgbd_info = sunrgbd_info[0]
+    scan_name = sunrgbd_info['point_cloud']['lidar_idx']
+    sunrgbd_results['pts_filename'] = osp.join(data_path, 'lidar',
+                                               f'{scan_name:06d}.npy')
+    sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
+    sunrgbd_point_cloud = sunrgbd_results['points']
+    assert sunrgbd_point_cloud.shape == (100, 4)
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
+    scannet_load_data = IndoorLoadPointsFromFile(True)
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    scannet_results['data_path'] = data_path
+    scannet_info = scannet_info[0]
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+    scannet_results['pts_filename'] = osp.join(data_path,
+                                               f'{scan_name}_vert.npy')
+    scannet_results = scannet_load_data(scannet_results)
+    scannet_point_cloud = scannet_results['points']
+    assert scannet_point_cloud.shape == (100, 4)
+def test_load_annotations3D():
+    sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
+    if sunrgbd_info['annos']['gt_num'] != 0:
+        sunrgbd_gt_bboxes_3d = sunrgbd_info['annos']['gt_boxes_upright_depth']
+        sunrgbd_gt_labels = sunrgbd_info['annos']['class'].reshape(-1, 1)
+        sunrgbd_gt_bboxes_3d_mask = np.ones_like(sunrgbd_gt_labels)
+    else:
+        sunrgbd_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        sunrgbd_gt_labels = np.zeros((1, 1))
+        sunrgbd_gt_bboxes_3d_mask = np.zeros((1, 1))
+    assert sunrgbd_gt_bboxes_3d.shape == (3, 7)
+    assert sunrgbd_gt_labels.shape == (3, 1)
+    assert sunrgbd_gt_bboxes_3d_mask.shape == (3, 1)
+    scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
+    scannet_load_annotations3D = IndoorLoadAnnotations3D()
+    scannet_results = dict()
+    data_path = './tests/data/scannet/scannet_train_instance_data'
+    if scannet_info['annos']['gt_num'] != 0:
+        scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
+        scannet_gt_labels = scannet_info['annos']['class'].reshape(-1, 1)
+        scannet_gt_bboxes_3d_mask = np.ones_like(scannet_gt_labels)
+    else:
+        scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
+        scannet_gt_labels = np.zeros((1, 1))
+        scannet_gt_bboxes_3d_mask = np.zeros((1, 1))
+    scan_name = scannet_info['point_cloud']['lidar_idx']
+    scannet_results['pts_instance_mask_path'] = osp.join(
+        data_path, f'{scan_name}_ins_label.npy')
+    scannet_results['pts_semantic_mask_path'] = osp.join(
+        data_path, scan_name + '_sem_label.npy')
+    scannet_results['info'] = scannet_info
+    scannet_results['gt_bboxes_3d'] = scannet_gt_bboxes_3d
+    scannet_results['gt_labels'] = scannet_gt_labels
+    scannet_results['gt_bboxes_3d_mask'] = scannet_gt_bboxes_3d_mask
+    scannet_results = scannet_load_annotations3D(scannet_results)
+    scannet_gt_boxes = scannet_results['gt_bboxes_3d']
+    scannet_gt_lbaels = scannet_results['gt_labels']
+    scannet_gt_boxes_mask = scannet_results['gt_bboxes_3d_mask']
+    scannet_pts_instance_mask = scannet_results['pts_instance_mask']
+    scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
+    assert scannet_gt_boxes.shape == (27, 6)
+    assert scannet_gt_lbaels.shape == (27, 1)
+    assert scannet_gt_boxes_mask.shape == (27, 1)
+    assert scannet_pts_instance_mask.shape == (100, )
+    assert scannet_pts_semantic_mask.shape == (100, )