Add typehints for `data structures` (#2406)

* add typehint * fix UT * update docs

Add typehints for `data structures` (#2406)
* add typehint * fix UT * update docs
b4b9af6b · Xiang Xu · GitHub · a65171ab · b4b9af6b · b4b9af6b
Unverified Commit b4b9af6b authored Apr 24, 2023 by Xiang Xu Committed by GitHub Apr 24, 2023
16 changed files
--- a/mmdet3d/structures/bbox_3d/base_box3d.py
+++ b/mmdet3d/structures/bbox_3d/base_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import warnings
 from abc import abstractmethod
+from typing import Iterator, Optional, Sequence, Tuple, Union

 import numpy as np
 import torch
 from mmcv.ops import box_iou_rotated, points_in_boxes_all, points_in_boxes_part
+from torch import Tensor

+from mmdet3d.structures.points import BasePoints
 from .utils import limit_period


-class BaseInstance3DBoxes(object):
+class BaseInstance3DBoxes:
    """Base class for 3D Boxes.

    Note:
-        The box is bottom centered, i.e. the relative position of origin in
-        the box is (0.5, 0.5, 0).
+        The box is bottom centered, i.e. the relative position of origin in the
+        box is (0.5, 0.5, 0).

    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix.
-        box_dim (int): Number of the dimension of a box.
-            Each row is (x, y, z, x_size, y_size, z_size, yaw).
-            Defaults to 7.
-        with_yaw (bool): Whether the box is with yaw rotation.
-            If False, the value of yaw will be set to 0 as minmax boxes.
-            Defaults to True.
-        origin (tuple[float], optional): Relative position of the box origin.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The boxes
+            data with shape (N, box_dim).
+        box_dim (int): Number of the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw). Defaults to 7.
+        with_yaw (bool): Whether the box is with yaw rotation. If False, the
+            value of yaw will be set to 0 as minmax boxes. Defaults to True.
+        origin (Tuple[float]): Relative position of the box origin.
            Defaults to (0.5, 0.5, 0). This will guide the box be converted to
            (0.5, 0.5, 0) mode.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x box_dim.
-        box_dim (int): Integer indicating the dimension of a box.
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+        tensor (Tensor): Float matrix with shape (N, box_dim).
+        box_dim (int): Integer indicating the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
            boxes.
    """

-    def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):
-        if isinstance(tensor, torch.Tensor):
+    def __init__(
+        self,
+        tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+        box_dim: int = 7,
+        with_yaw: bool = True,
+        origin: Tuple[float, float, float] = (0.5, 0.5, 0)
+    ) -> None:
+        if isinstance(tensor, Tensor):
            device = tensor.device
        else:
            device = torch.device('cpu')
        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
        if tensor.numel() == 0:
-            # Use reshape, so we don't end up creating a new tensor that
-            # does not depend on the inputs (and consequently confuses jit)
-            tensor = tensor.reshape((0, box_dim)).to(
-                dtype=torch.float32, device=device)
-        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+            # Use reshape, so we don't end up creating a new tensor that does
+            # not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, box_dim))
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, \
+            ('The box dimension must be 2 and the length of the last '
+             f'dimension must be {box_dim}, but got boxes with shape '
+             f'{tensor.shape}.')

        if tensor.shape[-1] == 6:
-            # If the dimension of boxes is 6, we expand box_dim by padding
-            # 0 as a fake yaw and set with_yaw to False.
+            # If the dimension of boxes is 6, we expand box_dim by padding 0 as
+            # a fake yaw and set with_yaw to False
            assert box_dim == 6
            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
            tensor = torch.cat((tensor, fake_rot), dim=-1)
@@ -68,82 +78,82 @@ class BaseInstance3DBoxes(object):
            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)

    @property
-    def volume(self):
-        """torch.Tensor: A vector with volume of each box."""
+    def volume(self) -> Tensor:
+        """Tensor: A vector with volume of each box in shape (N, )."""
        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]

    @property
-    def dims(self):
-        """torch.Tensor: Size dimensions of each box in shape (N, 3)."""
+    def dims(self) -> Tensor:
+        """Tensor: Size dimensions of each box in shape (N, 3)."""
        return self.tensor[:, 3:6]

    @property
-    def yaw(self):
-        """torch.Tensor: A vector with yaw of each box in shape (N, )."""
+    def yaw(self) -> Tensor:
+        """Tensor: A vector with yaw of each box in shape (N, )."""
        return self.tensor[:, 6]

    @property
-    def height(self):
-        """torch.Tensor: A vector with height of each box in shape (N, )."""
+    def height(self) -> Tensor:
+        """Tensor: A vector with height of each box in shape (N, )."""
        return self.tensor[:, 5]

    @property
-    def top_height(self):
-        """torch.Tensor:
-            A vector with the top height of each box in shape (N, )."""
+    def top_height(self) -> Tensor:
+        """Tensor: A vector with top height of each box in shape (N, )."""
        return self.bottom_height + self.height

    @property
-    def bottom_height(self):
-        """torch.Tensor:
-            A vector with bottom's height of each box in shape (N, )."""
+    def bottom_height(self) -> Tensor:
+        """Tensor: A vector with bottom height of each box in shape (N, )."""
        return self.tensor[:, 2]

    @property
-    def center(self):
+    def center(self) -> Tensor:
        """Calculate the center of all the boxes.

        Note:
-            In MMDetection3D's convention, the bottom center is
-            usually taken as the default center.
+            In MMDetection3D's convention, the bottom center is usually taken
+            as the default center.

-            The relative position of the centers in different kinds of
-            boxes are different, e.g., the relative center of a boxes is
-            (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
-            It is recommended to use ``bottom_center`` or ``gravity_center``
-            for clearer usage.
+            The relative position of the centers in different kinds of boxes
+            are different, e.g., the relative center of a boxes is
+            (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar. It is
+            recommended to use ``bottom_center`` or ``gravity_center`` for
+            clearer usage.

        Returns:
-            torch.Tensor: A tensor with center of each box in shape (N, 3).
+            Tensor: A tensor with center of each box in shape (N, 3).
        """
        return self.bottom_center

    @property
-    def bottom_center(self):
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+    def bottom_center(self) -> Tensor:
+        """Tensor: A tensor with center of each box in shape (N, 3)."""
        return self.tensor[:, :3]

    @property
-    def gravity_center(self):
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
-        pass
+    def gravity_center(self) -> Tensor:
+        """Tensor: A tensor with center of each box in shape (N, 3)."""
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, :2] = bottom_center[:, :2]
+        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
+        return gravity_center

    @property
-    def corners(self):
-        """torch.Tensor:
-            a tensor with 8 corners of each box in shape (N, 8, 3)."""
+    def corners(self) -> Tensor:
+        """Tensor: A tensor with 8 corners of each box in shape (N, 8, 3)."""
        pass

    @property
-    def bev(self):
-        """torch.Tensor: 2D BEV box of each box with rotation
-            in XYWHR format, in shape (N, 5)."""
+    def bev(self) -> Tensor:
+        """Tensor: 2D BEV box of each box with rotation in XYWHR format, in
+        shape (N, 5)."""
        return self.tensor[:, [0, 1, 3, 4, 6]]

    @property
-    def nearest_bev(self):
-        """torch.Tensor: A tensor of 2D BEV box of each box
-            without rotation."""
+    def nearest_bev(self) -> Tensor:
+        """Tensor: A tensor of 2D BEV box of each box without rotation."""
        # Obtain BEV boxes with rotation in XYWHR format
        bev_rotated_boxes = self.bev
        # convert the rotation to a valid range
@@ -161,20 +171,23 @@ class BaseInstance3DBoxes(object):
        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
        return bev_boxes

-    def in_range_bev(self, box_range):
+    def in_range_bev(
+            self, box_range: Union[Tensor, np.ndarray,
+                                   Sequence[float]]) -> Tensor:
        """Check whether the boxes are in the given range.

        Args:
-            box_range (list | torch.Tensor): the range of box
-                (x_min, y_min, x_max, y_max)
+            box_range (Tensor or np.ndarray or Sequence[float]): The range of
+                box in order of (x_min, y_min, x_max, y_max).

        Note:
-            The original implementation of SECOND checks whether boxes in
-            a range by checking whether the points are in a convex
-            polygon, we reduce the burden for simpler cases.
+            The original implementation of SECOND checks whether boxes in a
+            range by checking whether the points are in a convex polygon, we
+            reduce the burden for simpler cases.

        Returns:
-            torch.Tensor: Whether each box is inside the reference range.
+            Tensor: A binary vector indicating whether each box is inside the
+            reference range.
        """
        in_range_flags = ((self.bev[:, 0] > box_range[0])
                          & (self.bev[:, 1] > box_range[1])
@@ -183,55 +196,77 @@ class BaseInstance3DBoxes(object):
        return in_range_flags

    @abstractmethod
-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.

        Args:
-            angle (float | torch.Tensor | np.ndarray):
-                Rotation angle or rotation matrix.
-            points (torch.Tensor | numpy.ndarray |
-                :obj:`BasePoints`, optional):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
+                matrix.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.
+
+        Returns:
+            tuple or None: When ``points`` is None, the function returns None,
+            otherwise it returns the rotated points and the rotation matrix
+            ``rot_mat_T``.
        """
        pass

    @abstractmethod
-    def flip(self, bev_direction='horizontal'):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.

        Args:
-            bev_direction (str, optional): Direction by which to flip.
-                Can be chosen from 'horizontal' and 'vertical'.
-                Defaults to 'horizontal'.
+            bev_direction (str): Direction by which to flip. Can be chosen from
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
+                Points to flip. Defaults to None.
+
+        Returns:
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        pass

-    def translate(self, trans_vector):
+    def translate(self, trans_vector: Union[Tensor, np.ndarray]) -> None:
        """Translate boxes with the given translation vector.

        Args:
-            trans_vector (torch.Tensor): Translation vector of size (1, 3).
+            trans_vector (Tensor or np.ndarray): Translation vector of size
+                1x3.
        """
-        if not isinstance(trans_vector, torch.Tensor):
+        if not isinstance(trans_vector, Tensor):
            trans_vector = self.tensor.new_tensor(trans_vector)
        self.tensor[:, :3] += trans_vector

-    def in_range_3d(self, box_range):
+    def in_range_3d(
+            self, box_range: Union[Tensor, np.ndarray,
+                                   Sequence[float]]) -> Tensor:
        """Check whether the boxes are in the given range.

        Args:
-            box_range (list | torch.Tensor): The range of box
-                (x_min, y_min, z_min, x_max, y_max, z_max)
+            box_range (Tensor or np.ndarray or Sequence[float]): The range of
+                box (x_min, y_min, z_min, x_max, y_max, z_max).

        Note:
-            In the original implementation of SECOND, checking whether
-            a box in the range checks whether the points are in a convex
-            polygon, we try to reduce the burden for simpler cases.
+            In the original implementation of SECOND, checking whether a box in
+            the range checks whether the points are in a convex polygon, we try
+            to reduce the burden for simpler cases.

        Returns:
-            torch.Tensor: A binary vector indicating whether each box is
-                inside the reference range.
+            Tensor: A binary vector indicating whether each point is inside the
+            reference range.
        """
        in_range_flags = ((self.tensor[:, 0] > box_range[0])
                          & (self.tensor[:, 1] > box_range[1])
@@ -242,25 +277,30 @@ class BaseInstance3DBoxes(object):
        return in_range_flags

    @abstractmethod
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`Box3DMode`): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Box mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            correct_yaw (bool): Whether to convert the yaw angle to the target
+                coordinate. Defaults to False.

        Returns:
-            :obj:`BaseInstance3DBoxes`: The converted box of the same type
-                in the `dst` mode.
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
+            the ``dst`` mode.
        """
        pass

-    def scale(self, scale_factor):
+    def scale(self, scale_factor: float) -> None:
        """Scale the box with horizontal and vertical scaling factors.

        Args:
@@ -269,28 +309,27 @@ class BaseInstance3DBoxes(object):
        self.tensor[:, :6] *= scale_factor
        self.tensor[:, 7:] *= scale_factor  # velocity

-    def limit_yaw(self, offset=0.5, period=np.pi):
+    def limit_yaw(self, offset: float = 0.5, period: float = np.pi) -> None:
        """Limit the yaw to a given period and offset.

        Args:
-            offset (float, optional): The offset of the yaw. Defaults to 0.5.
-            period (float, optional): The expected period. Defaults to np.pi.
+            offset (float): The offset of the yaw. Defaults to 0.5.
+            period (float): The expected period. Defaults to np.pi.
        """
        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)

-    def nonempty(self, threshold=0.0):
+    def nonempty(self, threshold: float = 0.0) -> Tensor:
        """Find boxes that are non-empty.

-        A box is considered empty,
-        if either of its side is no larger than threshold.
+        A box is considered empty if either of its side is no larger than
+        threshold.

        Args:
-            threshold (float, optional): The threshold of minimal sizes.
-                Defaults to 0.0.
+            threshold (float): The threshold of minimal sizes. Defaults to 0.0.

        Returns:
-            torch.Tensor: A binary vector which represents whether each
-                box is empty (False) or non-empty (True).
+            Tensor: A binary vector which represents whether each box is empty
+            (False) or non-empty (True).
        """
        box = self.tensor
        size_x = box[..., 3]
@@ -300,23 +339,29 @@ class BaseInstance3DBoxes(object):
                & (size_y > threshold) & (size_z > threshold))
        return keep

-    def __getitem__(self, item):
+    def __getitem__(
+            self, item: Union[int, slice, np.ndarray,
+                              Tensor]) -> 'BaseInstance3DBoxes':
        """
+        Args:
+            item (int or slice or np.ndarray or Tensor): Index of boxes.
+
        Note:
            The following usage are allowed:
-            1. `new_boxes = boxes[3]`:
-                return a `Boxes` that contains only one box.
-            2. `new_boxes = boxes[2:10]`:
-                return a slice of boxes.
-            3. `new_boxes = boxes[vector]`:
-                where vector is a torch.BoolTensor with `length = len(boxes)`.
-                Nonzero elements in the vector will be selected.
+
+            1. `new_boxes = boxes[3]`: Return a `Boxes` that contains only one
+               box.
+            2. `new_boxes = boxes[2:10]`: Return a slice of boxes.
+            3. `new_boxes = boxes[vector]`: Where vector is a
+               torch.BoolTensor with `length = len(boxes)`. Nonzero elements in
+               the vector will be selected.
+
            Note that the returned Boxes might share storage with this Boxes,
-            subject to Pytorch's indexing semantics.
+            subject to PyTorch's indexing semantics.

        Returns:
            :obj:`BaseInstance3DBoxes`: A new object of
-                :class:`BaseInstance3DBoxes` after indexing.
+            :class:`BaseInstance3DBoxes` after indexing.
        """
        original_type = type(self)
        if isinstance(item, int):
@@ -329,23 +374,24 @@ class BaseInstance3DBoxes(object):
            f'Indexing on Boxes with {item} failed to return a matrix!'
        return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)

-    def __len__(self):
+    def __len__(self) -> int:
        """int: Number of boxes in the current object."""
        return self.tensor.shape[0]

-    def __repr__(self):
-        """str: Return a strings that describes the object."""
+    def __repr__(self) -> str:
+        """str: Return a string that describes the object."""
        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'

    @classmethod
-    def cat(cls, boxes_list):
+    def cat(cls, boxes_list: Sequence['BaseInstance3DBoxes']
+            ) -> 'BaseInstance3DBoxes':
        """Concatenate a list of Boxes into a single Boxes.

        Args:
-            boxes_list (list[:obj:`BaseInstance3DBoxes`]): List of boxes.
+            boxes_list (Sequence[:obj:`BaseInstance3DBoxes`]): List of boxes.

        Returns:
-            :obj:`BaseInstance3DBoxes`: The concatenated Boxes.
+            :obj:`BaseInstance3DBoxes`: The concatenated boxes.
        """
        assert isinstance(boxes_list, (list, tuple))
        if len(boxes_list) == 0:
@@ -356,19 +402,20 @@ class BaseInstance3DBoxes(object):
        # so the returned boxes never share storage with input
        cat_boxes = cls(
            torch.cat([b.tensor for b in boxes_list], dim=0),
-            box_dim=boxes_list[0].tensor.shape[1],
+            box_dim=boxes_list[0].box_dim,
            with_yaw=boxes_list[0].with_yaw)
        return cat_boxes

-    def to(self, device, *args, **kwargs):
+    def to(self, device: Union[str, torch.device], *args,
+           **kwargs) -> 'BaseInstance3DBoxes':
        """Convert current boxes to a specific device.

        Args:
-            device (str | :obj:`torch.device`): The name of the device.
+            device (str or :obj:`torch.device`): The name of the device.

        Returns:
-            :obj:`BaseInstance3DBoxes`: A new boxes object on the
-                specific device.
+            :obj:`BaseInstance3DBoxes`: A new boxes object on the specific
+                device.
        """
        original_type = type(self)
        return original_type(
@@ -376,50 +423,51 @@ class BaseInstance3DBoxes(object):
            box_dim=self.box_dim,
            with_yaw=self.with_yaw)

-    def clone(self):
-        """Clone the Boxes.
+    def clone(self) -> 'BaseInstance3DBoxes':
+        """Clone the boxes.

        Returns:
-            :obj:`BaseInstance3DBoxes`: Box object with the same properties
-                as self.
+            :obj:`BaseInstance3DBoxes`: Box object with the same properties as
+            self.
        """
        original_type = type(self)
        return original_type(
            self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)

    @property
-    def device(self):
-        """str: The device of the boxes are on."""
+    def device(self) -> torch.device:
+        """torch.device: The device of the boxes are on."""
        return self.tensor.device

-    def __iter__(self):
-        """Yield a box as a Tensor of shape (4,) at a time.
+    def __iter__(self) -> Iterator[Tensor]:
+        """Yield a box as a Tensor at a time.

        Returns:
-            torch.Tensor: A box of shape (4,).
+            Iterator[Tensor]: A box of shape (box_dim, ).
        """
        yield from self.tensor

    @classmethod
-    def height_overlaps(cls, boxes1, boxes2, mode='iou'):
+    def height_overlaps(cls, boxes1: 'BaseInstance3DBoxes',
+                        boxes2: 'BaseInstance3DBoxes') -> Tensor:
        """Calculate height overlaps of two boxes.

        Note:
-            This function calculates the height overlaps between boxes1 and
-            boxes2,  boxes1 and boxes2 should be in the same type.
+            This function calculates the height overlaps between ``boxes1`` and
+            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.

        Args:
            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
-            mode (str, optional): Mode of IoU calculation. Defaults to 'iou'.

        Returns:
-            torch.Tensor: Calculated iou of boxes.
+            Tensor: Calculated height overlap of the boxes.
        """
        assert isinstance(boxes1, BaseInstance3DBoxes)
        assert isinstance(boxes2, BaseInstance3DBoxes)
-        assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
-            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
+        assert type(boxes1) == type(boxes2), \
+            '"boxes1" and "boxes2" should be in the same type, ' \
+            f'but got {type(boxes1)} and {type(boxes2)}.'

        boxes1_top_height = boxes1.top_height.view(-1, 1)
        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
@@ -433,7 +481,10 @@ class BaseInstance3DBoxes(object):
        return overlaps_h

    @classmethod
-    def overlaps(cls, boxes1, boxes2, mode='iou'):
+    def overlaps(cls,
+                 boxes1: 'BaseInstance3DBoxes',
+                 boxes2: 'BaseInstance3DBoxes',
+                 mode: str = 'iou') -> Tensor:
        """Calculate 3D overlaps of two boxes.

        Note:
@@ -443,15 +494,16 @@ class BaseInstance3DBoxes(object):
        Args:
            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
-            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
+            mode (str): Mode of iou calculation. Defaults to 'iou'.

        Returns:
-            torch.Tensor: Calculated 3D overlaps of the boxes.
+            Tensor: Calculated 3D overlap of the boxes.
        """
        assert isinstance(boxes1, BaseInstance3DBoxes)
        assert isinstance(boxes2, BaseInstance3DBoxes)
-        assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
-            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
+        assert type(boxes1) == type(boxes2), \
+            '"boxes1" and "boxes2" should be in the same type, ' \
+            f'but got {type(boxes1)} and {type(boxes2)}.'

        assert mode in ['iou', 'iof']

@@ -467,7 +519,7 @@ class BaseInstance3DBoxes(object):
        # ``box_iou_rotated``.
        boxes1_bev, boxes2_bev = boxes1.bev, boxes2.bev
        boxes1_bev[:, 2:4] = boxes1_bev[:, 2:4].clamp(min=1e-4)
-        boxes2_bev[:, 2:4] = boxes2.bev[:, 2:4].clamp(min=1e-4)
+        boxes2_bev[:, 2:4] = boxes2_bev[:, 2:4].clamp(min=1e-4)

        # bev overlap
        iou2d = box_iou_rotated(boxes1_bev, boxes2_bev)
@@ -492,68 +544,81 @@ class BaseInstance3DBoxes(object):

        return iou3d

-    def new_box(self, data):
+    def new_box(
+        self, data: Union[Tensor, np.ndarray, Sequence[Sequence[float]]]
+    ) -> 'BaseInstance3DBoxes':
        """Create a new box object with data.

-        The new box and its tensor has the similar properties
-            as self and self.tensor, respectively.
+        The new box and its tensor has the similar properties as self and
+        self.tensor, respectively.

        Args:
-            data (torch.Tensor | numpy.array | list): Data to be copied.
+            data (Tensor or np.ndarray or Sequence[Sequence[float]]): Data to
+                be copied.

        Returns:
-            :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``,
-                the object's other properties are similar to ``self``.
+            :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``, the
+            object's other properties are similar to ``self``.
        """
        new_tensor = self.tensor.new_tensor(data) \
-            if not isinstance(data, torch.Tensor) else data.to(self.device)
+            if not isinstance(data, Tensor) else data.to(self.device)
        original_type = type(self)
        return original_type(
            new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)

-    def points_in_boxes_part(self, points, boxes_override=None):
+    def points_in_boxes_part(
+            self,
+            points: Tensor,
+            boxes_override: Optional[Tensor] = None) -> Tensor:
        """Find the box in which each point is.

        Args:
-            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
-                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
-            boxes_override (torch.Tensor, optional): Boxes to override
-                `self.tensor`. Defaults to None.
-
-        Returns:
-            torch.Tensor: The index of the first box that each point
-                is in, in shape (M, ). Default value is -1
-                (if the point is not enclosed by any box).
+            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
+                are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
+                Defaults to None.

        Note:
-            If a point is enclosed by multiple boxes, the index of the
-            first box will be returned.
+            If a point is enclosed by multiple boxes, the index of the first
+            box will be returned.
+
+        Returns:
+            Tensor: The index of the first box that each point is in with shape
+            (M, ). Default value is -1 (if the point is not enclosed by any
+            box).
        """
        if boxes_override is not None:
            boxes = boxes_override
        else:
            boxes = self.tensor
-        if points.dim() == 2:
-            points = points.unsqueeze(0)
-        box_idx = points_in_boxes_part(points,
-                                       boxes.unsqueeze(0).to(
-                                           points.device)).squeeze(0)
-        return box_idx
-
-    def points_in_boxes_all(self, points, boxes_override=None):
+
+        points_clone = points.clone()[..., :3]
+        if points_clone.dim() == 2:
+            points_clone = points_clone.unsqueeze(0)
+        else:
+            assert points_clone.dim() == 3 and points_clone.shape[0] == 1
+
+        boxes = boxes.to(points_clone.device).unsqueeze(0)
+        box_idx = points_in_boxes_part(points_clone, boxes)
+
+        return box_idx.squeeze(0)
+
+    def points_in_boxes_all(self,
+                            points: Tensor,
+                            boxes_override: Optional[Tensor] = None) -> Tensor:
        """Find all boxes in which each point is.

        Args:
-            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
-                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
-            boxes_override (torch.Tensor, optional): Boxes to override
-                `self.tensor`. Defaults to None.
+            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
+                are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
+                Defaults to None.

        Returns:
-            torch.Tensor: A tensor indicating whether a point is in a box,
-                in shape (M, T). T is the number of boxes. Denote this
-                tensor as A, if the m^th point is in the t^th box, then
-                `A[m, t] == 1`, elsewise `A[m, t] == 0`.
+            Tensor: A tensor indicating whether a point is in a box with shape
+            (M, T). T is the number of boxes. Denote this tensor as A, it the
+            m^th point is in the t^th box, then `A[m, t] == 1`, otherwise
+            `A[m, t] == 0`.
        """
        if boxes_override is not None:
            boxes = boxes_override
@@ -571,13 +636,17 @@ class BaseInstance3DBoxes(object):

        return box_idxs_of_pts.squeeze(0)

-    def points_in_boxes(self, points, boxes_override=None):
-        warnings.warn('DeprecationWarning: points_in_boxes is a '
-                      'deprecated method, please consider using '
-                      'points_in_boxes_part.')
+    def points_in_boxes(self,
+                        points: Tensor,
+                        boxes_override: Optional[Tensor] = None) -> Tensor:
+        warnings.warn('DeprecationWarning: points_in_boxes is a deprecated '
+                      'method, please consider using points_in_boxes_part.')
        return self.points_in_boxes_part(points, boxes_override)

-    def points_in_boxes_batch(self, points, boxes_override=None):
+    def points_in_boxes_batch(
+            self,
+            points: Tensor,
+            boxes_override: Optional[Tensor] = None) -> Tensor:
        warnings.warn('DeprecationWarning: points_in_boxes_batch is a '
                      'deprecated method, please consider using '
                      'points_in_boxes_all.')

--- a/mmdet3d/structures/bbox_3d/box_3d_mode.py
+++ b/mmdet3d/structures/bbox_3d/box_3d_mode.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from enum import IntEnum, unique
+from typing import Optional, Sequence, Union

 import numpy as np
 import torch
+from torch import Tensor

 from .base_box3d import BaseInstance3DBoxes
 from .cam_box3d import CameraInstance3DBoxes
@@ -13,7 +15,7 @@ from .utils import limit_period

 @unique
 class Box3DMode(IntEnum):
-    r"""Enum of different ways to represent a box.
+    """Enum of different ways to represent a box.

    Coordinates in LiDAR:

@@ -28,7 +30,7 @@ class Box3DMode(IntEnum):
    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.

-    Coordinates in camera:
+    Coordinates in Camera:

    .. code-block:: none

@@ -44,7 +46,7 @@ class Box3DMode(IntEnum):
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
    and the yaw is around the y axis, thus the rotation axis=1.

-    Coordinates in Depth mode:
+    Coordinates in Depth:

    .. code-block:: none

@@ -63,30 +65,37 @@ class Box3DMode(IntEnum):
    DEPTH = 2

    @staticmethod
-    def convert(box, src, dst, rt_mat=None, with_yaw=True, correct_yaw=False):
-        """Convert boxes from `src` mode to `dst` mode.
+    def convert(
+        box: Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes],
+        src: 'Box3DMode',
+        dst: 'Box3DMode',
+        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+        with_yaw: bool = True,
+        correct_yaw: bool = False
+    ) -> Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes]:
+        """Convert boxes from ``src`` mode to ``dst`` mode.

        Args:
-            box (tuple | list | np.ndarray |
-                torch.Tensor | :obj:`BaseInstance3DBoxes`):
-                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
-            src (:obj:`Box3DMode`): The src Box mode.
-            dst (:obj:`Box3DMode`): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            box (Sequence[float] or np.ndarray or Tensor or
+                :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk
+                array/tensor.
+            src (:obj:`Box3DMode`): The source box mode.
+            dst (:obj:`Box3DMode`): The target box mode.
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
-            with_yaw (bool, optional): If `box` is an instance of
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            with_yaw (bool): If ``box`` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
            correct_yaw (bool): If the yaw is rotated by rt_mat.
+                Defaults to False.

        Returns:
-            (tuple | list | np.ndarray | torch.Tensor |
-                :obj:`BaseInstance3DBoxes`):
-                The converted box of the same type.
+            Sequence[float] or np.ndarray or Tensor or
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type.
        """
        if src == dst:
            return box
@@ -208,7 +217,7 @@ class Box3DMode(IntEnum):
                f'Conversion from Box3DMode {src} to {dst} '
                'is not supported yet')

-        if not isinstance(rt_mat, torch.Tensor):
+        if not isinstance(rt_mat, Tensor):
            rt_mat = arr.new_tensor(rt_mat)
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
@@ -251,8 +260,8 @@ class Box3DMode(IntEnum):
                target_type = DepthInstance3DBoxes
            else:
                raise NotImplementedError(
-                    f'Conversion to {dst} through {original_type}'
-                    ' is not supported yet')
+                    f'Conversion to {dst} through {original_type} '
+                    'is not supported yet')
            return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)
        else:
            return arr
--- a/mmdet3d/structures/bbox_3d/cam_box3d.py
+++ b/mmdet3d/structures/bbox_3d/cam_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Tuple, Union
+
 import numpy as np
 import torch
+from torch import Tensor

 from mmdet3d.structures.points import BasePoints
 from .base_box3d import BaseInstance3DBoxes
@@ -10,7 +13,7 @@ from .utils import rotation_3d_in_axis, yaw2local
 class CameraInstance3DBoxes(BaseInstance3DBoxes):
    """3D boxes of instances in CAM coordinates.

-    Coordinates in camera:
+    Coordinates in Camera:

    .. code-block:: none

@@ -24,39 +27,54 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        down y

    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
-    and the yaw is around the y axis, thus the rotation axis=1.
-    The yaw is 0 at the positive direction of x axis, and decreases from
-    the positive direction of x to the positive direction of z.
+    and the yaw is around the y axis, thus the rotation axis=1. The yaw is 0 at
+    the positive direction of x axis, and decreases from the positive direction
+    of x to the positive direction of z.
+
+    Args:
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The boxes
+            data with shape (N, box_dim).
+        box_dim (int): Number of the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw). Defaults to 7.
+        with_yaw (bool): Whether the box is with yaw rotation. If False, the
+            value of yaw will be set to 0 as minmax boxes. Defaults to True.
+        origin (Tuple[float]): Relative position of the box origin.
+            Defaults to (0.5, 1.0, 0.5). This will guide the box be converted
+            to (0.5, 1.0, 0.5) mode.

    Attributes:
-        tensor (torch.Tensor): Float matrix in shape (N, box_dim).
-        box_dim (int): Integer indicating the dimension of a box
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
-        with_yaw (bool): If True, the value of yaw will be set to 0 as
-            axis-aligned boxes tightly enclosing the original boxes.
+        tensor (Tensor): Float matrix with shape (N, box_dim).
+        box_dim (int): Integer indicating the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
+        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
+            boxes.
    """
    YAW_AXIS = 1

-    def __init__(self,
-                 tensor,
-                 box_dim=7,
-                 with_yaw=True,
-                 origin=(0.5, 1.0, 0.5)):
-        if isinstance(tensor, torch.Tensor):
+    def __init__(
+        self,
+        tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+        box_dim: int = 7,
+        with_yaw: bool = True,
+        origin: Tuple[float, float, float] = (0.5, 1.0, 0.5)
+    ) -> None:
+        if isinstance(tensor, Tensor):
            device = tensor.device
        else:
            device = torch.device('cpu')
        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
        if tensor.numel() == 0:
-            # Use reshape, so we don't end up creating a new tensor that
-            # does not depend on the inputs (and consequently confuses jit)
-            tensor = tensor.reshape((0, box_dim)).to(
-                dtype=torch.float32, device=device)
-        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+            # Use reshape, so we don't end up creating a new tensor that does
+            # not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, box_dim))
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, \
+            ('The box dimension must be 2 and the length of the last '
+             f'dimension must be {box_dim}, but got boxes with shape '
+             f'{tensor.shape}.')

        if tensor.shape[-1] == 6:
-            # If the dimension of boxes is 6, we expand box_dim by padding
-            # 0 as a fake yaw and set with_yaw to False.
+            # If the dimension of boxes is 6, we expand box_dim by padding 0 as
+            # a fake yaw and set with_yaw to False
            assert box_dim == 6
            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
            tensor = torch.cat((tensor, fake_rot), dim=-1)
@@ -73,31 +91,27 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)

    @property
-    def height(self):
-        """torch.Tensor: A vector with height of each box in shape (N, )."""
+    def height(self) -> Tensor:
+        """Tensor: A vector with height of each box in shape (N, )."""
        return self.tensor[:, 4]

    @property
-    def top_height(self):
-        """torch.Tensor:
-            A vector with the top height of each box in shape (N, )."""
+    def top_height(self) -> Tensor:
+        """Tensor: A vector with top height of each box in shape (N, )."""
        # the positive direction is down rather than up
        return self.bottom_height - self.height

    @property
-    def bottom_height(self):
-        """torch.Tensor:
-            A vector with bottom's height of each box in shape (N, )."""
+    def bottom_height(self) -> Tensor:
+        """Tensor: A vector with bottom height of each box in shape (N, )."""
        return self.tensor[:, 1]

    @property
-    def local_yaw(self):
-        """torch.Tensor:
-            A vector with local yaw of each box in shape (N, ).
-            local_yaw equals to alpha in kitti, which is commonly
-            used in monocular 3D object detection task, so only
-            :obj:`CameraInstance3DBoxes` has the property.
-        """
+    def local_yaw(self) -> Tensor:
+        """Tensor: A vector with local yaw of each box in shape (N, ).
+        local_yaw equals to alpha in kitti, which is commonly used in monocular
+        3D object detection task, so only :obj:`CameraInstance3DBoxes` has the
+        property."""
        yaw = self.yaw
        loc = self.gravity_center
        local_yaw = yaw2local(yaw, loc)
@@ -105,8 +119,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        return local_yaw

    @property
-    def gravity_center(self):
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+    def gravity_center(self) -> Tensor:
+        """Tensor: A tensor with center of each box in shape (N, 3)."""
        bottom_center = self.bottom_center
        gravity_center = torch.zeros_like(bottom_center)
        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
@@ -114,12 +128,9 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        return gravity_center

    @property
-    def corners(self):
-        """torch.Tensor: Coordinates of corners of all the boxes in
-                         shape (N, 8, 3).
-
-        Convert the boxes to  in clockwise order, in the form of
-        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
+    def corners(self) -> Tensor:
+        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
+        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).

        .. code-block:: none

@@ -132,11 +143,14 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            (x0, y0, z0) + ----------- +   + (x1, y1, z1)
                         |  /      .   |  /
                         | / origin    | /
-            (x0, y1, z0) + ----------- + -------> x right
+            (x0, y1, z0) + ----------- + -------> right x
                         |             (x1, y1, z0)
                         |
                         v
                    down y
+
+        Returns:
+            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
        """
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)
@@ -147,7 +161,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                device=dims.device, dtype=dims.dtype)

        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
-        # use relative origin [0.5, 1, 0.5]
+        # use relative origin (0.5, 1, 0.5)
        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])

@@ -157,9 +171,9 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        return corners

    @property
-    def bev(self):
-        """torch.Tensor: 2D BEV box of each box with rotation
-            in XYWHR format, in shape (N, 5)."""
+    def bev(self) -> Tensor:
+        """Tensor: 2D BEV box of each box with rotation in XYWHR format, in
+        shape (N, 5)."""
        bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()
        # positive direction of the gravity axis
        # in cam coord system points to the earth
@@ -167,22 +181,27 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        bev[:, -1] = -bev[:, -1]
        return bev

-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.

        Args:
-            angle (float | torch.Tensor | np.ndarray):
-                Rotation angle or rotation matrix.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
+                matrix.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.

        Returns:
-            tuple or None: When ``points`` is None, the function returns
-                None, otherwise it returns the rotated points and the
-                rotation matrix ``rot_mat_T``.
+            tuple or None: When ``points`` is None, the function returns None,
+            otherwise it returns the rotated points and the rotation matrix
+            ``rot_mat_T``.
        """
-        if not isinstance(angle, torch.Tensor):
+        if not isinstance(angle, Tensor):
            angle = self.tensor.new_tensor(angle)

        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
@@ -204,7 +223,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        self.tensor[:, 6] += angle

        if points is not None:
-            if isinstance(points, torch.Tensor):
+            if isinstance(points, Tensor):
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
                rot_mat_T = rot_mat_T.cpu().numpy()
@@ -215,18 +234,25 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                raise ValueError
            return points, rot_mat_T

-    def flip(self, bev_direction='horizontal', points=None):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.

        In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.

        Args:
-            bev_direction (str): Flip direction (horizontal or vertical).
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            bev_direction (str): Direction by which to flip. Can be chosen from
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to flip. Defaults to None.

        Returns:
-            torch.Tensor, numpy.ndarray or None: Flipped points.
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
@@ -239,8 +265,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                self.tensor[:, 6] = -self.tensor[:, 6]

        if points is not None:
-            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
-            if isinstance(points, (torch.Tensor, np.ndarray)):
+            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
+            if isinstance(points, (Tensor, np.ndarray)):
                if bev_direction == 'horizontal':
                    points[:, 0] = -points[:, 0]
                elif bev_direction == 'vertical':
@@ -250,19 +276,20 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            return points

    @classmethod
-    def height_overlaps(cls, boxes1, boxes2, mode='iou'):
+    def height_overlaps(cls, boxes1: 'CameraInstance3DBoxes',
+                        boxes2: 'CameraInstance3DBoxes') -> Tensor:
        """Calculate height overlaps of two boxes.

-        This function calculates the height overlaps between ``boxes1`` and
-        ``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.
+        Note:
+            This function calculates the height overlaps between ``boxes1`` and
+            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.

        Args:
            boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.
            boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.
-            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.

        Returns:
-            torch.Tensor: Calculated iou of boxes' heights.
+            Tensor: Calculated height overlap of the boxes.
        """
        assert isinstance(boxes1, CameraInstance3DBoxes)
        assert isinstance(boxes2, CameraInstance3DBoxes)
@@ -280,22 +307,26 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
        return overlaps_h

-    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`Box3DMode`): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Box mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from ``src`` coordinates to ``dst`` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
            correct_yaw (bool): Whether to convert the yaw angle to the target
                coordinate. Defaults to False.
+
        Returns:
-            :obj:`BaseInstance3DBoxes`:
-                The converted box of the same type in the ``dst`` mode.
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
+            the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode

@@ -307,19 +338,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            rt_mat=rt_mat,
            correct_yaw=correct_yaw)

-    def points_in_boxes_part(self, points, boxes_override=None):
+    def points_in_boxes_part(
+            self,
+            points: Tensor,
+            boxes_override: Optional[Tensor] = None) -> Tensor:
        """Find the box in which each point is.

        Args:
-            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
-                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
-            boxes_override (torch.Tensor, optional): Boxes to override
-                `self.tensor `. Defaults to None.
+            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
+                are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
+                Defaults to None.

        Returns:
-            torch.Tensor: The index of the box in which
-                each point is, in shape (M, ). Default value is -1
-                (if the point is not enclosed by any box).
+            Tensor: The index of the first box that each point is in with shape
+            (M, ). Default value is -1 (if the point is not enclosed by any
+            box).
        """
        from .coord_3d_mode import Coord3DMode

@@ -328,24 +362,29 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        if boxes_override is not None:
            boxes_lidar = boxes_override
        else:
-            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
-                                              Coord3DMode.LIDAR)
+            boxes_lidar = Coord3DMode.convert(
+                self.tensor,
+                Coord3DMode.CAM,
+                Coord3DMode.LIDAR,
+                is_point=False)

        box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)
        return box_idx

-    def points_in_boxes_all(self, points, boxes_override=None):
+    def points_in_boxes_all(self,
+                            points: Tensor,
+                            boxes_override: Optional[Tensor] = None) -> Tensor:
        """Find all boxes in which each point is.

        Args:
-            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
-                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
-            boxes_override (torch.Tensor, optional): Boxes to override
-                `self.tensor `. Defaults to None.
+            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
+                are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
+                Defaults to None.

        Returns:
-            torch.Tensor: The index of all boxes in which each point is,
-                in shape (B, M, T).
+            Tensor: The index of all boxes in which each point is with shape
+            (M, T).
        """
        from .coord_3d_mode import Coord3DMode

@@ -354,8 +393,11 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        if boxes_override is not None:
            boxes_lidar = boxes_override
        else:
-            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
-                                              Coord3DMode.LIDAR)
+            boxes_lidar = Coord3DMode.convert(
+                self.tensor,
+                Coord3DMode.CAM,
+                Coord3DMode.LIDAR,
+                is_point=False)

        box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)
        return box_idx
--- a/mmdet3d/structures/bbox_3d/coord_3d_mode.py
+++ b/mmdet3d/structures/bbox_3d/coord_3d_mode.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from enum import IntEnum, unique
+from typing import Optional, Sequence, Union

 import numpy as np
 import torch
+from torch import Tensor

 from mmdet3d.structures.points import (BasePoints, CameraPoints, DepthPoints,
                                       LiDARPoints)
@@ -12,8 +14,7 @@ from .box_3d_mode import Box3DMode

 @unique
 class Coord3DMode(IntEnum):
-    r"""Enum of different ways to represent a box
-        and point cloud.
+    """Enum of different ways to represent a box and point cloud.

    Coordinates in LiDAR:

@@ -28,7 +29,7 @@ class Coord3DMode(IntEnum):
    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.

-    Coordinates in camera:
+    Coordinates in Camera:

    .. code-block:: none

@@ -44,7 +45,7 @@ class Coord3DMode(IntEnum):
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
    and the yaw is around the y axis, thus the rotation axis=1.

-    Coordinates in Depth mode:
+    Coordinates in Depth:

    .. code-block:: none

@@ -63,96 +64,133 @@ class Coord3DMode(IntEnum):
    DEPTH = 2

    @staticmethod
-    def convert(input, src, dst, rt_mat=None, with_yaw=True, is_point=True):
-        """Convert boxes or points from `src` mode to `dst` mode.
+    def convert(input: Union[Sequence[float], np.ndarray, Tensor,
+                             BaseInstance3DBoxes, BasePoints],
+                src: Union[Box3DMode, 'Coord3DMode'],
+                dst: Union[Box3DMode, 'Coord3DMode'],
+                rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+                with_yaw: bool = True,
+                correct_yaw: bool = False,
+                is_point: bool = True):
+        """Convert boxes or points from ``src`` mode to ``dst`` mode.

        Args:
-            input (tuple | list | np.ndarray | torch.Tensor |
-                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):
-                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
-            src (:obj:`Box3DMode` | :obj:`Coord3DMode`): The source mode.
-            dst (:obj:`Box3DMode` | :obj:`Coord3DMode`): The target mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            input (Sequence[float] or np.ndarray or Tensor or
+                :obj:`BaseInstance3DBoxes` or :obj:`BasePoints`): Can be a
+                k-tuple, k-list or an Nxk array/tensor.
+            src (:obj:`Box3DMode` or :obj:`Coord3DMode`): The source mode.
+            dst (:obj:`Box3DMode` or :obj:`Coord3DMode`): The target mode.
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
-            with_yaw (bool): If `box` is an instance of
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            with_yaw (bool): If ``box`` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
-            is_point (bool): If `input` is neither an instance of
+            correct_yaw (bool): If the yaw is rotated by rt_mat.
+                Defaults to False.
+            is_point (bool): If ``input`` is neither an instance of
                :obj:`BaseInstance3DBoxes` nor an instance of
                :obj:`BasePoints`, whether or not it is point data.
                Defaults to True.

        Returns:
-            (tuple | list | np.ndarray | torch.Tensor |
-                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):
-                The converted box of the same type.
+            Sequence[float] or np.ndarray or Tensor or
+            :obj:`BaseInstance3DBoxes` or :obj:`BasePoints`: The converted box
+            or points of the same type.
        """
        if isinstance(input, BaseInstance3DBoxes):
            return Coord3DMode.convert_box(
-                input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)
+                input,
+                src,
+                dst,
+                rt_mat=rt_mat,
+                with_yaw=with_yaw,
+                correct_yaw=correct_yaw)
        elif isinstance(input, BasePoints):
            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)
-        elif isinstance(input, (tuple, list, np.ndarray, torch.Tensor)):
+        elif isinstance(input, (tuple, list, np.ndarray, Tensor)):
            if is_point:
                return Coord3DMode.convert_point(
                    input, src, dst, rt_mat=rt_mat)
            else:
                return Coord3DMode.convert_box(
-                    input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)
+                    input,
+                    src,
+                    dst,
+                    rt_mat=rt_mat,
+                    with_yaw=with_yaw,
+                    correct_yaw=correct_yaw)
        else:
            raise NotImplementedError

    @staticmethod
-    def convert_box(box, src, dst, rt_mat=None, with_yaw=True):
-        """Convert boxes from `src` mode to `dst` mode.
+    def convert_box(
+        box: Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes],
+        src: Box3DMode,
+        dst: Box3DMode,
+        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+        with_yaw: bool = True,
+        correct_yaw: bool = False
+    ) -> Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes]:
+        """Convert boxes from ``src`` mode to ``dst`` mode.

        Args:
-            box (tuple | list | np.ndarray |
-                torch.Tensor | :obj:`BaseInstance3DBoxes`):
-                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
-            src (:obj:`Box3DMode`): The src Box mode.
-            dst (:obj:`Box3DMode`): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            box (Sequence[float] or np.ndarray or Tensor or
+                :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk
+                array/tensor.
+            src (:obj:`Box3DMode`): The source box mode.
+            dst (:obj:`Box3DMode`): The target box mode.
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
-            with_yaw (bool): If `box` is an instance of
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            with_yaw (bool): If ``box`` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
+            correct_yaw (bool): If the yaw is rotated by rt_mat.
+                Defaults to False.

        Returns:
-            (tuple | list | np.ndarray | torch.Tensor |
-                :obj:`BaseInstance3DBoxes`):
-                The converted box of the same type.
+            Sequence[float] or np.ndarray or Tensor or
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type.
        """
-        return Box3DMode.convert(box, src, dst, rt_mat=rt_mat)
+        return Box3DMode.convert(
+            box,
+            src,
+            dst,
+            rt_mat=rt_mat,
+            with_yaw=with_yaw,
+            correct_yaw=correct_yaw)

    @staticmethod
-    def convert_point(point, src, dst, rt_mat=None):
-        """Convert points from `src` mode to `dst` mode.
+    def convert_point(
+        point: Union[Sequence[float], np.ndarray, Tensor, BasePoints],
+        src: 'Coord3DMode',
+        dst: 'Coord3DMode',
+        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+    ) -> Union[Sequence[float], np.ndarray, Tensor, BasePoints]:
+        """Convert points from ``src`` mode to ``dst`` mode.

        Args:
-            point (tuple | list | np.ndarray |
-                torch.Tensor | :obj:`BasePoints`):
+            box (Sequence[float] or np.ndarray or Tensor or :obj:`BasePoints`):
                Can be a k-tuple, k-list or an Nxk array/tensor.
-            src (:obj:`CoordMode`): The src Point mode.
-            dst (:obj:`CoordMode`): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            src (:obj:`Coord3DMode`): The source point mode.
+            dst (:obj:`Coord3DMode`): The target point mode.
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.

        Returns:
-            (tuple | list | np.ndarray | torch.Tensor | :obj:`BasePoints`):
-                The converted point of the same type.
+            Sequence[float] or np.ndarray or Tensor or :obj:`BasePoints`: The
+            converted point of the same type.
        """
        if src == dst:
            return point
@@ -162,7 +200,7 @@ class Coord3DMode(IntEnum):
        single_point = isinstance(point, (list, tuple))
        if single_point:
            assert len(point) >= 3, (
-                'CoordMode.convert takes either a k-tuple/list or '
+                'Coord3DMode.convert takes either a k-tuple/list or '
                'an Nxk array/tensor, where k >= 3')
            arr = torch.tensor(point)[None, :]
        else:
@@ -198,7 +236,7 @@ class Coord3DMode(IntEnum):
                f'Conversion from Coord3DMode {src} to {dst} '
                'is not supported yet')

-        if not isinstance(rt_mat, torch.Tensor):
+        if not isinstance(rt_mat, Tensor):
            rt_mat = arr.new_tensor(rt_mat)
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
@@ -225,8 +263,8 @@ class Coord3DMode(IntEnum):
                target_type = DepthPoints
            else:
                raise NotImplementedError(
-                    f'Conversion to {dst} through {original_type}'
-                    ' is not supported yet')
+                    f'Conversion to {dst} through {original_type} '
+                    'is not supported yet')
            return target_type(
                arr,
                points_dim=arr.size(-1),

--- a/mmdet3d/structures/bbox_3d/depth_box3d.py
+++ b/mmdet3d/structures/bbox_3d/depth_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Tuple, Union
+
 import numpy as np
 import torch
+from torch import Tensor

 from mmdet3d.structures.points import BasePoints
 from .base_box3d import BaseInstance3DBoxes
@@ -8,68 +11,54 @@ from .utils import rotation_3d_in_axis


 class DepthInstance3DBoxes(BaseInstance3DBoxes):
-    """3D boxes of instances in Depth coordinates.
+    """3D boxes of instances in DEPTH coordinates.

    Coordinates in Depth:

    .. code-block:: none

-                    up z    y front (yaw=0.5*pi)
-                       ^   ^
-                       |  /
-                       | /
-                       0 ------> x right (yaw=0)
+        up z    y front (yaw=0.5*pi)
+           ^   ^
+           |  /
+           | /
+           0 ------> x right (yaw=0)

    The relative coordinate of bottom center in a Depth box is (0.5, 0.5, 0),
-    and the yaw is around the z axis, thus the rotation axis=2.
-    The yaw is 0 at the positive direction of x axis, and decreases from
-    the positive direction of x to the positive direction of y.
-    Also note that rotation of DepthInstance3DBoxes is counterclockwise,
-    which is reverse to the definition of the yaw angle (clockwise).
-
-    A refactor is ongoing to make the three coordinate systems
-    easier to understand and convert between each other.
+    and the yaw is around the z axis, thus the rotation axis=2. The yaw is 0 at
+    the positive direction of x axis, and increases from the positive direction
+    of x to the positive direction of y.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x box_dim.
-        box_dim (int): Integer indicates the dimension of a box
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+        tensor (Tensor): Float matrix with shape (N, box_dim).
+        box_dim (int): Integer indicating the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
            boxes.
    """
    YAW_AXIS = 2

    @property
-    def gravity_center(self):
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
-        bottom_center = self.bottom_center
-        gravity_center = torch.zeros_like(bottom_center)
-        gravity_center[:, :2] = bottom_center[:, :2]
-        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
-        return gravity_center
-
-    @property
-    def corners(self):
-        """torch.Tensor: Coordinates of corners of all the boxes
-        in shape (N, 8, 3).
-
-        Convert the boxes to corners in clockwise order, in form of
-        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
+    def corners(self) -> Tensor:
+        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
+        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).

        .. code-block:: none

-                                           up z
-                            front y           ^
-                                 /            |
-                                /             |
-                  (x0, y1, z1) + -----------  + (x1, y1, z1)
-                              /|            / |
-                             / |           /  |
-               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
-                            |  /      .   |  /
-                            | / origin    | /
-               (x0, y0, z0) + ----------- + --------> right x
-                                          (x1, y0, z0)
+                                        up z
+                         front y           ^
+                              /            |
+                             /             |
+               (x0, y1, z1) + -----------  + (x1, y1, z1)
+                           /|            / |
+                          / |           /  |
+            (x0, y0, z1) + ----------- +   + (x1, y1, z0)
+                         |  /      .   |  /
+                         | / origin    | /
+            (x0, y0, z0) + ----------- + --------> right x
+                                       (x1, y0, z0)
+
+        Returns:
+            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
        """
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)
@@ -90,22 +79,27 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
        corners += self.tensor[:, :3].view(-1, 1, 3)
        return corners

-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.

        Args:
-            angle (float | torch.Tensor | np.ndarray):
-                Rotation angle or rotation matrix.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
+                matrix.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.

        Returns:
-            tuple or None: When ``points`` is None, the function returns
-                None, otherwise it returns the rotated points and the
-                rotation matrix ``rot_mat_T``.
+            tuple or None: When ``points`` is None, the function returns None,
+            otherwise it returns the rotated points and the rotation matrix
+            ``rot_mat_T``.
        """
-        if not isinstance(angle, torch.Tensor):
+        if not isinstance(angle, Tensor):
            angle = self.tensor.new_tensor(angle)

        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
@@ -139,7 +133,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
            self.tensor[:, 3:5] = torch.cat((new_x_size, new_y_size), dim=-1)

        if points is not None:
-            if isinstance(points, torch.Tensor):
+            if isinstance(points, Tensor):
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
                rot_mat_T = rot_mat_T.cpu().numpy()
@@ -150,19 +144,25 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                raise ValueError
            return points, rot_mat_T

-    def flip(self, bev_direction='horizontal', points=None):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.

-        In Depth coordinates, it flips x (horizontal) or y (vertical) axis.
+        In Depth coordinates, it flips the x (horizontal) or y (vertical) axis.

        Args:
-            bev_direction (str, optional): Flip direction
-                (horizontal or vertical). Defaults to 'horizontal'.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            bev_direction (str): Direction by which to flip. Can be chosen from
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to flip. Defaults to None.

        Returns:
-            torch.Tensor, numpy.ndarray or None: Flipped points.
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
@@ -175,8 +175,8 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                self.tensor[:, 6] = -self.tensor[:, 6]

        if points is not None:
-            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
-            if isinstance(points, (torch.Tensor, np.ndarray)):
+            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
+            if isinstance(points, (Tensor, np.ndarray)):
                if bev_direction == 'horizontal':
                    points[:, 0] = -points[:, 0]
                elif bev_direction == 'vertical':
@@ -185,31 +185,41 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                points.flip(bev_direction)
            return points

-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`Box3DMode`): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Box mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from ``src`` coordinates to ``dst`` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            correct_yaw (bool): Whether to convert the yaw angle to the target
+                coordinate. Defaults to False.

        Returns:
-            :obj:`DepthInstance3DBoxes`:
-                The converted box of the same type in the ``dst`` mode.
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
+            the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
-            box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
+            box=self,
+            src=Box3DMode.DEPTH,
+            dst=dst,
+            rt_mat=rt_mat,
+            correct_yaw=correct_yaw)

-    def enlarged_box(self, extra_width):
-        """Enlarge the length, width and height boxes.
+    def enlarged_box(
+            self, extra_width: Union[float, Tensor]) -> 'DepthInstance3DBoxes':
+        """Enlarge the length, width and height of boxes.

        Args:
-            extra_width (float | torch.Tensor): Extra width to enlarge the box.
+            extra_width (float or Tensor): Extra width to enlarge the box.

        Returns:
            :obj:`DepthInstance3DBoxes`: Enlarged boxes.
@@ -220,11 +230,11 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
        enlarged_boxes[:, 2] -= extra_width
        return self.new_box(enlarged_boxes)

-    def get_surface_line_center(self):
+    def get_surface_line_center(self) -> Tuple[Tensor, Tensor]:
        """Compute surface and line center of bounding boxes.

        Returns:
-            torch.Tensor: Surface and line center of bounding boxes.
+            Tuple[Tensor, Tensor]: Surface and line center of bounding boxes.
        """
        obj_size = self.dims
        center = self.gravity_center.view(-1, 1, 3)

--- a/mmdet3d/structures/bbox_3d/lidar_box3d.py
+++ b/mmdet3d/structures/bbox_3d/lidar_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Tuple, Union
+
 import numpy as np
 import torch
+from torch import Tensor

 from mmdet3d.structures.points import BasePoints
 from .base_box3d import BaseInstance3DBoxes
@@ -14,45 +17,30 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):

    .. code-block:: none

-                                up z    x front (yaw=0)
-                                   ^   ^
-                                   |  /
-                                   | /
-       (yaw=0.5*pi) left y <------ 0
+                                 up z    x front (yaw=0)
+                                    ^   ^
+                                    |  /
+                                    | /
+        (yaw=0.5*pi) left y <------ 0

    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
-    and the yaw is around the z axis, thus the rotation axis=2.
-    The yaw is 0 at the positive direction of x axis, and increases from
-    the positive direction of x to the positive direction of y.
-
-    A refactor is ongoing to make the three coordinate systems
-    easier to understand and convert between each other.
+    and the yaw is around the z axis, thus the rotation axis=2. The yaw is 0 at
+    the positive direction of x axis, and increases from the positive direction
+    of x to the positive direction of y.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x box_dim.
-        box_dim (int): Integer indicating the dimension of a box.
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+        tensor (Tensor): Float matrix with shape (N, box_dim).
+        box_dim (int): Integer indicating the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
            boxes.
    """
    YAW_AXIS = 2

    @property
-    def gravity_center(self):
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
-        bottom_center = self.bottom_center
-        gravity_center = torch.zeros_like(bottom_center)
-        gravity_center[:, :2] = bottom_center[:, :2]
-        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
-        return gravity_center
-
-    @property
-    def corners(self):
-        """torch.Tensor: Coordinates of corners of all the boxes
-        in shape (N, 8, 3).
-
-        Convert the boxes to corners in clockwise order, in form of
-        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
+    def corners(self) -> Tensor:
+        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
+        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).

        .. code-block:: none

@@ -66,8 +54,11 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
                            |  /      .   |  /
                            | / origin    | /
-            left y<-------- + ----------- + (x0, y1, z0)
+            left y <------- + ----------- + (x0, y1, z0)
                (x0, y0, z0)
+
+        Returns:
+            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
        """
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)
@@ -78,7 +69,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                device=dims.device, dtype=dims.dtype)

        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
-        # use relative origin [0.5, 0.5, 0]
+        # use relative origin (0.5, 0.5, 0)
        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])

@@ -88,22 +79,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
        corners += self.tensor[:, :3].view(-1, 1, 3)
        return corners

-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.

        Args:
-            angles (float | torch.Tensor | np.ndarray):
-                Rotation angle or rotation matrix.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
+                matrix.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.

        Returns:
-            tuple or None: When ``points`` is None, the function returns
-                None, otherwise it returns the rotated points and the
-                rotation matrix ``rot_mat_T``.
+            tuple or None: When ``points`` is None, the function returns None,
+            otherwise it returns the rotated points and the rotation matrix
+            ``rot_mat_T``.
        """
-        if not isinstance(angle, torch.Tensor):
+        if not isinstance(angle, Tensor):
            angle = self.tensor.new_tensor(angle)

        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
@@ -129,7 +125,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
            self.tensor[:, 7:9] = self.tensor[:, 7:9] @ rot_mat_T[:2, :2]

        if points is not None:
-            if isinstance(points, torch.Tensor):
+            if isinstance(points, Tensor):
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
                rot_mat_T = rot_mat_T.cpu().numpy()
@@ -140,18 +136,25 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                raise ValueError
            return points, rot_mat_T

-    def flip(self, bev_direction='horizontal', points=None):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.

        In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.

        Args:
-            bev_direction (str): Flip direction (horizontal or vertical).
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            bev_direction (str): Direction by which to flip. Can be chosen from
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to flip. Defaults to None.

        Returns:
-            torch.Tensor, numpy.ndarray or None: Flipped points.
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
@@ -164,8 +167,8 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi

        if points is not None:
-            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
-            if isinstance(points, (torch.Tensor, np.ndarray)):
+            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
+            if isinstance(points, (Tensor, np.ndarray)):
                if bev_direction == 'horizontal':
                    points[:, 1] = -points[:, 1]
                elif bev_direction == 'vertical':
@@ -174,22 +177,26 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                points.flip(bev_direction)
            return points

-    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`Box3DMode`): the target Box mode
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Box mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from ``src`` coordinates to ``dst`` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
-            correct_yaw (bool): If convert the yaw angle to the target
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            correct_yaw (bool): Whether to convert the yaw angle to the target
                coordinate. Defaults to False.
+
        Returns:
-            :obj:`BaseInstance3DBoxes`:
-                The converted box of the same type in the ``dst`` mode.
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
+            the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
@@ -199,11 +206,12 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
            rt_mat=rt_mat,
            correct_yaw=correct_yaw)

-    def enlarged_box(self, extra_width):
-        """Enlarge the length, width and height boxes.
+    def enlarged_box(
+            self, extra_width: Union[float, Tensor]) -> 'LiDARInstance3DBoxes':
+        """Enlarge the length, width and height of boxes.

        Args:
-            extra_width (float | torch.Tensor): Extra width to enlarge the box.
+            extra_width (float or Tensor): Extra width to enlarge the box.

        Returns:
            :obj:`LiDARInstance3DBoxes`: Enlarged boxes.

--- a/mmdet3d/structures/bbox_3d/utils.py
+++ b/mmdet3d/structures/bbox_3d/utils.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from logging import warning
+from typing import Tuple, Union

 import numpy as np
 import torch
+from torch import Tensor

-from mmdet3d.utils.array_converter import array_converter
+from mmdet3d.utils import array_converter


 @array_converter(apply_to=('val', ))
-def limit_period(val, offset=0.5, period=np.pi):
+def limit_period(val: Union[np.ndarray, Tensor],
+                 offset: float = 0.5,
+                 period: float = np.pi) -> Union[np.ndarray, Tensor]:
    """Limit the value into a period for periodic function.

    Args:
-        val (torch.Tensor | np.ndarray): The value to be converted.
-        offset (float, optional): Offset to set the value range.
-            Defaults to 0.5.
-        period ([type], optional): Period of the value. Defaults to np.pi.
+        val (np.ndarray or Tensor): The value to be converted.
+        offset (float): Offset to set the value range. Defaults to 0.5.
+        period (float): Period of the value. Defaults to np.pi.

    Returns:
-        (torch.Tensor | np.ndarray): Value in the range of
-            [-offset * period, (1-offset) * period]
+        np.ndarray or Tensor: Value in the range of
+        [-offset * period, (1-offset) * period].
    """
    limited_val = val - torch.floor(val / period + offset) * period
    return limited_val


 @array_converter(apply_to=('points', 'angles'))
-def rotation_3d_in_axis(points,
-                        angles,
-                        axis=0,
-                        return_mat=False,
-                        clockwise=False):
+def rotation_3d_in_axis(
+    points: Union[np.ndarray, Tensor],
+    angles: Union[np.ndarray, Tensor, float],
+    axis: int = 0,
+    return_mat: bool = False,
+    clockwise: bool = False
+) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Tensor, Tensor], np.ndarray,
+           Tensor]:
    """Rotate points by angles according to axis.

    Args:
-        points (np.ndarray | torch.Tensor | list | tuple ):
-            Points of shape (N, M, 3).
-        angles (np.ndarray | torch.Tensor | list | tuple | float):
-            Vector of angles in shape (N,)
-        axis (int, optional): The axis to be rotated. Defaults to 0.
-        return_mat: Whether or not return the rotation matrix (transposed).
-            Defaults to False.
-        clockwise: Whether the rotation is clockwise. Defaults to False.
+        points (np.ndarray or Tensor): Points with shape (N, M, 3).
+        angles (np.ndarray or Tensor or float): Vector of angles with shape
+            (N, ).
+        axis (int): The axis to be rotated. Defaults to 0.
+        return_mat (bool): Whether or not to return the rotation matrix
+            (transposed). Defaults to False.
+        clockwise (bool): Whether the rotation is clockwise. Defaults to False.

    Raises:
-        ValueError: when the axis is not in range [0, 1, 2], it will
-            raise value error.
+        ValueError: When the axis is not in range [-3, -2, -1, 0, 1, 2], it
+            will raise ValueError.

    Returns:
-        (torch.Tensor | np.ndarray): Rotated points in shape (N, M, 3).
+        Tuple[np.ndarray, np.ndarray] or Tuple[Tensor, Tensor] or np.ndarray or
+        Tensor: Rotated points with shape (N, M, 3) and rotation matrix with
+        shape (N, 3, 3).
    """
    batch_free = len(points.shape) == 2
    if batch_free:
@@ -57,8 +64,8 @@ def rotation_3d_in_axis(points,
    if isinstance(angles, float) or len(angles.shape) == 0:
        angles = torch.full(points.shape[:1], angles)

-    assert len(points.shape) == 3 and len(angles.shape) == 1 \
-        and points.shape[0] == angles.shape[0], f'Incorrect shape of points ' \
+    assert len(points.shape) == 3 and len(angles.shape) == 1 and \
+        points.shape[0] == angles.shape[0], 'Incorrect shape of points ' \
        f'angles: {points.shape}, {angles.shape}'

    assert points.shape[-1] in [2, 3], \
@@ -89,8 +96,8 @@ def rotation_3d_in_axis(points,
                torch.stack([zeros, -rot_sin, rot_cos])
            ])
        else:
-            raise ValueError(f'axis should in range '
-                             f'[-3, -2, -1, 0, 1, 2], got {axis}')
+            raise ValueError(
+                f'axis should in range [-3, -2, -1, 0, 1, 2], got {axis}')
    else:
        rot_mat_T = torch.stack([
            torch.stack([rot_cos, rot_sin]),
@@ -118,14 +125,15 @@ def rotation_3d_in_axis(points,


 @array_converter(apply_to=('boxes_xywhr', ))
-def xywhr2xyxyr(boxes_xywhr):
+def xywhr2xyxyr(
+        boxes_xywhr: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
    """Convert a rotated boxes in XYWHR format to XYXYR format.

    Args:
-        boxes_xywhr (torch.Tensor | np.ndarray): Rotated boxes in XYWHR format.
+        boxes_xywhr (Tensor or np.ndarray): Rotated boxes in XYWHR format.

    Returns:
-        (torch.Tensor | np.ndarray): Converted boxes in XYXYR format.
+        Tensor or np.ndarray: Converted boxes in XYXYR format.
    """
    boxes = torch.zeros_like(boxes_xywhr)
    half_w = boxes_xywhr[..., 2] / 2
@@ -139,16 +147,16 @@ def xywhr2xyxyr(boxes_xywhr):
    return boxes


-def get_box_type(box_type):
+def get_box_type(box_type: str) -> Tuple[type, int]:
    """Get the type and mode of box structure.

    Args:
-        box_type (str): The type of box structure.
-            The valid value are "LiDAR", "Camera", or "Depth".
+        box_type (str): The type of box structure. The valid value are "LiDAR",
+            "Camera" and "Depth".

    Raises:
-        ValueError: A ValueError is raised when `box_type`
-            does not belong to the three valid types.
+        ValueError: A ValueError is raised when ``box_type`` does not belong to
+            the three valid types.

    Returns:
        tuple: Box type and box mode.
@@ -166,36 +174,39 @@ def get_box_type(box_type):
        box_type_3d = DepthInstance3DBoxes
        box_mode_3d = Box3DMode.DEPTH
    else:
-        raise ValueError('Only "box_type" of "camera", "lidar", "depth"'
-                         f' are supported, got {box_type}')
+        raise ValueError('Only "box_type" of "camera", "lidar", "depth" are '
+                         f'supported, got {box_type}')

    return box_type_3d, box_mode_3d


 @array_converter(apply_to=('points_3d', 'proj_mat'))
-def points_cam2img(points_3d, proj_mat, with_depth=False):
+def points_cam2img(points_3d: Union[Tensor, np.ndarray],
+                   proj_mat: Union[Tensor, np.ndarray],
+                   with_depth: bool = False) -> Union[Tensor, np.ndarray]:
    """Project points in camera coordinates to image coordinates.

    Args:
-        points_3d (torch.Tensor | np.ndarray): Points in shape (N, 3)
-        proj_mat (torch.Tensor | np.ndarray):
-            Transformation matrix between coordinates.
-        with_depth (bool, optional): Whether to keep depth in the output.
+        points_3d (Tensor or np.ndarray): Points in shape (N, 3).
+        proj_mat (Tensor or np.ndarray): Transformation matrix between
+            coordinates.
+        with_depth (bool): Whether to keep depth in the output.
            Defaults to False.

    Returns:
-        (torch.Tensor | np.ndarray): Points in image coordinates,
-            with shape [N, 2] if `with_depth=False`, else [N, 3].
+        Tensor or np.ndarray: Points in image coordinates with shape [N, 2] if
+        ``with_depth=False``, else [N, 3].
    """
    points_shape = list(points_3d.shape)
    points_shape[-1] = 1

-    assert len(proj_mat.shape) == 2, 'The dimension of the projection'\
-        f' matrix should be 2 instead of {len(proj_mat.shape)}.'
+    assert len(proj_mat.shape) == 2, \
+        'The dimension of the projection matrix should be 2 ' \
+        f'instead of {len(proj_mat.shape)}.'
    d1, d2 = proj_mat.shape[:2]
-    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (
-        d1 == 4 and d2 == 4), 'The shape of the projection matrix'\
-        f' ({d1}*{d2}) is not supported.'
+    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or \
+        (d1 == 4 and d2 == 4), 'The shape of the projection matrix ' \
+        f'({d1}*{d2}) is not supported.'
    if d1 == 3:
        proj_mat_expanded = torch.eye(
            4, device=proj_mat.device, dtype=proj_mat.dtype)
@@ -215,18 +226,20 @@ def points_cam2img(points_3d, proj_mat, with_depth=False):


 @array_converter(apply_to=('points', 'cam2img'))
-def points_img2cam(points, cam2img):
+def points_img2cam(
+        points: Union[Tensor, np.ndarray],
+        cam2img: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
    """Project points in image coordinates to camera coordinates.

    Args:
-        points (torch.Tensor): 2.5D points in 2D images, [N, 3],
-            3 corresponds with x, y in the image and depth.
-        cam2img (torch.Tensor): Camera intrinsic matrix. The shape can be
-            [3, 3], [3, 4] or [4, 4].
+        points (Tensor or np.ndarray): 2.5D points in 2D images with shape
+            [N, 3], 3 corresponds with x, y in the image and depth.
+        cam2img (Tensor or np.ndarray): Camera intrinsic matrix. The shape can
+            be [3, 3], [3, 4] or [4, 4].

    Returns:
-        torch.Tensor: points in 3D space. [N, 3],
-            3 corresponds with x, y, z in 3D space.
+        Tensor or np.ndarray: Points in 3D space with shape [N, 3], 3
+        corresponds with x, y, z in 3D space.
    """
    assert cam2img.shape[0] <= 4
    assert cam2img.shape[1] <= 4
@@ -260,8 +273,8 @@ def mono_cam_box2vis(cam_box):

    Args:
        cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate
-            system before conversion. Could be gt bbox loaded from dataset
-            or network prediction output.
+            system before conversion. Could be gt bbox loaded from dataset or
+            network prediction output.

    Returns:
        :obj:`CameraInstance3DBoxes`: Box after conversion.
@@ -269,7 +282,7 @@ def mono_cam_box2vis(cam_box):
    warning.warn('DeprecationWarning: The hack of yaw and dimension in the '
                 'monocular 3D detection on nuScenes has been removed. The '
                 'function mono_cam_box2vis will be deprecated.')
-    from . import CameraInstance3DBoxes
+    from .cam_box3d import CameraInstance3DBoxes
    assert isinstance(cam_box, CameraInstance3DBoxes), \
        'input bbox should be CameraInstance3DBoxes!'

@@ -294,16 +307,16 @@ def mono_cam_box2vis(cam_box):
    return cam_box


-def get_proj_mat_by_coord_type(img_meta, coord_type):
+def get_proj_mat_by_coord_type(img_meta: dict, coord_type: str) -> Tensor:
    """Obtain image features using points.

    Args:
-        img_meta (dict): Meta info.
-        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
-            Can be case-insensitive.
+        img_meta (dict): Meta information.
+        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'. Can be case-
+            insensitive.

    Returns:
-        torch.Tensor: transformation matrix.
+        Tensor: Transformation matrix.
    """
    coord_type = coord_type.upper()
    mapping = {'LIDAR': 'lidar2img', 'DEPTH': 'depth2img', 'CAMERA': 'cam2img'}
@@ -311,18 +324,16 @@ def get_proj_mat_by_coord_type(img_meta, coord_type):
    return img_meta[mapping[coord_type]]


-def yaw2local(yaw, loc):
+def yaw2local(yaw: Tensor, loc: Tensor) -> Tensor:
    """Transform global yaw to local yaw (alpha in kitti) in camera
    coordinates, ranges from -pi to pi.

    Args:
-        yaw (torch.Tensor): A vector with local yaw of each box.
-            shape: (N, )
-        loc (torch.Tensor): gravity center of each box.
-            shape: (N, 3)
+        yaw (Tensor): A vector with local yaw of each box in shape (N, ).
+        loc (Tensor): Gravity center of each box in shape (N, 3).

    Returns:
-        torch.Tensor: local yaw (alpha in kitti).
+        Tensor: Local yaw (alpha in kitti).
    """
    local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2])
    larger_idx = (local_yaw > np.pi).nonzero(as_tuple=False)
@@ -335,7 +346,7 @@ def yaw2local(yaw, loc):
    return local_yaw


-def get_lidar2img(cam2img, lidar2cam):
+def get_lidar2img(cam2img: Tensor, lidar2cam: Tensor) -> Tensor:
    """Get the projection matrix of lidar2img.

    Args:
@@ -343,7 +354,7 @@ def get_lidar2img(cam2img, lidar2cam):
        lidar2cam (torch.Tensor): A 3x3 or 4x4 projection matrix.

    Returns:
-        torch.Tensor: transformation matrix with shape 4x4.
+        Tensor: Transformation matrix with shape 4x4.
    """
    if cam2img.shape == (3, 3):
        temp = cam2img.new_zeros(4, 4)

--- a/mmdet3d/structures/det3d_data_sample.py
+++ b/mmdet3d/structures/det3d_data_sample.py
@@ -56,7 +56,7 @@ class Det3DDataSample(DetDataSample):
        >>> from mmengine.structures import InstanceData

        >>> from mmdet3d.structures import Det3DDataSample
-        >>> from mmdet3d.structures import BaseInstance3DBoxes
+        >>> from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes

        >>> data_sample = Det3DDataSample()
        >>> meta_info = dict(
@@ -80,15 +80,15 @@ class Det3DDataSample(DetDataSample):
                    DATA FIELDS
                    labels_3d: tensor([1, 0, 2, 0, 1])
                    bboxes_3d: BaseInstance3DBoxes(
-                            tensor([[1.9115e-01, 3.6061e-01, 6.7707e-01, 5.2902e-01, 8.0736e-01, 8.2759e-01,  # noqa E501
+                            tensor([[1.9115e-01, 3.6061e-01, 6.7707e-01, 5.2902e-01, 8.0736e-01, 8.2759e-01,
                                2.4328e-01],
-                                [5.6272e-01, 2.7508e-01, 5.7966e-01, 9.2410e-01, 3.0456e-01, 1.8912e-01,  # noqa E501
+                                [5.6272e-01, 2.7508e-01, 5.7966e-01, 9.2410e-01, 3.0456e-01, 1.8912e-01,
                                3.3176e-01],
-                                [8.1069e-01, 2.8684e-01, 7.7689e-01, 9.2397e-02, 5.5849e-01, 3.8007e-01,  # noqa E501
+                                [8.1069e-01, 2.8684e-01, 7.7689e-01, 9.2397e-02, 5.5849e-01, 3.8007e-01,
                                4.6719e-01],
-                                [6.6346e-01, 4.8005e-01, 5.2318e-02, 4.4137e-01, 4.1163e-01, 8.9339e-01,  # noqa E501
+                                [6.6346e-01, 4.8005e-01, 5.2318e-02, 4.4137e-01, 4.1163e-01, 8.9339e-01,
                                7.2847e-01],
-                                [2.4800e-01, 7.1944e-01, 3.4766e-01, 7.8583e-01, 8.5507e-01, 6.3729e-02,  # noqa E501
+                                [2.4800e-01, 7.1944e-01, 3.4766e-01, 7.8583e-01, 8.5507e-01, 6.3729e-02,
                                7.5161e-05]]))
                ) at 0x7f7e29de3a00>
        ) at 0x7f7e2a0e8640>
@@ -108,8 +108,8 @@ class Det3DDataSample(DetDataSample):

        >>> data_sample = Det3DDataSample()
        >>> gt_instances_3d_data = dict(
-        ...    bboxes_3d=BaseInstance3DBoxes(torch.rand((2, 7))),
-        ...    labels_3d=torch.rand(2))
+        ...     bboxes_3d=BaseInstance3DBoxes(torch.rand((2, 7))),
+        ...     labels_3d=torch.rand(2))
        >>> gt_instances_3d = InstanceData(**gt_instances_3d_data)
        >>> data_sample.gt_instances_3d = gt_instances_3d
        >>> assert 'gt_instances_3d' in data_sample
@@ -118,8 +118,8 @@ class Det3DDataSample(DetDataSample):
        >>> from mmdet3d.structures import PointData
        >>> data_sample = Det3DDataSample()
        >>> gt_pts_seg_data = dict(
-        ...    pts_instance_mask=torch.rand(2),
-        ...    pts_semantic_mask=torch.rand(2))
+        ...     pts_instance_mask=torch.rand(2),
+        ...     pts_semantic_mask=torch.rand(2))
        >>> data_sample.gt_pts_seg = PointData(**gt_pts_seg_data)
        >>> print(data_sample)
        <Det3DDataSample(
@@ -132,7 +132,7 @@ class Det3DDataSample(DetDataSample):
                    pts_instance_mask: tensor([0.7363, 0.8096])
                ) at 0x7f7e2962cc40>
        ) at 0x7f7e29ff0d60>
-    """
+    """  # noqa: E501

    @property
    def gt_instances_3d(self) -> InstanceData:

--- a/mmdet3d/structures/points/__init__.py
+++ b/mmdet3d/structures/points/__init__.py
@@ -7,24 +7,25 @@ from .lidar_points import LiDARPoints
 __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']


-def get_points_type(points_type):
+def get_points_type(points_type: str) -> type:
    """Get the class of points according to coordinate type.

    Args:
-        points_type (str): The type of points coordinate.
-            The valid value are "CAMERA", "LIDAR", or "DEPTH".
+        points_type (str): The type of points coordinate. The valid value are
+            "CAMERA", "LIDAR" and "DEPTH".

    Returns:
-        class: Points type.
+        type: Points type.
    """
-    if points_type == 'CAMERA':
+    points_type_upper = points_type.upper()
+    if points_type_upper == 'CAMERA':
        points_cls = CameraPoints
-    elif points_type == 'LIDAR':
+    elif points_type_upper == 'LIDAR':
        points_cls = LiDARPoints
-    elif points_type == 'DEPTH':
+    elif points_type_upper == 'DEPTH':
        points_cls = DepthPoints
    else:
-        raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"'
-                         f' are supported, got {points_type}')
+        raise ValueError('Only "points_type" of "CAMERA", "LIDAR" and "DEPTH" '
+                         f'are supported, got {points_type}')

    return points_cls
--- a/mmdet3d/structures/points/base_points.py
+++ b/mmdet3d/structures/points/base_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import warnings
 from abc import abstractmethod
+from typing import Iterator, Optional, Sequence, Union

 import numpy as np
 import torch
+from torch import Tensor

-from ..bbox_3d.utils import rotation_3d_in_axis
+from mmdet3d.structures.bbox_3d.utils import rotation_3d_in_axis


-class BasePoints(object):
+class BasePoints:
    """Base class for Points.

    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
-        points_dim (int, optional): Number of the dimension of a point.
-            Each row is (x, y, z). Defaults to 3.
-        attribute_dims (dict, optional): Dictionary to indicate the
-            meaning of extra dimension. Defaults to None.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
+            data with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...). Defaults to 3.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
-        points_dim (int): Integer indicating the dimension of a point.
-            Each row is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
-            dimension. Defaults to None.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...).
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """

-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
-        if isinstance(tensor, torch.Tensor):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
+        if isinstance(tensor, Tensor):
            device = tensor.device
        else:
            device = torch.device('cpu')
        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
        if tensor.numel() == 0:
-            # Use reshape, so we don't end up creating a new tensor that
-            # does not depend on the inputs (and consequently confuses jit)
-            tensor = tensor.reshape((0, points_dim)).to(
-                dtype=torch.float32, device=device)
-        assert tensor.dim() == 2 and tensor.size(-1) == \
-            points_dim, tensor.size()
-
-        self.tensor = tensor
+            # Use reshape, so we don't end up creating a new tensor that does
+            # not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, points_dim))
+        assert tensor.dim() == 2 and tensor.size(-1) == points_dim, \
+            ('The points dimension must be 2 and the length of the last '
+             f'dimension must be {points_dim}, but got points with shape '
+             f'{tensor.shape}.')
+
+        self.tensor = tensor.clone()
        self.points_dim = points_dim
        self.attribute_dims = attribute_dims
        self.rotation_axis = 0

    @property
-    def coord(self):
-        """torch.Tensor: Coordinates of each point in shape (N, 3)."""
+    def coord(self) -> Tensor:
+        """Tensor: Coordinates of each point in shape (N, 3)."""
        return self.tensor[:, :3]

    @coord.setter
-    def coord(self, tensor):
-        """Set the coordinates of each point."""
+    def coord(self, tensor: Union[Tensor, np.ndarray]) -> None:
+        """Set the coordinates of each point.
+
+        Args:
+            tensor (Tensor or np.ndarray): Coordinates of each point with shape
+                (N, 3).
+        """
        try:
            tensor = tensor.reshape(self.shape[0], 3)
        except (RuntimeError, ValueError):  # for torch.Tensor and np.ndarray
            raise ValueError(f'got unexpected shape {tensor.shape}')
-        if not isinstance(tensor, torch.Tensor):
+        if not isinstance(tensor, Tensor):
            tensor = self.tensor.new_tensor(tensor)
        self.tensor[:, :3] = tensor

    @property
-    def height(self):
-        """torch.Tensor:
-            A vector with height of each point in shape (N, 1), or None."""
+    def height(self) -> Union[Tensor, None]:
+        """Tensor or None: Returns a vector with height of each point in shape
+        (N, )."""
        if self.attribute_dims is not None and \
                'height' in self.attribute_dims.keys():
            return self.tensor[:, self.attribute_dims['height']]
@@ -73,13 +85,18 @@ class BasePoints(object):
            return None

    @height.setter
-    def height(self, tensor):
-        """Set the height of each point."""
+    def height(self, tensor: Union[Tensor, np.ndarray]) -> None:
+        """Set the height of each point.
+
+        Args:
+            tensor (Tensor or np.ndarray): Height of each point with shape
+                (N, ).
+        """
        try:
            tensor = tensor.reshape(self.shape[0])
        except (RuntimeError, ValueError):  # for torch.Tensor and np.ndarray
            raise ValueError(f'got unexpected shape {tensor.shape}')
-        if not isinstance(tensor, torch.Tensor):
+        if not isinstance(tensor, Tensor):
            tensor = self.tensor.new_tensor(tensor)
        if self.attribute_dims is not None and \
                'height' in self.attribute_dims.keys():
@@ -94,9 +111,9 @@ class BasePoints(object):
            self.points_dim += 1

    @property
-    def color(self):
-        """torch.Tensor:
-            A vector with color of each point in shape (N, 3), or None."""
+    def color(self) -> Union[Tensor, None]:
+        """Tensor or None: Returns a vector with color of each point in shape
+        (N, 3)."""
        if self.attribute_dims is not None and \
                'color' in self.attribute_dims.keys():
            return self.tensor[:, self.attribute_dims['color']]
@@ -104,15 +121,20 @@ class BasePoints(object):
            return None

    @color.setter
-    def color(self, tensor):
-        """Set the color of each point."""
+    def color(self, tensor: Union[Tensor, np.ndarray]) -> None:
+        """Set the color of each point.
+
+        Args:
+            tensor (Tensor or np.ndarray): Color of each point with shape
+                (N, 3).
+        """
        try:
            tensor = tensor.reshape(self.shape[0], 3)
        except (RuntimeError, ValueError):  # for torch.Tensor and np.ndarray
            raise ValueError(f'got unexpected shape {tensor.shape}')
        if tensor.max() >= 256 or tensor.min() < 0:
            warnings.warn('point got color value beyond [0, 255]')
-        if not isinstance(tensor, torch.Tensor):
+        if not isinstance(tensor, Tensor):
            tensor = self.tensor.new_tensor(tensor)
        if self.attribute_dims is not None and \
                'color' in self.attribute_dims.keys():
@@ -128,32 +150,36 @@ class BasePoints(object):
            self.points_dim += 3

    @property
-    def shape(self):
-        """torch.Shape: Shape of points."""
+    def shape(self) -> torch.Size:
+        """torch.Size: Shape of points."""
        return self.tensor.shape

-    def shuffle(self):
+    def shuffle(self) -> Tensor:
        """Shuffle the points.

        Returns:
-            torch.Tensor: The shuffled index.
+            Tensor: The shuffled index.
        """
        idx = torch.randperm(self.__len__(), device=self.tensor.device)
        self.tensor = self.tensor[idx]
        return idx

-    def rotate(self, rotation, axis=None):
+    def rotate(self,
+               rotation: Union[Tensor, np.ndarray, float],
+               axis: Optional[int] = None) -> Tensor:
        """Rotate points with the given rotation matrix or angle.

        Args:
-            rotation (float | np.ndarray | torch.Tensor): Rotation matrix
-                or angle.
+            rotation (Tensor or np.ndarray or float): Rotation matrix or angle.
            axis (int, optional): Axis to rotate at. Defaults to None.
+
+        Returns:
+            Tensor: Rotation matrix.
        """
-        if not isinstance(rotation, torch.Tensor):
+        if not isinstance(rotation, Tensor):
            rotation = self.tensor.new_tensor(rotation)
-        assert rotation.shape == torch.Size([3, 3]) or \
-            rotation.numel() == 1, f'invalid rotation shape {rotation.shape}'
+        assert rotation.shape == torch.Size([3, 3]) or rotation.numel() == 1, \
+            f'invalid rotation shape {rotation.shape}'

        if axis is None:
            axis = self.rotation_axis
@@ -171,22 +197,23 @@ class BasePoints(object):
        return rot_mat_T

    @abstractmethod
-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.

        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
        pass

-    def translate(self, trans_vector):
+    def translate(self, trans_vector: Union[Tensor, np.ndarray]) -> None:
        """Translate points with the given translation vector.

        Args:
-            trans_vector (np.ndarray, torch.Tensor): Translation
-                vector of size 3 or nx3.
+            trans_vector (Tensor or np.ndarray): Translation vector of size 3
+                or nx3.
        """
-        if not isinstance(trans_vector, torch.Tensor):
+        if not isinstance(trans_vector, Tensor):
            trans_vector = self.tensor.new_tensor(trans_vector)
        trans_vector = trans_vector.squeeze(0)
        if trans_vector.dim() == 1:
@@ -200,21 +227,23 @@ class BasePoints(object):
            )
        self.tensor[:, :3] += trans_vector

-    def in_range_3d(self, point_range):
+    def in_range_3d(
+            self, point_range: Union[Tensor, np.ndarray,
+                                     Sequence[float]]) -> Tensor:
        """Check whether the points are in the given range.

        Args:
-            point_range (list | torch.Tensor): The range of point
-                (x_min, y_min, z_min, x_max, y_max, z_max)
+            point_range (Tensor or np.ndarray or Sequence[float]): The range of
+                point (x_min, y_min, z_min, x_max, y_max, z_max).

        Note:
-            In the original implementation of SECOND, checking whether
-            a box in the range checks whether the points are in a convex
-            polygon, we try to reduce the burden for simpler cases.
+            In the original implementation of SECOND, checking whether a box in
+            the range checks whether the points are in a convex polygon, we try
+            to reduce the burden for simpler cases.

        Returns:
-            torch.Tensor: A binary vector indicating whether each point is
-                inside the reference range.
+            Tensor: A binary vector indicating whether each point is inside the
+            reference range.
        """
        in_range_flags = ((self.tensor[:, 0] > point_range[0])
                          & (self.tensor[:, 1] > point_range[1])
@@ -225,20 +254,22 @@ class BasePoints(object):
        return in_range_flags

    @property
-    def bev(self):
-        """torch.Tensor: BEV of the points in shape (N, 2)."""
+    def bev(self) -> Tensor:
+        """Tensor: BEV of the points in shape (N, 2)."""
        return self.tensor[:, [0, 1]]

-    def in_range_bev(self, point_range):
+    def in_range_bev(
+            self, point_range: Union[Tensor, np.ndarray,
+                                     Sequence[float]]) -> Tensor:
        """Check whether the points are in the given range.

        Args:
-            point_range (list | torch.Tensor): The range of point
-                in order of (x_min, y_min, x_max, y_max).
+            point_range (Tensor or np.ndarray or Sequence[float]): The range of
+                point in order of (x_min, y_min, x_max, y_max).

        Returns:
-            torch.Tensor: Indicating whether each point is inside
-                the reference range.
+            Tensor: A binary vector indicating whether each point is inside the
+            reference range.
        """
        in_range_flags = ((self.bev[:, 0] > point_range[0])
                          & (self.bev[:, 1] > point_range[1])
@@ -247,25 +278,28 @@ class BasePoints(object):
        return in_range_flags

    @abstractmethod
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`CoordMode`): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Point mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.

        Returns:
-            :obj:`BasePoints`: The converted box of the same type
-                in the `dst` mode.
+            :obj:`BasePoints`: The converted point of the same type in the
+            ``dst`` mode.
        """
        pass

-    def scale(self, scale_factor):
+    def scale(self, scale_factor: float) -> None:
        """Scale the points with horizontal and vertical scaling factors.

        Args:
@@ -273,27 +307,34 @@ class BasePoints(object):
        """
        self.tensor[:, :3] *= scale_factor

-    def __getitem__(self, item):
+    def __getitem__(
+            self, item: Union[int, tuple, slice, np.ndarray,
+                              Tensor]) -> 'BasePoints':
        """
+        Args:
+            item (int or tuple or slice or np.ndarray or Tensor): Index of
+                points.
+
        Note:
            The following usage are allowed:
-            1. `new_points = points[3]`:
-                return a `Points` that contains only one point.
-            2. `new_points = points[2:10]`:
-                return a slice of points.
-            3. `new_points = points[vector]`:
-                where vector is a torch.BoolTensor with `length = len(points)`.
-                Nonzero elements in the vector will be selected.
-            4. `new_points = points[3:11, vector]`:
-                return a slice of points and attribute dims.
-            5. `new_points = points[4:12, 2]`:
-                return a slice of points with single attribute.
+
+            1. `new_points = points[3]`: Return a `Points` that contains only
+               one point.
+            2. `new_points = points[2:10]`: Return a slice of points.
+            3. `new_points = points[vector]`: Whether vector is a
+               torch.BoolTensor with `length = len(points)`. Nonzero elements
+               in the vector will be selected.
+            4. `new_points = points[3:11, vector]`: Return a slice of points
+               and attribute dims.
+            5. `new_points = points[4:12, 2]`: Return a slice of points with
+               single attribute.
+
            Note that the returned Points might share storage with this Points,
-            subject to Pytorch's indexing semantics.
+            subject to PyTorch's indexing semantics.

        Returns:
-            :obj:`BasePoints`: A new object of
-                :class:`BasePoints` after indexing.
+            :obj:`BasePoints`: A new object of :class:`BasePoints` after
+            indexing.
        """
        original_type = type(self)
        if isinstance(item, int):
@@ -304,8 +345,8 @@ class BasePoints(object):
        elif isinstance(item, tuple) and len(item) == 2:
            if isinstance(item[1], slice):
                start = 0 if item[1].start is None else item[1].start
-                stop = self.tensor.shape[1] if \
-                    item[1].stop is None else item[1].stop
+                stop = self.tensor.shape[1] \
+                    if item[1].stop is None else item[1].stop
                step = 1 if item[1].step is None else item[1].step
                item = list(item)
                item[1] = list(range(start, stop, step))
@@ -334,7 +375,7 @@ class BasePoints(object):
                        attribute_dims.pop(key)
            else:
                attribute_dims = None
-        elif isinstance(item, (slice, np.ndarray, torch.Tensor)):
+        elif isinstance(item, (slice, np.ndarray, Tensor)):
            p = self.tensor[item]
            attribute_dims = self.attribute_dims
        else:
@@ -345,23 +386,23 @@ class BasePoints(object):
        return original_type(
            p, points_dim=p.shape[1], attribute_dims=attribute_dims)

-    def __len__(self):
+    def __len__(self) -> int:
        """int: Number of points in the current object."""
        return self.tensor.shape[0]

-    def __repr__(self):
-        """str: Return a strings that describes the object."""
+    def __repr__(self) -> str:
+        """str: Return a string that describes the object."""
        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'

    @classmethod
-    def cat(cls, points_list):
+    def cat(cls, points_list: Sequence['BasePoints']) -> 'BasePoints':
        """Concatenate a list of Points into a single Points.

        Args:
-            points_list (list[:obj:`BasePoints`]): List of points.
+            points_list (Sequence[:obj:`BasePoints`]): List of points.

        Returns:
-            :obj:`BasePoints`: The concatenated Points.
+            :obj:`BasePoints`: The concatenated points.
        """
        assert isinstance(points_list, (list, tuple))
        if len(points_list) == 0:
@@ -372,32 +413,31 @@ class BasePoints(object):
        # so the returned points never share storage with input
        cat_points = cls(
            torch.cat([p.tensor for p in points_list], dim=0),
-            points_dim=points_list[0].tensor.shape[1],
+            points_dim=points_list[0].points_dim,
            attribute_dims=points_list[0].attribute_dims)
        return cat_points

-    def to(self, device):
+    def to(self, device: Union[str, torch.device], *args,
+           **kwargs) -> 'BasePoints':
        """Convert current points to a specific device.

        Args:
-            device (str | :obj:`torch.device`): The name of the device.
+            device (str or :obj:`torch.device`): The name of the device.

        Returns:
-            :obj:`BasePoints`: A new boxes object on the
-                specific device.
+            :obj:`BasePoints`: A new points object on the specific device.
        """
        original_type = type(self)
        return original_type(
-            self.tensor.to(device),
+            self.tensor.to(device, *args, **kwargs),
            points_dim=self.points_dim,
            attribute_dims=self.attribute_dims)

-    def clone(self):
-        """Clone the Points.
+    def clone(self) -> 'BasePoints':
+        """Clone the points.

        Returns:
-            :obj:`BasePoints`: Box object with the same properties
-                as self.
+            :obj:`BasePoints`: Point object with the same properties as self.
        """
        original_type = type(self)
        return original_type(
@@ -406,33 +446,36 @@ class BasePoints(object):
            attribute_dims=self.attribute_dims)

    @property
-    def device(self):
-        """str: The device of the points are on."""
+    def device(self) -> torch.device:
+        """torch.device: The device of the points are on."""
        return self.tensor.device

-    def __iter__(self):
-        """Yield a point as a Tensor of shape (4,) at a time.
+    def __iter__(self) -> Iterator[Tensor]:
+        """Yield a point as a Tensor at a time.

        Returns:
-            torch.Tensor: A point of shape (4,).
+            Iterator[Tensor]: A point of shape (points_dim, ).
        """
        yield from self.tensor

-    def new_point(self, data):
+    def new_point(
+        self, data: Union[Tensor, np.ndarray, Sequence[Sequence[float]]]
+    ) -> 'BasePoints':
        """Create a new point object with data.

-        The new point and its tensor has the similar properties
-            as self and self.tensor, respectively.
+        The new point and its tensor has the similar properties as self and
+        self.tensor, respectively.

        Args:
-            data (torch.Tensor | numpy.array | list): Data to be copied.
+            data (Tensor or np.ndarray or Sequence[Sequence[float]]): Data to
+                be copied.

        Returns:
-            :obj:`BasePoints`: A new point object with ``data``,
-                the object's other properties are similar to ``self``.
+            :obj:`BasePoints`: A new point object with ``data``, the object's
+            other properties are similar to ``self``.
        """
        new_tensor = self.tensor.new_tensor(data) \
-            if not isinstance(data, torch.Tensor) else data.to(self.device)
+            if not isinstance(data, Tensor) else data.to(self.device)
        original_type = type(self)
        return original_type(
            new_tensor,

--- a/mmdet3d/structures/points/cam_points.py
+++ b/mmdet3d/structures/points/cam_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+
+import numpy as np
+from torch import Tensor
+
 from .base_points import BasePoints


@@ -6,58 +11,67 @@ class CameraPoints(BasePoints):
    """Points of instances in CAM coordinates.

    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
-        points_dim (int, optional): Number of the dimension of a point.
-            Each row is (x, y, z). Defaults to 3.
-        attribute_dims (dict, optional): Dictionary to indicate the
-            meaning of extra dimension. Defaults to None.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
+            data with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...). Defaults to 3.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
-        points_dim (int): Integer indicating the dimension of a point.
-            Each row is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
-            dimension. Defaults to None.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...).
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """

-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
        super(CameraPoints, self).__init__(
            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
        self.rotation_axis = 1

-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.

        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
+        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 0] = -self.tensor[:, 0]
        elif bev_direction == 'vertical':
            self.tensor[:, 2] = -self.tensor[:, 2]

    @property
-    def bev(self):
-        """torch.Tensor: BEV of the points in shape (N, 2)."""
+    def bev(self) -> Tensor:
+        """Tensor: BEV of the points in shape (N, 2)."""
        return self.tensor[:, [0, 2]]

-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`CoordMode`): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Point mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.

        Returns:
-            :obj:`BasePoints`: The converted point of the same type
-                in the `dst` mode.
+            :obj:`BasePoints`: The converted point of the same type in the
+            ``dst`` mode.
        """
-        from mmdet3d.structures import Coord3DMode
+        from mmdet3d.structures.bbox_3d import Coord3DMode
        return Coord3DMode.convert_point(
            point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/structures/points/depth_points.py
+++ b/mmdet3d/structures/points/depth_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+
+import numpy as np
+from torch import Tensor
+
 from .base_points import BasePoints


@@ -6,53 +11,62 @@ class DepthPoints(BasePoints):
    """Points of instances in DEPTH coordinates.

    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
-        points_dim (int, optional): Number of the dimension of a point.
-            Each row is (x, y, z). Defaults to 3.
-        attribute_dims (dict, optional): Dictionary to indicate the
-            meaning of extra dimension. Defaults to None.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
+            data with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...). Defaults to 3.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
-        points_dim (int): Integer indicating the dimension of a point.
-            Each row is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
-            dimension. Defaults to None.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...).
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """

-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
        super(DepthPoints, self).__init__(
            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
        self.rotation_axis = 2

-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.

        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
+        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 0] = -self.tensor[:, 0]
        elif bev_direction == 'vertical':
            self.tensor[:, 1] = -self.tensor[:, 1]

-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`CoordMode`): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Point mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.

        Returns:
-            :obj:`BasePoints`: The converted point of the same type
-                in the `dst` mode.
+            :obj:`BasePoints`: The converted point of the same type in the
+            ``dst`` mode.
        """
-        from mmdet3d.structures import Coord3DMode
+        from mmdet3d.structures.bbox_3d import Coord3DMode
        return Coord3DMode.convert_point(
            point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/structures/points/lidar_points.py
+++ b/mmdet3d/structures/points/lidar_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+
+import numpy as np
+from torch import Tensor
+
 from .base_points import BasePoints


@@ -6,53 +11,62 @@ class LiDARPoints(BasePoints):
    """Points of instances in LIDAR coordinates.

    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
-        points_dim (int, optional): Number of the dimension of a point.
-            Each row is (x, y, z). Defaults to 3.
-        attribute_dims (dict, optional): Dictionary to indicate the
-            meaning of extra dimension. Defaults to None.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
+            data with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...). Defaults to 3.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.

    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
-        points_dim (int): Integer indicating the dimension of a point.
-            Each row is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
-            dimension. Defaults to None.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
+        points_dim (int): Integer indicating the dimension of a point. Each row
+            is (x, y, z, ...).
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """

-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
        super(LiDARPoints, self).__init__(
            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
        self.rotation_axis = 2

-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.

        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
+        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 1] = -self.tensor[:, 1]
        elif bev_direction == 'vertical':
            self.tensor[:, 0] = -self.tensor[:, 0]

-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.

        Args:
-            dst (:obj:`CoordMode`): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            dst (int): The target Point mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
-                The conversion from `src` coordinates to `dst` coordinates
-                usually comes along the change of sensors, e.g., from camera
-                to LiDAR. This requires a transformation matrix.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.

        Returns:
-            :obj:`BasePoints`: The converted point of the same type
-                in the `dst` mode.
+            :obj:`BasePoints`: The converted point of the same type in the
+            ``dst`` mode.
        """
-        from mmdet3d.structures import Coord3DMode
+        from mmdet3d.structures.bbox_3d import Coord3DMode
        return Coord3DMode.convert_point(
            point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/utils/array_converter.py
+++ b/mmdet3d/utils/array_converter.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import functools
 from inspect import getfullargspec
-from typing import Callable, Optional, Tuple, Union
+from typing import Callable, Optional, Tuple, Type, Union

 import numpy as np
 import torch

-TemplateArrayType = Union[tuple, list, int, float, np.ndarray, torch.Tensor]
-OptArrayType = Optional[Union[np.ndarray, torch.Tensor]]
+TemplateArrayType = Union[np.ndarray, torch.Tensor, list, tuple, int, float]


 def array_converter(to_torch: bool = True,
@@ -16,37 +15,36 @@ def array_converter(to_torch: bool = True,
                    recover: bool = True) -> Callable:
    """Wrapper function for data-type agnostic processing.

-    First converts input arrays to PyTorch tensors or NumPy ndarrays
-    for middle calculation, then convert output to original data-type if
-    `recover=True`.
+    First converts input arrays to PyTorch tensors or NumPy arrays for middle
+    calculation, then convert output to original data-type if `recover=True`.

    Args:
-        to_torch (bool): Whether convert to PyTorch tensors
-            for middle calculation. Defaults to True.
-        apply_to (Tuple[str, ...]): The arguments to which we apply
-            data-type conversion. Defaults to an empty tuple.
-        template_arg_name_ (str, optional): Argument serving as the template (
-            return arrays should have the same dtype and device
-            as the template). Defaults to None. If None, we will use the
-            first argument in `apply_to` as the template argument.
-        recover (bool): Whether or not recover the wrapped function
-            outputs to the `template_arg_name_` type. Defaults to True.
+        to_torch (bool): Whether to convert to PyTorch tensors for middle
+            calculation. Defaults to True.
+        apply_to (Tuple[str]): The arguments to which we apply data-type
+            conversion. Defaults to an empty tuple.
+        template_arg_name_ (str, optional): Argument serving as the template
+            (return arrays should have the same dtype and device as the
+            template). Defaults to None. If None, we will use the first
+            argument in `apply_to` as the template argument.
+        recover (bool): Whether or not to recover the wrapped function outputs
+            to the `template_arg_name_` type. Defaults to True.

    Raises:
-        ValueError: When template_arg_name_ is not among all args, or
-            when apply_to contains an arg which is not among all args,
-            a ValueError will be raised. When the template argument or
-            an argument to convert is a list or tuple, and cannot be
-            converted to a NumPy array, a ValueError will be raised.
-        TypeError: When the type of the template argument or
-                an argument to convert does not belong to the above range,
-                or the contents of such an list-or-tuple-type argument
-                do not share the same data type, a TypeError is raised.
+        ValueError: When template_arg_name_ is not among all args, or when
+            apply_to contains an arg which is not among all args, a ValueError
+            will be raised. When the template argument or an argument to
+            convert is a list or tuple, and cannot be converted to a NumPy
+            array, a ValueError will be raised.
+        TypeError: When the type of the template argument or an argument to
+            convert does not belong to the above range, or the contents of such
+            an list-or-tuple-type argument do not share the same data type, a
+            TypeError will be raised.

    Returns:
-        (function): wrapped function.
+        Callable: Wrapped function.

-    Example:
+    Examples:
        >>> import torch
        >>> import numpy as np
        >>>
@@ -67,7 +65,7 @@ def array_converter(to_torch: bool = True,
        >>> def simple_add(a, b):
        >>>     return a + b
        >>>
-        >>> simple_add()
+        >>> simple_add(a, b)
        >>>
        >>> # Use torch funcs for floor(a) if flag=True else ceil(a),
        >>> # and return the torch tensor
@@ -126,8 +124,8 @@ def array_converter(to_torch: bool = True,
            # inspect apply_to
            for arg_to_apply in apply_to:
                if arg_to_apply not in all_arg_names:
-                    raise ValueError(f'{arg_to_apply} is not '
-                                     f'an argument of {func_name}')
+                    raise ValueError(
+                        f'{arg_to_apply} is not an argument of {func_name}')

            new_args = []
            new_kwargs = {}
@@ -207,8 +205,8 @@ class ArrayConverter:
    """Utility class for data-type agnostic processing.

    Args:
-        template_array (tuple | list | int | float | np.ndarray |
-            torch.Tensor, optional): template array. Defaults to None.
+        template_array (np.ndarray or torch.Tensor or list or tuple or int or
+            float, optional): Template array. Defaults to None.
    """
    SUPPORTED_NON_ARRAY_TYPES = (int, float, np.int8, np.int16, np.int32,
                                 np.int64, np.uint8, np.uint16, np.uint32,
@@ -223,15 +221,15 @@ class ArrayConverter:
        """Set template array.

        Args:
-            array (tuple | list | int | float | np.ndarray | torch.Tensor):
-                Template array.
+            array (np.ndarray or torch.Tensor or list or tuple or int or
+                float): Template array.

        Raises:
-            ValueError: If input is list or tuple and cannot be converted to
-                to a NumPy array, a ValueError is raised.
-            TypeError: If input type does not belong to the above range,
-                or the contents of a list or tuple do not share the
-                same data type, a TypeError is raised.
+            ValueError: If input is list or tuple and cannot be converted to a
+                NumPy array, a ValueError is raised.
+            TypeError: If input type does not belong to the above range, or the
+                contents of a list or tuple do not share the same data type, a
+                TypeError is raised.
        """
        self.array_type = type(array)
        self.is_num = False
@@ -249,41 +247,40 @@ class ArrayConverter:
                    raise TypeError
                self.dtype = array.dtype
            except (ValueError, TypeError):
-                print(f'The following list cannot be converted to'
-                      f' a numpy array of supported dtype:\n{array}')
+                print('The following list cannot be converted to a numpy '
+                      f'array of supported dtype:\n{array}')
                raise
-        elif isinstance(array, self.SUPPORTED_NON_ARRAY_TYPES):
+        elif isinstance(array, (int, float)):
            self.array_type = np.ndarray
            self.is_num = True
            self.dtype = np.dtype(type(array))
        else:
-            raise TypeError(f'Template type {self.array_type}'
-                            f' is not supported.')
+            raise TypeError(
+                f'Template type {self.array_type} is not supported.')

    def convert(
-            self,
-            input_array: TemplateArrayType,
-            target_type: Optional[type] = None,
-            target_array: OptArrayType = None
+        self,
+        input_array: TemplateArrayType,
+        target_type: Optional[Type] = None,
+        target_array: Optional[Union[np.ndarray, torch.Tensor]] = None
    ) -> Union[np.ndarray, torch.Tensor]:
        """Convert input array to target data type.

        Args:
-            input_array (tuple | list | int | float | np.ndarray |
-                torch.Tensor): Input array.
-            target_type (:class:`np.ndarray` or :class:`torch.Tensor`,
-                optional): Type to which input array is converted.
-                Defaults to None.
-            target_array (np.ndarray | torch.Tensor, optional):
-                Template array to which input array is converted.
+            input_array (np.ndarray or torch.Tensor or list or tuple or int or
+                float): Input array.
+            target_type (Type, optional): Type to which input array is
+                converted. It should be `np.ndarray` or `torch.Tensor`.
                Defaults to None.
+            target_array (np.ndarray or torch.Tensor, optional): Template array
+                to which input array is converted. Defaults to None.

        Raises:
-            ValueError: If input is list or tuple and cannot be converted to
-                to a NumPy array, a ValueError is raised.
-            TypeError: If input type does not belong to the above range,
-                or the contents of a list or tuple do not share the
-                same data type, a TypeError is raised.
+            ValueError: If input is list or tuple and cannot be converted to a
+                NumPy array, a ValueError is raised.
+            TypeError: If input type does not belong to the above range, or the
+                contents of a list or tuple do not share the same data type, a
+                TypeError is raised.

        Returns:
            np.ndarray or torch.Tensor: The converted array.
@@ -294,8 +291,8 @@ class ArrayConverter:
                if input_array.dtype not in self.SUPPORTED_NON_ARRAY_TYPES:
                    raise TypeError
            except (ValueError, TypeError):
-                print(f'The input cannot be converted to'
-                      f' a single-type numpy array:\n{input_array}')
+                print('The input cannot be converted to a single-type numpy '
+                      f'array:\n{input_array}')
                raise
        elif isinstance(input_array, self.SUPPORTED_NON_ARRAY_TYPES):
            input_array = np.array(input_array)
@@ -328,14 +325,14 @@ class ArrayConverter:

    def recover(
        self, input_array: Union[np.ndarray, torch.Tensor]
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> Union[np.ndarray, torch.Tensor, int, float]:
        """Recover input type to original array type.

        Args:
-            input_array (np.ndarray | torch.Tensor): Input array.
+            input_array (np.ndarray or torch.Tensor): Input array.

        Returns:
-            np.ndarray or torch.Tensor: Converted array.
+            np.ndarray or torch.Tensor or int or float: Converted array.
        """
        assert isinstance(input_array, (np.ndarray, torch.Tensor)), \
            'invalid input array type'

--- a/mmdet3d/version.py
+++ b/mmdet3d/version.py
@@ -4,15 +4,15 @@ __version__ = '1.1.0'
 short_version = __version__


-def parse_version_info(version_str):
+def parse_version_info(version_str: str) -> tuple:
    """Parse a version string into a tuple.

    Args:
        version_str (str): The version string.

    Returns:
-        tuple[int | str]: The version info, e.g., "1.3.0" is parsed into
-            (1, 3, 0), and "2.0.0rc4" is parsed into (2, 0, 0, 'rc4').
+        tuple: The version info, e.g., "1.3.0" is parsed into (1, 3, 0), and
+        "2.0.0rc4" is parsed into (2, 0, 0, 'rc4').
    """
    version_info = []
    for x in version_str.split('.'):

--- a/tests/test_structures/test_bbox/test_box3d.py
+++ b/tests/test_structures/test_bbox/test_box3d.py
@@ -1772,10 +1772,10 @@ def test_points_in_boxes():
         [1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1], [0, 1, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
-         [0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
-         [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1],
-         [0, 0, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0],
-         [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]],
+         [0, 0, 1, 0, 1, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
+         [0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0],
+         [0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0],
+         [1, 0, 0, 1, 0, 0], [1, 0, 0, 1, 0, 0]],
        dtype=torch.int32).cuda()
    assert point_indices.shape == torch.Size([23, 6])
    assert (point_indices == expected_point_indices).all()
@@ -1785,8 +1785,8 @@ def test_points_in_boxes():

    point_indices = cam_boxes.points_in_boxes_part(cam_pts)
    expected_point_indices = torch.tensor([
-        0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, 3, -1, -1, 2, 3, 3, 2, 2, 3,
-        0, 0
+        0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1, 2, -1, 2, 5, 2,
+        -1, 0, 0
    ],
                                          dtype=torch.int32).cuda()
    assert point_indices.shape == torch.Size([23])