Add typehints for `data structures` (#2406)

* add typehint * fix UT * update docs

Add typehints for `data structures` (#2406)
* add typehint * fix UT * update docs
b4b9af6b · Xiang Xu · GitHub · a65171ab · b4b9af6b · b4b9af6b
Unverified Commit b4b9af6b authored Apr 24, 2023 by Xiang Xu Committed by GitHub Apr 24, 2023
16 changed files
--- a/mmdet3d/structures/bbox_3d/base_box3d.py
+++ b/mmdet3d/structures/bbox_3d/base_box3d.py
--- a/mmdet3d/structures/bbox_3d/box_3d_mode.py
+++ b/mmdet3d/structures/bbox_3d/box_3d_mode.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from enum import IntEnum, unique
+from typing import Optional, Sequence, Union
 import numpy as np
 import torch
+from torch import Tensor
 from .base_box3d import BaseInstance3DBoxes
 from .cam_box3d import CameraInstance3DBoxes
@@ -13,7 +15,7 @@ from .utils import limit_period
 @unique
 class Box3DMode(IntEnum):
-    r"""Enum of different ways to represent a box.
+    """Enum of different ways to represent a box.
    Coordinates in LiDAR:
@@ -28,7 +30,7 @@ class Box3DMode(IntEnum):
    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.
-    Coordinates in camera:
+    Coordinates in Camera:
    .. code-block:: none
@@ -44,7 +46,7 @@ class Box3DMode(IntEnum):
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
    and the yaw is around the y axis, thus the rotation axis=1.
-    Coordinates in Depth mode:
+    Coordinates in Depth:
    .. code-block:: none
@@ -63,30 +65,37 @@ class Box3DMode(IntEnum):
    DEPTH = 2
    @staticmethod
-    def convert(box, src, dst, rt_mat=None, with_yaw=True, correct_yaw=False):
+    def convert(
-        """Convert boxes from `src` mode to `dst` mode.
+        box: Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes],
+        src: 'Box3DMode',
+        dst: 'Box3DMode',
+        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+        with_yaw: bool = True,
+        correct_yaw: bool = False
+    ) -> Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes]:
+        """Convert boxes from ``src`` mode to ``dst`` mode.
        Args:
-            box (tuple | list | np.ndarray |
+            box (Sequence[float] or np.ndarray or Tensor or
-                torch.Tensor | :obj:`BaseInstance3DBoxes`):
+                :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk
-                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
+                array/tensor.
-            src (:obj:`Box3DMode`): The src Box mode.
+            src (:obj:`Box3DMode`): The source box mode.
-            dst (:obj:`Box3DMode`): The target Box mode.
+            dst (:obj:`Box3DMode`): The target box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
-            with_yaw (bool, optional): If `box` is an instance of
+            with_yaw (bool): If ``box`` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
            correct_yaw (bool): If the yaw is rotated by rt_mat.
+                Defaults to False.
        Returns:
-            (tuple | list | np.ndarray | torch.Tensor |
+            Sequence[float] or np.ndarray or Tensor or
-                :obj:`BaseInstance3DBoxes`):
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type.
-                The converted box of the same type.
        """
        if src == dst:
            return box
@@ -208,7 +217,7 @@ class Box3DMode(IntEnum):
                f'Conversion from Box3DMode {src} to {dst} '
                'is not supported yet')
-        if not isinstance(rt_mat, torch.Tensor):
+        if not isinstance(rt_mat, Tensor):
            rt_mat = arr.new_tensor(rt_mat)
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
@@ -251,8 +260,8 @@ class Box3DMode(IntEnum):
                target_type = DepthInstance3DBoxes
            else:
                raise NotImplementedError(
-                    f'Conversion to {dst} through {original_type}'
+                    f'Conversion to {dst} through {original_type} '
-                    ' is not supported yet')
+                    'is not supported yet')
            return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)
        else:
            return arr
--- a/mmdet3d/structures/bbox_3d/cam_box3d.py
+++ b/mmdet3d/structures/bbox_3d/cam_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Tuple, Union
 import numpy as np
 import torch
+from torch import Tensor
 from mmdet3d.structures.points import BasePoints
 from .base_box3d import BaseInstance3DBoxes
@@ -10,7 +13,7 @@ from .utils import rotation_3d_in_axis, yaw2local
 class CameraInstance3DBoxes(BaseInstance3DBoxes):
    """3D boxes of instances in CAM coordinates.
-    Coordinates in camera:
+    Coordinates in Camera:
    .. code-block:: none
@@ -24,39 +27,54 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        down y
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
-    and the yaw is around the y axis, thus the rotation axis=1.
+    and the yaw is around the y axis, thus the rotation axis=1. The yaw is 0 at
-    The yaw is 0 at the positive direction of x axis, and decreases from
+    the positive direction of x axis, and decreases from the positive direction
-    the positive direction of x to the positive direction of z.
+    of x to the positive direction of z.
+    Args:
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The boxes
+            data with shape (N, box_dim).
+        box_dim (int): Number of the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw). Defaults to 7.
+        with_yaw (bool): Whether the box is with yaw rotation. If False, the
+            value of yaw will be set to 0 as minmax boxes. Defaults to True.
+        origin (Tuple[float]): Relative position of the box origin.
+            Defaults to (0.5, 1.0, 0.5). This will guide the box be converted
+            to (0.5, 1.0, 0.5) mode.
    Attributes:
-        tensor (torch.Tensor): Float matrix in shape (N, box_dim).
+        tensor (Tensor): Float matrix with shape (N, box_dim).
-        box_dim (int): Integer indicating the dimension of a box
+        box_dim (int): Integer indicating the dimension of a box. Each row is
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
-        with_yaw (bool): If True, the value of yaw will be set to 0 as
+        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
-            axis-aligned boxes tightly enclosing the original boxes.
+            boxes.
    """
    YAW_AXIS = 1
-    def __init__(self,
+    def __init__(
-                 tensor,
+        self,
-                 box_dim=7,
+        tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
-                 with_yaw=True,
+        box_dim: int = 7,
-                 origin=(0.5, 1.0, 0.5)):
+        with_yaw: bool = True,
-        if isinstance(tensor, torch.Tensor):
+        origin: Tuple[float, float, float] = (0.5, 1.0, 0.5)
+    ) -> None:
+        if isinstance(tensor, Tensor):
            device = tensor.device
        else:
            device = torch.device('cpu')
        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
        if tensor.numel() == 0:
-            # Use reshape, so we don't end up creating a new tensor that
+            # Use reshape, so we don't end up creating a new tensor that does
-            # does not depend on the inputs (and consequently confuses jit)
+            # not depend on the inputs (and consequently confuses jit)
-            tensor = tensor.reshape((0, box_dim)).to(
+            tensor = tensor.reshape((-1, box_dim))
-                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, \
-        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+            ('The box dimension must be 2 and the length of the last '
+             f'dimension must be {box_dim}, but got boxes with shape '
+             f'{tensor.shape}.')
        if tensor.shape[-1] == 6:
-            # If the dimension of boxes is 6, we expand box_dim by padding
+            # If the dimension of boxes is 6, we expand box_dim by padding 0 as
-            # 0 as a fake yaw and set with_yaw to False.
+            # a fake yaw and set with_yaw to False
            assert box_dim == 6
            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
            tensor = torch.cat((tensor, fake_rot), dim=-1)
@@ -73,31 +91,27 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
    @property
-    def height(self):
+    def height(self) -> Tensor:
-        """torch.Tensor: A vector with height of each box in shape (N, )."""
+        """Tensor: A vector with height of each box in shape (N, )."""
        return self.tensor[:, 4]
    @property
-    def top_height(self):
+    def top_height(self) -> Tensor:
-        """torch.Tensor:
+        """Tensor: A vector with top height of each box in shape (N, )."""
-            A vector with the top height of each box in shape (N, )."""
        # the positive direction is down rather than up
        return self.bottom_height - self.height
    @property
-    def bottom_height(self):
+    def bottom_height(self) -> Tensor:
-        """torch.Tensor:
+        """Tensor: A vector with bottom height of each box in shape (N, )."""
-            A vector with bottom's height of each box in shape (N, )."""
        return self.tensor[:, 1]
    @property
-    def local_yaw(self):
+    def local_yaw(self) -> Tensor:
-        """torch.Tensor:
+        """Tensor: A vector with local yaw of each box in shape (N, ).
-            A vector with local yaw of each box in shape (N, ).
+        local_yaw equals to alpha in kitti, which is commonly used in monocular
-            local_yaw equals to alpha in kitti, which is commonly
+        3D object detection task, so only :obj:`CameraInstance3DBoxes` has the
-            used in monocular 3D object detection task, so only
+        property."""
-            :obj:`CameraInstance3DBoxes` has the property.
-        """
        yaw = self.yaw
        loc = self.gravity_center
        local_yaw = yaw2local(yaw, loc)
@@ -105,8 +119,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        return local_yaw
    @property
-    def gravity_center(self):
+    def gravity_center(self) -> Tensor:
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        """Tensor: A tensor with center of each box in shape (N, 3)."""
        bottom_center = self.bottom_center
        gravity_center = torch.zeros_like(bottom_center)
        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
@@ -114,12 +128,9 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        return gravity_center
    @property
-    def corners(self):
+    def corners(self) -> Tensor:
-        """torch.Tensor: Coordinates of corners of all the boxes in
+        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
-                         shape (N, 8, 3).
+        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).
-        Convert the boxes to  in clockwise order, in the form of
-        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
        .. code-block:: none
@@ -132,11 +143,14 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            (x0, y0, z0) + ----------- +   + (x1, y1, z1)
                         |  /      .   |  /
                         | / origin    | /
-            (x0, y1, z0) + ----------- + -------> x right
+            (x0, y1, z0) + ----------- + -------> right x
                         |             (x1, y1, z0)
                         |
                         v
                    down y
+        Returns:
+            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
        """
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)
@@ -147,7 +161,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                device=dims.device, dtype=dims.dtype)
        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
-        # use relative origin [0.5, 1, 0.5]
+        # use relative origin (0.5, 1, 0.5)
        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
@@ -157,9 +171,9 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        return corners
    @property
-    def bev(self):
+    def bev(self) -> Tensor:
-        """torch.Tensor: 2D BEV box of each box with rotation
+        """Tensor: 2D BEV box of each box with rotation in XYWHR format, in
-            in XYWHR format, in shape (N, 5)."""
+        shape (N, 5)."""
        bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()
        # positive direction of the gravity axis
        # in cam coord system points to the earth
@@ -167,22 +181,27 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        bev[:, -1] = -bev[:, -1]
        return bev
-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.
        Args:
-            angle (float | torch.Tensor | np.ndarray):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
-                Rotation angle or rotation matrix.
+                matrix.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.
        Returns:
-            tuple or None: When ``points`` is None, the function returns
+            tuple or None: When ``points`` is None, the function returns None,
-                None, otherwise it returns the rotated points and the
+            otherwise it returns the rotated points and the rotation matrix
-                rotation matrix ``rot_mat_T``.
+            ``rot_mat_T``.
        """
-        if not isinstance(angle, torch.Tensor):
+        if not isinstance(angle, Tensor):
            angle = self.tensor.new_tensor(angle)
        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
@@ -204,7 +223,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        self.tensor[:, 6] += angle
        if points is not None:
-            if isinstance(points, torch.Tensor):
+            if isinstance(points, Tensor):
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
                rot_mat_T = rot_mat_T.cpu().numpy()
@@ -215,18 +234,25 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                raise ValueError
            return points, rot_mat_T
-    def flip(self, bev_direction='horizontal', points=None):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.
        In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
        Args:
-            bev_direction (str): Flip direction (horizontal or vertical).
+            bev_direction (str): Direction by which to flip. Can be chosen from
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to flip. Defaults to None.
        Returns:
-            torch.Tensor, numpy.ndarray or None: Flipped points.
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
@@ -239,8 +265,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
                self.tensor[:, 6] = -self.tensor[:, 6]
        if points is not None:
-            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
+            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
-            if isinstance(points, (torch.Tensor, np.ndarray)):
+            if isinstance(points, (Tensor, np.ndarray)):
                if bev_direction == 'horizontal':
                    points[:, 0] = -points[:, 0]
                elif bev_direction == 'vertical':
@@ -250,19 +276,20 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            return points
    @classmethod
-    def height_overlaps(cls, boxes1, boxes2, mode='iou'):
+    def height_overlaps(cls, boxes1: 'CameraInstance3DBoxes',
+                        boxes2: 'CameraInstance3DBoxes') -> Tensor:
        """Calculate height overlaps of two boxes.
+        Note:
            This function calculates the height overlaps between ``boxes1`` and
-        ``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.
+            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
        Args:
            boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.
            boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.
-            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
        Returns:
-            torch.Tensor: Calculated iou of boxes' heights.
+            Tensor: Calculated height overlap of the boxes.
        """
        assert isinstance(boxes1, CameraInstance3DBoxes)
        assert isinstance(boxes2, CameraInstance3DBoxes)
@@ -280,22 +307,26 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
        return overlaps_h
-    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.
        Args:
-            dst (:obj:`Box3DMode`): The target Box mode.
+            dst (int): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from ``src`` coordinates to ``dst`` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
            correct_yaw (bool): Whether to convert the yaw angle to the target
                coordinate. Defaults to False.
        Returns:
-            :obj:`BaseInstance3DBoxes`:
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
-                The converted box of the same type in the ``dst`` mode.
+            the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
@@ -307,19 +338,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
            rt_mat=rt_mat,
            correct_yaw=correct_yaw)
-    def points_in_boxes_part(self, points, boxes_override=None):
+    def points_in_boxes_part(
+            self,
+            points: Tensor,
+            boxes_override: Optional[Tensor] = None) -> Tensor:
        """Find the box in which each point is.
        Args:
-            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
+            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
-                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
+                are (x, y, z) in LiDAR or depth coordinate.
-            boxes_override (torch.Tensor, optional): Boxes to override
+            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
-                `self.tensor `. Defaults to None.
+                Defaults to None.
        Returns:
-            torch.Tensor: The index of the box in which
+            Tensor: The index of the first box that each point is in with shape
-                each point is, in shape (M, ). Default value is -1
+            (M, ). Default value is -1 (if the point is not enclosed by any
-                (if the point is not enclosed by any box).
+            box).
        """
        from .coord_3d_mode import Coord3DMode
@@ -328,24 +362,29 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        if boxes_override is not None:
            boxes_lidar = boxes_override
        else:
-            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
+            boxes_lidar = Coord3DMode.convert(
-                                              Coord3DMode.LIDAR)
+                self.tensor,
+                Coord3DMode.CAM,
+                Coord3DMode.LIDAR,
+                is_point=False)
        box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)
        return box_idx
-    def points_in_boxes_all(self, points, boxes_override=None):
+    def points_in_boxes_all(self,
+                            points: Tensor,
+                            boxes_override: Optional[Tensor] = None) -> Tensor:
        """Find all boxes in which each point is.
        Args:
-            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
+            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
-                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
+                are (x, y, z) in LiDAR or depth coordinate.
-            boxes_override (torch.Tensor, optional): Boxes to override
+            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
-                `self.tensor `. Defaults to None.
+                Defaults to None.
        Returns:
-            torch.Tensor: The index of all boxes in which each point is,
+            Tensor: The index of all boxes in which each point is with shape
-                in shape (B, M, T).
+            (M, T).
        """
        from .coord_3d_mode import Coord3DMode
@@ -354,8 +393,11 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        if boxes_override is not None:
            boxes_lidar = boxes_override
        else:
-            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
+            boxes_lidar = Coord3DMode.convert(
-                                              Coord3DMode.LIDAR)
+                self.tensor,
+                Coord3DMode.CAM,
+                Coord3DMode.LIDAR,
+                is_point=False)
        box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)
        return box_idx
--- a/mmdet3d/structures/bbox_3d/coord_3d_mode.py
+++ b/mmdet3d/structures/bbox_3d/coord_3d_mode.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from enum import IntEnum, unique
+from typing import Optional, Sequence, Union
 import numpy as np
 import torch
+from torch import Tensor
 from mmdet3d.structures.points import (BasePoints, CameraPoints, DepthPoints,
                                       LiDARPoints)
@@ -12,8 +14,7 @@ from .box_3d_mode import Box3DMode
 @unique
 class Coord3DMode(IntEnum):
-    r"""Enum of different ways to represent a box
+    """Enum of different ways to represent a box and point cloud.
-        and point cloud.
    Coordinates in LiDAR:
@@ -28,7 +29,7 @@ class Coord3DMode(IntEnum):
    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.
-    Coordinates in camera:
+    Coordinates in Camera:
    .. code-block:: none
@@ -44,7 +45,7 @@ class Coord3DMode(IntEnum):
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
    and the yaw is around the y axis, thus the rotation axis=1.
-    Coordinates in Depth mode:
+    Coordinates in Depth:
    .. code-block:: none
@@ -63,96 +64,133 @@ class Coord3DMode(IntEnum):
    DEPTH = 2
    @staticmethod
-    def convert(input, src, dst, rt_mat=None, with_yaw=True, is_point=True):
+    def convert(input: Union[Sequence[float], np.ndarray, Tensor,
-        """Convert boxes or points from `src` mode to `dst` mode.
+                             BaseInstance3DBoxes, BasePoints],
+                src: Union[Box3DMode, 'Coord3DMode'],
+                dst: Union[Box3DMode, 'Coord3DMode'],
+                rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+                with_yaw: bool = True,
+                correct_yaw: bool = False,
+                is_point: bool = True):
+        """Convert boxes or points from ``src`` mode to ``dst`` mode.
        Args:
-            input (tuple | list | np.ndarray | torch.Tensor |
+            input (Sequence[float] or np.ndarray or Tensor or
-                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):
+                :obj:`BaseInstance3DBoxes` or :obj:`BasePoints`): Can be a
-                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
+                k-tuple, k-list or an Nxk array/tensor.
-            src (:obj:`Box3DMode` | :obj:`Coord3DMode`): The source mode.
+            src (:obj:`Box3DMode` or :obj:`Coord3DMode`): The source mode.
-            dst (:obj:`Box3DMode` | :obj:`Coord3DMode`): The target mode.
+            dst (:obj:`Box3DMode` or :obj:`Coord3DMode`): The target mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
-            with_yaw (bool): If `box` is an instance of
+            with_yaw (bool): If ``box`` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
-            is_point (bool): If `input` is neither an instance of
+            correct_yaw (bool): If the yaw is rotated by rt_mat.
+                Defaults to False.
+            is_point (bool): If ``input`` is neither an instance of
                :obj:`BaseInstance3DBoxes` nor an instance of
                :obj:`BasePoints`, whether or not it is point data.
                Defaults to True.
        Returns:
-            (tuple | list | np.ndarray | torch.Tensor |
+            Sequence[float] or np.ndarray or Tensor or
-                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):
+            :obj:`BaseInstance3DBoxes` or :obj:`BasePoints`: The converted box
-                The converted box of the same type.
+            or points of the same type.
        """
        if isinstance(input, BaseInstance3DBoxes):
            return Coord3DMode.convert_box(
-                input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)
+                input,
+                src,
+                dst,
+                rt_mat=rt_mat,
+                with_yaw=with_yaw,
+                correct_yaw=correct_yaw)
        elif isinstance(input, BasePoints):
            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)
-        elif isinstance(input, (tuple, list, np.ndarray, torch.Tensor)):
+        elif isinstance(input, (tuple, list, np.ndarray, Tensor)):
            if is_point:
                return Coord3DMode.convert_point(
                    input, src, dst, rt_mat=rt_mat)
            else:
                return Coord3DMode.convert_box(
-                    input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)
+                    input,
+                    src,
+                    dst,
+                    rt_mat=rt_mat,
+                    with_yaw=with_yaw,
+                    correct_yaw=correct_yaw)
        else:
            raise NotImplementedError
    @staticmethod
-    def convert_box(box, src, dst, rt_mat=None, with_yaw=True):
+    def convert_box(
-        """Convert boxes from `src` mode to `dst` mode.
+        box: Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes],
+        src: Box3DMode,
+        dst: Box3DMode,
+        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+        with_yaw: bool = True,
+        correct_yaw: bool = False
+    ) -> Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes]:
+        """Convert boxes from ``src`` mode to ``dst`` mode.
        Args:
-            box (tuple | list | np.ndarray |
+            box (Sequence[float] or np.ndarray or Tensor or
-                torch.Tensor | :obj:`BaseInstance3DBoxes`):
+                :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk
-                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
+                array/tensor.
-            src (:obj:`Box3DMode`): The src Box mode.
+            src (:obj:`Box3DMode`): The source box mode.
-            dst (:obj:`Box3DMode`): The target Box mode.
+            dst (:obj:`Box3DMode`): The target box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
-            with_yaw (bool): If `box` is an instance of
+            with_yaw (bool): If ``box`` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
+            correct_yaw (bool): If the yaw is rotated by rt_mat.
+                Defaults to False.
        Returns:
-            (tuple | list | np.ndarray | torch.Tensor |
+            Sequence[float] or np.ndarray or Tensor or
-                :obj:`BaseInstance3DBoxes`):
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type.
-                The converted box of the same type.
        """
-        return Box3DMode.convert(box, src, dst, rt_mat=rt_mat)
+        return Box3DMode.convert(
+            box,
+            src,
+            dst,
+            rt_mat=rt_mat,
+            with_yaw=with_yaw,
+            correct_yaw=correct_yaw)
    @staticmethod
-    def convert_point(point, src, dst, rt_mat=None):
+    def convert_point(
-        """Convert points from `src` mode to `dst` mode.
+        point: Union[Sequence[float], np.ndarray, Tensor, BasePoints],
+        src: 'Coord3DMode',
+        dst: 'Coord3DMode',
+        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
+    ) -> Union[Sequence[float], np.ndarray, Tensor, BasePoints]:
+        """Convert points from ``src`` mode to ``dst`` mode.
        Args:
-            point (tuple | list | np.ndarray |
+            box (Sequence[float] or np.ndarray or Tensor or :obj:`BasePoints`):
-                torch.Tensor | :obj:`BasePoints`):
                Can be a k-tuple, k-list or an Nxk array/tensor.
-            src (:obj:`CoordMode`): The src Point mode.
+            src (:obj:`Coord3DMode`): The source point mode.
-            dst (:obj:`CoordMode`): The target Point mode.
+            dst (:obj:`Coord3DMode`): The target point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (np.ndarray or Tensor, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
        Returns:
-            (tuple | list | np.ndarray | torch.Tensor | :obj:`BasePoints`):
+            Sequence[float] or np.ndarray or Tensor or :obj:`BasePoints`: The
-                The converted point of the same type.
+            converted point of the same type.
        """
        if src == dst:
            return point
@@ -162,7 +200,7 @@ class Coord3DMode(IntEnum):
        single_point = isinstance(point, (list, tuple))
        if single_point:
            assert len(point) >= 3, (
-                'CoordMode.convert takes either a k-tuple/list or '
+                'Coord3DMode.convert takes either a k-tuple/list or '
                'an Nxk array/tensor, where k >= 3')
            arr = torch.tensor(point)[None, :]
        else:
@@ -198,7 +236,7 @@ class Coord3DMode(IntEnum):
                f'Conversion from Coord3DMode {src} to {dst} '
                'is not supported yet')
-        if not isinstance(rt_mat, torch.Tensor):
+        if not isinstance(rt_mat, Tensor):
            rt_mat = arr.new_tensor(rt_mat)
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
@@ -225,8 +263,8 @@ class Coord3DMode(IntEnum):
                target_type = DepthPoints
            else:
                raise NotImplementedError(
-                    f'Conversion to {dst} through {original_type}'
+                    f'Conversion to {dst} through {original_type} '
-                    ' is not supported yet')
+                    'is not supported yet')
            return target_type(
                arr,
                points_dim=arr.size(-1),

--- a/mmdet3d/structures/bbox_3d/depth_box3d.py
+++ b/mmdet3d/structures/bbox_3d/depth_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Tuple, Union
 import numpy as np
 import torch
+from torch import Tensor
 from mmdet3d.structures.points import BasePoints
 from .base_box3d import BaseInstance3DBoxes
@@ -8,7 +11,7 @@ from .utils import rotation_3d_in_axis
 class DepthInstance3DBoxes(BaseInstance3DBoxes):
-    """3D boxes of instances in Depth coordinates.
+    """3D boxes of instances in DEPTH coordinates.
    Coordinates in Depth:
@@ -21,40 +24,23 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
           0 ------> x right (yaw=0)
    The relative coordinate of bottom center in a Depth box is (0.5, 0.5, 0),
-    and the yaw is around the z axis, thus the rotation axis=2.
+    and the yaw is around the z axis, thus the rotation axis=2. The yaw is 0 at
-    The yaw is 0 at the positive direction of x axis, and decreases from
+    the positive direction of x axis, and increases from the positive direction
-    the positive direction of x to the positive direction of y.
+    of x to the positive direction of y.
-    Also note that rotation of DepthInstance3DBoxes is counterclockwise,
-    which is reverse to the definition of the yaw angle (clockwise).
-    A refactor is ongoing to make the three coordinate systems
-    easier to understand and convert between each other.
    Attributes:
-        tensor (torch.Tensor): Float matrix of N x box_dim.
+        tensor (Tensor): Float matrix with shape (N, box_dim).
-        box_dim (int): Integer indicates the dimension of a box
+        box_dim (int): Integer indicating the dimension of a box. Each row is
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
            boxes.
    """
    YAW_AXIS = 2
    @property
-    def gravity_center(self):
+    def corners(self) -> Tensor:
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
-        bottom_center = self.bottom_center
+        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).
-        gravity_center = torch.zeros_like(bottom_center)
-        gravity_center[:, :2] = bottom_center[:, :2]
-        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
-        return gravity_center
-    @property
-    def corners(self):
-        """torch.Tensor: Coordinates of corners of all the boxes
-        in shape (N, 8, 3).
-        Convert the boxes to corners in clockwise order, in form of
-        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
        .. code-block:: none
@@ -70,6 +56,9 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                         | / origin    | /
            (x0, y0, z0) + ----------- + --------> right x
                                       (x1, y0, z0)
+        Returns:
+            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
        """
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)
@@ -90,22 +79,27 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
        corners += self.tensor[:, :3].view(-1, 1, 3)
        return corners
-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.
        Args:
-            angle (float | torch.Tensor | np.ndarray):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
-                Rotation angle or rotation matrix.
+                matrix.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.
        Returns:
-            tuple or None: When ``points`` is None, the function returns
+            tuple or None: When ``points`` is None, the function returns None,
-                None, otherwise it returns the rotated points and the
+            otherwise it returns the rotated points and the rotation matrix
-                rotation matrix ``rot_mat_T``.
+            ``rot_mat_T``.
        """
-        if not isinstance(angle, torch.Tensor):
+        if not isinstance(angle, Tensor):
            angle = self.tensor.new_tensor(angle)
        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
@@ -139,7 +133,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
            self.tensor[:, 3:5] = torch.cat((new_x_size, new_y_size), dim=-1)
        if points is not None:
-            if isinstance(points, torch.Tensor):
+            if isinstance(points, Tensor):
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
                rot_mat_T = rot_mat_T.cpu().numpy()
@@ -150,19 +144,25 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                raise ValueError
            return points, rot_mat_T
-    def flip(self, bev_direction='horizontal', points=None):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.
-        In Depth coordinates, it flips x (horizontal) or y (vertical) axis.
+        In Depth coordinates, it flips the x (horizontal) or y (vertical) axis.
        Args:
-            bev_direction (str, optional): Flip direction
+            bev_direction (str): Direction by which to flip. Can be chosen from
-                (horizontal or vertical). Defaults to 'horizontal'.
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to flip. Defaults to None.
        Returns:
-            torch.Tensor, numpy.ndarray or None: Flipped points.
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
@@ -175,8 +175,8 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                self.tensor[:, 6] = -self.tensor[:, 6]
        if points is not None:
-            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
+            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
-            if isinstance(points, (torch.Tensor, np.ndarray)):
+            if isinstance(points, (Tensor, np.ndarray)):
                if bev_direction == 'horizontal':
                    points[:, 0] = -points[:, 0]
                elif bev_direction == 'vertical':
@@ -185,31 +185,41 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
                points.flip(bev_direction)
            return points
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.
        Args:
-            dst (:obj:`Box3DMode`): The target Box mode.
+            dst (int): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from ``src`` coordinates to ``dst`` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
+            correct_yaw (bool): Whether to convert the yaw angle to the target
+                coordinate. Defaults to False.
        Returns:
-            :obj:`DepthInstance3DBoxes`:
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
-                The converted box of the same type in the ``dst`` mode.
+            the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
-            box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
+            box=self,
+            src=Box3DMode.DEPTH,
+            dst=dst,
+            rt_mat=rt_mat,
+            correct_yaw=correct_yaw)
-    def enlarged_box(self, extra_width):
+    def enlarged_box(
-        """Enlarge the length, width and height boxes.
+            self, extra_width: Union[float, Tensor]) -> 'DepthInstance3DBoxes':
+        """Enlarge the length, width and height of boxes.
        Args:
-            extra_width (float | torch.Tensor): Extra width to enlarge the box.
+            extra_width (float or Tensor): Extra width to enlarge the box.
        Returns:
            :obj:`DepthInstance3DBoxes`: Enlarged boxes.
@@ -220,11 +230,11 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
        enlarged_boxes[:, 2] -= extra_width
        return self.new_box(enlarged_boxes)
-    def get_surface_line_center(self):
+    def get_surface_line_center(self) -> Tuple[Tensor, Tensor]:
        """Compute surface and line center of bounding boxes.
        Returns:
-            torch.Tensor: Surface and line center of bounding boxes.
+            Tuple[Tensor, Tensor]: Surface and line center of bounding boxes.
        """
        obj_size = self.dims
        center = self.gravity_center.view(-1, 1, 3)

--- a/mmdet3d/structures/bbox_3d/lidar_box3d.py
+++ b/mmdet3d/structures/bbox_3d/lidar_box3d.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Tuple, Union
 import numpy as np
 import torch
+from torch import Tensor
 from mmdet3d.structures.points import BasePoints
 from .base_box3d import BaseInstance3DBoxes
@@ -21,38 +24,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
        (yaw=0.5*pi) left y <------ 0
    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
-    and the yaw is around the z axis, thus the rotation axis=2.
+    and the yaw is around the z axis, thus the rotation axis=2. The yaw is 0 at
-    The yaw is 0 at the positive direction of x axis, and increases from
+    the positive direction of x axis, and increases from the positive direction
-    the positive direction of x to the positive direction of y.
+    of x to the positive direction of y.
-    A refactor is ongoing to make the three coordinate systems
-    easier to understand and convert between each other.
    Attributes:
-        tensor (torch.Tensor): Float matrix of N x box_dim.
+        tensor (Tensor): Float matrix with shape (N, box_dim).
-        box_dim (int): Integer indicating the dimension of a box.
+        box_dim (int): Integer indicating the dimension of a box. Each row is
-            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
            boxes.
    """
    YAW_AXIS = 2
    @property
-    def gravity_center(self):
+    def corners(self) -> Tensor:
-        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
-        bottom_center = self.bottom_center
+        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).
-        gravity_center = torch.zeros_like(bottom_center)
-        gravity_center[:, :2] = bottom_center[:, :2]
-        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
-        return gravity_center
-    @property
-    def corners(self):
-        """torch.Tensor: Coordinates of corners of all the boxes
-        in shape (N, 8, 3).
-        Convert the boxes to corners in clockwise order, in form of
-        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
        .. code-block:: none
@@ -66,8 +54,11 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
                            |  /      .   |  /
                            | / origin    | /
-            left y<-------- + ----------- + (x0, y1, z0)
+            left y <------- + ----------- + (x0, y1, z0)
                (x0, y0, z0)
+        Returns:
+            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
        """
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)
@@ -78,7 +69,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                device=dims.device, dtype=dims.dtype)
        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
-        # use relative origin [0.5, 0.5, 0]
+        # use relative origin (0.5, 0.5, 0)
        corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
@@ -88,22 +79,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
        corners += self.tensor[:, :3].view(-1, 1, 3)
        return corners
-    def rotate(self, angle, points=None):
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
+            BasePoints, Tensor], None]:
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.
        Args:
-            angles (float | torch.Tensor | np.ndarray):
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
-                Rotation angle or rotation matrix.
+                matrix.
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to rotate. Defaults to None.
        Returns:
-            tuple or None: When ``points`` is None, the function returns
+            tuple or None: When ``points`` is None, the function returns None,
-                None, otherwise it returns the rotated points and the
+            otherwise it returns the rotated points and the rotation matrix
-                rotation matrix ``rot_mat_T``.
+            ``rot_mat_T``.
        """
-        if not isinstance(angle, torch.Tensor):
+        if not isinstance(angle, Tensor):
            angle = self.tensor.new_tensor(angle)
        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
@@ -129,7 +125,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
            self.tensor[:, 7:9] = self.tensor[:, 7:9] @ rot_mat_T[:2, :2]
        if points is not None:
-            if isinstance(points, torch.Tensor):
+            if isinstance(points, Tensor):
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
                rot_mat_T = rot_mat_T.cpu().numpy()
@@ -140,18 +136,25 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                raise ValueError
            return points, rot_mat_T
-    def flip(self, bev_direction='horizontal', points=None):
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
+    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
        """Flip the boxes in BEV along given BEV direction.
        In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.
        Args:
-            bev_direction (str): Flip direction (horizontal or vertical).
+            bev_direction (str): Direction by which to flip. Can be chosen from
-            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
                Points to flip. Defaults to None.
        Returns:
-            torch.Tensor, numpy.ndarray or None: Flipped points.
+            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
        """
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
@@ -164,8 +167,8 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
        if points is not None:
-            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
+            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
-            if isinstance(points, (torch.Tensor, np.ndarray)):
+            if isinstance(points, (Tensor, np.ndarray)):
                if bev_direction == 'horizontal':
                    points[:, 1] = -points[:, 1]
                elif bev_direction == 'vertical':
@@ -174,22 +177,26 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                points.flip(bev_direction)
            return points
-    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
        """Convert self to ``dst`` mode.
        Args:
-            dst (:obj:`Box3DMode`): the target Box mode
+            dst (int): The target Box mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from ``src`` coordinates to ``dst`` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
-            correct_yaw (bool): If convert the yaw angle to the target
+            correct_yaw (bool): Whether to convert the yaw angle to the target
                coordinate. Defaults to False.
        Returns:
-            :obj:`BaseInstance3DBoxes`:
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
-                The converted box of the same type in the ``dst`` mode.
+            the ``dst`` mode.
        """
        from .box_3d_mode import Box3DMode
        return Box3DMode.convert(
@@ -199,11 +206,12 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
            rt_mat=rt_mat,
            correct_yaw=correct_yaw)
-    def enlarged_box(self, extra_width):
+    def enlarged_box(
-        """Enlarge the length, width and height boxes.
+            self, extra_width: Union[float, Tensor]) -> 'LiDARInstance3DBoxes':
+        """Enlarge the length, width and height of boxes.
        Args:
-            extra_width (float | torch.Tensor): Extra width to enlarge the box.
+            extra_width (float or Tensor): Extra width to enlarge the box.
        Returns:
            :obj:`LiDARInstance3DBoxes`: Enlarged boxes.

--- a/mmdet3d/structures/bbox_3d/utils.py
+++ b/mmdet3d/structures/bbox_3d/utils.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from logging import warning
+from typing import Tuple, Union
 import numpy as np
 import torch
+from torch import Tensor
-from mmdet3d.utils.array_converter import array_converter
+from mmdet3d.utils import array_converter
 @array_converter(apply_to=('val', ))
-def limit_period(val, offset=0.5, period=np.pi):
+def limit_period(val: Union[np.ndarray, Tensor],
+                 offset: float = 0.5,
+                 period: float = np.pi) -> Union[np.ndarray, Tensor]:
    """Limit the value into a period for periodic function.
    Args:
-        val (torch.Tensor | np.ndarray): The value to be converted.
+        val (np.ndarray or Tensor): The value to be converted.
-        offset (float, optional): Offset to set the value range.
+        offset (float): Offset to set the value range. Defaults to 0.5.
-            Defaults to 0.5.
+        period (float): Period of the value. Defaults to np.pi.
-        period ([type], optional): Period of the value. Defaults to np.pi.
    Returns:
-        (torch.Tensor | np.ndarray): Value in the range of
+        np.ndarray or Tensor: Value in the range of
-            [-offset * period, (1-offset) * period]
+        [-offset * period, (1-offset) * period].
    """
    limited_val = val - torch.floor(val / period + offset) * period
    return limited_val
 @array_converter(apply_to=('points', 'angles'))
-def rotation_3d_in_axis(points,
+def rotation_3d_in_axis(
-                        angles,
+    points: Union[np.ndarray, Tensor],
-                        axis=0,
+    angles: Union[np.ndarray, Tensor, float],
-                        return_mat=False,
+    axis: int = 0,
-                        clockwise=False):
+    return_mat: bool = False,
+    clockwise: bool = False
+) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Tensor, Tensor], np.ndarray,
+           Tensor]:
    """Rotate points by angles according to axis.
    Args:
-        points (np.ndarray | torch.Tensor | list | tuple ):
+        points (np.ndarray or Tensor): Points with shape (N, M, 3).
-            Points of shape (N, M, 3).
+        angles (np.ndarray or Tensor or float): Vector of angles with shape
-        angles (np.ndarray | torch.Tensor | list | tuple | float):
+            (N, ).
-            Vector of angles in shape (N,)
+        axis (int): The axis to be rotated. Defaults to 0.
-        axis (int, optional): The axis to be rotated. Defaults to 0.
+        return_mat (bool): Whether or not to return the rotation matrix
-        return_mat: Whether or not return the rotation matrix (transposed).
+            (transposed). Defaults to False.
-            Defaults to False.
+        clockwise (bool): Whether the rotation is clockwise. Defaults to False.
-        clockwise: Whether the rotation is clockwise. Defaults to False.
    Raises:
-        ValueError: when the axis is not in range [0, 1, 2], it will
+        ValueError: When the axis is not in range [-3, -2, -1, 0, 1, 2], it
-            raise value error.
+            will raise ValueError.
    Returns:
-        (torch.Tensor | np.ndarray): Rotated points in shape (N, M, 3).
+        Tuple[np.ndarray, np.ndarray] or Tuple[Tensor, Tensor] or np.ndarray or
+        Tensor: Rotated points with shape (N, M, 3) and rotation matrix with
+        shape (N, 3, 3).
    """
    batch_free = len(points.shape) == 2
    if batch_free:
@@ -57,8 +64,8 @@ def rotation_3d_in_axis(points,
    if isinstance(angles, float) or len(angles.shape) == 0:
        angles = torch.full(points.shape[:1], angles)
-    assert len(points.shape) == 3 and len(angles.shape) == 1 \
+    assert len(points.shape) == 3 and len(angles.shape) == 1 and \
-        and points.shape[0] == angles.shape[0], f'Incorrect shape of points ' \
+        points.shape[0] == angles.shape[0], 'Incorrect shape of points ' \
        f'angles: {points.shape}, {angles.shape}'
    assert points.shape[-1] in [2, 3], \
@@ -89,8 +96,8 @@ def rotation_3d_in_axis(points,
                torch.stack([zeros, -rot_sin, rot_cos])
            ])
        else:
-            raise ValueError(f'axis should in range '
+            raise ValueError(
-                             f'[-3, -2, -1, 0, 1, 2], got {axis}')
+                f'axis should in range [-3, -2, -1, 0, 1, 2], got {axis}')
    else:
        rot_mat_T = torch.stack([
            torch.stack([rot_cos, rot_sin]),
@@ -118,14 +125,15 @@ def rotation_3d_in_axis(points,
 @array_converter(apply_to=('boxes_xywhr', ))
-def xywhr2xyxyr(boxes_xywhr):
+def xywhr2xyxyr(
+        boxes_xywhr: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
    """Convert a rotated boxes in XYWHR format to XYXYR format.
    Args:
-        boxes_xywhr (torch.Tensor | np.ndarray): Rotated boxes in XYWHR format.
+        boxes_xywhr (Tensor or np.ndarray): Rotated boxes in XYWHR format.
    Returns:
-        (torch.Tensor | np.ndarray): Converted boxes in XYXYR format.
+        Tensor or np.ndarray: Converted boxes in XYXYR format.
    """
    boxes = torch.zeros_like(boxes_xywhr)
    half_w = boxes_xywhr[..., 2] / 2
@@ -139,16 +147,16 @@ def xywhr2xyxyr(boxes_xywhr):
    return boxes
-def get_box_type(box_type):
+def get_box_type(box_type: str) -> Tuple[type, int]:
    """Get the type and mode of box structure.
    Args:
-        box_type (str): The type of box structure.
+        box_type (str): The type of box structure. The valid value are "LiDAR",
-            The valid value are "LiDAR", "Camera", or "Depth".
+            "Camera" and "Depth".
    Raises:
-        ValueError: A ValueError is raised when `box_type`
+        ValueError: A ValueError is raised when ``box_type`` does not belong to
-            does not belong to the three valid types.
+            the three valid types.
    Returns:
        tuple: Box type and box mode.
@@ -166,36 +174,39 @@ def get_box_type(box_type):
        box_type_3d = DepthInstance3DBoxes
        box_mode_3d = Box3DMode.DEPTH
    else:
-        raise ValueError('Only "box_type" of "camera", "lidar", "depth"'
+        raise ValueError('Only "box_type" of "camera", "lidar", "depth" are '
-                         f' are supported, got {box_type}')
+                         f'supported, got {box_type}')
    return box_type_3d, box_mode_3d
 @array_converter(apply_to=('points_3d', 'proj_mat'))
-def points_cam2img(points_3d, proj_mat, with_depth=False):
+def points_cam2img(points_3d: Union[Tensor, np.ndarray],
+                   proj_mat: Union[Tensor, np.ndarray],
+                   with_depth: bool = False) -> Union[Tensor, np.ndarray]:
    """Project points in camera coordinates to image coordinates.
    Args:
-        points_3d (torch.Tensor | np.ndarray): Points in shape (N, 3)
+        points_3d (Tensor or np.ndarray): Points in shape (N, 3).
-        proj_mat (torch.Tensor | np.ndarray):
+        proj_mat (Tensor or np.ndarray): Transformation matrix between
-            Transformation matrix between coordinates.
+            coordinates.
-        with_depth (bool, optional): Whether to keep depth in the output.
+        with_depth (bool): Whether to keep depth in the output.
            Defaults to False.
    Returns:
-        (torch.Tensor | np.ndarray): Points in image coordinates,
+        Tensor or np.ndarray: Points in image coordinates with shape [N, 2] if
-            with shape [N, 2] if `with_depth=False`, else [N, 3].
+        ``with_depth=False``, else [N, 3].
    """
    points_shape = list(points_3d.shape)
    points_shape[-1] = 1
-    assert len(proj_mat.shape) == 2, 'The dimension of the projection'\
+    assert len(proj_mat.shape) == 2, \
-        f' matrix should be 2 instead of {len(proj_mat.shape)}.'
+        'The dimension of the projection matrix should be 2 ' \
+        f'instead of {len(proj_mat.shape)}.'
    d1, d2 = proj_mat.shape[:2]
-    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or (
+    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or \
-        d1 == 4 and d2 == 4), 'The shape of the projection matrix'\
+        (d1 == 4 and d2 == 4), 'The shape of the projection matrix ' \
-        f' ({d1}*{d2}) is not supported.'
+        f'({d1}*{d2}) is not supported.'
    if d1 == 3:
        proj_mat_expanded = torch.eye(
            4, device=proj_mat.device, dtype=proj_mat.dtype)
@@ -215,18 +226,20 @@ def points_cam2img(points_3d, proj_mat, with_depth=False):
 @array_converter(apply_to=('points', 'cam2img'))
-def points_img2cam(points, cam2img):
+def points_img2cam(
+        points: Union[Tensor, np.ndarray],
+        cam2img: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
    """Project points in image coordinates to camera coordinates.
    Args:
-        points (torch.Tensor): 2.5D points in 2D images, [N, 3],
+        points (Tensor or np.ndarray): 2.5D points in 2D images with shape
-            3 corresponds with x, y in the image and depth.
+            [N, 3], 3 corresponds with x, y in the image and depth.
-        cam2img (torch.Tensor): Camera intrinsic matrix. The shape can be
+        cam2img (Tensor or np.ndarray): Camera intrinsic matrix. The shape can
-            [3, 3], [3, 4] or [4, 4].
+            be [3, 3], [3, 4] or [4, 4].
    Returns:
-        torch.Tensor: points in 3D space. [N, 3],
+        Tensor or np.ndarray: Points in 3D space with shape [N, 3], 3
-            3 corresponds with x, y, z in 3D space.
+        corresponds with x, y, z in 3D space.
    """
    assert cam2img.shape[0] <= 4
    assert cam2img.shape[1] <= 4
@@ -260,8 +273,8 @@ def mono_cam_box2vis(cam_box):
    Args:
        cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate
-            system before conversion. Could be gt bbox loaded from dataset
+            system before conversion. Could be gt bbox loaded from dataset or
-            or network prediction output.
+            network prediction output.
    Returns:
        :obj:`CameraInstance3DBoxes`: Box after conversion.
@@ -269,7 +282,7 @@ def mono_cam_box2vis(cam_box):
    warning.warn('DeprecationWarning: The hack of yaw and dimension in the '
                 'monocular 3D detection on nuScenes has been removed. The '
                 'function mono_cam_box2vis will be deprecated.')
-    from . import CameraInstance3DBoxes
+    from .cam_box3d import CameraInstance3DBoxes
    assert isinstance(cam_box, CameraInstance3DBoxes), \
        'input bbox should be CameraInstance3DBoxes!'
@@ -294,16 +307,16 @@ def mono_cam_box2vis(cam_box):
    return cam_box
-def get_proj_mat_by_coord_type(img_meta, coord_type):
+def get_proj_mat_by_coord_type(img_meta: dict, coord_type: str) -> Tensor:
    """Obtain image features using points.
    Args:
-        img_meta (dict): Meta info.
+        img_meta (dict): Meta information.
-        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
+        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'. Can be case-
-            Can be case-insensitive.
+            insensitive.
    Returns:
-        torch.Tensor: transformation matrix.
+        Tensor: Transformation matrix.
    """
    coord_type = coord_type.upper()
    mapping = {'LIDAR': 'lidar2img', 'DEPTH': 'depth2img', 'CAMERA': 'cam2img'}
@@ -311,18 +324,16 @@ def get_proj_mat_by_coord_type(img_meta, coord_type):
    return img_meta[mapping[coord_type]]
-def yaw2local(yaw, loc):
+def yaw2local(yaw: Tensor, loc: Tensor) -> Tensor:
    """Transform global yaw to local yaw (alpha in kitti) in camera
    coordinates, ranges from -pi to pi.
    Args:
-        yaw (torch.Tensor): A vector with local yaw of each box.
+        yaw (Tensor): A vector with local yaw of each box in shape (N, ).
-            shape: (N, )
+        loc (Tensor): Gravity center of each box in shape (N, 3).
-        loc (torch.Tensor): gravity center of each box.
-            shape: (N, 3)
    Returns:
-        torch.Tensor: local yaw (alpha in kitti).
+        Tensor: Local yaw (alpha in kitti).
    """
    local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2])
    larger_idx = (local_yaw > np.pi).nonzero(as_tuple=False)
@@ -335,7 +346,7 @@ def yaw2local(yaw, loc):
    return local_yaw
-def get_lidar2img(cam2img, lidar2cam):
+def get_lidar2img(cam2img: Tensor, lidar2cam: Tensor) -> Tensor:
    """Get the projection matrix of lidar2img.
    Args:
@@ -343,7 +354,7 @@ def get_lidar2img(cam2img, lidar2cam):
        lidar2cam (torch.Tensor): A 3x3 or 4x4 projection matrix.
    Returns:
-        torch.Tensor: transformation matrix with shape 4x4.
+        Tensor: Transformation matrix with shape 4x4.
    """
    if cam2img.shape == (3, 3):
        temp = cam2img.new_zeros(4, 4)

--- a/mmdet3d/structures/det3d_data_sample.py
+++ b/mmdet3d/structures/det3d_data_sample.py
@@ -56,7 +56,7 @@ class Det3DDataSample(DetDataSample):
        >>> from mmengine.structures import InstanceData
        >>> from mmdet3d.structures import Det3DDataSample
-        >>> from mmdet3d.structures import BaseInstance3DBoxes
+        >>> from mmdet3d.structures.bbox_3d import BaseInstance3DBoxes
        >>> data_sample = Det3DDataSample()
        >>> meta_info = dict(
@@ -80,15 +80,15 @@ class Det3DDataSample(DetDataSample):
                    DATA FIELDS
                    labels_3d: tensor([1, 0, 2, 0, 1])
                    bboxes_3d: BaseInstance3DBoxes(
-                            tensor([[1.9115e-01, 3.6061e-01, 6.7707e-01, 5.2902e-01, 8.0736e-01, 8.2759e-01,  # noqa E501
+                            tensor([[1.9115e-01, 3.6061e-01, 6.7707e-01, 5.2902e-01, 8.0736e-01, 8.2759e-01,
                                2.4328e-01],
-                                [5.6272e-01, 2.7508e-01, 5.7966e-01, 9.2410e-01, 3.0456e-01, 1.8912e-01,  # noqa E501
+                                [5.6272e-01, 2.7508e-01, 5.7966e-01, 9.2410e-01, 3.0456e-01, 1.8912e-01,
                                3.3176e-01],
-                                [8.1069e-01, 2.8684e-01, 7.7689e-01, 9.2397e-02, 5.5849e-01, 3.8007e-01,  # noqa E501
+                                [8.1069e-01, 2.8684e-01, 7.7689e-01, 9.2397e-02, 5.5849e-01, 3.8007e-01,
                                4.6719e-01],
-                                [6.6346e-01, 4.8005e-01, 5.2318e-02, 4.4137e-01, 4.1163e-01, 8.9339e-01,  # noqa E501
+                                [6.6346e-01, 4.8005e-01, 5.2318e-02, 4.4137e-01, 4.1163e-01, 8.9339e-01,
                                7.2847e-01],
-                                [2.4800e-01, 7.1944e-01, 3.4766e-01, 7.8583e-01, 8.5507e-01, 6.3729e-02,  # noqa E501
+                                [2.4800e-01, 7.1944e-01, 3.4766e-01, 7.8583e-01, 8.5507e-01, 6.3729e-02,
                                7.5161e-05]]))
                ) at 0x7f7e29de3a00>
        ) at 0x7f7e2a0e8640>
@@ -132,7 +132,7 @@ class Det3DDataSample(DetDataSample):
                    pts_instance_mask: tensor([0.7363, 0.8096])
                ) at 0x7f7e2962cc40>
        ) at 0x7f7e29ff0d60>
-    """
+    """  # noqa: E501
    @property
    def gt_instances_3d(self) -> InstanceData:

--- a/mmdet3d/structures/points/__init__.py
+++ b/mmdet3d/structures/points/__init__.py
@@ -7,24 +7,25 @@ from .lidar_points import LiDARPoints
 __all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']
-def get_points_type(points_type):
+def get_points_type(points_type: str) -> type:
    """Get the class of points according to coordinate type.
    Args:
-        points_type (str): The type of points coordinate.
+        points_type (str): The type of points coordinate. The valid value are
-            The valid value are "CAMERA", "LIDAR", or "DEPTH".
+            "CAMERA", "LIDAR" and "DEPTH".
    Returns:
-        class: Points type.
+        type: Points type.
    """
-    if points_type == 'CAMERA':
+    points_type_upper = points_type.upper()
+    if points_type_upper == 'CAMERA':
        points_cls = CameraPoints
-    elif points_type == 'LIDAR':
+    elif points_type_upper == 'LIDAR':
        points_cls = LiDARPoints
-    elif points_type == 'DEPTH':
+    elif points_type_upper == 'DEPTH':
        points_cls = DepthPoints
    else:
-        raise ValueError('Only "points_type" of "CAMERA", "LIDAR", or "DEPTH"'
+        raise ValueError('Only "points_type" of "CAMERA", "LIDAR" and "DEPTH" '
-                         f' are supported, got {points_type}')
+                         f'are supported, got {points_type}')
    return points_cls
--- a/mmdet3d/structures/points/base_points.py
+++ b/mmdet3d/structures/points/base_points.py
--- a/mmdet3d/structures/points/cam_points.py
+++ b/mmdet3d/structures/points/cam_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+import numpy as np
+from torch import Tensor
 from .base_points import BasePoints
@@ -6,58 +11,67 @@ class CameraPoints(BasePoints):
    """Points of instances in CAM coordinates.
    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
-        points_dim (int, optional): Number of the dimension of a point.
+            data with shape (N, points_dim).
-            Each row is (x, y, z). Defaults to 3.
+        points_dim (int): Integer indicating the dimension of a point. Each row
-        attribute_dims (dict, optional): Dictionary to indicate the
+            is (x, y, z, ...). Defaults to 3.
-            meaning of extra dimension. Defaults to None.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
-        points_dim (int): Integer indicating the dimension of a point.
+        points_dim (int): Integer indicating the dimension of a point. Each row
-            Each row is (x, y, z, ...).
+            is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
-            dimension. Defaults to None.
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """
-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
        super(CameraPoints, self).__init__(
            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
        self.rotation_axis = 1
-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.
        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
+        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 0] = -self.tensor[:, 0]
        elif bev_direction == 'vertical':
            self.tensor[:, 2] = -self.tensor[:, 2]
    @property
-    def bev(self):
+    def bev(self) -> Tensor:
-        """torch.Tensor: BEV of the points in shape (N, 2)."""
+        """Tensor: BEV of the points in shape (N, 2)."""
        return self.tensor[:, [0, 2]]
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.
        Args:
-            dst (:obj:`CoordMode`): The target Point mode.
+            dst (int): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
        Returns:
-            :obj:`BasePoints`: The converted point of the same type
+            :obj:`BasePoints`: The converted point of the same type in the
-                in the `dst` mode.
+            ``dst`` mode.
        """
-        from mmdet3d.structures import Coord3DMode
+        from mmdet3d.structures.bbox_3d import Coord3DMode
        return Coord3DMode.convert_point(
            point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/structures/points/depth_points.py
+++ b/mmdet3d/structures/points/depth_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+import numpy as np
+from torch import Tensor
 from .base_points import BasePoints
@@ -6,53 +11,62 @@ class DepthPoints(BasePoints):
    """Points of instances in DEPTH coordinates.
    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
-        points_dim (int, optional): Number of the dimension of a point.
+            data with shape (N, points_dim).
-            Each row is (x, y, z). Defaults to 3.
+        points_dim (int): Integer indicating the dimension of a point. Each row
-        attribute_dims (dict, optional): Dictionary to indicate the
+            is (x, y, z, ...). Defaults to 3.
-            meaning of extra dimension. Defaults to None.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
-        points_dim (int): Integer indicating the dimension of a point.
+        points_dim (int): Integer indicating the dimension of a point. Each row
-            Each row is (x, y, z, ...).
+            is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
-            dimension. Defaults to None.
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """
-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
        super(DepthPoints, self).__init__(
            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
        self.rotation_axis = 2
-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.
        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
+        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 0] = -self.tensor[:, 0]
        elif bev_direction == 'vertical':
            self.tensor[:, 1] = -self.tensor[:, 1]
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.
        Args:
-            dst (:obj:`CoordMode`): The target Point mode.
+            dst (int): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
        Returns:
-            :obj:`BasePoints`: The converted point of the same type
+            :obj:`BasePoints`: The converted point of the same type in the
-                in the `dst` mode.
+            ``dst`` mode.
        """
-        from mmdet3d.structures import Coord3DMode
+        from mmdet3d.structures.bbox_3d import Coord3DMode
        return Coord3DMode.convert_point(
            point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/structures/points/lidar_points.py
+++ b/mmdet3d/structures/points/lidar_points.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+import numpy as np
+from torch import Tensor
 from .base_points import BasePoints
@@ -6,53 +11,62 @@ class LiDARPoints(BasePoints):
    """Points of instances in LIDAR coordinates.
    Args:
-        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The points
-        points_dim (int, optional): Number of the dimension of a point.
+            data with shape (N, points_dim).
-            Each row is (x, y, z). Defaults to 3.
+        points_dim (int): Integer indicating the dimension of a point. Each row
-        attribute_dims (dict, optional): Dictionary to indicate the
+            is (x, y, z, ...). Defaults to 3.
-            meaning of extra dimension. Defaults to None.
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
+            extra dimension. Defaults to None.
    Attributes:
-        tensor (torch.Tensor): Float matrix of N x points_dim.
+        tensor (Tensor): Float matrix with shape (N, points_dim).
-        points_dim (int): Integer indicating the dimension of a point.
+        points_dim (int): Integer indicating the dimension of a point. Each row
-            Each row is (x, y, z, ...).
+            is (x, y, z, ...).
-        attribute_dims (bool): Dictionary to indicate the meaning of extra
+        attribute_dims (dict, optional): Dictionary to indicate the meaning of
-            dimension. Defaults to None.
+            extra dimension. Defaults to None.
        rotation_axis (int): Default rotation axis for points rotation.
    """
-    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+    def __init__(self,
+                 tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+                 points_dim: int = 3,
+                 attribute_dims: Optional[dict] = None) -> None:
        super(LiDARPoints, self).__init__(
            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
        self.rotation_axis = 2
-    def flip(self, bev_direction='horizontal'):
+    def flip(self, bev_direction: str = 'horizontal') -> None:
        """Flip the points along given BEV direction.
        Args:
            bev_direction (str): Flip direction (horizontal or vertical).
+                Defaults to 'horizontal'.
        """
+        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 1] = -self.tensor[:, 1]
        elif bev_direction == 'vertical':
            self.tensor[:, 0] = -self.tensor[:, 0]
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor,
+                                          np.ndarray]] = None) -> 'BasePoints':
        """Convert self to ``dst`` mode.
        Args:
-            dst (:obj:`CoordMode`): The target Point mode.
+            dst (int): The target Point mode.
-            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
                translation matrix between different coordinates.
-                Defaults to None.
+                Defaults to None. The conversion from ``src`` coordinates to
-                The conversion from `src` coordinates to `dst` coordinates
+                ``dst`` coordinates usually comes along the change of sensors,
-                usually comes along the change of sensors, e.g., from camera
+                e.g., from camera to LiDAR. This requires a transformation
-                to LiDAR. This requires a transformation matrix.
+                matrix.
        Returns:
-            :obj:`BasePoints`: The converted point of the same type
+            :obj:`BasePoints`: The converted point of the same type in the
-                in the `dst` mode.
+            ``dst`` mode.
        """
-        from mmdet3d.structures import Coord3DMode
+        from mmdet3d.structures.bbox_3d import Coord3DMode
        return Coord3DMode.convert_point(
            point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/utils/array_converter.py
+++ b/mmdet3d/utils/array_converter.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import functools
 from inspect import getfullargspec
-from typing import Callable, Optional, Tuple, Union
+from typing import Callable, Optional, Tuple, Type, Union
 import numpy as np
 import torch
-TemplateArrayType = Union[tuple, list, int, float, np.ndarray, torch.Tensor]
+TemplateArrayType = Union[np.ndarray, torch.Tensor, list, tuple, int, float]
-OptArrayType = Optional[Union[np.ndarray, torch.Tensor]]
 def array_converter(to_torch: bool = True,
@@ -16,37 +15,36 @@ def array_converter(to_torch: bool = True,
                    recover: bool = True) -> Callable:
    """Wrapper function for data-type agnostic processing.
-    First converts input arrays to PyTorch tensors or NumPy ndarrays
+    First converts input arrays to PyTorch tensors or NumPy arrays for middle
-    for middle calculation, then convert output to original data-type if
+    calculation, then convert output to original data-type if `recover=True`.
-    `recover=True`.
    Args:
-        to_torch (bool): Whether convert to PyTorch tensors
+        to_torch (bool): Whether to convert to PyTorch tensors for middle
-            for middle calculation. Defaults to True.
+            calculation. Defaults to True.
-        apply_to (Tuple[str, ...]): The arguments to which we apply
+        apply_to (Tuple[str]): The arguments to which we apply data-type
-            data-type conversion. Defaults to an empty tuple.
+            conversion. Defaults to an empty tuple.
-        template_arg_name_ (str, optional): Argument serving as the template (
+        template_arg_name_ (str, optional): Argument serving as the template
-            return arrays should have the same dtype and device
+            (return arrays should have the same dtype and device as the
-            as the template). Defaults to None. If None, we will use the
+            template). Defaults to None. If None, we will use the first
-            first argument in `apply_to` as the template argument.
+            argument in `apply_to` as the template argument.
-        recover (bool): Whether or not recover the wrapped function
+        recover (bool): Whether or not to recover the wrapped function outputs
-            outputs to the `template_arg_name_` type. Defaults to True.
+            to the `template_arg_name_` type. Defaults to True.
    Raises:
-        ValueError: When template_arg_name_ is not among all args, or
+        ValueError: When template_arg_name_ is not among all args, or when
-            when apply_to contains an arg which is not among all args,
+            apply_to contains an arg which is not among all args, a ValueError
-            a ValueError will be raised. When the template argument or
+            will be raised. When the template argument or an argument to
-            an argument to convert is a list or tuple, and cannot be
+            convert is a list or tuple, and cannot be converted to a NumPy
-            converted to a NumPy array, a ValueError will be raised.
+            array, a ValueError will be raised.
-        TypeError: When the type of the template argument or
+        TypeError: When the type of the template argument or an argument to
-                an argument to convert does not belong to the above range,
+            convert does not belong to the above range, or the contents of such
-                or the contents of such an list-or-tuple-type argument
+            an list-or-tuple-type argument do not share the same data type, a
-                do not share the same data type, a TypeError is raised.
+            TypeError will be raised.
    Returns:
-        (function): wrapped function.
+        Callable: Wrapped function.
-    Example:
+    Examples:
        >>> import torch
        >>> import numpy as np
        >>>
@@ -67,7 +65,7 @@ def array_converter(to_torch: bool = True,
        >>> def simple_add(a, b):
        >>>     return a + b
        >>>
-        >>> simple_add()
+        >>> simple_add(a, b)
        >>>
        >>> # Use torch funcs for floor(a) if flag=True else ceil(a),
        >>> # and return the torch tensor
@@ -126,8 +124,8 @@ def array_converter(to_torch: bool = True,
            # inspect apply_to
            for arg_to_apply in apply_to:
                if arg_to_apply not in all_arg_names:
-                    raise ValueError(f'{arg_to_apply} is not '
+                    raise ValueError(
-                                     f'an argument of {func_name}')
+                        f'{arg_to_apply} is not an argument of {func_name}')
            new_args = []
            new_kwargs = {}
@@ -207,8 +205,8 @@ class ArrayConverter:
    """Utility class for data-type agnostic processing.
    Args:
-        template_array (tuple | list | int | float | np.ndarray |
+        template_array (np.ndarray or torch.Tensor or list or tuple or int or
-            torch.Tensor, optional): template array. Defaults to None.
+            float, optional): Template array. Defaults to None.
    """
    SUPPORTED_NON_ARRAY_TYPES = (int, float, np.int8, np.int16, np.int32,
                                 np.int64, np.uint8, np.uint16, np.uint32,
@@ -223,15 +221,15 @@ class ArrayConverter:
        """Set template array.
        Args:
-            array (tuple | list | int | float | np.ndarray | torch.Tensor):
+            array (np.ndarray or torch.Tensor or list or tuple or int or
-                Template array.
+                float): Template array.
        Raises:
-            ValueError: If input is list or tuple and cannot be converted to
+            ValueError: If input is list or tuple and cannot be converted to a
-                to a NumPy array, a ValueError is raised.
+                NumPy array, a ValueError is raised.
-            TypeError: If input type does not belong to the above range,
+            TypeError: If input type does not belong to the above range, or the
-                or the contents of a list or tuple do not share the
+                contents of a list or tuple do not share the same data type, a
-                same data type, a TypeError is raised.
+                TypeError is raised.
        """
        self.array_type = type(array)
        self.is_num = False
@@ -249,41 +247,40 @@ class ArrayConverter:
                    raise TypeError
                self.dtype = array.dtype
            except (ValueError, TypeError):
-                print(f'The following list cannot be converted to'
+                print('The following list cannot be converted to a numpy '
-                      f' a numpy array of supported dtype:\n{array}')
+                      f'array of supported dtype:\n{array}')
                raise
-        elif isinstance(array, self.SUPPORTED_NON_ARRAY_TYPES):
+        elif isinstance(array, (int, float)):
            self.array_type = np.ndarray
            self.is_num = True
            self.dtype = np.dtype(type(array))
        else:
-            raise TypeError(f'Template type {self.array_type}'
+            raise TypeError(
-                            f' is not supported.')
+                f'Template type {self.array_type} is not supported.')
    def convert(
        self,
        input_array: TemplateArrayType,
-            target_type: Optional[type] = None,
+        target_type: Optional[Type] = None,
-            target_array: OptArrayType = None
+        target_array: Optional[Union[np.ndarray, torch.Tensor]] = None
    ) -> Union[np.ndarray, torch.Tensor]:
        """Convert input array to target data type.
        Args:
-            input_array (tuple | list | int | float | np.ndarray |
+            input_array (np.ndarray or torch.Tensor or list or tuple or int or
-                torch.Tensor): Input array.
+                float): Input array.
-            target_type (:class:`np.ndarray` or :class:`torch.Tensor`,
+            target_type (Type, optional): Type to which input array is
-                optional): Type to which input array is converted.
+                converted. It should be `np.ndarray` or `torch.Tensor`.
-                Defaults to None.
-            target_array (np.ndarray | torch.Tensor, optional):
-                Template array to which input array is converted.
                Defaults to None.
+            target_array (np.ndarray or torch.Tensor, optional): Template array
+                to which input array is converted. Defaults to None.
        Raises:
-            ValueError: If input is list or tuple and cannot be converted to
+            ValueError: If input is list or tuple and cannot be converted to a
-                to a NumPy array, a ValueError is raised.
+                NumPy array, a ValueError is raised.
-            TypeError: If input type does not belong to the above range,
+            TypeError: If input type does not belong to the above range, or the
-                or the contents of a list or tuple do not share the
+                contents of a list or tuple do not share the same data type, a
-                same data type, a TypeError is raised.
+                TypeError is raised.
        Returns:
            np.ndarray or torch.Tensor: The converted array.
@@ -294,8 +291,8 @@ class ArrayConverter:
                if input_array.dtype not in self.SUPPORTED_NON_ARRAY_TYPES:
                    raise TypeError
            except (ValueError, TypeError):
-                print(f'The input cannot be converted to'
+                print('The input cannot be converted to a single-type numpy '
-                      f' a single-type numpy array:\n{input_array}')
+                      f'array:\n{input_array}')
                raise
        elif isinstance(input_array, self.SUPPORTED_NON_ARRAY_TYPES):
            input_array = np.array(input_array)
@@ -328,14 +325,14 @@ class ArrayConverter:
    def recover(
        self, input_array: Union[np.ndarray, torch.Tensor]
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> Union[np.ndarray, torch.Tensor, int, float]:
        """Recover input type to original array type.
        Args:
-            input_array (np.ndarray | torch.Tensor): Input array.
+            input_array (np.ndarray or torch.Tensor): Input array.
        Returns:
-            np.ndarray or torch.Tensor: Converted array.
+            np.ndarray or torch.Tensor or int or float: Converted array.
        """
        assert isinstance(input_array, (np.ndarray, torch.Tensor)), \
            'invalid input array type'

--- a/mmdet3d/version.py
+++ b/mmdet3d/version.py
@@ -4,15 +4,15 @@ __version__ = '1.1.0'
 short_version = __version__
-def parse_version_info(version_str):
+def parse_version_info(version_str: str) -> tuple:
    """Parse a version string into a tuple.
    Args:
        version_str (str): The version string.
    Returns:
-        tuple[int | str]: The version info, e.g., "1.3.0" is parsed into
+        tuple: The version info, e.g., "1.3.0" is parsed into (1, 3, 0), and
-            (1, 3, 0), and "2.0.0rc4" is parsed into (2, 0, 0, 'rc4').
+        "2.0.0rc4" is parsed into (2, 0, 0, 'rc4').
    """
    version_info = []
    for x in version_str.split('.'):

--- a/tests/test_structures/test_bbox/test_box3d.py
+++ b/tests/test_structures/test_bbox/test_box3d.py
@@ -1772,10 +1772,10 @@ def test_points_in_boxes():
         [1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1], [0, 1, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
-         [0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
+         [0, 0, 1, 0, 1, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
-         [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1],
+         [0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0],
-         [0, 0, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0],
+         [0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0],
-         [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]],
+         [1, 0, 0, 1, 0, 0], [1, 0, 0, 1, 0, 0]],
        dtype=torch.int32).cuda()
    assert point_indices.shape == torch.Size([23, 6])
    assert (point_indices == expected_point_indices).all()
@@ -1785,8 +1785,8 @@ def test_points_in_boxes():
    point_indices = cam_boxes.points_in_boxes_part(cam_pts)
    expected_point_indices = torch.tensor([
-        0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, 3, -1, -1, 2, 3, 3, 2, 2, 3,
+        0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1, 2, -1, 2, 5, 2,
-        0, 0
+        -1, 0, 0
    ],
                                          dtype=torch.int32).cuda()
    assert point_indices.shape == torch.Size([23])