[Feature]: Add points structure (#196)

* add h3d backbone * add h3d backbone * add h3dnet * modify scannet config * fix bugs for proposal refine * fix bugs for test backbone * add primitive head test * modify h3dhead * modify h3d head * update loss weight config * fix bugs for h3d head loss * modify h3d head get targets function * update h3dnet base config * modify weighted loss * Revert "Merge branch 'h3d_u2' into 'master'" This reverts merge request !5 * add points structure * modify points rotation and add coord_3d_mode unittest * modify docstring

[Feature]: Add points structure (#196)
* add h3d backbone * add h3d backbone * add h3dnet * modify scannet config * fix bugs for proposal refine * fix bugs for test backbone * add primitive head test * modify h3dhead * modify h3d head * update loss weight config * fix bugs for h3d head loss * modify h3d head get targets function * update h3dnet base config * modify weighted loss * Revert "Merge branch 'h3d_u2' into 'master'" This reverts merge request !5 * add points structure * modify points rotation and add coord_3d_mode unittest * modify docstring
20b163a4 · encore-zhou · GitHub · 37ce1871 · 20b163a4 · 20b163a4
Unverified Commit 20b163a4 authored Nov 14, 2020 by encore-zhou Committed by GitHub Nov 14, 2020
11 changed files
--- a/mmdet3d/core/__init__.py
+++ b/mmdet3d/core/__init__.py
 from .anchor import *  # noqa: F401, F403
 from .bbox import *  # noqa: F401, F403
 from .evaluation import *  # noqa: F401, F403
+from .points import *  # noqa: F401, F403
 from .post_processing import *  # noqa: F401, F403
 from .utils import *  # noqa: F401, F403
 from .visualizer import *  # noqa: F401, F403

--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
@@ -7,9 +7,9 @@ from .samplers import (BaseSampler, CombinedSampler,
                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
                       PseudoSampler, RandomSampler, SamplingResult)
 from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
-                         DepthInstance3DBoxes, LiDARInstance3DBoxes,
-                         get_box_type, limit_period, points_cam2img,
-                         xywhr2xyxyr)
+                         Coord3DMode, DepthInstance3DBoxes,
+                         LiDARInstance3DBoxes, get_box_type, limit_period,
+                         points_cam2img, xywhr2xyxyr)
 from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back

 __all__ = [
@@ -21,5 +21,5 @@ __all__ = [
    'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
    'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
    'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
-    'get_box_type'
+    'get_box_type', 'Coord3DMode'
 ]
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
 from .base_box3d import BaseInstance3DBoxes
 from .box_3d_mode import Box3DMode
 from .cam_box3d import CameraInstance3DBoxes
+from .coord_3d_mode import Coord3DMode
 from .depth_box3d import DepthInstance3DBoxes
 from .lidar_box3d import LiDARInstance3DBoxes
 from .utils import (get_box_type, limit_period, points_cam2img,
@@ -9,5 +10,6 @@ from .utils import (get_box_type, limit_period, points_cam2img,
 __all__ = [
    'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
-    'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img'
+    'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
+    'Coord3DMode'
 ]
--- a/mmdet3d/core/bbox/structures/coord_3d_mode.py
+++ b/mmdet3d/core/bbox/structures/coord_3d_mode.py
+import numpy as np
+import torch
+from enum import IntEnum, unique
+
+from mmdet3d.core.points import (BasePoints, CameraPoints, DepthPoints,
+                                 LiDARPoints)
+from .base_box3d import BaseInstance3DBoxes
+from .cam_box3d import CameraInstance3DBoxes
+from .depth_box3d import DepthInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+
+
+@unique
+class Coord3DMode(IntEnum):
+    r"""Enum of different ways to represent a box
+        and point cloud.
+
+    Coordinates in LiDAR:
+
+    .. code-block:: none
+
+                    up z
+                       ^   x front
+                       |  /
+                       | /
+        left y <------ 0
+
+    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
+    and the yaw is around the z axis, thus the rotation axis=2.
+
+    Coordinates in camera:
+
+    .. code-block:: none
+
+                z front
+               /
+              /
+             0 ------> x right
+             |
+             |
+             v
+        down y
+
+    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
+    and the yaw is around the y axis, thus the rotation axis=1.
+
+    Coordinates in Depth mode:
+
+    .. code-block:: none
+
+        up z
+           ^   y front
+           |  /
+           | /
+           0 ------> x right
+
+    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
+    and the yaw is around the z axis, thus the rotation axis=2.
+    """
+
+    LIDAR = 0
+    CAM = 1
+    DEPTH = 2
+
+    @staticmethod
+    def convert(input, src, dst, rt_mat=None):
+        """Convert boxes or points from `src` mode to `dst` mode."""
+        if isinstance(input, BaseInstance3DBoxes):
+            return Coord3DMode.convert_box(input, src, dst, rt_mat=rt_mat)
+        elif isinstance(input, BasePoints):
+            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)
+        else:
+            raise NotImplementedError
+
+    @staticmethod
+    def convert_box(box, src, dst, rt_mat=None):
+        """Convert boxes from `src` mode to `dst` mode.
+
+        Args:
+            box (tuple | list | np.dnarray |
+                torch.Tensor | BaseInstance3DBoxes):
+                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
+            src (:obj:`CoordMode`): The src Box mode.
+            dst (:obj:`CoordMode`): The target Box mode.
+            rt_mat (np.dnarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+
+        Returns:
+            (tuple | list | np.dnarray | torch.Tensor | BaseInstance3DBoxes): \
+                The converted box of the same type.
+        """
+        if src == dst:
+            return box
+
+        is_numpy = isinstance(box, np.ndarray)
+        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) >= 7, (
+                'CoordMode.convert takes either a k-tuple/list or '
+                'an Nxk array/tensor, where k >= 7')
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            elif is_Instance3DBoxes:
+                arr = box.tensor.clone()
+            else:
+                arr = box.clone()
+
+        # convert box from `src` mode to `dst` mode.
+        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
+        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
+            xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)
+        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
+            xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)
+        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
+            xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
+        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
+            xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
+        else:
+            raise NotImplementedError(
+                f'Conversion from Coord3DMode {src} to {dst} '
+                'is not supported yet')
+
+        if not isinstance(rt_mat, torch.Tensor):
+            rt_mat = arr.new_tensor(rt_mat)
+        if rt_mat.size(1) == 4:
+            extended_xyz = torch.cat(
+                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
+            xyz = extended_xyz @ rt_mat.t()
+        else:
+            xyz = arr[:, :3] @ rt_mat.t()
+
+        remains = arr[..., 6:]
+        arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)
+
+        # convert arr to the original type
+        original_type = type(box)
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        elif is_Instance3DBoxes:
+            if dst == Coord3DMode.CAM:
+                target_type = CameraInstance3DBoxes
+            elif dst == Coord3DMode.LIDAR:
+                target_type = LiDARInstance3DBoxes
+            elif dst == Coord3DMode.DEPTH:
+                target_type = DepthInstance3DBoxes
+            else:
+                raise NotImplementedError(
+                    f'Conversion to {dst} through {original_type}'
+                    ' is not supported yet')
+            return target_type(
+                arr, box_dim=arr.size(-1), with_yaw=box.with_yaw)
+        else:
+            return arr
+
+    @staticmethod
+    def convert_point(point, src, dst, rt_mat=None):
+        """Convert points from `src` mode to `dst` mode.
+
+        Args:
+            box (tuple | list | np.dnarray |
+                torch.Tensor | BasePoints):
+                Can be a k-tuple, k-list or an Nxk array/tensor.
+            src (:obj:`CoordMode`): The src Point mode.
+            dst (:obj:`CoordMode`): The target Point mode.
+            rt_mat (np.dnarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+
+        Returns:
+            (tuple | list | np.dnarray | torch.Tensor | BasePoints): \
+                The converted point of the same type.
+        """
+        if src == dst:
+            return point
+
+        is_numpy = isinstance(point, np.ndarray)
+        is_InstancePoints = isinstance(point, BasePoints)
+        single_point = isinstance(point, (list, tuple))
+        if single_point:
+            assert len(point) >= 3, (
+                'CoordMode.convert takes either a k-tuple/list or '
+                'an Nxk array/tensor, where k >= 3')
+            arr = torch.tensor(point)[None, :]
+        else:
+            # avoid modifying the input point
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(point)).clone()
+            elif is_InstancePoints:
+                arr = point.tensor.clone()
+            else:
+                arr = point.clone()
+
+        # convert point from `src` mode to `dst` mode.
+        if rt_mat is not None:
+            if not isinstance(rt_mat, torch.Tensor):
+                rt_mat = arr.new_tensor(rt_mat)
+        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:
+            rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
+        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:
+            rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
+        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
+            rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
+        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
+            rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
+            rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
+        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:
+            rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
+        else:
+            raise NotImplementedError(
+                f'Conversion from Coord3DMode {src} to {dst} '
+                'is not supported yet')
+
+        if rt_mat.size(1) == 4:
+            extended_xyz = torch.cat(
+                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
+            xyz = extended_xyz @ rt_mat.t()
+        else:
+            xyz = arr[:, :3] @ rt_mat.t()
+
+        remains = arr[..., 3:]
+        arr = torch.cat([xyz[:, :3], remains], dim=-1)
+
+        # convert arr to the original type
+        original_type = type(point)
+        if single_point:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        elif is_InstancePoints:
+            if dst == Coord3DMode.CAM:
+                target_type = CameraPoints
+            elif dst == Coord3DMode.LIDAR:
+                target_type = LiDARPoints
+            elif dst == Coord3DMode.DEPTH:
+                target_type = DepthPoints
+            else:
+                raise NotImplementedError(
+                    f'Conversion to {dst} through {original_type}'
+                    ' is not supported yet')
+            return target_type(
+                arr,
+                points_dim=arr.size(-1),
+                attribute_dims=point.attribute_dims)
+        else:
+            return arr
--- a/mmdet3d/core/points/__init__.py
+++ b/mmdet3d/core/points/__init__.py
+from .base_points import BasePoints
+from .cam_points import CameraPoints
+from .depth_points import DepthPoints
+from .lidar_points import LiDARPoints
+
+__all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']
--- a/mmdet3d/core/points/base_points.py
+++ b/mmdet3d/core/points/base_points.py
+import torch
+from abc import abstractmethod
+
+
+class BasePoints(object):
+    """Base class for Points.
+
+    Args:
+        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        points_dim (int): Number of the dimension of a point.
+            Each row is (x, y, z). Default to 3.
+        attribute_dims (dict): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+
+    Attributes:
+        tensor (torch.Tensor): Float matrix of N x points_dim.
+        points_dim (int): Integer indicating the dimension of a point.
+            Each row is (x, y, z, ...).
+        attribute_dims (bool): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+    """
+
+    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, points_dim)).to(
+                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == \
+            points_dim, tensor.size()
+
+        self.tensor = tensor
+        self.points_dim = points_dim
+        self.attribute_dims = attribute_dims
+
+    @property
+    def coord(self):
+        """torch.Tensor: Coordinates of each point with size (N, 3)."""
+        return self.tensor[:, :3]
+
+    @property
+    def height(self):
+        """torch.Tensor: A vector with height of each point."""
+        if self.attribute_dims is not None and \
+                'height' in self.attribute_dims.keys():
+            return self.tensor[:, self.attribute_dims['height']]
+        else:
+            return None
+
+    @property
+    def color(self):
+        """torch.Tensor: A vector with color of each point."""
+        if self.attribute_dims is not None and \
+                'color' in self.attribute_dims.keys():
+            return self.tensor[:, self.attribute_dims['color']]
+        else:
+            return None
+
+    def shuffle(self):
+        """Shuffle the points."""
+        self.tensor = self.tensor[torch.randperm(
+            self.__len__(), device=self.tensor.device)]
+
+    def rotate(self, rotation, axis=2):
+        """Rotate points with the given rotation matrix or angle.
+
+        Args:
+            rotation (float, np.ndarray, torch.Tensor): Rotation matrix
+                or angle.
+            axis (int): Axis to rotate at. Defaults to 2.
+        """
+        if not isinstance(rotation, torch.Tensor):
+            rotation = self.tensor.new_tensor(rotation)
+        assert rotation.shape == torch.Size([3, 3]) or \
+            rotation.numel() == 1
+
+        if rotation.numel() == 1:
+            rot_sin = torch.sin(rotation)
+            rot_cos = torch.cos(rotation)
+            if axis == 1:
+                rot_mat_T = rotation.new_tensor([[rot_cos, 0, -rot_sin],
+                                                 [0, 1, 0],
+                                                 [rot_sin, 0, rot_cos]])
+            elif axis == 2 or axis == -1:
+                rot_mat_T = rotation.new_tensor([[rot_cos, -rot_sin, 0],
+                                                 [rot_sin, rot_cos, 0],
+                                                 [0, 0, 1]])
+            elif axis == 0:
+                rot_mat_T = rotation.new_tensor([[0, rot_cos, -rot_sin],
+                                                 [0, rot_sin, rot_cos],
+                                                 [1, 0, 0]])
+            else:
+                raise ValueError('axis should in range')
+            rot_mat_T = rot_mat_T.T
+        elif rotation.numel() == 9:
+            rot_mat_T = rotation
+        else:
+            raise NotImplementedError
+        self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
+
+    @abstractmethod
+    def flip(self, bev_direction='horizontal'):
+        """Flip the points in BEV along given BEV direction."""
+        pass
+
+    def translate(self, trans_vector):
+        """Translate points with the given translation vector.
+
+        Args:
+            trans_vector (np.ndarray, torch.Tensor): Translation
+                vector of size 3 or nx3.
+        """
+        if not isinstance(trans_vector, torch.Tensor):
+            trans_vector = self.tensor.new_tensor(trans_vector)
+        trans_vector = trans_vector.squeeze(0)
+        if trans_vector.dim() == 1:
+            assert trans_vector.shape[0] == 3
+        elif trans_vector.dim() == 2:
+            assert trans_vector.shape[0] == self.tensor.shape[0] and \
+                trans_vector.shape[1] == 3
+        else:
+            raise NotImplementedError(
+                'Unsupported translation vector of shape {}'.format(
+                    trans_vector.shape))
+        self.tensor[:, :3] += trans_vector
+
+    def in_range_3d(self, point_range):
+        """Check whether the points are in the given range.
+
+        Args:
+            point_range (list | torch.Tensor): The range of point
+                (x_min, y_min, z_min, x_max, y_max, z_max)
+
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burden for simpler cases.
+
+        Returns:
+            torch.Tensor: A binary vector indicating whether each point is \
+                inside the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > point_range[0])
+                          & (self.tensor[:, 1] > point_range[1])
+                          & (self.tensor[:, 2] > point_range[2])
+                          & (self.tensor[:, 0] < point_range[3])
+                          & (self.tensor[:, 1] < point_range[4])
+                          & (self.tensor[:, 2] < point_range[5]))
+        return in_range_flags
+
+    @abstractmethod
+    def in_range_bev(self, point_range):
+        """Check whether the points are in the given range.
+
+        Args:
+            point_range (list | torch.Tensor): The range of point
+                in order of (x_min, y_min, x_max, y_max).
+
+        Returns:
+            torch.Tensor: Indicating whether each point is inside \
+                the reference range.
+        """
+        pass
+
+    @abstractmethod
+    def convert_to(self, dst, rt_mat=None):
+        """Convert self to ``dst`` mode.
+
+        Args:
+            dst (:obj:`CoordMode`): The target Box mode.
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+
+        Returns:
+            :obj:`BasePoints`: The converted box of the same type \
+                in the `dst` mode.
+        """
+        pass
+
+    def scale(self, scale_factor):
+        """Scale the points with horizontal and vertical scaling factors.
+
+        Args:
+            scale_factors (float): Scale factors to scale the points.
+        """
+        self.tensor[:, :3] *= scale_factor
+
+    def __getitem__(self, item):
+        """
+        Note:
+            The following usage are allowed:
+            1. `new_points = points[3]`:
+                return a `Points` that contains only one point.
+            2. `new_points = points[2:10]`:
+                return a slice of points.
+            3. `new_points = points[vector]`:
+                where vector is a torch.BoolTensor with `length = len(points)`.
+                Nonzero elements in the vector will be selected.
+            Note that the returned Points might share storage with this Points,
+            subject to Pytorch's indexing semantics.
+
+        Returns:
+            :obj:`BaseInstancesPints`: A new object of  \
+                :class:`BaseInstancesPints` after indexing.
+        """
+        original_type = type(self)
+        if isinstance(item, int):
+            return original_type(
+                self.tensor[item].view(1, -1),
+                points_dim=self.points_dim,
+                attribute_dims=self.attribute_dims)
+        p = self.tensor[item]
+        assert p.dim() == 2, \
+            f'Indexing on Points with {item} failed to return a matrix!'
+        return original_type(
+            p, points_dim=self.points_dim, attribute_dims=self.attribute_dims)
+
+    def __len__(self):
+        """int: Number of points in the current object."""
+        return self.tensor.shape[0]
+
+    def __repr__(self):
+        """str: Return a strings that describes the object."""
+        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'
+
+    @classmethod
+    def cat(cls, points_list):
+        """Concatenate a list of Points into a single Points.
+
+        Args:
+            points_list (list[:obj:`BaseInstancesPoints`]): List of points.
+
+        Returns:
+            :obj:`BaseInstancesPoints`: The concatenated Points.
+        """
+        assert isinstance(points_list, (list, tuple))
+        if len(points_list) == 0:
+            return cls(torch.empty(0))
+        assert all(isinstance(points, cls) for points in points_list)
+
+        # use torch.cat (v.s. layers.cat)
+        # so the returned points never share storage with input
+        cat_points = cls(
+            torch.cat([p.tensor for p in points_list], dim=0),
+            points_dim=points_list[0].tensor.shape[1],
+            attribute_dims=points_list[0].attribute_dims)
+        return cat_points
+
+    def to(self, device):
+        """Convert current points to a specific device.
+
+        Args:
+            device (str | :obj:`torch.device`): The name of the device.
+
+        Returns:
+            :obj:`BasePoints`: A new boxes object on the \
+                specific device.
+        """
+        original_type = type(self)
+        return original_type(
+            self.tensor.to(device),
+            points_dim=self.points_dim,
+            attribute_dims=self.attribute_dims)
+
+    def clone(self):
+        """Clone the Points.
+
+        Returns:
+            :obj:`BasePoints`: Box object with the same properties \
+                as self.
+        """
+        original_type = type(self)
+        return original_type(
+            self.tensor.clone(),
+            points_dim=self.points_dim,
+            attribute_dims=self.attribute_dims)
+
+    @property
+    def device(self):
+        """str: The device of the points are on."""
+        return self.tensor.device
+
+    def __iter__(self):
+        """Yield a point as a Tensor of shape (4,) at a time.
+
+        Returns:
+            torch.Tensor: A point of shape (4,).
+        """
+        yield from self.tensor
+
+    def new_point(self, data):
+        """Create a new point object with data.
+
+        The new point and its tensor has the similar properties \
+            as self and self.tensor, respectively.
+
+        Args:
+            data (torch.Tensor | numpy.array | list): Data to be copied.
+
+        Returns:
+            :obj:`BasePoints`: A new point object with ``data``, \
+                the object's other properties are similar to ``self``.
+        """
+        new_tensor = self.tensor.new_tensor(data) \
+            if not isinstance(data, torch.Tensor) else data.to(self.device)
+        original_type = type(self)
+        return original_type(
+            new_tensor,
+            points_dim=self.points_dim,
+            attribute_dims=self.attribute_dims)
--- a/mmdet3d/core/points/cam_points.py
+++ b/mmdet3d/core/points/cam_points.py
+from .base_points import BasePoints
+
+
+class CameraPoints(BasePoints):
+    """Points of instances in CAM coordinates.
+
+    Args:
+        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        points_dim (int): Number of the dimension of a point.
+            Each row is (x, y, z). Default to 3.
+        attribute_dims (dict): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+
+    Attributes:
+        tensor (torch.Tensor): Float matrix of N x points_dim.
+        points_dim (int): Integer indicating the dimension of a point.
+            Each row is (x, y, z, ...).
+        attribute_dims (bool): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+    """
+
+    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+        super(CameraPoints, self).__init__(
+            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
+
+    def flip(self, bev_direction='horizontal'):
+        """Flip the boxes in BEV along given BEV direction."""
+        if bev_direction == 'horizontal':
+            self.tensor[:, 0] = -self.tensor[:, 0]
+        elif bev_direction == 'vertical':
+            self.tensor[:, 2] = -self.tensor[:, 2]
+
+    def in_range_bev(self, point_range):
+        """Check whether the points are in the given range.
+
+        Args:
+            point_range (list | torch.Tensor): The range of point
+                in order of (x_min, y_min, x_max, y_max).
+
+        Returns:
+            torch.Tensor: Indicating whether each point is inside \
+                the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > point_range[0])
+                          & (self.tensor[:, 2] > point_range[1])
+                          & (self.tensor[:, 0] < point_range[2])
+                          & (self.tensor[:, 2] < point_range[3]))
+        return in_range_flags
+
+    def convert_to(self, dst, rt_mat=None):
+        """Convert self to ``dst`` mode.
+
+        Args:
+            dst (:obj:`CoordMode`): The target Point mode.
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+
+        Returns:
+            :obj:`BasePoints`: The converted point of the same type \
+                in the `dst` mode.
+        """
+        from mmdet3d.core.bbox import Coord3DMode
+        return Coord3DMode.convert_point(
+            point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/core/points/depth_points.py
+++ b/mmdet3d/core/points/depth_points.py
+from .base_points import BasePoints
+
+
+class DepthPoints(BasePoints):
+    """Points of instances in DEPTH coordinates.
+
+    Args:
+        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        points_dim (int): Number of the dimension of a point.
+            Each row is (x, y, z). Default to 3.
+        attribute_dims (dict): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+
+    Attributes:
+        tensor (torch.Tensor): Float matrix of N x points_dim.
+        points_dim (int): Integer indicating the dimension of a point.
+            Each row is (x, y, z, ...).
+        attribute_dims (bool): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+    """
+
+    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+        super(DepthPoints, self).__init__(
+            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
+
+    def flip(self, bev_direction='horizontal'):
+        """Flip the boxes in BEV along given BEV direction."""
+        if bev_direction == 'horizontal':
+            self.tensor[:, 0] = -self.tensor[:, 0]
+        elif bev_direction == 'vertical':
+            self.tensor[:, 1] = -self.tensor[:, 1]
+
+    def in_range_bev(self, point_range):
+        """Check whether the points are in the given range.
+
+        Args:
+            point_range (list | torch.Tensor): The range of point
+                in order of (x_min, y_min, x_max, y_max).
+
+        Returns:
+            torch.Tensor: Indicating whether each point is inside \
+                the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > point_range[0])
+                          & (self.tensor[:, 1] > point_range[1])
+                          & (self.tensor[:, 0] < point_range[2])
+                          & (self.tensor[:, 1] < point_range[3]))
+        return in_range_flags
+
+    def convert_to(self, dst, rt_mat=None):
+        """Convert self to ``dst`` mode.
+
+        Args:
+            dst (:obj:`CoordMode`): The target Point mode.
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+
+        Returns:
+            :obj:`BasePoints`: The converted point of the same type \
+                in the `dst` mode.
+        """
+        from mmdet3d.core.bbox import Coord3DMode
+        return Coord3DMode.convert_point(
+            point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
--- a/mmdet3d/core/points/lidar_points.py
+++ b/mmdet3d/core/points/lidar_points.py
+from .base_points import BasePoints
+
+
+class LiDARPoints(BasePoints):
+    """Points of instances in LIDAR coordinates.
+
+    Args:
+        tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
+        points_dim (int): Number of the dimension of a point.
+            Each row is (x, y, z). Default to 3.
+        attribute_dims (dict): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+
+    Attributes:
+        tensor (torch.Tensor): Float matrix of N x points_dim.
+        points_dim (int): Integer indicating the dimension of a point.
+            Each row is (x, y, z, ...).
+        attribute_dims (bool): Dictinory to indicate the meaning of extra
+            dimension. Default to None.
+    """
+
+    def __init__(self, tensor, points_dim=3, attribute_dims=None):
+        super(LiDARPoints, self).__init__(
+            tensor, points_dim=points_dim, attribute_dims=attribute_dims)
+
+    def flip(self, bev_direction='horizontal'):
+        """Flip the boxes in BEV along given BEV direction."""
+        if bev_direction == 'horizontal':
+            self.tensor[:, 1] = -self.tensor[:, 1]
+        elif bev_direction == 'vertical':
+            self.tensor[:, 0] = -self.tensor[:, 0]
+
+    def in_range_bev(self, point_range):
+        """Check whether the points are in the given range.
+
+        Args:
+            point_range (list | torch.Tensor): The range of point
+                in order of (x_min, y_min, x_max, y_max).
+
+        Returns:
+            torch.Tensor: Indicating whether each point is inside \
+                the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > point_range[0])
+                          & (self.tensor[:, 1] > point_range[1])
+                          & (self.tensor[:, 0] < point_range[2])
+                          & (self.tensor[:, 1] < point_range[3]))
+        return in_range_flags
+
+    def convert_to(self, dst, rt_mat=None):
+        """Convert self to ``dst`` mode.
+
+        Args:
+            dst (:obj:`CoordMode`): The target Point mode.
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+
+        Returns:
+            :obj:`BasePoints`: The converted point of the same type \
+                in the `dst` mode.
+        """
+        from mmdet3d.core.bbox import Coord3DMode
+        return Coord3DMode.convert_point(
+            point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
--- a/tests/test_coord_3d_mode.py
+++ b/tests/test_coord_3d_mode.py
+import numpy as np
+import torch
+
+from mmdet3d.core.bbox import (CameraInstance3DBoxes, Coord3DMode,
+                               DepthInstance3DBoxes, LiDARInstance3DBoxes)
+from mmdet3d.core.points import CameraPoints, DepthPoints, LiDARPoints
+
+
+def test_points_conversion():
+    """Test the conversion of points between different modes."""
+    points_np = np.array([[
+        -5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,
+        0.4974, 0.9409
+    ],
+                          [
+                              -2.66751588e+01, 5.59499564e+00, -9.14345860e-01,
+                              0.1502, 0.3707, 0.1086, 0.6297
+                          ],
+                          [
+                              -5.80979675e+00, 3.54092357e+01, 2.00889888e-01,
+                              0.6565, 0.6248, 0.6954, 0.2538
+                          ],
+                          [
+                              -3.13086877e+01, 1.09007628e+00, -1.94612112e-01,
+                              0.2803, 0.0258, 0.4896, 0.3269
+                          ]],
+                         dtype=np.float32)
+
+    # test CAM to LIDAR and DEPTH
+    cam_points = CameraPoints(
+        points_np,
+        points_dim=7,
+        attribute_dims=dict(color=[3, 4, 5], height=6))
+
+    convert_lidar_points = cam_points.convert_to(Coord3DMode.LIDAR)
+    expected_tensor = torch.tensor([[
+        2.9757e-01, 5.2422e+00, -4.0021e+01, 6.6660e-01, 1.9560e-01,
+        4.9740e-01, 9.4090e-01
+    ],
+                                    [
+                                        -9.1435e-01, 2.6675e+01, -5.5950e+00,
+                                        1.5020e-01, 3.7070e-01, 1.0860e-01,
+                                        6.2970e-01
+                                    ],
+                                    [
+                                        2.0089e-01, 5.8098e+00, -3.5409e+01,
+                                        6.5650e-01, 6.2480e-01, 6.9540e-01,
+                                        2.5380e-01
+                                    ],
+                                    [
+                                        -1.9461e-01, 3.1309e+01, -1.0901e+00,
+                                        2.8030e-01, 2.5800e-02, 4.8960e-01,
+                                        3.2690e-01
+                                    ]])
+
+    lidar_point_tensor = Coord3DMode.convert_point(cam_points.tensor,
+                                                   Coord3DMode.CAM,
+                                                   Coord3DMode.LIDAR)
+    assert torch.allclose(expected_tensor, convert_lidar_points.tensor, 1e-4)
+    assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,
+                          1e-4)
+
+    convert_depth_points = cam_points.convert_to(Coord3DMode.DEPTH)
+    expected_tensor = torch.tensor([[
+        -5.2422e+00, -2.9757e-01, 4.0021e+01, 6.6660e-01, 1.9560e-01,
+        4.9740e-01, 9.4090e-01
+    ],
+                                    [
+                                        -2.6675e+01, 9.1435e-01, 5.5950e+00,
+                                        1.5020e-01, 3.7070e-01, 1.0860e-01,
+                                        6.2970e-01
+                                    ],
+                                    [
+                                        -5.8098e+00, -2.0089e-01, 3.5409e+01,
+                                        6.5650e-01, 6.2480e-01, 6.9540e-01,
+                                        2.5380e-01
+                                    ],
+                                    [
+                                        -3.1309e+01, 1.9461e-01, 1.0901e+00,
+                                        2.8030e-01, 2.5800e-02, 4.8960e-01,
+                                        3.2690e-01
+                                    ]])
+
+    depth_point_tensor = Coord3DMode.convert_point(cam_points.tensor,
+                                                   Coord3DMode.CAM,
+                                                   Coord3DMode.DEPTH)
+    assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4)
+    assert torch.allclose(depth_point_tensor, convert_depth_points.tensor,
+                          1e-4)
+
+    # test LIDAR to CAM and DEPTH
+    lidar_points = LiDARPoints(
+        points_np,
+        points_dim=7,
+        attribute_dims=dict(color=[3, 4, 5], height=6))
+
+    convert_cam_points = lidar_points.convert_to(Coord3DMode.CAM)
+    expected_tensor = torch.tensor([[
+        -4.0021e+01, -2.9757e-01, -5.2422e+00, 6.6660e-01, 1.9560e-01,
+        4.9740e-01, 9.4090e-01
+    ],
+                                    [
+                                        -5.5950e+00, 9.1435e-01, -2.6675e+01,
+                                        1.5020e-01, 3.7070e-01, 1.0860e-01,
+                                        6.2970e-01
+                                    ],
+                                    [
+                                        -3.5409e+01, -2.0089e-01, -5.8098e+00,
+                                        6.5650e-01, 6.2480e-01, 6.9540e-01,
+                                        2.5380e-01
+                                    ],
+                                    [
+                                        -1.0901e+00, 1.9461e-01, -3.1309e+01,
+                                        2.8030e-01, 2.5800e-02, 4.8960e-01,
+                                        3.2690e-01
+                                    ]])
+
+    cam_point_tensor = Coord3DMode.convert_point(lidar_points.tensor,
+                                                 Coord3DMode.LIDAR,
+                                                 Coord3DMode.CAM)
+    assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4)
+    assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4)
+
+    convert_depth_points = lidar_points.convert_to(Coord3DMode.DEPTH)
+    expected_tensor = torch.tensor([[
+        -4.0021e+01, -5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01,
+        4.9740e-01, 9.4090e-01
+    ],
+                                    [
+                                        -5.5950e+00, -2.6675e+01, -9.1435e-01,
+                                        1.5020e-01, 3.7070e-01, 1.0860e-01,
+                                        6.2970e-01
+                                    ],
+                                    [
+                                        -3.5409e+01, -5.8098e+00, 2.0089e-01,
+                                        6.5650e-01, 6.2480e-01, 6.9540e-01,
+                                        2.5380e-01
+                                    ],
+                                    [
+                                        -1.0901e+00, -3.1309e+01, -1.9461e-01,
+                                        2.8030e-01, 2.5800e-02, 4.8960e-01,
+                                        3.2690e-01
+                                    ]])
+
+    depth_point_tensor = Coord3DMode.convert_point(lidar_points.tensor,
+                                                   Coord3DMode.LIDAR,
+                                                   Coord3DMode.DEPTH)
+    assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4)
+    assert torch.allclose(depth_point_tensor, convert_depth_points.tensor,
+                          1e-4)
+
+    # test DEPTH to CAM and LIDAR
+    depth_points = DepthPoints(
+        points_np,
+        points_dim=7,
+        attribute_dims=dict(color=[3, 4, 5], height=6))
+
+    convert_cam_points = depth_points.convert_to(Coord3DMode.CAM)
+    expected_tensor = torch.tensor([[
+        -5.2422e+00, 2.9757e-01, -4.0021e+01, 6.6660e-01, 1.9560e-01,
+        4.9740e-01, 9.4090e-01
+    ],
+                                    [
+                                        -2.6675e+01, -9.1435e-01, -5.5950e+00,
+                                        1.5020e-01, 3.7070e-01, 1.0860e-01,
+                                        6.2970e-01
+                                    ],
+                                    [
+                                        -5.8098e+00, 2.0089e-01, -3.5409e+01,
+                                        6.5650e-01, 6.2480e-01, 6.9540e-01,
+                                        2.5380e-01
+                                    ],
+                                    [
+                                        -3.1309e+01, -1.9461e-01, -1.0901e+00,
+                                        2.8030e-01, 2.5800e-02, 4.8960e-01,
+                                        3.2690e-01
+                                    ]])
+
+    cam_point_tensor = Coord3DMode.convert_point(depth_points.tensor,
+                                                 Coord3DMode.DEPTH,
+                                                 Coord3DMode.CAM)
+    assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4)
+    assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4)
+
+    convert_lidar_points = depth_points.convert_to(Coord3DMode.LIDAR)
+    expected_tensor = torch.tensor([[
+        4.0021e+01, 5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01, 4.9740e-01,
+        9.4090e-01
+    ],
+                                    [
+                                        5.5950e+00, 2.6675e+01, -9.1435e-01,
+                                        1.5020e-01, 3.7070e-01, 1.0860e-01,
+                                        6.2970e-01
+                                    ],
+                                    [
+                                        3.5409e+01, 5.8098e+00, 2.0089e-01,
+                                        6.5650e-01, 6.2480e-01, 6.9540e-01,
+                                        2.5380e-01
+                                    ],
+                                    [
+                                        1.0901e+00, 3.1309e+01, -1.9461e-01,
+                                        2.8030e-01, 2.5800e-02, 4.8960e-01,
+                                        3.2690e-01
+                                    ]])
+
+    lidar_point_tensor = Coord3DMode.convert_point(depth_points.tensor,
+                                                   Coord3DMode.DEPTH,
+                                                   Coord3DMode.LIDAR)
+    assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,
+                          1e-4)
+    assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,
+                          1e-4)
+
+
+def test_boxes_conversion():
+    # test CAM to LIDAR and DEPTH
+    cam_boxes = CameraInstance3DBoxes(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
+                                              Coord3DMode.LIDAR)
+
+    expected_tensor = torch.tensor(
+        [[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800],
+         [-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200],
+         [-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700],
+         [-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900],
+         [-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]])
+    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
+
+    convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
+                                              Coord3DMode.DEPTH)
+    expected_tensor = torch.tensor(
+        [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
+         [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
+         [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
+         [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
+         [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
+    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)
+
+    # test LIDAR to CAM and DEPTH
+    lidar_boxes = LiDARInstance3DBoxes(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
+                                            Coord3DMode.CAM)
+    expected_tensor = torch.tensor(
+        [[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800],
+         [-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200],
+         [0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700],
+         [-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900],
+         [-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]])
+    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)
+
+    convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
+                                              Coord3DMode.DEPTH)
+    expected_tensor = torch.tensor(
+        [[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
+         [-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
+         [0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
+         [-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
+         [-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
+    assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)
+
+    # test DEPTH to CAM and LIDAR
+    depth_boxes = DepthInstance3DBoxes(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
+                                            Coord3DMode.CAM)
+    expected_tensor = torch.tensor(
+        [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
+         [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
+         [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
+         [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
+         [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
+    assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)
+
+    convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
+                                              Coord3DMode.LIDAR)
+    expected_tensor = torch.tensor(
+        [[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
+         [2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
+         [-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
+         [21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
+         [8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
+    assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
--- a/tests/test_points.py
+++ b/tests/test_points.py