Unverified Commit 20b163a4 authored by encore-zhou's avatar encore-zhou Committed by GitHub
Browse files

[Feature]: Add points structure (#196)

* add h3d backbone

* add h3d backbone

* add h3dnet

* modify scannet config

* fix bugs for proposal refine

* fix bugs for test backbone

* add primitive head test

* modify h3dhead

* modify h3d head

* update loss weight config

* fix bugs for h3d head loss

* modify h3d head get targets function

* update h3dnet base config

* modify weighted loss

* Revert "Merge branch 'h3d_u2' into 'master'"

This reverts merge request !5

* add points structure

* modify points rotation and add coord_3d_mode unittest

* modify docstring
parent 37ce1871
from .anchor import * # noqa: F401, F403
from .bbox import * # noqa: F401, F403
from .evaluation import * # noqa: F401, F403
from .points import * # noqa: F401, F403
from .post_processing import * # noqa: F401, F403
from .utils import * # noqa: F401, F403
from .visualizer import * # noqa: F401, F403
......
......@@ -7,9 +7,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes,
get_box_type, limit_period, points_cam2img,
xywhr2xyxyr)
Coord3DMode, DepthInstance3DBoxes,
LiDARInstance3DBoxes, get_box_type, limit_period,
points_cam2img, xywhr2xyxyr)
from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
__all__ = [
......@@ -21,5 +21,5 @@ __all__ = [
'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
'get_box_type'
'get_box_type', 'Coord3DMode'
]
from .base_box3d import BaseInstance3DBoxes
from .box_3d_mode import Box3DMode
from .cam_box3d import CameraInstance3DBoxes
from .coord_3d_mode import Coord3DMode
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
from .utils import (get_box_type, limit_period, points_cam2img,
......@@ -9,5 +10,6 @@ from .utils import (get_box_type, limit_period, points_cam2img,
__all__ = [
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img'
'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
'Coord3DMode'
]
import numpy as np
import torch
from enum import IntEnum, unique
from mmdet3d.core.points import (BasePoints, CameraPoints, DepthPoints,
LiDARPoints)
from .base_box3d import BaseInstance3DBoxes
from .cam_box3d import CameraInstance3DBoxes
from .depth_box3d import DepthInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
@unique
class Coord3DMode(IntEnum):
r"""Enum of different ways to represent a box
and point cloud.
Coordinates in LiDAR:
.. code-block:: none
up z
^ x front
| /
| /
left y <------ 0
The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2.
Coordinates in camera:
.. code-block:: none
z front
/
/
0 ------> x right
|
|
v
down y
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1.
Coordinates in Depth mode:
.. code-block:: none
up z
^ y front
| /
| /
0 ------> x right
The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2.
"""
LIDAR = 0
CAM = 1
DEPTH = 2
@staticmethod
def convert(input, src, dst, rt_mat=None):
"""Convert boxes or points from `src` mode to `dst` mode."""
if isinstance(input, BaseInstance3DBoxes):
return Coord3DMode.convert_box(input, src, dst, rt_mat=rt_mat)
elif isinstance(input, BasePoints):
return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)
else:
raise NotImplementedError
@staticmethod
def convert_box(box, src, dst, rt_mat=None):
"""Convert boxes from `src` mode to `dst` mode.
Args:
box (tuple | list | np.dnarray |
torch.Tensor | BaseInstance3DBoxes):
Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
src (:obj:`CoordMode`): The src Box mode.
dst (:obj:`CoordMode`): The target Box mode.
rt_mat (np.dnarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
(tuple | list | np.dnarray | torch.Tensor | BaseInstance3DBoxes): \
The converted box of the same type.
"""
if src == dst:
return box
is_numpy = isinstance(box, np.ndarray)
is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
single_box = isinstance(box, (list, tuple))
if single_box:
assert len(box) >= 7, (
'CoordMode.convert takes either a k-tuple/list or '
'an Nxk array/tensor, where k >= 7')
arr = torch.tensor(box)[None, :]
else:
# avoid modifying the input box
if is_numpy:
arr = torch.from_numpy(np.asarray(box)).clone()
elif is_Instance3DBoxes:
arr = box.tensor.clone()
else:
arr = box.clone()
# convert box from `src` mode to `dst` mode.
x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)
elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)
elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
xyz_size = torch.cat([y_size, x_size, z_size], dim=-1)
else:
raise NotImplementedError(
f'Conversion from Coord3DMode {src} to {dst} '
'is not supported yet')
if not isinstance(rt_mat, torch.Tensor):
rt_mat = arr.new_tensor(rt_mat)
if rt_mat.size(1) == 4:
extended_xyz = torch.cat(
[arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
xyz = extended_xyz @ rt_mat.t()
else:
xyz = arr[:, :3] @ rt_mat.t()
remains = arr[..., 6:]
arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)
# convert arr to the original type
original_type = type(box)
if single_box:
return original_type(arr.flatten().tolist())
if is_numpy:
return arr.numpy()
elif is_Instance3DBoxes:
if dst == Coord3DMode.CAM:
target_type = CameraInstance3DBoxes
elif dst == Coord3DMode.LIDAR:
target_type = LiDARInstance3DBoxes
elif dst == Coord3DMode.DEPTH:
target_type = DepthInstance3DBoxes
else:
raise NotImplementedError(
f'Conversion to {dst} through {original_type}'
' is not supported yet')
return target_type(
arr, box_dim=arr.size(-1), with_yaw=box.with_yaw)
else:
return arr
@staticmethod
def convert_point(point, src, dst, rt_mat=None):
"""Convert points from `src` mode to `dst` mode.
Args:
box (tuple | list | np.dnarray |
torch.Tensor | BasePoints):
Can be a k-tuple, k-list or an Nxk array/tensor.
src (:obj:`CoordMode`): The src Point mode.
dst (:obj:`CoordMode`): The target Point mode.
rt_mat (np.dnarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
(tuple | list | np.dnarray | torch.Tensor | BasePoints): \
The converted point of the same type.
"""
if src == dst:
return point
is_numpy = isinstance(point, np.ndarray)
is_InstancePoints = isinstance(point, BasePoints)
single_point = isinstance(point, (list, tuple))
if single_point:
assert len(point) >= 3, (
'CoordMode.convert takes either a k-tuple/list or '
'an Nxk array/tensor, where k >= 3')
arr = torch.tensor(point)[None, :]
else:
# avoid modifying the input point
if is_numpy:
arr = torch.from_numpy(np.asarray(point)).clone()
elif is_InstancePoints:
arr = point.tensor.clone()
else:
arr = point.clone()
# convert point from `src` mode to `dst` mode.
if rt_mat is not None:
if not isinstance(rt_mat, torch.Tensor):
rt_mat = arr.new_tensor(rt_mat)
if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:
rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:
rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:
rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
else:
raise NotImplementedError(
f'Conversion from Coord3DMode {src} to {dst} '
'is not supported yet')
if rt_mat.size(1) == 4:
extended_xyz = torch.cat(
[arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
xyz = extended_xyz @ rt_mat.t()
else:
xyz = arr[:, :3] @ rt_mat.t()
remains = arr[..., 3:]
arr = torch.cat([xyz[:, :3], remains], dim=-1)
# convert arr to the original type
original_type = type(point)
if single_point:
return original_type(arr.flatten().tolist())
if is_numpy:
return arr.numpy()
elif is_InstancePoints:
if dst == Coord3DMode.CAM:
target_type = CameraPoints
elif dst == Coord3DMode.LIDAR:
target_type = LiDARPoints
elif dst == Coord3DMode.DEPTH:
target_type = DepthPoints
else:
raise NotImplementedError(
f'Conversion to {dst} through {original_type}'
' is not supported yet')
return target_type(
arr,
points_dim=arr.size(-1),
attribute_dims=point.attribute_dims)
else:
return arr
from .base_points import BasePoints
from .cam_points import CameraPoints
from .depth_points import DepthPoints
from .lidar_points import LiDARPoints
__all__ = ['BasePoints', 'CameraPoints', 'DepthPoints', 'LiDARPoints']
import torch
from abc import abstractmethod
class BasePoints(object):
"""Base class for Points.
Args:
tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
points_dim (int): Number of the dimension of a point.
Each row is (x, y, z). Default to 3.
attribute_dims (dict): Dictinory to indicate the meaning of extra
dimension. Default to None.
Attributes:
tensor (torch.Tensor): Float matrix of N x points_dim.
points_dim (int): Integer indicating the dimension of a point.
Each row is (x, y, z, ...).
attribute_dims (bool): Dictinory to indicate the meaning of extra
dimension. Default to None.
"""
def __init__(self, tensor, points_dim=3, attribute_dims=None):
if isinstance(tensor, torch.Tensor):
device = tensor.device
else:
device = torch.device('cpu')
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
if tensor.numel() == 0:
# Use reshape, so we don't end up creating a new tensor that
# does not depend on the inputs (and consequently confuses jit)
tensor = tensor.reshape((0, points_dim)).to(
dtype=torch.float32, device=device)
assert tensor.dim() == 2 and tensor.size(-1) == \
points_dim, tensor.size()
self.tensor = tensor
self.points_dim = points_dim
self.attribute_dims = attribute_dims
@property
def coord(self):
"""torch.Tensor: Coordinates of each point with size (N, 3)."""
return self.tensor[:, :3]
@property
def height(self):
"""torch.Tensor: A vector with height of each point."""
if self.attribute_dims is not None and \
'height' in self.attribute_dims.keys():
return self.tensor[:, self.attribute_dims['height']]
else:
return None
@property
def color(self):
"""torch.Tensor: A vector with color of each point."""
if self.attribute_dims is not None and \
'color' in self.attribute_dims.keys():
return self.tensor[:, self.attribute_dims['color']]
else:
return None
def shuffle(self):
"""Shuffle the points."""
self.tensor = self.tensor[torch.randperm(
self.__len__(), device=self.tensor.device)]
def rotate(self, rotation, axis=2):
"""Rotate points with the given rotation matrix or angle.
Args:
rotation (float, np.ndarray, torch.Tensor): Rotation matrix
or angle.
axis (int): Axis to rotate at. Defaults to 2.
"""
if not isinstance(rotation, torch.Tensor):
rotation = self.tensor.new_tensor(rotation)
assert rotation.shape == torch.Size([3, 3]) or \
rotation.numel() == 1
if rotation.numel() == 1:
rot_sin = torch.sin(rotation)
rot_cos = torch.cos(rotation)
if axis == 1:
rot_mat_T = rotation.new_tensor([[rot_cos, 0, -rot_sin],
[0, 1, 0],
[rot_sin, 0, rot_cos]])
elif axis == 2 or axis == -1:
rot_mat_T = rotation.new_tensor([[rot_cos, -rot_sin, 0],
[rot_sin, rot_cos, 0],
[0, 0, 1]])
elif axis == 0:
rot_mat_T = rotation.new_tensor([[0, rot_cos, -rot_sin],
[0, rot_sin, rot_cos],
[1, 0, 0]])
else:
raise ValueError('axis should in range')
rot_mat_T = rot_mat_T.T
elif rotation.numel() == 9:
rot_mat_T = rotation
else:
raise NotImplementedError
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
@abstractmethod
def flip(self, bev_direction='horizontal'):
"""Flip the points in BEV along given BEV direction."""
pass
def translate(self, trans_vector):
"""Translate points with the given translation vector.
Args:
trans_vector (np.ndarray, torch.Tensor): Translation
vector of size 3 or nx3.
"""
if not isinstance(trans_vector, torch.Tensor):
trans_vector = self.tensor.new_tensor(trans_vector)
trans_vector = trans_vector.squeeze(0)
if trans_vector.dim() == 1:
assert trans_vector.shape[0] == 3
elif trans_vector.dim() == 2:
assert trans_vector.shape[0] == self.tensor.shape[0] and \
trans_vector.shape[1] == 3
else:
raise NotImplementedError(
'Unsupported translation vector of shape {}'.format(
trans_vector.shape))
self.tensor[:, :3] += trans_vector
def in_range_3d(self, point_range):
"""Check whether the points are in the given range.
Args:
point_range (list | torch.Tensor): The range of point
(x_min, y_min, z_min, x_max, y_max, z_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burden for simpler cases.
Returns:
torch.Tensor: A binary vector indicating whether each point is \
inside the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > point_range[0])
& (self.tensor[:, 1] > point_range[1])
& (self.tensor[:, 2] > point_range[2])
& (self.tensor[:, 0] < point_range[3])
& (self.tensor[:, 1] < point_range[4])
& (self.tensor[:, 2] < point_range[5]))
return in_range_flags
@abstractmethod
def in_range_bev(self, point_range):
"""Check whether the points are in the given range.
Args:
point_range (list | torch.Tensor): The range of point
in order of (x_min, y_min, x_max, y_max).
Returns:
torch.Tensor: Indicating whether each point is inside \
the reference range.
"""
pass
@abstractmethod
def convert_to(self, dst, rt_mat=None):
"""Convert self to ``dst`` mode.
Args:
dst (:obj:`CoordMode`): The target Box mode.
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
:obj:`BasePoints`: The converted box of the same type \
in the `dst` mode.
"""
pass
def scale(self, scale_factor):
"""Scale the points with horizontal and vertical scaling factors.
Args:
scale_factors (float): Scale factors to scale the points.
"""
self.tensor[:, :3] *= scale_factor
def __getitem__(self, item):
"""
Note:
The following usage are allowed:
1. `new_points = points[3]`:
return a `Points` that contains only one point.
2. `new_points = points[2:10]`:
return a slice of points.
3. `new_points = points[vector]`:
where vector is a torch.BoolTensor with `length = len(points)`.
Nonzero elements in the vector will be selected.
Note that the returned Points might share storage with this Points,
subject to Pytorch's indexing semantics.
Returns:
:obj:`BaseInstancesPints`: A new object of \
:class:`BaseInstancesPints` after indexing.
"""
original_type = type(self)
if isinstance(item, int):
return original_type(
self.tensor[item].view(1, -1),
points_dim=self.points_dim,
attribute_dims=self.attribute_dims)
p = self.tensor[item]
assert p.dim() == 2, \
f'Indexing on Points with {item} failed to return a matrix!'
return original_type(
p, points_dim=self.points_dim, attribute_dims=self.attribute_dims)
def __len__(self):
"""int: Number of points in the current object."""
return self.tensor.shape[0]
def __repr__(self):
"""str: Return a strings that describes the object."""
return self.__class__.__name__ + '(\n ' + str(self.tensor) + ')'
@classmethod
def cat(cls, points_list):
"""Concatenate a list of Points into a single Points.
Args:
points_list (list[:obj:`BaseInstancesPoints`]): List of points.
Returns:
:obj:`BaseInstancesPoints`: The concatenated Points.
"""
assert isinstance(points_list, (list, tuple))
if len(points_list) == 0:
return cls(torch.empty(0))
assert all(isinstance(points, cls) for points in points_list)
# use torch.cat (v.s. layers.cat)
# so the returned points never share storage with input
cat_points = cls(
torch.cat([p.tensor for p in points_list], dim=0),
points_dim=points_list[0].tensor.shape[1],
attribute_dims=points_list[0].attribute_dims)
return cat_points
def to(self, device):
"""Convert current points to a specific device.
Args:
device (str | :obj:`torch.device`): The name of the device.
Returns:
:obj:`BasePoints`: A new boxes object on the \
specific device.
"""
original_type = type(self)
return original_type(
self.tensor.to(device),
points_dim=self.points_dim,
attribute_dims=self.attribute_dims)
def clone(self):
"""Clone the Points.
Returns:
:obj:`BasePoints`: Box object with the same properties \
as self.
"""
original_type = type(self)
return original_type(
self.tensor.clone(),
points_dim=self.points_dim,
attribute_dims=self.attribute_dims)
@property
def device(self):
"""str: The device of the points are on."""
return self.tensor.device
def __iter__(self):
"""Yield a point as a Tensor of shape (4,) at a time.
Returns:
torch.Tensor: A point of shape (4,).
"""
yield from self.tensor
def new_point(self, data):
"""Create a new point object with data.
The new point and its tensor has the similar properties \
as self and self.tensor, respectively.
Args:
data (torch.Tensor | numpy.array | list): Data to be copied.
Returns:
:obj:`BasePoints`: A new point object with ``data``, \
the object's other properties are similar to ``self``.
"""
new_tensor = self.tensor.new_tensor(data) \
if not isinstance(data, torch.Tensor) else data.to(self.device)
original_type = type(self)
return original_type(
new_tensor,
points_dim=self.points_dim,
attribute_dims=self.attribute_dims)
from .base_points import BasePoints
class CameraPoints(BasePoints):
"""Points of instances in CAM coordinates.
Args:
tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
points_dim (int): Number of the dimension of a point.
Each row is (x, y, z). Default to 3.
attribute_dims (dict): Dictinory to indicate the meaning of extra
dimension. Default to None.
Attributes:
tensor (torch.Tensor): Float matrix of N x points_dim.
points_dim (int): Integer indicating the dimension of a point.
Each row is (x, y, z, ...).
attribute_dims (bool): Dictinory to indicate the meaning of extra
dimension. Default to None.
"""
def __init__(self, tensor, points_dim=3, attribute_dims=None):
super(CameraPoints, self).__init__(
tensor, points_dim=points_dim, attribute_dims=attribute_dims)
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction."""
if bev_direction == 'horizontal':
self.tensor[:, 0] = -self.tensor[:, 0]
elif bev_direction == 'vertical':
self.tensor[:, 2] = -self.tensor[:, 2]
def in_range_bev(self, point_range):
"""Check whether the points are in the given range.
Args:
point_range (list | torch.Tensor): The range of point
in order of (x_min, y_min, x_max, y_max).
Returns:
torch.Tensor: Indicating whether each point is inside \
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > point_range[0])
& (self.tensor[:, 2] > point_range[1])
& (self.tensor[:, 0] < point_range[2])
& (self.tensor[:, 2] < point_range[3]))
return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to ``dst`` mode.
Args:
dst (:obj:`CoordMode`): The target Point mode.
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
:obj:`BasePoints`: The converted point of the same type \
in the `dst` mode.
"""
from mmdet3d.core.bbox import Coord3DMode
return Coord3DMode.convert_point(
point=self, src=Coord3DMode.CAM, dst=dst, rt_mat=rt_mat)
from .base_points import BasePoints
class DepthPoints(BasePoints):
"""Points of instances in DEPTH coordinates.
Args:
tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
points_dim (int): Number of the dimension of a point.
Each row is (x, y, z). Default to 3.
attribute_dims (dict): Dictinory to indicate the meaning of extra
dimension. Default to None.
Attributes:
tensor (torch.Tensor): Float matrix of N x points_dim.
points_dim (int): Integer indicating the dimension of a point.
Each row is (x, y, z, ...).
attribute_dims (bool): Dictinory to indicate the meaning of extra
dimension. Default to None.
"""
def __init__(self, tensor, points_dim=3, attribute_dims=None):
super(DepthPoints, self).__init__(
tensor, points_dim=points_dim, attribute_dims=attribute_dims)
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction."""
if bev_direction == 'horizontal':
self.tensor[:, 0] = -self.tensor[:, 0]
elif bev_direction == 'vertical':
self.tensor[:, 1] = -self.tensor[:, 1]
def in_range_bev(self, point_range):
"""Check whether the points are in the given range.
Args:
point_range (list | torch.Tensor): The range of point
in order of (x_min, y_min, x_max, y_max).
Returns:
torch.Tensor: Indicating whether each point is inside \
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > point_range[0])
& (self.tensor[:, 1] > point_range[1])
& (self.tensor[:, 0] < point_range[2])
& (self.tensor[:, 1] < point_range[3]))
return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to ``dst`` mode.
Args:
dst (:obj:`CoordMode`): The target Point mode.
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
:obj:`BasePoints`: The converted point of the same type \
in the `dst` mode.
"""
from mmdet3d.core.bbox import Coord3DMode
return Coord3DMode.convert_point(
point=self, src=Coord3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
from .base_points import BasePoints
class LiDARPoints(BasePoints):
"""Points of instances in LIDAR coordinates.
Args:
tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix.
points_dim (int): Number of the dimension of a point.
Each row is (x, y, z). Default to 3.
attribute_dims (dict): Dictinory to indicate the meaning of extra
dimension. Default to None.
Attributes:
tensor (torch.Tensor): Float matrix of N x points_dim.
points_dim (int): Integer indicating the dimension of a point.
Each row is (x, y, z, ...).
attribute_dims (bool): Dictinory to indicate the meaning of extra
dimension. Default to None.
"""
def __init__(self, tensor, points_dim=3, attribute_dims=None):
super(LiDARPoints, self).__init__(
tensor, points_dim=points_dim, attribute_dims=attribute_dims)
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction."""
if bev_direction == 'horizontal':
self.tensor[:, 1] = -self.tensor[:, 1]
elif bev_direction == 'vertical':
self.tensor[:, 0] = -self.tensor[:, 0]
def in_range_bev(self, point_range):
"""Check whether the points are in the given range.
Args:
point_range (list | torch.Tensor): The range of point
in order of (x_min, y_min, x_max, y_max).
Returns:
torch.Tensor: Indicating whether each point is inside \
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > point_range[0])
& (self.tensor[:, 1] > point_range[1])
& (self.tensor[:, 0] < point_range[2])
& (self.tensor[:, 1] < point_range[3]))
return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to ``dst`` mode.
Args:
dst (:obj:`CoordMode`): The target Point mode.
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
:obj:`BasePoints`: The converted point of the same type \
in the `dst` mode.
"""
from mmdet3d.core.bbox import Coord3DMode
return Coord3DMode.convert_point(
point=self, src=Coord3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
import numpy as np
import torch
from mmdet3d.core.bbox import (CameraInstance3DBoxes, Coord3DMode,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from mmdet3d.core.points import CameraPoints, DepthPoints, LiDARPoints
def test_points_conversion():
"""Test the conversion of points between different modes."""
points_np = np.array([[
-5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 0.6666, 0.1956,
0.4974, 0.9409
],
[
-2.66751588e+01, 5.59499564e+00, -9.14345860e-01,
0.1502, 0.3707, 0.1086, 0.6297
],
[
-5.80979675e+00, 3.54092357e+01, 2.00889888e-01,
0.6565, 0.6248, 0.6954, 0.2538
],
[
-3.13086877e+01, 1.09007628e+00, -1.94612112e-01,
0.2803, 0.0258, 0.4896, 0.3269
]],
dtype=np.float32)
# test CAM to LIDAR and DEPTH
cam_points = CameraPoints(
points_np,
points_dim=7,
attribute_dims=dict(color=[3, 4, 5], height=6))
convert_lidar_points = cam_points.convert_to(Coord3DMode.LIDAR)
expected_tensor = torch.tensor([[
2.9757e-01, 5.2422e+00, -4.0021e+01, 6.6660e-01, 1.9560e-01,
4.9740e-01, 9.4090e-01
],
[
-9.1435e-01, 2.6675e+01, -5.5950e+00,
1.5020e-01, 3.7070e-01, 1.0860e-01,
6.2970e-01
],
[
2.0089e-01, 5.8098e+00, -3.5409e+01,
6.5650e-01, 6.2480e-01, 6.9540e-01,
2.5380e-01
],
[
-1.9461e-01, 3.1309e+01, -1.0901e+00,
2.8030e-01, 2.5800e-02, 4.8960e-01,
3.2690e-01
]])
lidar_point_tensor = Coord3DMode.convert_point(cam_points.tensor,
Coord3DMode.CAM,
Coord3DMode.LIDAR)
assert torch.allclose(expected_tensor, convert_lidar_points.tensor, 1e-4)
assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,
1e-4)
convert_depth_points = cam_points.convert_to(Coord3DMode.DEPTH)
expected_tensor = torch.tensor([[
-5.2422e+00, -2.9757e-01, 4.0021e+01, 6.6660e-01, 1.9560e-01,
4.9740e-01, 9.4090e-01
],
[
-2.6675e+01, 9.1435e-01, 5.5950e+00,
1.5020e-01, 3.7070e-01, 1.0860e-01,
6.2970e-01
],
[
-5.8098e+00, -2.0089e-01, 3.5409e+01,
6.5650e-01, 6.2480e-01, 6.9540e-01,
2.5380e-01
],
[
-3.1309e+01, 1.9461e-01, 1.0901e+00,
2.8030e-01, 2.5800e-02, 4.8960e-01,
3.2690e-01
]])
depth_point_tensor = Coord3DMode.convert_point(cam_points.tensor,
Coord3DMode.CAM,
Coord3DMode.DEPTH)
assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4)
assert torch.allclose(depth_point_tensor, convert_depth_points.tensor,
1e-4)
# test LIDAR to CAM and DEPTH
lidar_points = LiDARPoints(
points_np,
points_dim=7,
attribute_dims=dict(color=[3, 4, 5], height=6))
convert_cam_points = lidar_points.convert_to(Coord3DMode.CAM)
expected_tensor = torch.tensor([[
-4.0021e+01, -2.9757e-01, -5.2422e+00, 6.6660e-01, 1.9560e-01,
4.9740e-01, 9.4090e-01
],
[
-5.5950e+00, 9.1435e-01, -2.6675e+01,
1.5020e-01, 3.7070e-01, 1.0860e-01,
6.2970e-01
],
[
-3.5409e+01, -2.0089e-01, -5.8098e+00,
6.5650e-01, 6.2480e-01, 6.9540e-01,
2.5380e-01
],
[
-1.0901e+00, 1.9461e-01, -3.1309e+01,
2.8030e-01, 2.5800e-02, 4.8960e-01,
3.2690e-01
]])
cam_point_tensor = Coord3DMode.convert_point(lidar_points.tensor,
Coord3DMode.LIDAR,
Coord3DMode.CAM)
assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4)
assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4)
convert_depth_points = lidar_points.convert_to(Coord3DMode.DEPTH)
expected_tensor = torch.tensor([[
-4.0021e+01, -5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01,
4.9740e-01, 9.4090e-01
],
[
-5.5950e+00, -2.6675e+01, -9.1435e-01,
1.5020e-01, 3.7070e-01, 1.0860e-01,
6.2970e-01
],
[
-3.5409e+01, -5.8098e+00, 2.0089e-01,
6.5650e-01, 6.2480e-01, 6.9540e-01,
2.5380e-01
],
[
-1.0901e+00, -3.1309e+01, -1.9461e-01,
2.8030e-01, 2.5800e-02, 4.8960e-01,
3.2690e-01
]])
depth_point_tensor = Coord3DMode.convert_point(lidar_points.tensor,
Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
assert torch.allclose(expected_tensor, convert_depth_points.tensor, 1e-4)
assert torch.allclose(depth_point_tensor, convert_depth_points.tensor,
1e-4)
# test DEPTH to CAM and LIDAR
depth_points = DepthPoints(
points_np,
points_dim=7,
attribute_dims=dict(color=[3, 4, 5], height=6))
convert_cam_points = depth_points.convert_to(Coord3DMode.CAM)
expected_tensor = torch.tensor([[
-5.2422e+00, 2.9757e-01, -4.0021e+01, 6.6660e-01, 1.9560e-01,
4.9740e-01, 9.4090e-01
],
[
-2.6675e+01, -9.1435e-01, -5.5950e+00,
1.5020e-01, 3.7070e-01, 1.0860e-01,
6.2970e-01
],
[
-5.8098e+00, 2.0089e-01, -3.5409e+01,
6.5650e-01, 6.2480e-01, 6.9540e-01,
2.5380e-01
],
[
-3.1309e+01, -1.9461e-01, -1.0901e+00,
2.8030e-01, 2.5800e-02, 4.8960e-01,
3.2690e-01
]])
cam_point_tensor = Coord3DMode.convert_point(depth_points.tensor,
Coord3DMode.DEPTH,
Coord3DMode.CAM)
assert torch.allclose(expected_tensor, convert_cam_points.tensor, 1e-4)
assert torch.allclose(cam_point_tensor, convert_cam_points.tensor, 1e-4)
convert_lidar_points = depth_points.convert_to(Coord3DMode.LIDAR)
expected_tensor = torch.tensor([[
4.0021e+01, 5.2422e+00, 2.9757e-01, 6.6660e-01, 1.9560e-01, 4.9740e-01,
9.4090e-01
],
[
5.5950e+00, 2.6675e+01, -9.1435e-01,
1.5020e-01, 3.7070e-01, 1.0860e-01,
6.2970e-01
],
[
3.5409e+01, 5.8098e+00, 2.0089e-01,
6.5650e-01, 6.2480e-01, 6.9540e-01,
2.5380e-01
],
[
1.0901e+00, 3.1309e+01, -1.9461e-01,
2.8030e-01, 2.5800e-02, 4.8960e-01,
3.2690e-01
]])
lidar_point_tensor = Coord3DMode.convert_point(depth_points.tensor,
Coord3DMode.DEPTH,
Coord3DMode.LIDAR)
assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,
1e-4)
assert torch.allclose(lidar_point_tensor, convert_lidar_points.tensor,
1e-4)
def test_boxes_conversion():
# test CAM to LIDAR and DEPTH
cam_boxes = CameraInstance3DBoxes(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
convert_lidar_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
Coord3DMode.LIDAR)
expected_tensor = torch.tensor(
[[-1.7501, -1.7802, -2.5162, 1.6500, 1.7500, 3.3900, 1.4800],
[-1.6357, -8.9594, -2.4567, 1.5700, 1.5400, 4.0100, 1.6200],
[-1.3033, -28.2967, 0.5558, 1.4800, 1.4700, 2.2300, -1.5700],
[-1.7361, -26.6690, -21.8230, 1.4000, 1.5600, 3.4800, -1.6900],
[-1.6218, -31.3198, -8.1621, 1.4800, 1.7400, 3.7700, 2.7900]])
assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM,
Coord3DMode.DEPTH)
expected_tensor = torch.tensor(
[[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
[8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
[28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
[26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
[31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)
# test LIDAR to CAM and DEPTH
lidar_boxes = LiDARInstance3DBoxes(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
convert_cam_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
Coord3DMode.CAM)
expected_tensor = torch.tensor(
[[-2.5162, 1.7501, 1.7802, 3.3900, 1.6500, 1.7500, 1.4800],
[-2.4567, 1.6357, 8.9594, 4.0100, 1.5700, 1.5400, 1.6200],
[0.5558, 1.3033, 28.2967, 2.2300, 1.4800, 1.4700, -1.5700],
[-21.8230, 1.7361, 26.6690, 3.4800, 1.4000, 1.5600, -1.6900],
[-8.1621, 1.6218, 31.3198, 3.7700, 1.4800, 1.7400, 2.7900]])
assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)
convert_depth_boxes = Coord3DMode.convert(lidar_boxes, Coord3DMode.LIDAR,
Coord3DMode.DEPTH)
expected_tensor = torch.tensor(
[[-2.5162, 1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
[-2.4567, 8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
[0.5558, 28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
[-21.8230, 26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
[-8.1621, 31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3)
# test DEPTH to CAM and LIDAR
depth_boxes = DepthInstance3DBoxes(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
[8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
[28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
[26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
Coord3DMode.CAM)
expected_tensor = torch.tensor(
[[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, 1.4800],
[8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, 1.6200],
[28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, -1.5700],
[26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, -1.6900],
[31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, 2.7900]])
assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3)
convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH,
Coord3DMode.LIDAR)
expected_tensor = torch.tensor(
[[2.5162, -1.7802, -1.7501, 3.3900, 1.7500, 1.6500, 1.4800],
[2.4567, -8.9594, -1.6357, 4.0100, 1.5400, 1.5700, 1.6200],
[-0.5558, -28.2967, -1.3033, 2.2300, 1.4700, 1.4800, -1.5700],
[21.8230, -26.6690, -1.7361, 3.4800, 1.5600, 1.4000, -1.6900],
[8.1621, -31.3198, -1.6218, 3.7700, 1.7400, 1.4800, 2.7900]])
assert torch.allclose(expected_tensor, convert_lidar_boxes.tensor, 1e-3)
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment