Commit f2b01720 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master' into process_raw_data

parents 08c8adb6 47850641
......@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in camera:
.. code-block:: none
z front
z front (yaw=0.5*pi)
/
/
0 ------> x right
0 ------> x right (yaw=0)
|
|
v
......@@ -22,11 +22,15 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of z.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@property
......@@ -75,7 +79,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to in clockwise order, in the form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none
......@@ -85,7 +89,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
(x0, y0, z1) + ----------- + (x1, y0, z1)
/| / |
/ | / |
(x0, y0, z0) + ----------- + + (x1, y1, z0)
(x0, y0, z0) + ----------- + + (x1, y1, z1)
| / . | /
| / oriign | /
(x0, y1, z0) + ----------- + -------> x right
......@@ -123,7 +127,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
return self.tensor[:, [0, 2, 3, 5, 6]]
@property
def nearset_bev(self):
def nearest_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
......@@ -150,11 +154,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes
Args:
angles (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
angle (float | torch.Tensor): rotation angle
"""
if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle)
......@@ -166,13 +166,23 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
self.tensor[:, 6] += angle
def flip(self):
"""Flip the boxes in horizontal direction
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction
In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
In CAM coordinates, it flips the x axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
assert bev_direction in ('horizontal', 'vertical')
if bev_direction == 'horizontal':
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
elif bev_direction == 'vertical':
self.tensor[:, 2::7] = -self.tensor[:, 2::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6]
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
......@@ -188,7 +198,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance
Returns:
a binary vector, indicating whether each box is inside
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
......@@ -230,3 +240,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)
overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
return overlaps_h
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
import numpy as np
import torch
from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis
class DepthInstance3DBoxes(BaseInstance3DBoxes):
"""3D boxes of instances in Depth coordinates
Coordinates in Depth:
.. code-block:: none
up z y front (yaw=0.5*pi)
^ ^
| /
| /
0 ------> x right (yaw=0)
The relative coordinate of bottom center in a Depth box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of y.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@property
def gravity_center(self):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
bottom_center = self.bottom_center
gravity_center = torch.zeros_like(bottom_center)
gravity_center[:, :2] = bottom_center[:, :2]
gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
return gravity_center
@property
def corners(self):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none
up z
front y ^
/ |
/ |
(x0, y1, z1) + ----------- + (x1, y1, z1)
/| / |
/ | / |
(x0, y0, z1) + ----------- + + (x1, y1, z0)
| / . | /
| / oriign | /
(x0, y0, z0) + ----------- + --------> right x
(x1, y0, z0)
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
dims = self.dims
corners_norm = torch.from_numpy(
np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
device=dims.device, dtype=dims.dtype)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
# use relative origin [0.5, 0.5, 0]
corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
# rotate around z axis
corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)
corners += self.tensor[:, :3].view(-1, 1, 3)
return corners
@property
def bev(self):
"""Calculate the 2D bounding boxes in BEV with rotation
Returns:
torch.Tensor: a nx5 tensor of 2D BEV box of each box.
The box is in XYWHR format
"""
return self.tensor[:, [0, 1, 3, 4, 6]]
@property
def nearest_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
torch.Tensor: a tensor of 2D BEV box of each box.
"""
# Obtain BEV boxes with rotation in XYWHR format
bev_rotated_boxes = self.bev
# convert the rotation to a valid range
rotations = bev_rotated_boxes[:, -1]
normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
# find the center of boxes
conditions = (normed_rotations > np.pi / 4)[..., None]
bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
[0, 1, 3, 2]],
bev_rotated_boxes[:, :4])
centers = bboxes_xywh[:, :2]
dims = bboxes_xywh[:, 2:]
bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
return bev_boxes
def rotate(self, angle):
"""Calculate whether the points is in any of the boxes
Args:
angle (float | torch.Tensor): rotation angle
"""
if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle)
rot_sin = torch.sin(angle)
rot_cos = torch.cos(angle)
rot_mat = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
[rot_sin, rot_cos, 0], [0, 0, 1]])
self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat.T
if self.with_yaw:
self.tensor[:, 6] -= angle
else:
corners_rot = self.corners @ rot_mat.T
new_x_size = corners_rot[..., 0].max(
dim=1, keepdim=True)[0] - corners_rot[..., 0].min(
dim=1, keepdim=True)[0]
new_y_size = corners_rot[..., 1].max(
dim=1, keepdim=True)[0] - corners_rot[..., 1].min(
dim=1, keepdim=True)[0]
self.tensor[:, 3:5] = torch.cat((new_x_size, new_y_size), dim=-1)
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction
In Depth coordinates, it flips x (horizontal) or y (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
assert bev_direction in ('horizontal', 'vertical')
if bev_direction == 'horizontal':
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
elif bev_direction == 'vertical':
self.tensor[:, 1::7] = -self.tensor[:, 1::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6]
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, x_max, y_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 1] > box_range[1])
& (self.tensor[:, 0] < box_range[2])
& (self.tensor[:, 1] < box_range[3]))
return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
import numpy as np
import torch
from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis
......@@ -11,19 +12,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in LiDAR:
.. code-block:: none
up z x front
up z x front (yaw=0.5*pi)
^ ^
| /
| /
left y <------ 0
(yaw=pi) left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the negative direction of y axis, and increases from
the negative direction of y to the positive direction of x.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@property
......@@ -44,7 +49,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none
......@@ -90,7 +95,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
return self.tensor[:, [0, 1, 3, 4, 6]]
@property
def nearset_bev(self):
def nearest_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
......@@ -117,11 +122,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes
Args:
angles (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
angle (float | torch.Tensor): rotation angle
"""
if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle)
......@@ -133,13 +134,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
self.tensor[:, 6] += angle
def flip(self):
"""Flip the boxes in horizontal direction
if self.tensor.shape[1] == 9:
# rotate velo vector
self.tensor[:, 7:9] = self.tensor[:, 7:9] @ rot_mat_T[:2, :2]
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction
In LIDAR coordinates, it flips the y axis.
In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
assert bev_direction in ('horizontal', 'vertical')
if bev_direction == 'horizontal':
self.tensor[:, 1::7] = -self.tensor[:, 1::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
elif bev_direction == 'vertical':
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6]
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
......@@ -155,7 +170,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance
Returns:
a binary vector, indicating whether each box is inside
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
......@@ -163,3 +178,51 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
& (self.tensor[:, 0] < box_range[2])
& (self.tensor[:, 1] < box_range[3]))
return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
def enlarged_box(self, extra_width):
"""Enlarge the length, width and height boxes
Args:
extra_width (float | torch.Tensor): extra width to enlarge the box
Returns:
:obj:LiDARInstance3DBoxes: enlarged boxes
"""
enlarged_boxes = self.tensor.clone()
enlarged_boxes[:, 3:6] += extra_width * 2
# bottom center z minus extra_width
enlarged_boxes[:, 2] -= extra_width
return self.new_box(enlarged_boxes)
def points_in_boxes(self, points):
"""Find the box which the points are in.
Args:
points (:obj:torch.Tensor): Points in shape Nx3
Returns:
torch.Tensor: The index of box where each point are in.
"""
box_idx = points_in_boxes_gpu(
points.unsqueeze(0),
self.tensor.unsqueeze(0).to(points.device)).squeeze(0)
return box_idx
from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
from mmcv.utils import build_from_cfg
from mmdet3d.utils import get_root_logger
from mmdet.core.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
from .cocktail_optimizer import CocktailOptimizer
......
from mmcv.runner.optimizer import OPTIMIZERS
from torch.optim import Optimizer
from mmdet.core.optimizer import OPTIMIZERS
@OPTIMIZERS.register_module()
class CocktailOptimizer(Optimizer):
......@@ -9,6 +8,11 @@ class CocktailOptimizer(Optimizer):
This optimizer applies the cocktail optimzation for multi-modality models.
Args:
optimizers (list[:obj:`torch.optim.Optimizer`]): The list containing
different optimizers that optimize different parameters
step_intervals (list[int]): Step intervals of each optimizer
"""
def __init__(self, optimizers, step_intervals=None):
......@@ -18,6 +22,9 @@ class CocktailOptimizer(Optimizer):
self.param_groups += optimizer.param_groups
if not isinstance(step_intervals, list):
step_intervals = [1] * len(self.optimizers)
assert len(step_intervals) == len(optimizers), \
'"step_intervals" should contain the same number of intervals as' \
f'len(optimizers)={len(optimizers)}, got {step_intervals}'
self.step_intervals = step_intervals
self.num_step_updated = 0
......
......@@ -9,7 +9,7 @@ import torch
from mmcv.utils import print_log
from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops
from ..core.bbox import Box3DMode, CameraInstance3DBoxes, box_np_ops
from .custom_3d import Custom3DDataset
from .utils import remove_dontcare
......@@ -87,13 +87,14 @@ class KittiDataset(Custom3DDataset):
# print(gt_names, len(loc))
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
# gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes = gt_bboxes[selected].astype('float32')
gt_names = gt_names[selected]
......
......@@ -7,7 +7,7 @@ import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops
from ..core.bbox import LiDARInstance3DBoxes, box_np_ops
from .custom_3d import Custom3DDataset
......@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset):
# filter out bbox containing no points
mask = info['num_lidar_pts'] > 0
gt_bboxes_3d = info['gt_boxes'][mask]
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
[0.5, 0.5, 0])
gt_names_3d = info['gt_names'][mask]
gt_labels_3d = []
for cat in gt_names_3d:
......@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset):
gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=[0.5, 0.5, 0.5])
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
......
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet3d.core.bbox import BaseInstance3DBoxes
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor
......@@ -39,9 +40,8 @@ class DefaultFormatBundle(object):
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True)
for key in [
'proposals', 'gt_bboxes', 'gt_bboxes_3d', 'gt_bboxes_ignore',
'gt_labels', 'gt_labels_3d', 'pts_instance_mask',
'pts_semantic_mask'
'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',
'gt_labels_3d', 'pts_instance_mask', 'pts_semantic_mask'
]:
if key not in results:
continue
......@@ -49,6 +49,14 @@ class DefaultFormatBundle(object):
results[key] = DC([to_tensor(res) for res in results[key]])
else:
results[key] = DC(to_tensor(results[key]))
if 'gt_bboxes_3d' in results:
if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):
results['gt_bboxes_3d'] = DC(
results['gt_bboxes_3d'], cpu_only=True)
else:
results['gt_bboxes_3d'] = DC(
to_tensor(results['gt_bboxes_3d']))
if 'gt_masks' in results:
results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
if 'gt_semantic_seg' in results:
......
......@@ -26,12 +26,8 @@ class RandomFlip3D(RandomFlip):
self.sync_2d = sync_2d
def random_flip_points(self, gt_bboxes_3d, points):
gt_bboxes_3d[:, 1] = -gt_bboxes_3d[:, 1]
gt_bboxes_3d[:, 6] = -gt_bboxes_3d[:, 6] + np.pi
gt_bboxes_3d.flip()
points[:, 1] = -points[:, 1]
if gt_bboxes_3d.shape[1] == 9:
# flip velocitys at the same time
gt_bboxes_3d[:, 8] = -gt_bboxes_3d[:, 8]
return gt_bboxes_3d, points
def __call__(self, input_dict):
......@@ -121,10 +117,13 @@ class ObjectSample(object):
gt_bboxes_2d = input_dict['gt_bboxes']
# Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_labels_3d, gt_bboxes_2d=gt_bboxes_2d, img=img)
gt_bboxes_3d.tensor.numpy(),
gt_labels_3d,
gt_bboxes_2d=gt_bboxes_2d,
img=img)
else:
sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_labels_3d, img=None)
gt_bboxes_3d.tensor.numpy(), gt_labels_3d, img=None)
if sampled_dict is not None:
sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
......@@ -133,8 +132,9 @@ class ObjectSample(object):
gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
axis=0)
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d
]).astype(np.float32)
gt_bboxes_3d = gt_bboxes_3d.new_box(
np.concatenate(
[gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))
points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
# check the points dimension
......@@ -178,14 +178,16 @@ class ObjectNoise(object):
points = input_dict['points']
# TODO: check this inplace function
numpy_box = gt_bboxes_3d.tensor.numpy()
noise_per_object_v3_(
gt_bboxes_3d,
numpy_box,
points,
rotation_perturb=self.rot_uniform_noise,
center_noise_std=self.loc_noise_std,
global_random_rot_range=self.global_rot_range,
num_try=self.num_try)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
input_dict['points'] = points
return input_dict
......@@ -212,7 +214,7 @@ class GlobalRotScale(object):
def _trans_bbox_points(self, gt_boxes, points):
noise_trans = np.random.normal(0, self.trans_normal_noise[0], 3).T
points[:, :3] += noise_trans
gt_boxes[:, :3] += noise_trans
gt_boxes.translate(noise_trans)
return gt_boxes, points, noise_trans
def _rot_bbox_points(self, gt_boxes, points, rotation=np.pi / 4):
......@@ -221,16 +223,8 @@ class GlobalRotScale(object):
noise_rotation = np.random.uniform(rotation[0], rotation[1])
points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
points[:, :3], noise_rotation, axis=2)
gt_boxes[:, :3], _ = box_np_ops.rotation_points_single_angle(
gt_boxes[:, :3], noise_rotation, axis=2)
gt_boxes[:, 6] += noise_rotation
if gt_boxes.shape[1] == 9:
# rotate velo vector
rot_cos = np.cos(noise_rotation)
rot_sin = np.sin(noise_rotation)
rot_mat_T_bev = np.array([[rot_cos, -rot_sin], [rot_sin, rot_cos]],
dtype=points.dtype)
gt_boxes[:, 7:9] = gt_boxes[:, 7:9] @ rot_mat_T_bev
gt_boxes.rotate(noise_rotation)
return gt_boxes, points, rot_mat_T
def _scale_bbox_points(self,
......@@ -240,9 +234,7 @@ class GlobalRotScale(object):
max_scale=1.05):
noise_scale = np.random.uniform(min_scale, max_scale)
points[:, :3] *= noise_scale
gt_boxes[:, :6] *= noise_scale
if gt_boxes.shape[1] == 9:
gt_boxes[:, 7:] *= noise_scale
gt_boxes.scale(noise_scale)
return gt_boxes, points, noise_scale
def __call__(self, input_dict):
......@@ -256,7 +248,7 @@ class GlobalRotScale(object):
gt_bboxes_3d, points, trans_factor = self._trans_bbox_points(
gt_bboxes_3d, points)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['points'] = points
input_dict['pcd_scale_factor'] = scale_factor
input_dict['pcd_rotation'] = rotation_factor
......@@ -290,10 +282,6 @@ class ObjectRangeFilter(object):
self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
self.bev_range = self.pcd_range[[0, 1, 3, 4]]
@staticmethod
def limit_period(val, offset=0.5, period=np.pi):
return val - np.floor(val / period + offset) * period
@staticmethod
def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
"""remove gtbox outside training range.
......@@ -314,14 +302,13 @@ class ObjectRangeFilter(object):
def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_labels_3d = input_dict['gt_labels_3d']
mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range)
mask = gt_bboxes_3d.in_range_bev(self.bev_range)
gt_bboxes_3d = gt_bboxes_3d[mask]
gt_labels_3d = gt_labels_3d[mask]
# limit rad to [-pi, pi]
gt_bboxes_3d[:, 6] = self.limit_period(
gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_labels_3d'] = gt_labels_3d
return input_dict
......
......@@ -168,6 +168,8 @@ class AnchorTrainMixin(object):
labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
if len(gt_bboxes) > 0:
if not isinstance(gt_bboxes, torch.Tensor):
gt_bboxes = gt_bboxes.tensor.to(anchors.device)
assign_result = bbox_assigner.assign(anchors, gt_bboxes,
gt_bboxes_ignore, gt_labels)
sampling_result = self.bbox_sampler.sample(assign_result, anchors,
......
......@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3,
padding=1,
norm_cfg=norm_cfg,
indice_key=f'rcnn_down0',
conv_type='SubMConv3d'))
indice_key='rcnn_down0'))
merge_conv_channel_last = channel
down_conv_channel_last = merge_conv_channel_last
......@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3,
padding=1,
norm_cfg=norm_cfg,
indice_key=f'rcnn_down1',
conv_type='SubMConv3d'))
indice_key='rcnn_down1'))
down_conv_channel_last = channel
self.conv_down.add_module('merge_conv',
......
......@@ -5,7 +5,6 @@ import torch.nn.functional as F
from mmdet3d.core import multi_apply
from mmdet3d.core.bbox import box_torch_ops
from mmdet3d.models.builder import build_loss
from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
from mmdet.models import HEADS
......@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
"""Semantic segmentation head for point-wise segmentation.
Predict point-wise segmentation and part regression results for PartA2.
See https://arxiv.org/abs/1907.03670 for more detials.
See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
Args:
in_channels (int): the number of input channel.
......@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module):
seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)
def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):
"""generate segmentation and part prediction targets
"""generate segmentation and part prediction targets for a single sample
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (torch.Tensor): shape [box_num, 7], gt boxes
gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt boxes containing tensor
of shape [box_num, 7].
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
Returns:
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
enlarged_gt_boxes = box_torch_ops.enlarge_box3d_lidar(
gt_bboxes_3d, extra_width=self.extra_width)
gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)
part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
dtype=torch.float32)
box_idx = points_in_boxes_gpu(
voxel_centers.unsqueeze(0),
gt_bboxes_3d.unsqueeze(0)).squeeze(0) # -1 ~ box_num
enlarge_box_idx = points_in_boxes_gpu(
voxel_centers.unsqueeze(0),
enlarged_gt_boxes.unsqueeze(0)).squeeze(0).long() # -1 ~ box_num
box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)
enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(
voxel_centers).long()
gt_labels_pad = F.pad(
gt_labels_3d, (1, 0), mode='constant', value=self.num_classes)
......@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module):
ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1)
seg_targets[ignore_flag] = -1
for k in range(gt_bboxes_3d.shape[0]):
for k in range(len(gt_bboxes_3d)):
k_box_flag = box_idx == k
# no point in current box (caused by velodyne reduce)
if not k_box_flag.any():
continue
fg_voxels = voxel_centers[k_box_flag]
transformed_voxels = fg_voxels - gt_bboxes_3d[k, 0:3]
transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]
transformed_voxels = box_torch_ops.rotation_3d_in_axis(
transformed_voxels.unsqueeze(0),
-gt_bboxes_3d[k, 6].view(1),
-gt_bboxes_3d.yaw[k].view(1),
axis=2)
part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d[
k, 3:6] + voxel_centers.new_tensor([0.5, 0.5, 0])
part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d.dims[
k] + voxel_centers.new_tensor([0.5, 0.5, 0])
part_targets = torch.clamp(part_targets, min=0)
return seg_targets, part_targets
def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):
"""generate segmentation and part prediction targets
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): list of gt boxes
containing tensor of shape [box_num, 7].
gt_labels_3d (list[torch.Tensor]): list of GT labels.
Returns:
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
batch_size = len(gt_labels_3d)
voxel_center_list = []
for idx in range(batch_size):
......
......@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead
@HEADS.register_module
@HEADS.register_module()
class PartAggregationROIHead(Base3DRoIHead):
"""Part aggregation roi head for PartA2"""
......@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx]
cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0
......@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead):
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_boxes[pred_per_cls],
cur_gt_bboxes[gt_per_cls],
cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
......@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead):
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes, cur_gt_bboxes, gt_labels=cur_gt_labels)
cur_boxes, cur_gt_bboxes.tensor, gt_labels=cur_gt_labels)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes,
cur_gt_bboxes,
cur_gt_bboxes.tensor,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
......
from .pillar_encoder import AlignedPillarFeatureNet, PillarFeatureNet
from .voxel_encoder import (DynamicVFE, VoxelFeatureExtractor,
VoxelFeatureExtractorV2, VoxelFeatureExtractorV3)
from .pillar_encoder import PillarFeatureNet
from .voxel_encoder import DynamicSimpleVFE, DynamicVFE, HardSimpleVFE, HardVFE
__all__ = [
'PillarFeatureNet', 'AlignedPillarFeatureNet', 'VoxelFeatureExtractor',
'DynamicVFE', 'VoxelFeatureExtractorV2', 'VoxelFeatureExtractorV3'
'PillarFeatureNet', 'HardVFE', 'DynamicVFE', 'HardSimpleVFE',
'DynamicSimpleVFE'
]
......@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module()
class PillarFeatureNet(nn.Module):
"""Pillar Feature Net.
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def __init__(self,
in_channels=4,
feat_channels=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max'):
super(PillarFeatureNet, self).__init__()
assert len(num_filters) > 0
assert len(feat_channels) > 0
if with_cluster_center:
num_input_features += 3
in_channels += 3
if with_voxel_center:
num_input_features += 2
in_channels += 2
if with_distance:
num_input_features += 1
in_channels += 1
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers
self.num_input_features = num_input_features
num_filters = [num_input_features] + list(num_filters)
self.in_channels = in_channels
feat_channels = [in_channels] + list(feat_channels)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
for i in range(len(feat_channels) - 1):
in_filters = feat_channels[i]
out_filters = feat_channels[i + 1]
if i < len(feat_channels) - 2:
last_layer = False
else:
last_layer = True
......@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer(
in_filters,
out_filters,
use_norm,
norm_cfg=norm_cfg,
last_layer=last_layer,
mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers)
......@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class DynamicPillarFeatureNet(PillarFeatureNet):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
in_channels=4,
feat_channels=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
......@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
"""
super(DynamicPillarFeatureNet, self).__init__(
num_input_features,
use_norm,
num_filters,
in_channels,
feat_channels,
with_distance,
with_cluster_center=with_cluster_center,
with_voxel_center=with_voxel_center,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
norm_cfg=norm_cfg,
mode=mode)
num_filters = [self.num_input_features] + list(num_filters)
feat_channels = [self.in_channels] + list(feat_channels)
pfn_layers = []
# TODO: currently only support one PFNLayer
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
for i in range(len(feat_channels) - 1):
in_filters = feat_channels[i]
out_filters = feat_channels[i + 1]
if i > 0:
in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
......@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features = torch.cat([point_feats, feat_per_point], dim=1)
return voxel_feats, voxel_coors
@VOXEL_ENCODERS.register_module()
class AlignedPillarFeatureNet(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super(AlignedPillarFeatureNet, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
print('Use cluster center')
num_input_features += 3
if with_voxel_center:
print('Use voxel center')
num_input_features += 2
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers
num_filters = [num_input_features] + list(num_filters)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
last_layer = False
else:
last_layer = True
pfn_layers.append(
PFNLayer(
in_filters,
out_filters,
use_norm,
last_layer=last_layer,
mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
def forward(self, features, num_points, coors):
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_points.type_as(features).view(
-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)
x_distance = features[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
y_distance = features[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
z_distance = features[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
normed_x_distance = 1 - torch.abs(x_distance / self.vx)
normed_y_distance = 1 - torch.abs(y_distance / self.vy)
normed_z_distance = 1 - torch.abs(z_distance / self.vz)
x_mask = torch.gt(normed_x_distance, 0).type_as(features)
y_mask = torch.gt(normed_y_distance, 0).type_as(features)
z_mask = torch.gt(normed_z_distance, 0).type_as(features)
nonzero_points_mask = x_mask.mul(y_mask).mul(z_mask)
aligned_distance = normed_x_distance.mul(normed_y_distance).mul(
normed_z_distance).mul(nonzero_points_mask)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features[:, :, :2]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_points, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
features *= mask
for pfn in self.pfn_layers:
if pfn.last_vfe:
features = pfn(features, aligned_distance)
else:
features = pfn(features)
return features.squeeze()
......@@ -4,28 +4,15 @@ from torch import nn
from torch.nn import functional as F
class Empty(nn.Module):
def __init__(self, *args, **kwargs):
super(Empty, self).__init__()
def forward(self, *args, **kwargs):
if len(args) == 1:
return args[0]
elif len(args) == 0:
return None
return args
def get_paddings_indicator(actual_num, max_num, axis=0):
"""Create boolean mask by actually number of a padded tensor.
Args:
actual_num ([type]): [description]
max_num ([type]): [description]
actual_num (torch.Tensor): Actual number of points in each voxel.
max_num (int): Max number of points in each voxel
Returns:
[type]: [description]
torch.Tensor: Mask indicates which points are valid inside a voxel.
"""
actual_num = torch.unsqueeze(actual_num, axis + 1)
# tiled_actual_num: [N, M, 1]
......@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self.cat_max = cat_max
self.max_out = max_out
# self.units = int(out_channels / 2)
if norm_cfg:
norm_name, norm_layer = build_norm_layer(norm_cfg, out_channels)
self.norm = norm_layer
self.norm = build_norm_layer(norm_cfg, out_channels)[1]
self.linear = nn.Linear(in_channels, out_channels, bias=False)
else:
self.norm = Empty(out_channels)
self.linear = nn.Linear(in_channels, out_channels, bias=True)
def forward(self, inputs):
# [K, T, 7] tensordot [7, units] = [K, T, units]
......@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
use_norm=True,
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
last_layer=False,
mode='max'):
""" Pillar Feature Net Layer.
......@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of
features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
"""
super().__init__()
......@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels = out_channels // 2
self.units = out_channels
if use_norm:
self.norm = nn.BatchNorm1d(self.units, eps=1e-3, momentum=0.01)
self.norm = build_norm_layer(norm_cfg, self.units)[1]
self.linear = nn.Linear(in_channels, self.units, bias=False)
else:
self.norm = Empty(self.unints)
self.linear = nn.Linear(in_channels, self.units, bias=True)
assert mode in ['max', 'avg']
self.mode = mode
def forward(self, inputs, num_voxels=None, aligned_distance=None):
......
import torch
from mmcv.cnn import build_norm_layer
from torch import nn
from torch.nn import functional as F
from mmdet3d.ops import DynamicScatter
from .. import builder
from ..registry import VOXEL_ENCODERS
from .utils import Empty, VFELayer, get_paddings_indicator
from .utils import VFELayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractor(nn.Module):
class HardSimpleVFE(nn.Module):
"""Simple voxel feature encoder used in SECOND
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractor, self).__init__()
self.name = name
assert len(num_filters) == 2
num_input_features += 3 # add mean features
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
if use_norm:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=False)
self.norm = nn.BatchNorm1d(num_filters[1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=True)
self.norm = Empty(num_filters[1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x = self.vfe1(features)
x *= mask
x = self.vfe2(x)
x *= mask
x = self.linear(x)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
x = F.relu(x)
x *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(x, dim=1)[0]
return voxelwise
@VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractorV2(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV2, self).__init__()
self.name = name
assert len(num_filters) > 0
num_input_features += 3
if with_distance:
num_input_features += 1
self._with_distance = with_distance
num_filters = [num_input_features] + num_filters
filters_pairs = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
self.vfe_layers = nn.ModuleList(
[VFELayer(i, o, use_norm) for i, o in filters_pairs])
if use_norm:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=False)
self.norm = nn.BatchNorm1d(
num_filters[-1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=True)
self.norm = Empty(num_filters[-1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
for vfe in self.vfe_layers:
features = vfe(features)
features *= mask
features = self.linear(features)
features = self.norm(features.permute(0, 2, 1).contiguous()).permute(
0, 2, 1).contiguous()
features = F.relu(features)
features *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(features, dim=1)[0]
return voxelwise
@VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractorV3(nn.Module):
It simply averages the values of points in a voxel.
"""
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV3, self).__init__()
self.name = name
def __init__(self):
super(HardSimpleVFE, self).__init__()
def forward(self, features, num_points, coors):
# features: [concated_num_points, num_voxel_size, 3(4)]
......@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@VOXEL_ENCODERS.register_module()
class DynamicVFEV3(nn.Module):
class DynamicSimpleVFE(nn.Module):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def __init__(self,
num_input_features=4,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1)):
super(DynamicVFEV3, self).__init__()
super(DynamicSimpleVFE, self).__init__()
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
@torch.no_grad()
......@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@VOXEL_ENCODERS.register_module()
class DynamicVFE(nn.Module):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def __init__(self,
num_input_features=4,
num_filters=[],
in_channels=4,
feat_channels=[],
with_distance=False,
with_cluster_center=False,
with_voxel_center=False,
......@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer=None,
return_point_feats=False):
super(DynamicVFE, self).__init__()
assert len(num_filters) > 0
assert mode in ['avg', 'max']
assert len(feat_channels) > 0
if with_cluster_center:
num_input_features += 3
in_channels += 3
if with_voxel_center:
num_input_features += 3
in_channels += 3
if with_distance:
num_input_features += 3
self.num_input_features = num_input_features
in_channels += 3
self.in_channels = in_channels
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
......@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters)
feat_channels = [self.in_channels] + list(feat_channels)
vfe_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
for i in range(len(feat_channels) - 1):
in_filters = feat_channels[i]
out_filters = feat_channels[i + 1]
if i > 0:
in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
......@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self.fusion_layer = builder.build_fusion_layer(fusion_layer)
def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_z = int(
......@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points=None,
img_feats=None,
img_meta=None):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""Forward functions
Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
features_ls = [features]
# Find distance of x, y, and z from cluster center
......@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@VOXEL_ENCODERS.register_module()
class HardVFE(nn.Module):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def __init__(self,
num_input_features=4,
num_filters=[],
in_channels=4,
feat_channels=[],
with_distance=False,
with_cluster_center=False,
with_voxel_center=False,
......@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer=None,
return_point_feats=False):
super(HardVFE, self).__init__()
assert len(num_filters) > 0
assert len(feat_channels) > 0
if with_cluster_center:
num_input_features += 3
in_channels += 3
if with_voxel_center:
num_input_features += 3
in_channels += 3
if with_distance:
num_input_features += 3
self.num_input_features = num_input_features
in_channels += 3
self.in_channels = in_channels
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
......@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters)
feat_channels = [self.in_channels] + list(feat_channels)
vfe_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
for i in range(len(feat_channels) - 1):
in_filters = feat_channels[i]
out_filters = feat_channels[i + 1]
if i > 0:
in_filters *= 2
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if i == (len(num_filters) - 2):
if i == (len(feat_channels) - 2):
cat_max = False
max_out = True
if fusion_layer:
......@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors,
img_feats=None,
img_meta=None):
"""
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
"""Forward functions
Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
features_ls = [features]
# Find distance of x, y, and z from cluster center
......@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for i, vfe in enumerate(self.vfe_layers):
voxel_feats = vfe(voxel_feats)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
if (self.fusion_layer is not None and img_feats is not None):
voxel_feats = self.fusion_with_mask(features, mask, voxel_feats,
coors, img_feats, img_meta)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
return voxel_feats
def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,
img_meta):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points
batch_size = coors[-1, 0] + 1
points = []
......@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points.append(features[single_mask][mask[single_mask]])
point_feats = voxel_feats[mask]
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
point_feats = self.fusion_layer(img_feats, points, point_feats,
img_meta)
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
voxel_canvas = voxel_feats.new_zeros(
size=(voxel_feats.size(0), voxel_feats.size(1),
point_feats.size(-1)))
voxel_canvas[mask] = point_feats
out = torch.max(voxel_canvas, dim=1)[0]
if torch.isnan(out).any():
import pdb
pdb.set_trace()
return out
......@@ -3,7 +3,7 @@ import pytest
import torch
from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
LiDARInstance3DBoxes)
DepthInstance3DBoxes, LiDARInstance3DBoxes)
def test_lidar_boxes3d():
......@@ -13,6 +13,46 @@ def test_lidar_boxes3d():
assert boxes.tensor.shape[0] == 0
assert boxes.tensor.shape[1] == 7
# Test init with origin
gravity_center_box = np.array(
[[
-5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00,
4.40900000e+00, 1.54800000e+00, -1.48801203e+00
],
[
-2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01,
4.58000000e-01, 7.82000000e-01, -4.62759755e+00
],
[
-5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00,
3.96900000e+00, 1.73200000e+00, -4.65203216e+00
],
[
-3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00,
3.85700000e+00, 1.72300000e+00, -2.81427027e+00
]],
dtype=np.float32)
bottom_center_box = LiDARInstance3DBoxes(
gravity_center_box, origin=[0.5, 0.5, 0.5])
expected_tensor = torch.tensor(
[[
-5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,
4.40900000e+00, 1.54800000e+00, -1.48801203e+00
],
[
-2.66751588e+01, 5.59499564e+00, -1.30534586e+00, 3.43000000e-01,
4.58000000e-01, 7.82000000e-01, -4.62759755e+00
],
[
-5.80979675e+00, 3.54092357e+01, -6.65110112e-01, 2.39600000e+00,
3.96900000e+00, 1.73200000e+00, -4.65203216e+00
],
[
-3.13086877e+01, 1.09007628e+00, -1.05611211e+00, 1.94400000e+00,
3.85700000e+00, 1.72300000e+00, -2.81427027e+00
]])
assert torch.allclose(expected_tensor, bottom_center_box.tensor)
# Test init with numpy array
np_boxes = np.array(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
......@@ -70,9 +110,19 @@ def test_lidar_boxes3d():
[28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
[26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
[31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])
boxes.flip()
boxes.flip('horizontal')
assert torch.allclose(boxes.tensor, expected_tensor)
expected_tensor = torch.tensor(
[[-1.7802, -2.5162, -1.7501, 1.7500, 3.3900, 1.6500, -1.6616],
[-8.9594, -2.4567, -1.6357, 1.5400, 4.0100, 1.5700, -1.5216],
[-28.2967, 0.5558, -1.3033, 1.4700, 2.2300, 1.4800, -4.7116],
[-26.6690, -21.8230, -1.7361, 1.5600, 3.4800, 1.4000, -4.8316],
[-31.3198, -8.1621, -1.6218, 1.7400, 3.7700, 1.4800, -0.3516]])
boxes_flip_vert = boxes.clone()
boxes_flip_vert.flip('vertical')
assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4)
# test box rotation
expected_tensor = torch.tensor(
[[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
......@@ -223,7 +273,7 @@ def test_lidar_boxes3d():
[27.3398, -18.3976, 29.0896, -14.6065]])
# the pytorch print loses some precision
assert torch.allclose(
boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7)
# obtained by the print of the original implementation
expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00],
......@@ -269,6 +319,25 @@ def test_lidar_boxes3d():
# the pytorch print loses some precision
assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
# test new_box
new_box1 = boxes.new_box([[1, 2, 3, 4, 5, 6, 7]])
assert torch.allclose(
new_box1.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))
assert new_box1.device == boxes.device
assert new_box1.with_yaw == boxes.with_yaw
assert new_box1.box_dim == boxes.box_dim
new_box2 = boxes.new_box(np.array([[1, 2, 3, 4, 5, 6, 7]]))
assert torch.allclose(
new_box2.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))
new_box3 = boxes.new_box(torch.tensor([[1, 2, 3, 4, 5, 6, 7]]))
assert torch.allclose(
new_box3.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))
def test_boxes_conversion():
"""Test the conversion of boxes between different modes.
......@@ -284,6 +353,8 @@ def test_boxes_conversion():
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
Box3DMode.CAM)
expected_box = lidar_boxes.convert_to(Box3DMode.CAM)
assert torch.equal(expected_box.tensor, cam_box_tensor)
# Some properties should be the same
cam_boxes = CameraInstance3DBoxes(cam_box_tensor)
......@@ -310,16 +381,10 @@ def test_boxes_conversion():
Box3DMode.DEPTH, Box3DMode.CAM)
assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)
# test error raise with not supported conversion
with pytest.raises(NotImplementedError):
Box3DMode.convert(lidar_box_tensor, Box3DMode.LIDAR, Box3DMode.DEPTH)
with pytest.raises(NotImplementedError):
Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
# test similar mode conversion
same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
Box3DMode.DEPTH)
assert (same_results == depth_box_tensor).all()
assert torch.equal(same_results, depth_box_tensor)
# test conversion with a given rt_mat
camera_boxes = CameraInstance3DBoxes(
......@@ -389,6 +454,35 @@ def test_boxes_conversion():
rt_mat.inverse().numpy())
assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())
# test convert from depth to lidar
depth_boxes = torch.tensor(
[[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],
dtype=torch.float32)
depth_boxes = DepthInstance3DBoxes(depth_boxes)
depth_to_lidar_box = depth_boxes.convert_to(Box3DMode.LIDAR)
expected_box = depth_to_lidar_box.convert_to(Box3DMode.DEPTH)
assert torch.equal(depth_boxes.tensor, expected_box.tensor)
lidar_to_depth_box = Box3DMode.convert(depth_to_lidar_box, Box3DMode.LIDAR,
Box3DMode.DEPTH)
assert torch.allclose(depth_boxes.tensor, lidar_to_depth_box.tensor)
assert torch.allclose(depth_boxes.volume, lidar_to_depth_box.volume)
# test convert from depth to camera
depth_to_cam_box = Box3DMode.convert(depth_boxes, Box3DMode.DEPTH,
Box3DMode.CAM)
cam_to_depth_box = Box3DMode.convert(depth_to_cam_box, Box3DMode.CAM,
Box3DMode.DEPTH)
expected_tensor = depth_to_cam_box.convert_to(Box3DMode.DEPTH)
assert torch.equal(expected_tensor.tensor, cam_to_depth_box.tensor)
assert torch.allclose(depth_boxes.tensor, cam_to_depth_box.tensor)
assert torch.allclose(depth_boxes.volume, cam_to_depth_box.volume)
with pytest.raises(NotImplementedError):
# assert invalid convert mode
Box3DMode.convert(depth_boxes, Box3DMode.DEPTH, 3)
def test_camera_boxes3d():
# Test init with numpy array
......@@ -449,9 +543,19 @@ def test_camera_boxes3d():
[26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
[31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),
Box3DMode.LIDAR, Box3DMode.CAM)
boxes.flip()
boxes.flip('horizontal')
assert torch.allclose(boxes.tensor, expected_tensor)
expected_tensor = torch.tensor(
[[2.5162, 1.7501, -1.7802, 3.3900, 1.6500, 1.7500, -1.6616],
[2.4567, 1.6357, -8.9594, 4.0100, 1.5700, 1.5400, -1.5216],
[-0.5558, 1.3033, -28.2967, 2.2300, 1.4800, 1.4700, -4.7116],
[21.8230, 1.7361, -26.6690, 3.4800, 1.4000, 1.5600, -4.8316],
[8.1621, 1.6218, -31.3198, 3.7700, 1.4800, 1.7400, -0.3516]])
boxes_flip_vert = boxes.clone()
boxes_flip_vert.flip('vertical')
assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4)
# test box rotation
expected_tensor = Box3DMode.convert(
torch.tensor(
......@@ -560,7 +664,7 @@ def test_camera_boxes3d():
expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]
# the pytorch print loses some precision
assert torch.allclose(
boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7)
# obtained by the print of the original implementation
expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],
......@@ -659,3 +763,130 @@ def test_boxes3d_overlaps():
cam_boxes1.overlaps(cam_boxes1, boxes1)
with pytest.raises(AssertionError):
boxes1.overlaps(cam_boxes1, boxes1)
def test_depth_boxes3d():
# test empty initialization
empty_boxes = []
boxes = DepthInstance3DBoxes(empty_boxes)
assert boxes.tensor.shape[0] == 0
assert boxes.tensor.shape[1] == 7
# Test init with numpy array
np_boxes = np.array(
[[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],
[2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971]],
dtype=np.float32)
boxes_1 = DepthInstance3DBoxes(np_boxes)
assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))
# test properties
assert boxes_1.volume.size(0) == 2
assert (boxes_1.center == boxes_1.bottom_center).all()
expected_tensor = torch.tensor([[1.4856, 2.5299, -0.1093],
[2.3262, 3.3065, 0.9475]])
assert torch.allclose(boxes_1.gravity_center, expected_tensor)
expected_tensor = torch.tensor([[1.4856, 2.5299, 0.9385, 2.1404, 3.0601],
[2.3262, 3.3065, 0.8234, 0.5325, 2.9971]])
assert torch.allclose(boxes_1.bev, expected_tensor)
expected_tensor = torch.tensor([[1.0164, 1.4597, 1.9548, 3.6001],
[1.9145, 3.0402, 2.7379, 3.5728]])
assert torch.allclose(boxes_1.nearest_bev, expected_tensor, 1e-4)
assert repr(boxes) == (
'DepthInstance3DBoxes(\n tensor([], size=(0, 7)))')
# test init with torch.Tensor
th_boxes = torch.tensor(
[[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],
dtype=torch.float32)
boxes_2 = DepthInstance3DBoxes(th_boxes)
assert torch.allclose(boxes_2.tensor, th_boxes)
# test clone/to/device
boxes_2 = boxes_2.clone()
boxes_1 = boxes_1.to(boxes_2.device)
# test box concatenation
expected_tensor = torch.tensor(
[[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],
[2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971],
[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]])
boxes = DepthInstance3DBoxes.cat([boxes_1, boxes_2])
assert torch.allclose(boxes.tensor, expected_tensor)
# concatenate empty list
empty_boxes = DepthInstance3DBoxes.cat([])
assert empty_boxes.tensor.shape[0] == 0
assert empty_boxes.tensor.shape[-1] == 7
# test box flip
expected_tensor = torch.tensor(
[[-1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 0.0815],
[-2.3262, 3.3065, 0.4426, 0.8234, 0.5325, 1.0099, 0.1445],
[-2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 0.0723],
[-1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 0.0815]])
boxes.flip(bev_direction='horizontal')
assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
expected_tensor = torch.tensor(
[[-1.4856, -2.5299, -0.5570, 0.9385, 2.1404, 0.8954, -0.0815],
[-2.3262, -3.3065, 0.4426, 0.8234, 0.5325, 1.0099, -0.1445],
[-2.4593, -2.5870, -0.4321, 0.8597, 0.6193, 1.0204, -0.0723],
[-1.4856, -2.5299, -0.5570, 0.9385, 2.1404, 0.8954, -0.0815]])
boxes.flip(bev_direction='vertical')
assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
# test box rotation
boxes_rot = boxes.clone()
expected_tensor = torch.tensor(
[[-1.6004, -2.4589, -0.5570, 0.9385, 2.1404, 0.8954, -0.0355],
[-2.4758, -3.1960, 0.4426, 0.8234, 0.5325, 1.0099, -0.0985],
[-2.5757, -2.4712, -0.4321, 0.8597, 0.6193, 1.0204, -0.0263],
[-1.6004, -2.4589, -0.5570, 0.9385, 2.1404, 0.8954, -0.0355]])
boxes_rot.rotate(-0.04599790655000615)
assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3)
th_boxes = torch.tensor(
[[0.61211395, 0.8129094, 0.10563634, 1.497534, 0.16927195, 0.27956772],
[1.430009, 0.49797538, 0.9382923, 0.07694054, 0.9312509, 1.8919173]],
dtype=torch.float32)
boxes = DepthInstance3DBoxes(th_boxes, box_dim=6, with_yaw=False)
expected_tensor = torch.tensor([[
0.64884546, 0.78390356, 0.10563634, 1.50373348, 0.23795205, 0.27956772,
0
],
[
1.45139421, 0.43169443, 0.93829232,
0.11967964, 0.93380373, 1.89191735, 0
]])
boxes_3 = boxes.clone()
boxes_3.rotate(-0.04599790655000615)
assert torch.allclose(boxes_3.tensor, expected_tensor)
boxes.rotate(torch.tensor(-0.04599790655000615))
assert torch.allclose(boxes.tensor, expected_tensor)
# test bbox in_range_bev
expected_tensor = torch.tensor([1, 1], dtype=torch.bool)
mask = boxes.in_range_bev([0., -40., 70.4, 40.])
assert (mask == expected_tensor).all()
mask = boxes.nonempty()
assert (mask == expected_tensor).all()
expected_tensor = torch.tensor([[[-0.1030, 0.6649, 0.1056],
[-0.1030, 0.6649, 0.3852],
[-0.1030, 0.9029, 0.3852],
[-0.1030, 0.9029, 0.1056],
[1.4007, 0.6649, 0.1056],
[1.4007, 0.6649, 0.3852],
[1.4007, 0.9029, 0.3852],
[1.4007, 0.9029, 0.1056]],
[[1.3916, -0.0352, 0.9383],
[1.3916, -0.0352, 2.8302],
[1.3916, 0.8986, 2.8302],
[1.3916, 0.8986, 0.9383],
[1.5112, -0.0352, 0.9383],
[1.5112, -0.0352, 2.8302],
[1.5112, 0.8986, 2.8302],
[1.5112, 0.8986, 0.9383]]])
torch.allclose(boxes.corners, expected_tensor)
import numpy as np
import torch
from mmdet3d.core.bbox import LiDARInstance3DBoxes
from mmdet3d.datasets.pipelines import Compose
def test_outdoor_aug_pipeline():
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
class_names = ['Car']
np.random.seed(0)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
pipeline = Compose(train_pipeline)
gt_bboxes_3d = LiDARInstance3DBoxes(
torch.tensor([
[
2.16902428e+01, -4.06038128e-02, -1.61906636e+00,
1.65999997e+00, 3.20000005e+00, 1.61000001e+00, -1.53999996e+00
],
[
7.05006886e+00, -6.57459593e+00, -1.60107934e+00,
2.27999997e+00, 1.27799997e+01, 3.66000009e+00, 1.54999995e+00
],
[
2.24698811e+01, -6.69203758e+00, -1.50118136e+00,
2.31999993e+00, 1.47299995e+01, 3.64000010e+00, 1.59000003e+00
],
[
3.48291969e+01, -7.09058380e+00, -1.36622977e+00,
2.31999993e+00, 1.00400000e+01, 3.60999990e+00, 1.61000001e+00
],
[
4.62394600e+01, -7.75838804e+00, -1.32405007e+00,
2.33999991e+00, 1.28299999e+01, 3.63000011e+00, 1.63999999e+00
],
[
2.82966995e+01, -5.55755794e-01, -1.30332506e+00,
1.47000003e+00, 2.23000002e+00, 1.48000002e+00, -1.57000005e+00
],
[
2.66690197e+01, 2.18230209e+01, -1.73605704e+00,
1.55999994e+00, 3.48000002e+00, 1.39999998e+00, -1.69000006e+00
],
[
3.13197803e+01, 8.16214371e+00, -1.62177873e+00,
1.74000001e+00, 3.76999998e+00, 1.48000002e+00, 2.78999996e+00
],
[
4.34395561e+01, -1.95209332e+01, -1.20757008e+00,
1.69000006e+00, 4.09999990e+00, 1.40999997e+00, -1.53999996e+00
],
[
3.29882965e+01, -3.79360509e+00, -1.69245458e+00,
1.74000001e+00, 4.09000015e+00, 1.49000001e+00, -1.52999997e+00
],
[
3.85469360e+01, 8.35060215e+00, -1.31423414e+00,
1.59000003e+00, 4.28000021e+00, 1.45000005e+00, 1.73000002e+00
],
[
2.22492104e+01, -1.13536005e+01, -1.38272512e+00,
1.62000000e+00, 3.55999994e+00, 1.71000004e+00, 2.48000002e+00
],
[
3.36115799e+01, -1.97708054e+01, -4.92827654e-01,
1.64999998e+00, 3.54999995e+00, 1.79999995e+00, -1.57000005e+00
],
[
9.85029602e+00, -1.51294518e+00, -1.66834795e+00,
1.59000003e+00, 3.17000008e+00, 1.38999999e+00, -8.39999974e-01
]
],
dtype=torch.float32))
gt_labels_3d = np.array([0, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
results = dict(
pts_filename='tests/data/kitti/a.bin',
ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
bbox3d_fields=[],
)
output = pipeline(results)
expected_tensor = torch.tensor(
[[20.6514, -8.8250, -1.0816, 1.5893, 3.0637, 1.5414, -1.9216],
[7.9374, 4.9457, -1.2008, 2.1829, 12.2357, 3.5041, 1.6629],
[20.8115, -2.0273, -1.8893, 2.2212, 14.1026, 3.4850, 2.6513],
[32.3850, -5.2135, -1.1321, 2.2212, 9.6124, 3.4562, 2.6498],
[43.7022, -7.8316, -0.5090, 2.2403, 12.2836, 3.4754, 2.0146],
[25.3300, -9.6670, -1.0855, 1.4074, 2.1350, 1.4170, -0.7141],
[16.5414, -29.0583, -0.9768, 1.4936, 3.3318, 1.3404, -0.7153],
[24.6548, -18.9226, -1.3567, 1.6659, 3.6094, 1.4170, 1.3970],
[45.8403, 1.8183, -1.1626, 1.6180, 3.9254, 1.3499, -0.6886],
[30.6288, -8.4497, -1.4881, 1.6659, 3.9158, 1.4265, -0.7241],
[32.3316, -22.4611, -1.3131, 1.5223, 4.0977, 1.3882, 2.4186],
[22.4492, 3.2944, -2.1674, 1.5510, 3.4084, 1.6372, 0.3928],
[37.3824, 5.0472, -0.6579, 1.5797, 3.3988, 1.7233, -1.4862],
[8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308, -1.7212]])
assert torch.allclose(
output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
def test_outdoor_velocity_aug_pipeline():
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
np.random.seed(0)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
pipeline = Compose(train_pipeline)
gt_bboxes_3d = LiDARInstance3DBoxes(
torch.tensor(
[[
-5.2422e+00, 4.0021e+01, -4.7643e-01, 2.0620e+00, 4.4090e+00,
1.5480e+00, -1.4880e+00, 8.5338e-03, 4.4934e-02
],
[
-2.6675e+01, 5.5950e+00, -1.3053e+00, 3.4300e-01, 4.5800e-01,
7.8200e-01, -4.6276e+00, -4.3284e-04, -1.8543e-03
],
[
-5.8098e+00, 3.5409e+01, -6.6511e-01, 2.3960e+00, 3.9690e+00,
1.7320e+00, -4.6520e+00, 0.0000e+00, 0.0000e+00
],
[
-3.1309e+01, 1.0901e+00, -1.0561e+00, 1.9440e+00, 3.8570e+00,
1.7230e+00, -2.8143e+00, -2.7606e-02, -8.0573e-02
],
[
-4.5642e+01, 2.0136e+01, -2.4681e-02, 1.9870e+00, 4.4400e+00,
1.9420e+00, 2.8336e-01, 0.0000e+00, 0.0000e+00
],
[
-5.1617e+00, 1.8305e+01, -1.0879e+00, 2.3230e+00, 4.8510e+00,
1.3710e+00, -1.5803e+00, 0.0000e+00, 0.0000e+00
],
[
-2.5285e+01, 4.1442e+00, -1.2713e+00, 1.7550e+00, 1.9890e+00,
2.2200e+00, -4.4900e+00, -3.1784e-02, -1.5291e-01
],
[
-2.2611e+00, 1.9170e+01, -1.1452e+00, 9.1900e-01, 1.1230e+00,
1.9310e+00, 4.7790e-02, 6.7684e-02, -1.7537e+00
],
[
-6.5878e+01, 1.3500e+01, -2.2528e-01, 1.8200e+00, 3.8520e+00,
1.5450e+00, -2.8757e+00, 0.0000e+00, 0.0000e+00
],
[
-5.4490e+00, 2.8363e+01, -7.7275e-01, 2.2360e+00, 3.7540e+00,
1.5590e+00, -4.6520e+00, -7.9736e-03, 7.7207e-03
]],
dtype=torch.float32),
box_dim=9)
gt_labels_3d = np.array([0, 8, 0, 0, 0, 0, -1, 7, 0, 0])
results = dict(
pts_filename='tests/data/kitti/a.bin',
ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
bbox3d_fields=[],
)
output = pipeline(results)
expected_tensor = torch.tensor(
[[
-3.7849e+00, -4.1057e+01, -4.8668e-01, 2.1064e+00, 4.5039e+00,
1.5813e+00, -1.6919e+00, 1.0469e-02, -4.5533e-02
],
[
-2.7010e+01, -6.7551e+00, -1.3334e+00, 3.5038e-01, 4.6786e-01,
7.9883e-01, 1.4477e+00, -5.1440e-04, 1.8758e-03
],
[
-4.5448e+00, -3.6372e+01, -6.7942e-01, 2.4476e+00, 4.0544e+00,
1.7693e+00, 1.4721e+00, 0.0000e+00, -0.0000e+00
],
[
-3.1916e+01, -2.3379e+00, -1.0788e+00, 1.9858e+00, 3.9400e+00,
1.7601e+00, -3.6564e-01, -3.1333e-02, 8.1166e-02
],
[
-4.5802e+01, -2.2340e+01, -2.5213e-02, 2.0298e+00, 4.5355e+00,
1.9838e+00, 2.8199e+00, 0.0000e+00, -0.0000e+00
],
[
-4.5526e+00, -1.8887e+01, -1.1114e+00, 2.3730e+00, 4.9554e+00,
1.4005e+00, -1.5997e+00, 0.0000e+00, -0.0000e+00
],
[
-2.5648e+01, -5.2197e+00, -1.2987e+00, 1.7928e+00, 2.0318e+00,
2.2678e+00, 1.3100e+00, -3.8428e-02, 1.5485e-01
],
[
-1.5578e+00, -1.9657e+01, -1.1699e+00, 9.3878e-01, 1.1472e+00,
1.9726e+00, 3.0555e+00, 4.5907e-04, 1.7928e+00
],
[
-4.4522e+00, -2.9166e+01, -7.8938e-01, 2.2841e+00, 3.8348e+00,
1.5925e+00, 1.4721e+00, -7.8371e-03, -8.1931e-03
]])
assert torch.allclose(
output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
import pytest
import torch
from mmdet3d.core.bbox import LiDARInstance3DBoxes
def test_PointwiseSemanticHead():
# PointwiseSemanticHead only support gpu version currently.
......@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead():
[1, 35, 930, 469]],
dtype=torch.int32).cuda() # n, 4(batch, ind_x, ind_y, ind_z)
voxel_dict = dict(voxel_centers=voxel_centers, coors=coordinates)
gt_bboxes = list(
gt_bboxes = [
LiDARInstance3DBoxes(
torch.tensor(
[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
dtype=torch.float32).cuda()),
LiDARInstance3DBoxes(
torch.tensor(
[[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
[[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],
[[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]],
dtype=torch.float32).cuda())
]
# batch size is 2 in the unit test
gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
# test get_targets
target_dict = self.get_targets(voxel_dict, gt_bboxes, gt_labels)
assert target_dict['seg_targets'].shape == torch.Size(
[voxel_features.shape[0]])
assert torch.allclose(target_dict['seg_targets'],
target_dict['seg_targets'].new_tensor([3, -1, 3, 3]))
assert target_dict['part_targets'].shape == torch.Size(
[voxel_features.shape[0], 3])
assert target_dict['part_targets'].sum() == 0
# test loss
loss_dict = self.loss(feats_dict, target_dict)
......@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead():
assert loss_dict['loss_part'] == 0 # no points in gt_boxes
total_loss = loss_dict['loss_seg'] + loss_dict['loss_part']
total_loss.backward()
if __name__ == '__main__':
test_PointwiseSemanticHead()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment