Commit f2b01720 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master' into process_raw_data

parents 08c8adb6 47850641
...@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in camera: Coordinates in camera:
.. code-block:: none .. code-block:: none
z front z front (yaw=0.5*pi)
/ /
/ /
0 ------> x right 0 ------> x right (yaw=0)
| |
| |
v v
...@@ -22,11 +22,15 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -22,11 +22,15 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5], The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1. and the yaw is around the y axis, thus the rotation axis=1.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of z.
Attributes: Attributes:
tensor (torch.Tensor): float matrix of N x box_dim. tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...). Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
""" """
@property @property
...@@ -75,7 +79,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -75,7 +79,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes. """Calculate the coordinates of corners of all the boxes.
Convert the boxes to in clockwise order, in the form of Convert the boxes to in clockwise order, in the form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1) (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none .. code-block:: none
...@@ -85,7 +89,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -85,7 +89,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
(x0, y0, z1) + ----------- + (x1, y0, z1) (x0, y0, z1) + ----------- + (x1, y0, z1)
/| / | /| / |
/ | / | / | / |
(x0, y0, z0) + ----------- + + (x1, y1, z0) (x0, y0, z0) + ----------- + + (x1, y1, z1)
| / . | / | / . | /
| / oriign | / | / oriign | /
(x0, y1, z0) + ----------- + -------> x right (x0, y1, z0) + ----------- + -------> x right
...@@ -123,7 +127,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -123,7 +127,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
return self.tensor[:, [0, 2, 3, 5, 6]] return self.tensor[:, [0, 2, 3, 5, 6]]
@property @property
def nearset_bev(self): def nearest_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation """Calculate the 2D bounding boxes in BEV without rotation
Returns: Returns:
...@@ -150,11 +154,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -150,11 +154,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes """Calculate whether the points is in any of the boxes
Args: Args:
angles (float | torch.Tensor): rotation angle angle (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
""" """
if not isinstance(angle, torch.Tensor): if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle) angle = self.tensor.new_tensor(angle)
...@@ -166,13 +166,23 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -166,13 +166,23 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
self.tensor[:, 6] += angle self.tensor[:, 6] += angle
def flip(self): def flip(self, bev_direction='horizontal'):
"""Flip the boxes in horizontal direction """Flip the boxes in BEV along given BEV direction
In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
In CAM coordinates, it flips the x axis. Args:
bev_direction (str): Flip direction (horizontal or vertical).
""" """
self.tensor[:, 0::7] = -self.tensor[:, 0::7] assert bev_direction in ('horizontal', 'vertical')
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi if bev_direction == 'horizontal':
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
elif bev_direction == 'vertical':
self.tensor[:, 2::7] = -self.tensor[:, 2::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6]
def in_range_bev(self, box_range): def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range """Check whether the boxes are in the given range
...@@ -188,8 +198,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -188,8 +198,8 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance TODO: check whether this will effect the performance
Returns: Returns:
a binary vector, indicating whether each box is inside torch.Tensor: Indicating whether each box is inside
the reference range. the reference range.
""" """
in_range_flags = ((self.tensor[:, 0] > box_range[0]) in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 2] > box_range[1]) & (self.tensor[:, 2] > box_range[1])
...@@ -230,3 +240,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -230,3 +240,22 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height) lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)
overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0) overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
return overlaps_h return overlaps_h
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
import numpy as np
import torch
from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis
class DepthInstance3DBoxes(BaseInstance3DBoxes):
"""3D boxes of instances in Depth coordinates
Coordinates in Depth:
.. code-block:: none
up z y front (yaw=0.5*pi)
^ ^
| /
| /
0 ------> x right (yaw=0)
The relative coordinate of bottom center in a Depth box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of y.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
"""
@property
def gravity_center(self):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
bottom_center = self.bottom_center
gravity_center = torch.zeros_like(bottom_center)
gravity_center[:, :2] = bottom_center[:, :2]
gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
return gravity_center
@property
def corners(self):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none
up z
front y ^
/ |
/ |
(x0, y1, z1) + ----------- + (x1, y1, z1)
/| / |
/ | / |
(x0, y0, z1) + ----------- + + (x1, y1, z0)
| / . | /
| / oriign | /
(x0, y0, z0) + ----------- + --------> right x
(x1, y0, z0)
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
dims = self.dims
corners_norm = torch.from_numpy(
np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
device=dims.device, dtype=dims.dtype)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
# use relative origin [0.5, 0.5, 0]
corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
# rotate around z axis
corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)
corners += self.tensor[:, :3].view(-1, 1, 3)
return corners
@property
def bev(self):
"""Calculate the 2D bounding boxes in BEV with rotation
Returns:
torch.Tensor: a nx5 tensor of 2D BEV box of each box.
The box is in XYWHR format
"""
return self.tensor[:, [0, 1, 3, 4, 6]]
@property
def nearest_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
torch.Tensor: a tensor of 2D BEV box of each box.
"""
# Obtain BEV boxes with rotation in XYWHR format
bev_rotated_boxes = self.bev
# convert the rotation to a valid range
rotations = bev_rotated_boxes[:, -1]
normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
# find the center of boxes
conditions = (normed_rotations > np.pi / 4)[..., None]
bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
[0, 1, 3, 2]],
bev_rotated_boxes[:, :4])
centers = bboxes_xywh[:, :2]
dims = bboxes_xywh[:, 2:]
bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
return bev_boxes
def rotate(self, angle):
"""Calculate whether the points is in any of the boxes
Args:
angle (float | torch.Tensor): rotation angle
"""
if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle)
rot_sin = torch.sin(angle)
rot_cos = torch.cos(angle)
rot_mat = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
[rot_sin, rot_cos, 0], [0, 0, 1]])
self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat.T
if self.with_yaw:
self.tensor[:, 6] -= angle
else:
corners_rot = self.corners @ rot_mat.T
new_x_size = corners_rot[..., 0].max(
dim=1, keepdim=True)[0] - corners_rot[..., 0].min(
dim=1, keepdim=True)[0]
new_y_size = corners_rot[..., 1].max(
dim=1, keepdim=True)[0] - corners_rot[..., 1].min(
dim=1, keepdim=True)[0]
self.tensor[:, 3:5] = torch.cat((new_x_size, new_y_size), dim=-1)
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction
In Depth coordinates, it flips x (horizontal) or y (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
"""
assert bev_direction in ('horizontal', 'vertical')
if bev_direction == 'horizontal':
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
elif bev_direction == 'vertical':
self.tensor[:, 1::7] = -self.tensor[:, 1::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6]
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, x_max, y_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
torch.Tensor: Indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 1] > box_range[1])
& (self.tensor[:, 0] < box_range[2])
& (self.tensor[:, 1] < box_range[3]))
return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
import numpy as np import numpy as np
import torch import torch
from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
from .base_box3d import BaseInstance3DBoxes from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis from .utils import limit_period, rotation_3d_in_axis
...@@ -11,19 +12,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -11,19 +12,23 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
Coordinates in LiDAR: Coordinates in LiDAR:
.. code-block:: none .. code-block:: none
up z x front up z x front (yaw=0.5*pi)
^ ^ ^ ^
| / | /
| / | /
left y <------ 0 (yaw=pi) left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0], The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2. and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the negative direction of y axis, and increases from
the negative direction of y to the positive direction of x.
Attributes: Attributes:
tensor (torch.Tensor): float matrix of N x box_dim. tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...). Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
with_yaw (bool): if True, the value of yaw will be set to 0 as minmax
boxes.
""" """
@property @property
...@@ -44,7 +49,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -44,7 +49,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate the coordinates of corners of all the boxes. """Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1) (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
.. code-block:: none .. code-block:: none
...@@ -90,7 +95,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -90,7 +95,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
return self.tensor[:, [0, 1, 3, 4, 6]] return self.tensor[:, [0, 1, 3, 4, 6]]
@property @property
def nearset_bev(self): def nearest_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation """Calculate the 2D bounding boxes in BEV without rotation
Returns: Returns:
...@@ -117,11 +122,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -117,11 +122,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""Calculate whether the points is in any of the boxes """Calculate whether the points is in any of the boxes
Args: Args:
angles (float | torch.Tensor): rotation angle angle (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
""" """
if not isinstance(angle, torch.Tensor): if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle) angle = self.tensor.new_tensor(angle)
...@@ -133,13 +134,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -133,13 +134,27 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
self.tensor[:, 6] += angle self.tensor[:, 6] += angle
def flip(self): if self.tensor.shape[1] == 9:
"""Flip the boxes in horizontal direction # rotate velo vector
self.tensor[:, 7:9] = self.tensor[:, 7:9] @ rot_mat_T[:2, :2]
def flip(self, bev_direction='horizontal'):
"""Flip the boxes in BEV along given BEV direction
In LIDAR coordinates, it flips the y axis. In LIDAR coordinates, it flips the y (horizontal) or x (vertical) axis.
Args:
bev_direction (str): Flip direction (horizontal or vertical).
""" """
self.tensor[:, 1::7] = -self.tensor[:, 1::7] assert bev_direction in ('horizontal', 'vertical')
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi if bev_direction == 'horizontal':
self.tensor[:, 1::7] = -self.tensor[:, 1::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
elif bev_direction == 'vertical':
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
if self.with_yaw:
self.tensor[:, 6] = -self.tensor[:, 6]
def in_range_bev(self, box_range): def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range """Check whether the boxes are in the given range
...@@ -155,11 +170,59 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -155,11 +170,59 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
TODO: check whether this will effect the performance TODO: check whether this will effect the performance
Returns: Returns:
a binary vector, indicating whether each box is inside torch.Tensor: Indicating whether each box is inside
the reference range. the reference range.
""" """
in_range_flags = ((self.tensor[:, 0] > box_range[0]) in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 1] > box_range[1]) & (self.tensor[:, 1] > box_range[1])
& (self.tensor[:, 0] < box_range[2]) & (self.tensor[:, 0] < box_range[2])
& (self.tensor[:, 1] < box_range[3])) & (self.tensor[:, 1] < box_range[3]))
return in_range_flags return in_range_flags
def convert_to(self, dst, rt_mat=None):
"""Convert self to `dst` mode.
Args:
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
BaseInstance3DBoxes:
The converted box of the same type in the `dst` mode.
"""
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
def enlarged_box(self, extra_width):
"""Enlarge the length, width and height boxes
Args:
extra_width (float | torch.Tensor): extra width to enlarge the box
Returns:
:obj:LiDARInstance3DBoxes: enlarged boxes
"""
enlarged_boxes = self.tensor.clone()
enlarged_boxes[:, 3:6] += extra_width * 2
# bottom center z minus extra_width
enlarged_boxes[:, 2] -= extra_width
return self.new_box(enlarged_boxes)
def points_in_boxes(self, points):
"""Find the box which the points are in.
Args:
points (:obj:torch.Tensor): Points in shape Nx3
Returns:
torch.Tensor: The index of box where each point are in.
"""
box_idx = points_in_boxes_gpu(
points.unsqueeze(0),
self.tensor.unsqueeze(0).to(points.device)).squeeze(0)
return box_idx
from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
from mmcv.utils import build_from_cfg from mmcv.utils import build_from_cfg
from mmdet3d.utils import get_root_logger from mmdet3d.utils import get_root_logger
from mmdet.core.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS
from .cocktail_optimizer import CocktailOptimizer from .cocktail_optimizer import CocktailOptimizer
......
from mmcv.runner.optimizer import OPTIMIZERS
from torch.optim import Optimizer from torch.optim import Optimizer
from mmdet.core.optimizer import OPTIMIZERS
@OPTIMIZERS.register_module() @OPTIMIZERS.register_module()
class CocktailOptimizer(Optimizer): class CocktailOptimizer(Optimizer):
...@@ -9,6 +8,11 @@ class CocktailOptimizer(Optimizer): ...@@ -9,6 +8,11 @@ class CocktailOptimizer(Optimizer):
This optimizer applies the cocktail optimzation for multi-modality models. This optimizer applies the cocktail optimzation for multi-modality models.
Args:
optimizers (list[:obj:`torch.optim.Optimizer`]): The list containing
different optimizers that optimize different parameters
step_intervals (list[int]): Step intervals of each optimizer
""" """
def __init__(self, optimizers, step_intervals=None): def __init__(self, optimizers, step_intervals=None):
...@@ -18,6 +22,9 @@ class CocktailOptimizer(Optimizer): ...@@ -18,6 +22,9 @@ class CocktailOptimizer(Optimizer):
self.param_groups += optimizer.param_groups self.param_groups += optimizer.param_groups
if not isinstance(step_intervals, list): if not isinstance(step_intervals, list):
step_intervals = [1] * len(self.optimizers) step_intervals = [1] * len(self.optimizers)
assert len(step_intervals) == len(optimizers), \
'"step_intervals" should contain the same number of intervals as' \
f'len(optimizers)={len(optimizers)}, got {step_intervals}'
self.step_intervals = step_intervals self.step_intervals = step_intervals
self.num_step_updated = 0 self.num_step_updated = 0
......
...@@ -9,7 +9,7 @@ import torch ...@@ -9,7 +9,7 @@ import torch
from mmcv.utils import print_log from mmcv.utils import print_log
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops from ..core.bbox import Box3DMode, CameraInstance3DBoxes, box_np_ops
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
from .utils import remove_dontcare from .utils import remove_dontcare
...@@ -87,13 +87,14 @@ class KittiDataset(Custom3DDataset): ...@@ -87,13 +87,14 @@ class KittiDataset(Custom3DDataset):
# print(gt_names, len(loc)) # print(gt_names, len(loc))
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32) axis=1).astype(np.float32)
# this change gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect, # convert gt_bboxes_3d to velodyne coordinates
Trv2c) gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
gt_bboxes = annos['bbox'] gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare']) selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32') # gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes = gt_bboxes[selected].astype('float32') gt_bboxes = gt_bboxes[selected].astype('float32')
gt_names = gt_names[selected] gt_names = gt_names[selected]
......
...@@ -7,7 +7,7 @@ import pyquaternion ...@@ -7,7 +7,7 @@ import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import box_np_ops from ..core.bbox import LiDARInstance3DBoxes, box_np_ops
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset): ...@@ -152,10 +152,6 @@ class NuScenesDataset(Custom3DDataset):
# filter out bbox containing no points # filter out bbox containing no points
mask = info['num_lidar_pts'] > 0 mask = info['num_lidar_pts'] > 0
gt_bboxes_3d = info['gt_boxes'][mask] gt_bboxes_3d = info['gt_boxes'][mask]
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
box_np_ops.change_box3d_center_(gt_bboxes_3d, [0.5, 0.5, 0.5],
[0.5, 0.5, 0])
gt_names_3d = info['gt_names'][mask] gt_names_3d = info['gt_names'][mask]
gt_labels_3d = [] gt_labels_3d = []
for cat in gt_names_3d: for cat in gt_names_3d:
...@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset): ...@@ -171,6 +167,13 @@ class NuScenesDataset(Custom3DDataset):
gt_velocity[nan_mask] = [0.0, 0.0] gt_velocity[nan_mask] = [0.0, 0.0]
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1) gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=[0.5, 0.5, 0.5])
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d, gt_labels_3d=gt_labels_3d,
......
import numpy as np import numpy as np
from mmcv.parallel import DataContainer as DC from mmcv.parallel import DataContainer as DC
from mmdet3d.core.bbox import BaseInstance3DBoxes
from mmdet.datasets.builder import PIPELINES from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor from mmdet.datasets.pipelines import to_tensor
...@@ -39,9 +40,8 @@ class DefaultFormatBundle(object): ...@@ -39,9 +40,8 @@ class DefaultFormatBundle(object):
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True) results['img'] = DC(to_tensor(img), stack=True)
for key in [ for key in [
'proposals', 'gt_bboxes', 'gt_bboxes_3d', 'gt_bboxes_ignore', 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',
'gt_labels', 'gt_labels_3d', 'pts_instance_mask', 'gt_labels_3d', 'pts_instance_mask', 'pts_semantic_mask'
'pts_semantic_mask'
]: ]:
if key not in results: if key not in results:
continue continue
...@@ -49,6 +49,14 @@ class DefaultFormatBundle(object): ...@@ -49,6 +49,14 @@ class DefaultFormatBundle(object):
results[key] = DC([to_tensor(res) for res in results[key]]) results[key] = DC([to_tensor(res) for res in results[key]])
else: else:
results[key] = DC(to_tensor(results[key])) results[key] = DC(to_tensor(results[key]))
if 'gt_bboxes_3d' in results:
if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes):
results['gt_bboxes_3d'] = DC(
results['gt_bboxes_3d'], cpu_only=True)
else:
results['gt_bboxes_3d'] = DC(
to_tensor(results['gt_bboxes_3d']))
if 'gt_masks' in results: if 'gt_masks' in results:
results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) results['gt_masks'] = DC(results['gt_masks'], cpu_only=True)
if 'gt_semantic_seg' in results: if 'gt_semantic_seg' in results:
......
...@@ -26,12 +26,8 @@ class RandomFlip3D(RandomFlip): ...@@ -26,12 +26,8 @@ class RandomFlip3D(RandomFlip):
self.sync_2d = sync_2d self.sync_2d = sync_2d
def random_flip_points(self, gt_bboxes_3d, points): def random_flip_points(self, gt_bboxes_3d, points):
gt_bboxes_3d[:, 1] = -gt_bboxes_3d[:, 1] gt_bboxes_3d.flip()
gt_bboxes_3d[:, 6] = -gt_bboxes_3d[:, 6] + np.pi
points[:, 1] = -points[:, 1] points[:, 1] = -points[:, 1]
if gt_bboxes_3d.shape[1] == 9:
# flip velocitys at the same time
gt_bboxes_3d[:, 8] = -gt_bboxes_3d[:, 8]
return gt_bboxes_3d, points return gt_bboxes_3d, points
def __call__(self, input_dict): def __call__(self, input_dict):
...@@ -121,10 +117,13 @@ class ObjectSample(object): ...@@ -121,10 +117,13 @@ class ObjectSample(object):
gt_bboxes_2d = input_dict['gt_bboxes'] gt_bboxes_2d = input_dict['gt_bboxes']
# Assume for now 3D & 2D bboxes are the same # Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all( sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_labels_3d, gt_bboxes_2d=gt_bboxes_2d, img=img) gt_bboxes_3d.tensor.numpy(),
gt_labels_3d,
gt_bboxes_2d=gt_bboxes_2d,
img=img)
else: else:
sampled_dict = self.db_sampler.sample_all( sampled_dict = self.db_sampler.sample_all(
gt_bboxes_3d, gt_labels_3d, img=None) gt_bboxes_3d.tensor.numpy(), gt_labels_3d, img=None)
if sampled_dict is not None: if sampled_dict is not None:
sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d'] sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
...@@ -133,8 +132,9 @@ class ObjectSample(object): ...@@ -133,8 +132,9 @@ class ObjectSample(object):
gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels], gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
axis=0) axis=0)
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, sampled_gt_bboxes_3d gt_bboxes_3d = gt_bboxes_3d.new_box(
]).astype(np.float32) np.concatenate(
[gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))
points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d) points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
# check the points dimension # check the points dimension
...@@ -178,14 +178,16 @@ class ObjectNoise(object): ...@@ -178,14 +178,16 @@ class ObjectNoise(object):
points = input_dict['points'] points = input_dict['points']
# TODO: check this inplace function # TODO: check this inplace function
numpy_box = gt_bboxes_3d.tensor.numpy()
noise_per_object_v3_( noise_per_object_v3_(
gt_bboxes_3d, numpy_box,
points, points,
rotation_perturb=self.rot_uniform_noise, rotation_perturb=self.rot_uniform_noise,
center_noise_std=self.loc_noise_std, center_noise_std=self.loc_noise_std,
global_random_rot_range=self.global_rot_range, global_random_rot_range=self.global_rot_range,
num_try=self.num_try) num_try=self.num_try)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
input_dict['points'] = points input_dict['points'] = points
return input_dict return input_dict
...@@ -212,7 +214,7 @@ class GlobalRotScale(object): ...@@ -212,7 +214,7 @@ class GlobalRotScale(object):
def _trans_bbox_points(self, gt_boxes, points): def _trans_bbox_points(self, gt_boxes, points):
noise_trans = np.random.normal(0, self.trans_normal_noise[0], 3).T noise_trans = np.random.normal(0, self.trans_normal_noise[0], 3).T
points[:, :3] += noise_trans points[:, :3] += noise_trans
gt_boxes[:, :3] += noise_trans gt_boxes.translate(noise_trans)
return gt_boxes, points, noise_trans return gt_boxes, points, noise_trans
def _rot_bbox_points(self, gt_boxes, points, rotation=np.pi / 4): def _rot_bbox_points(self, gt_boxes, points, rotation=np.pi / 4):
...@@ -221,16 +223,8 @@ class GlobalRotScale(object): ...@@ -221,16 +223,8 @@ class GlobalRotScale(object):
noise_rotation = np.random.uniform(rotation[0], rotation[1]) noise_rotation = np.random.uniform(rotation[0], rotation[1])
points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle( points[:, :3], rot_mat_T = box_np_ops.rotation_points_single_angle(
points[:, :3], noise_rotation, axis=2) points[:, :3], noise_rotation, axis=2)
gt_boxes[:, :3], _ = box_np_ops.rotation_points_single_angle( gt_boxes.rotate(noise_rotation)
gt_boxes[:, :3], noise_rotation, axis=2)
gt_boxes[:, 6] += noise_rotation
if gt_boxes.shape[1] == 9:
# rotate velo vector
rot_cos = np.cos(noise_rotation)
rot_sin = np.sin(noise_rotation)
rot_mat_T_bev = np.array([[rot_cos, -rot_sin], [rot_sin, rot_cos]],
dtype=points.dtype)
gt_boxes[:, 7:9] = gt_boxes[:, 7:9] @ rot_mat_T_bev
return gt_boxes, points, rot_mat_T return gt_boxes, points, rot_mat_T
def _scale_bbox_points(self, def _scale_bbox_points(self,
...@@ -240,9 +234,7 @@ class GlobalRotScale(object): ...@@ -240,9 +234,7 @@ class GlobalRotScale(object):
max_scale=1.05): max_scale=1.05):
noise_scale = np.random.uniform(min_scale, max_scale) noise_scale = np.random.uniform(min_scale, max_scale)
points[:, :3] *= noise_scale points[:, :3] *= noise_scale
gt_boxes[:, :6] *= noise_scale gt_boxes.scale(noise_scale)
if gt_boxes.shape[1] == 9:
gt_boxes[:, 7:] *= noise_scale
return gt_boxes, points, noise_scale return gt_boxes, points, noise_scale
def __call__(self, input_dict): def __call__(self, input_dict):
...@@ -256,7 +248,7 @@ class GlobalRotScale(object): ...@@ -256,7 +248,7 @@ class GlobalRotScale(object):
gt_bboxes_3d, points, trans_factor = self._trans_bbox_points( gt_bboxes_3d, points, trans_factor = self._trans_bbox_points(
gt_bboxes_3d, points) gt_bboxes_3d, points)
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32') input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['points'] = points input_dict['points'] = points
input_dict['pcd_scale_factor'] = scale_factor input_dict['pcd_scale_factor'] = scale_factor
input_dict['pcd_rotation'] = rotation_factor input_dict['pcd_rotation'] = rotation_factor
...@@ -290,10 +282,6 @@ class ObjectRangeFilter(object): ...@@ -290,10 +282,6 @@ class ObjectRangeFilter(object):
self.pcd_range = np.array(point_cloud_range, dtype=np.float32) self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
self.bev_range = self.pcd_range[[0, 1, 3, 4]] self.bev_range = self.pcd_range[[0, 1, 3, 4]]
@staticmethod
def limit_period(val, offset=0.5, period=np.pi):
return val - np.floor(val / period + offset) * period
@staticmethod @staticmethod
def filter_gt_box_outside_range(gt_bboxes_3d, limit_range): def filter_gt_box_outside_range(gt_bboxes_3d, limit_range):
"""remove gtbox outside training range. """remove gtbox outside training range.
...@@ -314,14 +302,13 @@ class ObjectRangeFilter(object): ...@@ -314,14 +302,13 @@ class ObjectRangeFilter(object):
def __call__(self, input_dict): def __call__(self, input_dict):
gt_bboxes_3d = input_dict['gt_bboxes_3d'] gt_bboxes_3d = input_dict['gt_bboxes_3d']
gt_labels_3d = input_dict['gt_labels_3d'] gt_labels_3d = input_dict['gt_labels_3d']
mask = self.filter_gt_box_outside_range(gt_bboxes_3d, self.bev_range) mask = gt_bboxes_3d.in_range_bev(self.bev_range)
gt_bboxes_3d = gt_bboxes_3d[mask] gt_bboxes_3d = gt_bboxes_3d[mask]
gt_labels_3d = gt_labels_3d[mask] gt_labels_3d = gt_labels_3d[mask]
# limit rad to [-pi, pi] # limit rad to [-pi, pi]
gt_bboxes_3d[:, 6] = self.limit_period( gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
gt_bboxes_3d[:, 6], offset=0.5, period=2 * np.pi) input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_bboxes_3d'] = gt_bboxes_3d.astype('float32')
input_dict['gt_labels_3d'] = gt_labels_3d input_dict['gt_labels_3d'] = gt_labels_3d
return input_dict return input_dict
......
...@@ -168,6 +168,8 @@ class AnchorTrainMixin(object): ...@@ -168,6 +168,8 @@ class AnchorTrainMixin(object):
labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
if len(gt_bboxes) > 0: if len(gt_bboxes) > 0:
if not isinstance(gt_bboxes, torch.Tensor):
gt_bboxes = gt_bboxes.tensor.to(anchors.device)
assign_result = bbox_assigner.assign(anchors, gt_bboxes, assign_result = bbox_assigner.assign(anchors, gt_bboxes,
gt_bboxes_ignore, gt_labels) gt_bboxes_ignore, gt_labels)
sampling_result = self.bbox_sampler.sample(assign_result, anchors, sampling_result = self.bbox_sampler.sample(assign_result, anchors,
......
...@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module): ...@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3, 3,
padding=1, padding=1,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
indice_key=f'rcnn_down0', indice_key='rcnn_down0'))
conv_type='SubMConv3d'))
merge_conv_channel_last = channel merge_conv_channel_last = channel
down_conv_channel_last = merge_conv_channel_last down_conv_channel_last = merge_conv_channel_last
...@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module): ...@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3, 3,
padding=1, padding=1,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
indice_key=f'rcnn_down1', indice_key='rcnn_down1'))
conv_type='SubMConv3d'))
down_conv_channel_last = channel down_conv_channel_last = channel
self.conv_down.add_module('merge_conv', self.conv_down.add_module('merge_conv',
......
...@@ -5,7 +5,6 @@ import torch.nn.functional as F ...@@ -5,7 +5,6 @@ import torch.nn.functional as F
from mmdet3d.core import multi_apply from mmdet3d.core import multi_apply
from mmdet3d.core.bbox import box_torch_ops from mmdet3d.core.bbox import box_torch_ops
from mmdet3d.models.builder import build_loss from mmdet3d.models.builder import build_loss
from mmdet3d.ops.roiaware_pool3d import points_in_boxes_gpu
from mmdet.models import HEADS from mmdet.models import HEADS
...@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module): ...@@ -14,7 +13,7 @@ class PointwiseSemanticHead(nn.Module):
"""Semantic segmentation head for point-wise segmentation. """Semantic segmentation head for point-wise segmentation.
Predict point-wise segmentation and part regression results for PartA2. Predict point-wise segmentation and part regression results for PartA2.
See https://arxiv.org/abs/1907.03670 for more detials. See `paper <https://arxiv.org/abs/1907.03670>`_ for more detials.
Args: Args:
in_channels (int): the number of input channel. in_channels (int): the number of input channel.
...@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module): ...@@ -65,28 +64,27 @@ class PointwiseSemanticHead(nn.Module):
seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats) seg_preds=seg_preds, part_preds=part_preds, part_feats=part_feats)
def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d): def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d):
"""generate segmentation and part prediction targets """generate segmentation and part prediction targets for a single sample
Args: Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3], voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels the center of voxels
gt_bboxes_3d (torch.Tensor): shape [box_num, 7], gt boxes gt_bboxes_3d (:obj:BaseInstance3DBoxes): gt boxes containing tensor
of shape [box_num, 7].
gt_labels_3d (torch.Tensor): shape [box_num], class label of gt gt_labels_3d (torch.Tensor): shape [box_num], class label of gt
Returns: Returns:
tuple : segmentation targets with shape [voxel_num] tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3] part prediction targets with shape [voxel_num, 3]
""" """
enlarged_gt_boxes = box_torch_ops.enlarge_box3d_lidar( gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device)
gt_bboxes_3d, extra_width=self.extra_width) enlarged_gt_boxes = gt_bboxes_3d.enlarged_box(self.extra_width)
part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3), part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3),
dtype=torch.float32) dtype=torch.float32)
box_idx = points_in_boxes_gpu( box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers)
voxel_centers.unsqueeze(0), enlarge_box_idx = enlarged_gt_boxes.points_in_boxes(
gt_bboxes_3d.unsqueeze(0)).squeeze(0) # -1 ~ box_num voxel_centers).long()
enlarge_box_idx = points_in_boxes_gpu(
voxel_centers.unsqueeze(0),
enlarged_gt_boxes.unsqueeze(0)).squeeze(0).long() # -1 ~ box_num
gt_labels_pad = F.pad( gt_labels_pad = F.pad(
gt_labels_3d, (1, 0), mode='constant', value=self.num_classes) gt_labels_3d, (1, 0), mode='constant', value=self.num_classes)
...@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module): ...@@ -95,24 +93,37 @@ class PointwiseSemanticHead(nn.Module):
ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1) ignore_flag = fg_pt_flag ^ (enlarge_box_idx > -1)
seg_targets[ignore_flag] = -1 seg_targets[ignore_flag] = -1
for k in range(gt_bboxes_3d.shape[0]): for k in range(len(gt_bboxes_3d)):
k_box_flag = box_idx == k k_box_flag = box_idx == k
# no point in current box (caused by velodyne reduce) # no point in current box (caused by velodyne reduce)
if not k_box_flag.any(): if not k_box_flag.any():
continue continue
fg_voxels = voxel_centers[k_box_flag] fg_voxels = voxel_centers[k_box_flag]
transformed_voxels = fg_voxels - gt_bboxes_3d[k, 0:3] transformed_voxels = fg_voxels - gt_bboxes_3d.bottom_center[k]
transformed_voxels = box_torch_ops.rotation_3d_in_axis( transformed_voxels = box_torch_ops.rotation_3d_in_axis(
transformed_voxels.unsqueeze(0), transformed_voxels.unsqueeze(0),
-gt_bboxes_3d[k, 6].view(1), -gt_bboxes_3d.yaw[k].view(1),
axis=2) axis=2)
part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d[ part_targets[k_box_flag] = transformed_voxels / gt_bboxes_3d.dims[
k, 3:6] + voxel_centers.new_tensor([0.5, 0.5, 0]) k] + voxel_centers.new_tensor([0.5, 0.5, 0])
part_targets = torch.clamp(part_targets, min=0) part_targets = torch.clamp(part_targets, min=0)
return seg_targets, part_targets return seg_targets, part_targets
def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d): def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d):
"""generate segmentation and part prediction targets
Args:
voxel_centers (torch.Tensor): shape [voxel_num, 3],
the center of voxels
gt_bboxes_3d (list[:obj:BaseInstance3DBoxes]): list of gt boxes
containing tensor of shape [box_num, 7].
gt_labels_3d (list[torch.Tensor]): list of GT labels.
Returns:
tuple : segmentation targets with shape [voxel_num]
part prediction targets with shape [voxel_num, 3]
"""
batch_size = len(gt_labels_3d) batch_size = len(gt_labels_3d)
voxel_center_list = [] voxel_center_list = []
for idx in range(batch_size): for idx in range(batch_size):
......
...@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor ...@@ -8,7 +8,7 @@ from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead from .base_3droi_head import Base3DRoIHead
@HEADS.register_module @HEADS.register_module()
class PartAggregationROIHead(Base3DRoIHead): class PartAggregationROIHead(Base3DRoIHead):
"""Part aggregation roi head for PartA2""" """Part aggregation roi head for PartA2"""
...@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead): ...@@ -174,7 +174,7 @@ class PartAggregationROIHead(Base3DRoIHead):
cur_proposal_list = proposal_list[batch_idx] cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d'] cur_boxes = cur_proposal_list['boxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d'] cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx] cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx] cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0 batch_num_gts = 0
...@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead): ...@@ -189,7 +189,7 @@ class PartAggregationROIHead(Base3DRoIHead):
pred_per_cls = (cur_labels_3d == i) pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign( cur_assign_res = assigner.assign(
cur_boxes[pred_per_cls], cur_boxes[pred_per_cls],
cur_gt_bboxes[gt_per_cls], cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls]) gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result # gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts batch_num_gts += cur_assign_res.num_gts
...@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead): ...@@ -215,11 +215,11 @@ class PartAggregationROIHead(Base3DRoIHead):
batch_gt_labels) batch_gt_labels)
else: # for single class else: # for single class
assign_result = self.bbox_assigner.assign( assign_result = self.bbox_assigner.assign(
cur_boxes, cur_gt_bboxes, gt_labels=cur_gt_labels) cur_boxes, cur_gt_bboxes.tensor, gt_labels=cur_gt_labels)
# sample boxes # sample boxes
sampling_result = self.bbox_sampler.sample(assign_result, sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes, cur_boxes,
cur_gt_bboxes, cur_gt_bboxes.tensor,
cur_gt_labels) cur_gt_labels)
sampling_results.append(sampling_result) sampling_results.append(sampling_result)
return sampling_results return sampling_results
......
from .pillar_encoder import AlignedPillarFeatureNet, PillarFeatureNet from .pillar_encoder import PillarFeatureNet
from .voxel_encoder import (DynamicVFE, VoxelFeatureExtractor, from .voxel_encoder import DynamicSimpleVFE, DynamicVFE, HardSimpleVFE, HardVFE
VoxelFeatureExtractorV2, VoxelFeatureExtractorV3)
__all__ = [ __all__ = [
'PillarFeatureNet', 'AlignedPillarFeatureNet', 'VoxelFeatureExtractor', 'PillarFeatureNet', 'HardVFE', 'DynamicVFE', 'HardSimpleVFE',
'DynamicVFE', 'VoxelFeatureExtractorV2', 'VoxelFeatureExtractorV3' 'DynamicSimpleVFE'
] ]
...@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator ...@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class PillarFeatureNet(nn.Module): class PillarFeatureNet(nn.Module):
"""Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
use_norm=True, feat_channels=(64, ),
num_filters=(64, ),
with_distance=False, with_distance=False,
with_cluster_center=True, with_cluster_center=True,
with_voxel_center=True, with_voxel_center=True,
voxel_size=(0.2, 0.2, 4), voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1), point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max'): mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super(PillarFeatureNet, self).__init__() super(PillarFeatureNet, self).__init__()
assert len(num_filters) > 0 assert len(feat_channels) > 0
if with_cluster_center: if with_cluster_center:
num_input_features += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
num_input_features += 2 in_channels += 2
if with_distance: if with_distance:
num_input_features += 1 in_channels += 1
self._with_distance = with_distance self._with_distance = with_distance
self._with_cluster_center = with_cluster_center self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers # Create PillarFeatureNet layers
self.num_input_features = num_input_features self.in_channels = in_channels
num_filters = [num_input_features] + list(num_filters) feat_channels = [in_channels] + list(feat_channels)
pfn_layers = [] pfn_layers = []
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i < len(num_filters) - 2: if i < len(feat_channels) - 2:
last_layer = False last_layer = False
else: else:
last_layer = True last_layer = True
...@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module): ...@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer( PFNLayer(
in_filters, in_filters,
out_filters, out_filters,
use_norm, norm_cfg=norm_cfg,
last_layer=last_layer, last_layer=last_layer,
mode=mode)) mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers) self.pfn_layers = nn.ModuleList(pfn_layers)
...@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module): ...@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class DynamicPillarFeatureNet(PillarFeatureNet): class DynamicPillarFeatureNet(PillarFeatureNet):
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
use_norm=True, feat_channels=(64, ),
num_filters=(64, ),
with_distance=False, with_distance=False,
with_cluster_center=True, with_cluster_center=True,
with_voxel_center=True, with_voxel_center=True,
...@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
""" """
super(DynamicPillarFeatureNet, self).__init__( super(DynamicPillarFeatureNet, self).__init__(
num_input_features, in_channels,
use_norm, feat_channels,
num_filters,
with_distance, with_distance,
with_cluster_center=with_cluster_center, with_cluster_center=with_cluster_center,
with_voxel_center=with_voxel_center, with_voxel_center=with_voxel_center,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
norm_cfg=norm_cfg,
mode=mode) mode=mode)
num_filters = [self.num_input_features] + list(num_filters) feat_channels = [self.in_channels] + list(feat_channels)
pfn_layers = [] pfn_layers = []
# TODO: currently only support one PFNLayer # TODO: currently only support one PFNLayer
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i > 0: if i > 0:
in_filters *= 2 in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters) norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
...@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features = torch.cat([point_feats, feat_per_point], dim=1) features = torch.cat([point_feats, feat_per_point], dim=1)
return voxel_feats, voxel_coors return voxel_feats, voxel_coors
@VOXEL_ENCODERS.register_module()
class AlignedPillarFeatureNet(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super(AlignedPillarFeatureNet, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
print('Use cluster center')
num_input_features += 3
if with_voxel_center:
print('Use voxel center')
num_input_features += 2
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers
num_filters = [num_input_features] + list(num_filters)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
last_layer = False
else:
last_layer = True
pfn_layers.append(
PFNLayer(
in_filters,
out_filters,
use_norm,
last_layer=last_layer,
mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
def forward(self, features, num_points, coors):
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_points.type_as(features).view(
-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)
x_distance = features[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
y_distance = features[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
z_distance = features[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
normed_x_distance = 1 - torch.abs(x_distance / self.vx)
normed_y_distance = 1 - torch.abs(y_distance / self.vy)
normed_z_distance = 1 - torch.abs(z_distance / self.vz)
x_mask = torch.gt(normed_x_distance, 0).type_as(features)
y_mask = torch.gt(normed_y_distance, 0).type_as(features)
z_mask = torch.gt(normed_z_distance, 0).type_as(features)
nonzero_points_mask = x_mask.mul(y_mask).mul(z_mask)
aligned_distance = normed_x_distance.mul(normed_y_distance).mul(
normed_z_distance).mul(nonzero_points_mask)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features[:, :, :2]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_points, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
features *= mask
for pfn in self.pfn_layers:
if pfn.last_vfe:
features = pfn(features, aligned_distance)
else:
features = pfn(features)
return features.squeeze()
...@@ -4,28 +4,15 @@ from torch import nn ...@@ -4,28 +4,15 @@ from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
class Empty(nn.Module):
def __init__(self, *args, **kwargs):
super(Empty, self).__init__()
def forward(self, *args, **kwargs):
if len(args) == 1:
return args[0]
elif len(args) == 0:
return None
return args
def get_paddings_indicator(actual_num, max_num, axis=0): def get_paddings_indicator(actual_num, max_num, axis=0):
"""Create boolean mask by actually number of a padded tensor. """Create boolean mask by actually number of a padded tensor.
Args: Args:
actual_num ([type]): [description] actual_num (torch.Tensor): Actual number of points in each voxel.
max_num ([type]): [description] max_num (int): Max number of points in each voxel
Returns: Returns:
[type]: [description] torch.Tensor: Mask indicates which points are valid inside a voxel.
""" """
actual_num = torch.unsqueeze(actual_num, axis + 1) actual_num = torch.unsqueeze(actual_num, axis + 1)
# tiled_actual_num: [N, M, 1] # tiled_actual_num: [N, M, 1]
...@@ -52,13 +39,9 @@ class VFELayer(nn.Module): ...@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self.cat_max = cat_max self.cat_max = cat_max
self.max_out = max_out self.max_out = max_out
# self.units = int(out_channels / 2) # self.units = int(out_channels / 2)
if norm_cfg:
norm_name, norm_layer = build_norm_layer(norm_cfg, out_channels) self.norm = build_norm_layer(norm_cfg, out_channels)[1]
self.norm = norm_layer self.linear = nn.Linear(in_channels, out_channels, bias=False)
self.linear = nn.Linear(in_channels, out_channels, bias=False)
else:
self.norm = Empty(out_channels)
self.linear = nn.Linear(in_channels, out_channels, bias=True)
def forward(self, inputs): def forward(self, inputs):
# [K, T, 7] tensordot [7, units] = [K, T, units] # [K, T, 7] tensordot [7, units] = [K, T, units]
...@@ -89,7 +72,7 @@ class PFNLayer(nn.Module): ...@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def __init__(self, def __init__(self,
in_channels, in_channels,
out_channels, out_channels,
use_norm=True, norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
last_layer=False, last_layer=False,
mode='max'): mode='max'):
""" Pillar Feature Net Layer. """ Pillar Feature Net Layer.
...@@ -100,9 +83,11 @@ class PFNLayer(nn.Module): ...@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args: Args:
in_channels (int): Number of input channels. in_channels (int): Number of input channels.
out_channels (int): Number of output channels. out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm. norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of last_layer (bool): If last_layer, there is no concatenation of
features. features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
""" """
super().__init__() super().__init__()
...@@ -112,13 +97,10 @@ class PFNLayer(nn.Module): ...@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels = out_channels // 2 out_channels = out_channels // 2
self.units = out_channels self.units = out_channels
if use_norm: self.norm = build_norm_layer(norm_cfg, self.units)[1]
self.norm = nn.BatchNorm1d(self.units, eps=1e-3, momentum=0.01) self.linear = nn.Linear(in_channels, self.units, bias=False)
self.linear = nn.Linear(in_channels, self.units, bias=False)
else:
self.norm = Empty(self.unints)
self.linear = nn.Linear(in_channels, self.units, bias=True)
assert mode in ['max', 'avg']
self.mode = mode self.mode = mode
def forward(self, inputs, num_voxels=None, aligned_distance=None): def forward(self, inputs, num_voxels=None, aligned_distance=None):
......
import torch import torch
from mmcv.cnn import build_norm_layer from mmcv.cnn import build_norm_layer
from torch import nn from torch import nn
from torch.nn import functional as F
from mmdet3d.ops import DynamicScatter from mmdet3d.ops import DynamicScatter
from .. import builder from .. import builder
from ..registry import VOXEL_ENCODERS from ..registry import VOXEL_ENCODERS
from .utils import Empty, VFELayer, get_paddings_indicator from .utils import VFELayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractor(nn.Module): class HardSimpleVFE(nn.Module):
"""Simple voxel feature encoder used in SECOND
def __init__(self, It simply averages the values of points in a voxel.
num_input_features=4, """
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractor, self).__init__()
self.name = name
assert len(num_filters) == 2
num_input_features += 3 # add mean features
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
if use_norm:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=False)
self.norm = nn.BatchNorm1d(num_filters[1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=True)
self.norm = Empty(num_filters[1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x = self.vfe1(features)
x *= mask
x = self.vfe2(x)
x *= mask
x = self.linear(x)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
x = F.relu(x)
x *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(x, dim=1)[0]
return voxelwise
def __init__(self):
@VOXEL_ENCODERS.register_module() super(HardSimpleVFE, self).__init__()
class VoxelFeatureExtractorV2(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV2, self).__init__()
self.name = name
assert len(num_filters) > 0
num_input_features += 3
if with_distance:
num_input_features += 1
self._with_distance = with_distance
num_filters = [num_input_features] + num_filters
filters_pairs = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
self.vfe_layers = nn.ModuleList(
[VFELayer(i, o, use_norm) for i, o in filters_pairs])
if use_norm:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=False)
self.norm = nn.BatchNorm1d(
num_filters[-1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=True)
self.norm = Empty(num_filters[-1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
for vfe in self.vfe_layers:
features = vfe(features)
features *= mask
features = self.linear(features)
features = self.norm(features.permute(0, 2, 1).contiguous()).permute(
0, 2, 1).contiguous()
features = F.relu(features)
features *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(features, dim=1)[0]
return voxelwise
@VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractorV3(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV3, self).__init__()
self.name = name
def forward(self, features, num_points, coors): def forward(self, features, num_points, coors):
# features: [concated_num_points, num_voxel_size, 3(4)] # features: [concated_num_points, num_voxel_size, 3(4)]
...@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module): ...@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class DynamicVFEV3(nn.Module): class DynamicSimpleVFE(nn.Module):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def __init__(self, def __init__(self,
num_input_features=4,
voxel_size=(0.2, 0.2, 4), voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1)): point_cloud_range=(0, -40, -3, 70.4, 40, 1)):
super(DynamicVFEV3, self).__init__() super(DynamicSimpleVFE, self).__init__()
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True) self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
@torch.no_grad() @torch.no_grad()
...@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module): ...@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class DynamicVFE(nn.Module): class DynamicVFE(nn.Module):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
num_filters=[], feat_channels=[],
with_distance=False, with_distance=False,
with_cluster_center=False, with_cluster_center=False,
with_voxel_center=False, with_voxel_center=False,
...@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module): ...@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer=None, fusion_layer=None,
return_point_feats=False): return_point_feats=False):
super(DynamicVFE, self).__init__() super(DynamicVFE, self).__init__()
assert len(num_filters) > 0 assert mode in ['avg', 'max']
assert len(feat_channels) > 0
if with_cluster_center: if with_cluster_center:
num_input_features += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
num_input_features += 3 in_channels += 3
if with_distance: if with_distance:
num_input_features += 3 in_channels += 3
self.num_input_features = num_input_features self.in_channels = in_channels
self._with_distance = with_distance self._with_distance = with_distance
self._with_cluster_center = with_cluster_center self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center self._with_voxel_center = with_voxel_center
...@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module): ...@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self.point_cloud_range = point_cloud_range self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True) self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters) feat_channels = [self.in_channels] + list(feat_channels)
vfe_layers = [] vfe_layers = []
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i > 0: if i > 0:
in_filters *= 2 in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters) norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
...@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module): ...@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self.fusion_layer = builder.build_fusion_layer(fusion_layer) self.fusion_layer = builder.build_fusion_layer(fusion_layer)
def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors): def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas # Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation # Calculate necessary things for canvas creation
canvas_z = int( canvas_z = int(
...@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module): ...@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points=None, points=None,
img_feats=None, img_feats=None,
img_meta=None): img_meta=None):
""" """Forward functions
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim) Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
""" """
features_ls = [features] features_ls = [features]
# Find distance of x, y, and z from cluster center # Find distance of x, y, and z from cluster center
...@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module): ...@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class HardVFE(nn.Module): class HardVFE(nn.Module):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
num_filters=[], feat_channels=[],
with_distance=False, with_distance=False,
with_cluster_center=False, with_cluster_center=False,
with_voxel_center=False, with_voxel_center=False,
...@@ -334,14 +292,14 @@ class HardVFE(nn.Module): ...@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer=None, fusion_layer=None,
return_point_feats=False): return_point_feats=False):
super(HardVFE, self).__init__() super(HardVFE, self).__init__()
assert len(num_filters) > 0 assert len(feat_channels) > 0
if with_cluster_center: if with_cluster_center:
num_input_features += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
num_input_features += 3 in_channels += 3
if with_distance: if with_distance:
num_input_features += 3 in_channels += 3
self.num_input_features = num_input_features self.in_channels = in_channels
self._with_distance = with_distance self._with_distance = with_distance
self._with_cluster_center = with_cluster_center self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center self._with_voxel_center = with_voxel_center
...@@ -357,16 +315,16 @@ class HardVFE(nn.Module): ...@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self.point_cloud_range = point_cloud_range self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True) self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters) feat_channels = [self.in_channels] + list(feat_channels)
vfe_layers = [] vfe_layers = []
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i > 0: if i > 0:
in_filters *= 2 in_filters *= 2
# TODO: pass norm_cfg to VFE # TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters) # norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if i == (len(num_filters) - 2): if i == (len(feat_channels) - 2):
cat_max = False cat_max = False
max_out = True max_out = True
if fusion_layer: if fusion_layer:
...@@ -394,9 +352,20 @@ class HardVFE(nn.Module): ...@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors, coors,
img_feats=None, img_feats=None,
img_meta=None): img_meta=None):
""" """Forward functions
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim) Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
""" """
features_ls = [features] features_ls = [features]
# Find distance of x, y, and z from cluster center # Find distance of x, y, and z from cluster center
...@@ -438,19 +407,29 @@ class HardVFE(nn.Module): ...@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for i, vfe in enumerate(self.vfe_layers): for i, vfe in enumerate(self.vfe_layers):
voxel_feats = vfe(voxel_feats) voxel_feats = vfe(voxel_feats)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
if (self.fusion_layer is not None and img_feats is not None): if (self.fusion_layer is not None and img_feats is not None):
voxel_feats = self.fusion_with_mask(features, mask, voxel_feats, voxel_feats = self.fusion_with_mask(features, mask, voxel_feats,
coors, img_feats, img_meta) coors, img_feats, img_meta)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
return voxel_feats return voxel_feats
def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats, def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,
img_meta): img_meta):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points # the features is consist of a batch of points
batch_size = coors[-1, 0] + 1 batch_size = coors[-1, 0] + 1
points = [] points = []
...@@ -459,20 +438,13 @@ class HardVFE(nn.Module): ...@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points.append(features[single_mask][mask[single_mask]]) points.append(features[single_mask][mask[single_mask]])
point_feats = voxel_feats[mask] point_feats = voxel_feats[mask]
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
point_feats = self.fusion_layer(img_feats, points, point_feats, point_feats = self.fusion_layer(img_feats, points, point_feats,
img_meta) img_meta)
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
voxel_canvas = voxel_feats.new_zeros( voxel_canvas = voxel_feats.new_zeros(
size=(voxel_feats.size(0), voxel_feats.size(1), size=(voxel_feats.size(0), voxel_feats.size(1),
point_feats.size(-1))) point_feats.size(-1)))
voxel_canvas[mask] = point_feats voxel_canvas[mask] = point_feats
out = torch.max(voxel_canvas, dim=1)[0] out = torch.max(voxel_canvas, dim=1)[0]
if torch.isnan(out).any():
import pdb
pdb.set_trace()
return out return out
...@@ -3,7 +3,7 @@ import pytest ...@@ -3,7 +3,7 @@ import pytest
import torch import torch
from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
LiDARInstance3DBoxes) DepthInstance3DBoxes, LiDARInstance3DBoxes)
def test_lidar_boxes3d(): def test_lidar_boxes3d():
...@@ -13,6 +13,46 @@ def test_lidar_boxes3d(): ...@@ -13,6 +13,46 @@ def test_lidar_boxes3d():
assert boxes.tensor.shape[0] == 0 assert boxes.tensor.shape[0] == 0
assert boxes.tensor.shape[1] == 7 assert boxes.tensor.shape[1] == 7
# Test init with origin
gravity_center_box = np.array(
[[
-5.24223238e+00, 4.00209696e+01, 2.97570381e-01, 2.06200000e+00,
4.40900000e+00, 1.54800000e+00, -1.48801203e+00
],
[
-2.66751588e+01, 5.59499564e+00, -9.14345860e-01, 3.43000000e-01,
4.58000000e-01, 7.82000000e-01, -4.62759755e+00
],
[
-5.80979675e+00, 3.54092357e+01, 2.00889888e-01, 2.39600000e+00,
3.96900000e+00, 1.73200000e+00, -4.65203216e+00
],
[
-3.13086877e+01, 1.09007628e+00, -1.94612112e-01, 1.94400000e+00,
3.85700000e+00, 1.72300000e+00, -2.81427027e+00
]],
dtype=np.float32)
bottom_center_box = LiDARInstance3DBoxes(
gravity_center_box, origin=[0.5, 0.5, 0.5])
expected_tensor = torch.tensor(
[[
-5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,
4.40900000e+00, 1.54800000e+00, -1.48801203e+00
],
[
-2.66751588e+01, 5.59499564e+00, -1.30534586e+00, 3.43000000e-01,
4.58000000e-01, 7.82000000e-01, -4.62759755e+00
],
[
-5.80979675e+00, 3.54092357e+01, -6.65110112e-01, 2.39600000e+00,
3.96900000e+00, 1.73200000e+00, -4.65203216e+00
],
[
-3.13086877e+01, 1.09007628e+00, -1.05611211e+00, 1.94400000e+00,
3.85700000e+00, 1.72300000e+00, -2.81427027e+00
]])
assert torch.allclose(expected_tensor, bottom_center_box.tensor)
# Test init with numpy array # Test init with numpy array
np_boxes = np.array( np_boxes = np.array(
[[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48], [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
...@@ -70,9 +110,19 @@ def test_lidar_boxes3d(): ...@@ -70,9 +110,19 @@ def test_lidar_boxes3d():
[28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927], [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
[26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926], [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
[31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]) [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]])
boxes.flip() boxes.flip('horizontal')
assert torch.allclose(boxes.tensor, expected_tensor) assert torch.allclose(boxes.tensor, expected_tensor)
expected_tensor = torch.tensor(
[[-1.7802, -2.5162, -1.7501, 1.7500, 3.3900, 1.6500, -1.6616],
[-8.9594, -2.4567, -1.6357, 1.5400, 4.0100, 1.5700, -1.5216],
[-28.2967, 0.5558, -1.3033, 1.4700, 2.2300, 1.4800, -4.7116],
[-26.6690, -21.8230, -1.7361, 1.5600, 3.4800, 1.4000, -4.8316],
[-31.3198, -8.1621, -1.6218, 1.7400, 3.7700, 1.4800, -0.3516]])
boxes_flip_vert = boxes.clone()
boxes_flip_vert.flip('vertical')
assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4)
# test box rotation # test box rotation
expected_tensor = torch.tensor( expected_tensor = torch.tensor(
[[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663], [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
...@@ -223,7 +273,7 @@ def test_lidar_boxes3d(): ...@@ -223,7 +273,7 @@ def test_lidar_boxes3d():
[27.3398, -18.3976, 29.0896, -14.6065]]) [27.3398, -18.3976, 29.0896, -14.6065]])
# the pytorch print loses some precision # the pytorch print loses some precision
assert torch.allclose( assert torch.allclose(
boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7) boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7)
# obtained by the print of the original implementation # obtained by the print of the original implementation
expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00], expected_tensor = torch.tensor([[[2.4093e+00, -4.4784e+00, -1.9169e+00],
...@@ -269,6 +319,25 @@ def test_lidar_boxes3d(): ...@@ -269,6 +319,25 @@ def test_lidar_boxes3d():
# the pytorch print loses some precision # the pytorch print loses some precision
assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7) assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
# test new_box
new_box1 = boxes.new_box([[1, 2, 3, 4, 5, 6, 7]])
assert torch.allclose(
new_box1.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))
assert new_box1.device == boxes.device
assert new_box1.with_yaw == boxes.with_yaw
assert new_box1.box_dim == boxes.box_dim
new_box2 = boxes.new_box(np.array([[1, 2, 3, 4, 5, 6, 7]]))
assert torch.allclose(
new_box2.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))
new_box3 = boxes.new_box(torch.tensor([[1, 2, 3, 4, 5, 6, 7]]))
assert torch.allclose(
new_box3.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=boxes.tensor.dtype))
def test_boxes_conversion(): def test_boxes_conversion():
"""Test the conversion of boxes between different modes. """Test the conversion of boxes between different modes.
...@@ -284,6 +353,8 @@ def test_boxes_conversion(): ...@@ -284,6 +353,8 @@ def test_boxes_conversion():
[31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]) [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR, cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
Box3DMode.CAM) Box3DMode.CAM)
expected_box = lidar_boxes.convert_to(Box3DMode.CAM)
assert torch.equal(expected_box.tensor, cam_box_tensor)
# Some properties should be the same # Some properties should be the same
cam_boxes = CameraInstance3DBoxes(cam_box_tensor) cam_boxes = CameraInstance3DBoxes(cam_box_tensor)
...@@ -310,16 +381,10 @@ def test_boxes_conversion(): ...@@ -310,16 +381,10 @@ def test_boxes_conversion():
Box3DMode.DEPTH, Box3DMode.CAM) Box3DMode.DEPTH, Box3DMode.CAM)
assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor) assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)
# test error raise with not supported conversion
with pytest.raises(NotImplementedError):
Box3DMode.convert(lidar_box_tensor, Box3DMode.LIDAR, Box3DMode.DEPTH)
with pytest.raises(NotImplementedError):
Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
# test similar mode conversion # test similar mode conversion
same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
Box3DMode.DEPTH) Box3DMode.DEPTH)
assert (same_results == depth_box_tensor).all() assert torch.equal(same_results, depth_box_tensor)
# test conversion with a given rt_mat # test conversion with a given rt_mat
camera_boxes = CameraInstance3DBoxes( camera_boxes = CameraInstance3DBoxes(
...@@ -389,6 +454,35 @@ def test_boxes_conversion(): ...@@ -389,6 +454,35 @@ def test_boxes_conversion():
rt_mat.inverse().numpy()) rt_mat.inverse().numpy())
assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy()) assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())
# test convert from depth to lidar
depth_boxes = torch.tensor(
[[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],
dtype=torch.float32)
depth_boxes = DepthInstance3DBoxes(depth_boxes)
depth_to_lidar_box = depth_boxes.convert_to(Box3DMode.LIDAR)
expected_box = depth_to_lidar_box.convert_to(Box3DMode.DEPTH)
assert torch.equal(depth_boxes.tensor, expected_box.tensor)
lidar_to_depth_box = Box3DMode.convert(depth_to_lidar_box, Box3DMode.LIDAR,
Box3DMode.DEPTH)
assert torch.allclose(depth_boxes.tensor, lidar_to_depth_box.tensor)
assert torch.allclose(depth_boxes.volume, lidar_to_depth_box.volume)
# test convert from depth to camera
depth_to_cam_box = Box3DMode.convert(depth_boxes, Box3DMode.DEPTH,
Box3DMode.CAM)
cam_to_depth_box = Box3DMode.convert(depth_to_cam_box, Box3DMode.CAM,
Box3DMode.DEPTH)
expected_tensor = depth_to_cam_box.convert_to(Box3DMode.DEPTH)
assert torch.equal(expected_tensor.tensor, cam_to_depth_box.tensor)
assert torch.allclose(depth_boxes.tensor, cam_to_depth_box.tensor)
assert torch.allclose(depth_boxes.volume, cam_to_depth_box.volume)
with pytest.raises(NotImplementedError):
# assert invalid convert mode
Box3DMode.convert(depth_boxes, Box3DMode.DEPTH, 3)
def test_camera_boxes3d(): def test_camera_boxes3d():
# Test init with numpy array # Test init with numpy array
...@@ -449,9 +543,19 @@ def test_camera_boxes3d(): ...@@ -449,9 +543,19 @@ def test_camera_boxes3d():
[26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926], [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
[31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]), [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),
Box3DMode.LIDAR, Box3DMode.CAM) Box3DMode.LIDAR, Box3DMode.CAM)
boxes.flip() boxes.flip('horizontal')
assert torch.allclose(boxes.tensor, expected_tensor) assert torch.allclose(boxes.tensor, expected_tensor)
expected_tensor = torch.tensor(
[[2.5162, 1.7501, -1.7802, 3.3900, 1.6500, 1.7500, -1.6616],
[2.4567, 1.6357, -8.9594, 4.0100, 1.5700, 1.5400, -1.5216],
[-0.5558, 1.3033, -28.2967, 2.2300, 1.4800, 1.4700, -4.7116],
[21.8230, 1.7361, -26.6690, 3.4800, 1.4000, 1.5600, -4.8316],
[8.1621, 1.6218, -31.3198, 3.7700, 1.4800, 1.7400, -0.3516]])
boxes_flip_vert = boxes.clone()
boxes_flip_vert.flip('vertical')
assert torch.allclose(boxes_flip_vert.tensor, expected_tensor, 1e-4)
# test box rotation # test box rotation
expected_tensor = Box3DMode.convert( expected_tensor = Box3DMode.convert(
torch.tensor( torch.tensor(
...@@ -560,7 +664,7 @@ def test_camera_boxes3d(): ...@@ -560,7 +664,7 @@ def test_camera_boxes3d():
expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2] expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]
# the pytorch print loses some precision # the pytorch print loses some precision
assert torch.allclose( assert torch.allclose(
boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7) boxes.nearest_bev, expected_tensor, rtol=1e-4, atol=1e-7)
# obtained by the print of the original implementation # obtained by the print of the original implementation
expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01], expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],
...@@ -659,3 +763,130 @@ def test_boxes3d_overlaps(): ...@@ -659,3 +763,130 @@ def test_boxes3d_overlaps():
cam_boxes1.overlaps(cam_boxes1, boxes1) cam_boxes1.overlaps(cam_boxes1, boxes1)
with pytest.raises(AssertionError): with pytest.raises(AssertionError):
boxes1.overlaps(cam_boxes1, boxes1) boxes1.overlaps(cam_boxes1, boxes1)
def test_depth_boxes3d():
# test empty initialization
empty_boxes = []
boxes = DepthInstance3DBoxes(empty_boxes)
assert boxes.tensor.shape[0] == 0
assert boxes.tensor.shape[1] == 7
# Test init with numpy array
np_boxes = np.array(
[[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],
[2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971]],
dtype=np.float32)
boxes_1 = DepthInstance3DBoxes(np_boxes)
assert torch.allclose(boxes_1.tensor, torch.from_numpy(np_boxes))
# test properties
assert boxes_1.volume.size(0) == 2
assert (boxes_1.center == boxes_1.bottom_center).all()
expected_tensor = torch.tensor([[1.4856, 2.5299, -0.1093],
[2.3262, 3.3065, 0.9475]])
assert torch.allclose(boxes_1.gravity_center, expected_tensor)
expected_tensor = torch.tensor([[1.4856, 2.5299, 0.9385, 2.1404, 3.0601],
[2.3262, 3.3065, 0.8234, 0.5325, 2.9971]])
assert torch.allclose(boxes_1.bev, expected_tensor)
expected_tensor = torch.tensor([[1.0164, 1.4597, 1.9548, 3.6001],
[1.9145, 3.0402, 2.7379, 3.5728]])
assert torch.allclose(boxes_1.nearest_bev, expected_tensor, 1e-4)
assert repr(boxes) == (
'DepthInstance3DBoxes(\n tensor([], size=(0, 7)))')
# test init with torch.Tensor
th_boxes = torch.tensor(
[[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]],
dtype=torch.float32)
boxes_2 = DepthInstance3DBoxes(th_boxes)
assert torch.allclose(boxes_2.tensor, th_boxes)
# test clone/to/device
boxes_2 = boxes_2.clone()
boxes_1 = boxes_1.to(boxes_2.device)
# test box concatenation
expected_tensor = torch.tensor(
[[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601],
[2.3262, 3.3065, --0.44255, 0.8234, 0.5325, 1.0099, 2.9971],
[2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 3.0693],
[1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 3.0601]])
boxes = DepthInstance3DBoxes.cat([boxes_1, boxes_2])
assert torch.allclose(boxes.tensor, expected_tensor)
# concatenate empty list
empty_boxes = DepthInstance3DBoxes.cat([])
assert empty_boxes.tensor.shape[0] == 0
assert empty_boxes.tensor.shape[-1] == 7
# test box flip
expected_tensor = torch.tensor(
[[-1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 0.0815],
[-2.3262, 3.3065, 0.4426, 0.8234, 0.5325, 1.0099, 0.1445],
[-2.4593, 2.5870, -0.4321, 0.8597, 0.6193, 1.0204, 0.0723],
[-1.4856, 2.5299, -0.5570, 0.9385, 2.1404, 0.8954, 0.0815]])
boxes.flip(bev_direction='horizontal')
assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
expected_tensor = torch.tensor(
[[-1.4856, -2.5299, -0.5570, 0.9385, 2.1404, 0.8954, -0.0815],
[-2.3262, -3.3065, 0.4426, 0.8234, 0.5325, 1.0099, -0.1445],
[-2.4593, -2.5870, -0.4321, 0.8597, 0.6193, 1.0204, -0.0723],
[-1.4856, -2.5299, -0.5570, 0.9385, 2.1404, 0.8954, -0.0815]])
boxes.flip(bev_direction='vertical')
assert torch.allclose(boxes.tensor, expected_tensor, 1e-3)
# test box rotation
boxes_rot = boxes.clone()
expected_tensor = torch.tensor(
[[-1.6004, -2.4589, -0.5570, 0.9385, 2.1404, 0.8954, -0.0355],
[-2.4758, -3.1960, 0.4426, 0.8234, 0.5325, 1.0099, -0.0985],
[-2.5757, -2.4712, -0.4321, 0.8597, 0.6193, 1.0204, -0.0263],
[-1.6004, -2.4589, -0.5570, 0.9385, 2.1404, 0.8954, -0.0355]])
boxes_rot.rotate(-0.04599790655000615)
assert torch.allclose(boxes_rot.tensor, expected_tensor, 1e-3)
th_boxes = torch.tensor(
[[0.61211395, 0.8129094, 0.10563634, 1.497534, 0.16927195, 0.27956772],
[1.430009, 0.49797538, 0.9382923, 0.07694054, 0.9312509, 1.8919173]],
dtype=torch.float32)
boxes = DepthInstance3DBoxes(th_boxes, box_dim=6, with_yaw=False)
expected_tensor = torch.tensor([[
0.64884546, 0.78390356, 0.10563634, 1.50373348, 0.23795205, 0.27956772,
0
],
[
1.45139421, 0.43169443, 0.93829232,
0.11967964, 0.93380373, 1.89191735, 0
]])
boxes_3 = boxes.clone()
boxes_3.rotate(-0.04599790655000615)
assert torch.allclose(boxes_3.tensor, expected_tensor)
boxes.rotate(torch.tensor(-0.04599790655000615))
assert torch.allclose(boxes.tensor, expected_tensor)
# test bbox in_range_bev
expected_tensor = torch.tensor([1, 1], dtype=torch.bool)
mask = boxes.in_range_bev([0., -40., 70.4, 40.])
assert (mask == expected_tensor).all()
mask = boxes.nonempty()
assert (mask == expected_tensor).all()
expected_tensor = torch.tensor([[[-0.1030, 0.6649, 0.1056],
[-0.1030, 0.6649, 0.3852],
[-0.1030, 0.9029, 0.3852],
[-0.1030, 0.9029, 0.1056],
[1.4007, 0.6649, 0.1056],
[1.4007, 0.6649, 0.3852],
[1.4007, 0.9029, 0.3852],
[1.4007, 0.9029, 0.1056]],
[[1.3916, -0.0352, 0.9383],
[1.3916, -0.0352, 2.8302],
[1.3916, 0.8986, 2.8302],
[1.3916, 0.8986, 0.9383],
[1.5112, -0.0352, 0.9383],
[1.5112, -0.0352, 2.8302],
[1.5112, 0.8986, 2.8302],
[1.5112, 0.8986, 0.9383]]])
torch.allclose(boxes.corners, expected_tensor)
import numpy as np
import torch
from mmdet3d.core.bbox import LiDARInstance3DBoxes
from mmdet3d.datasets.pipelines import Compose
def test_outdoor_aug_pipeline():
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
class_names = ['Car']
np.random.seed(0)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
pipeline = Compose(train_pipeline)
gt_bboxes_3d = LiDARInstance3DBoxes(
torch.tensor([
[
2.16902428e+01, -4.06038128e-02, -1.61906636e+00,
1.65999997e+00, 3.20000005e+00, 1.61000001e+00, -1.53999996e+00
],
[
7.05006886e+00, -6.57459593e+00, -1.60107934e+00,
2.27999997e+00, 1.27799997e+01, 3.66000009e+00, 1.54999995e+00
],
[
2.24698811e+01, -6.69203758e+00, -1.50118136e+00,
2.31999993e+00, 1.47299995e+01, 3.64000010e+00, 1.59000003e+00
],
[
3.48291969e+01, -7.09058380e+00, -1.36622977e+00,
2.31999993e+00, 1.00400000e+01, 3.60999990e+00, 1.61000001e+00
],
[
4.62394600e+01, -7.75838804e+00, -1.32405007e+00,
2.33999991e+00, 1.28299999e+01, 3.63000011e+00, 1.63999999e+00
],
[
2.82966995e+01, -5.55755794e-01, -1.30332506e+00,
1.47000003e+00, 2.23000002e+00, 1.48000002e+00, -1.57000005e+00
],
[
2.66690197e+01, 2.18230209e+01, -1.73605704e+00,
1.55999994e+00, 3.48000002e+00, 1.39999998e+00, -1.69000006e+00
],
[
3.13197803e+01, 8.16214371e+00, -1.62177873e+00,
1.74000001e+00, 3.76999998e+00, 1.48000002e+00, 2.78999996e+00
],
[
4.34395561e+01, -1.95209332e+01, -1.20757008e+00,
1.69000006e+00, 4.09999990e+00, 1.40999997e+00, -1.53999996e+00
],
[
3.29882965e+01, -3.79360509e+00, -1.69245458e+00,
1.74000001e+00, 4.09000015e+00, 1.49000001e+00, -1.52999997e+00
],
[
3.85469360e+01, 8.35060215e+00, -1.31423414e+00,
1.59000003e+00, 4.28000021e+00, 1.45000005e+00, 1.73000002e+00
],
[
2.22492104e+01, -1.13536005e+01, -1.38272512e+00,
1.62000000e+00, 3.55999994e+00, 1.71000004e+00, 2.48000002e+00
],
[
3.36115799e+01, -1.97708054e+01, -4.92827654e-01,
1.64999998e+00, 3.54999995e+00, 1.79999995e+00, -1.57000005e+00
],
[
9.85029602e+00, -1.51294518e+00, -1.66834795e+00,
1.59000003e+00, 3.17000008e+00, 1.38999999e+00, -8.39999974e-01
]
],
dtype=torch.float32))
gt_labels_3d = np.array([0, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
results = dict(
pts_filename='tests/data/kitti/a.bin',
ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
bbox3d_fields=[],
)
output = pipeline(results)
expected_tensor = torch.tensor(
[[20.6514, -8.8250, -1.0816, 1.5893, 3.0637, 1.5414, -1.9216],
[7.9374, 4.9457, -1.2008, 2.1829, 12.2357, 3.5041, 1.6629],
[20.8115, -2.0273, -1.8893, 2.2212, 14.1026, 3.4850, 2.6513],
[32.3850, -5.2135, -1.1321, 2.2212, 9.6124, 3.4562, 2.6498],
[43.7022, -7.8316, -0.5090, 2.2403, 12.2836, 3.4754, 2.0146],
[25.3300, -9.6670, -1.0855, 1.4074, 2.1350, 1.4170, -0.7141],
[16.5414, -29.0583, -0.9768, 1.4936, 3.3318, 1.3404, -0.7153],
[24.6548, -18.9226, -1.3567, 1.6659, 3.6094, 1.4170, 1.3970],
[45.8403, 1.8183, -1.1626, 1.6180, 3.9254, 1.3499, -0.6886],
[30.6288, -8.4497, -1.4881, 1.6659, 3.9158, 1.4265, -0.7241],
[32.3316, -22.4611, -1.3131, 1.5223, 4.0977, 1.3882, 2.4186],
[22.4492, 3.2944, -2.1674, 1.5510, 3.4084, 1.6372, 0.3928],
[37.3824, 5.0472, -0.6579, 1.5797, 3.3988, 1.7233, -1.4862],
[8.9259, -1.2578, -1.6081, 1.5223, 3.0350, 1.3308, -1.7212]])
assert torch.allclose(
output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
def test_outdoor_velocity_aug_pipeline():
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
np.random.seed(0)
train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
pipeline = Compose(train_pipeline)
gt_bboxes_3d = LiDARInstance3DBoxes(
torch.tensor(
[[
-5.2422e+00, 4.0021e+01, -4.7643e-01, 2.0620e+00, 4.4090e+00,
1.5480e+00, -1.4880e+00, 8.5338e-03, 4.4934e-02
],
[
-2.6675e+01, 5.5950e+00, -1.3053e+00, 3.4300e-01, 4.5800e-01,
7.8200e-01, -4.6276e+00, -4.3284e-04, -1.8543e-03
],
[
-5.8098e+00, 3.5409e+01, -6.6511e-01, 2.3960e+00, 3.9690e+00,
1.7320e+00, -4.6520e+00, 0.0000e+00, 0.0000e+00
],
[
-3.1309e+01, 1.0901e+00, -1.0561e+00, 1.9440e+00, 3.8570e+00,
1.7230e+00, -2.8143e+00, -2.7606e-02, -8.0573e-02
],
[
-4.5642e+01, 2.0136e+01, -2.4681e-02, 1.9870e+00, 4.4400e+00,
1.9420e+00, 2.8336e-01, 0.0000e+00, 0.0000e+00
],
[
-5.1617e+00, 1.8305e+01, -1.0879e+00, 2.3230e+00, 4.8510e+00,
1.3710e+00, -1.5803e+00, 0.0000e+00, 0.0000e+00
],
[
-2.5285e+01, 4.1442e+00, -1.2713e+00, 1.7550e+00, 1.9890e+00,
2.2200e+00, -4.4900e+00, -3.1784e-02, -1.5291e-01
],
[
-2.2611e+00, 1.9170e+01, -1.1452e+00, 9.1900e-01, 1.1230e+00,
1.9310e+00, 4.7790e-02, 6.7684e-02, -1.7537e+00
],
[
-6.5878e+01, 1.3500e+01, -2.2528e-01, 1.8200e+00, 3.8520e+00,
1.5450e+00, -2.8757e+00, 0.0000e+00, 0.0000e+00
],
[
-5.4490e+00, 2.8363e+01, -7.7275e-01, 2.2360e+00, 3.7540e+00,
1.5590e+00, -4.6520e+00, -7.9736e-03, 7.7207e-03
]],
dtype=torch.float32),
box_dim=9)
gt_labels_3d = np.array([0, 8, 0, 0, 0, 0, -1, 7, 0, 0])
results = dict(
pts_filename='tests/data/kitti/a.bin',
ann_info=dict(gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d),
bbox3d_fields=[],
)
output = pipeline(results)
expected_tensor = torch.tensor(
[[
-3.7849e+00, -4.1057e+01, -4.8668e-01, 2.1064e+00, 4.5039e+00,
1.5813e+00, -1.6919e+00, 1.0469e-02, -4.5533e-02
],
[
-2.7010e+01, -6.7551e+00, -1.3334e+00, 3.5038e-01, 4.6786e-01,
7.9883e-01, 1.4477e+00, -5.1440e-04, 1.8758e-03
],
[
-4.5448e+00, -3.6372e+01, -6.7942e-01, 2.4476e+00, 4.0544e+00,
1.7693e+00, 1.4721e+00, 0.0000e+00, -0.0000e+00
],
[
-3.1916e+01, -2.3379e+00, -1.0788e+00, 1.9858e+00, 3.9400e+00,
1.7601e+00, -3.6564e-01, -3.1333e-02, 8.1166e-02
],
[
-4.5802e+01, -2.2340e+01, -2.5213e-02, 2.0298e+00, 4.5355e+00,
1.9838e+00, 2.8199e+00, 0.0000e+00, -0.0000e+00
],
[
-4.5526e+00, -1.8887e+01, -1.1114e+00, 2.3730e+00, 4.9554e+00,
1.4005e+00, -1.5997e+00, 0.0000e+00, -0.0000e+00
],
[
-2.5648e+01, -5.2197e+00, -1.2987e+00, 1.7928e+00, 2.0318e+00,
2.2678e+00, 1.3100e+00, -3.8428e-02, 1.5485e-01
],
[
-1.5578e+00, -1.9657e+01, -1.1699e+00, 9.3878e-01, 1.1472e+00,
1.9726e+00, 3.0555e+00, 4.5907e-04, 1.7928e+00
],
[
-4.4522e+00, -2.9166e+01, -7.8938e-01, 2.2841e+00, 3.8348e+00,
1.5925e+00, 1.4721e+00, -7.8371e-03, -8.1931e-03
]])
assert torch.allclose(
output['gt_bboxes_3d']._data.tensor, expected_tensor, atol=1e-3)
import pytest import pytest
import torch import torch
from mmdet3d.core.bbox import LiDARInstance3DBoxes
def test_PointwiseSemanticHead(): def test_PointwiseSemanticHead():
# PointwiseSemanticHead only support gpu version currently. # PointwiseSemanticHead only support gpu version currently.
...@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead(): ...@@ -47,19 +49,29 @@ def test_PointwiseSemanticHead():
[1, 35, 930, 469]], [1, 35, 930, 469]],
dtype=torch.int32).cuda() # n, 4(batch, ind_x, ind_y, ind_z) dtype=torch.int32).cuda() # n, 4(batch, ind_x, ind_y, ind_z)
voxel_dict = dict(voxel_centers=voxel_centers, coors=coordinates) voxel_dict = dict(voxel_centers=voxel_centers, coors=coordinates)
gt_bboxes = list( gt_bboxes = [
torch.tensor( LiDARInstance3DBoxes(
[[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]], torch.tensor(
[[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]], [[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
dtype=torch.float32).cuda()) dtype=torch.float32).cuda()),
LiDARInstance3DBoxes(
torch.tensor(
[[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]],
dtype=torch.float32).cuda())
]
# batch size is 2 in the unit test
gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda()) gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
# test get_targets # test get_targets
target_dict = self.get_targets(voxel_dict, gt_bboxes, gt_labels) target_dict = self.get_targets(voxel_dict, gt_bboxes, gt_labels)
assert target_dict['seg_targets'].shape == torch.Size( assert target_dict['seg_targets'].shape == torch.Size(
[voxel_features.shape[0]]) [voxel_features.shape[0]])
assert torch.allclose(target_dict['seg_targets'],
target_dict['seg_targets'].new_tensor([3, -1, 3, 3]))
assert target_dict['part_targets'].shape == torch.Size( assert target_dict['part_targets'].shape == torch.Size(
[voxel_features.shape[0], 3]) [voxel_features.shape[0], 3])
assert target_dict['part_targets'].sum() == 0
# test loss # test loss
loss_dict = self.loss(feats_dict, target_dict) loss_dict = self.loss(feats_dict, target_dict)
...@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead(): ...@@ -67,7 +79,3 @@ def test_PointwiseSemanticHead():
assert loss_dict['loss_part'] == 0 # no points in gt_boxes assert loss_dict['loss_part'] == 0 # no points in gt_boxes
total_loss = loss_dict['loss_seg'] + loss_dict['loss_part'] total_loss = loss_dict['loss_seg'] + loss_dict['loss_part']
total_loss.backward() total_loss.backward()
if __name__ == '__main__':
test_PointwiseSemanticHead()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment