"docs/en/tutorials/customize_models.md" did not exist on "bd44491f6743783cd84a0bd0a989191fd4bf2907"
Commit 94bbd751 authored by liyinhao's avatar liyinhao
Browse files

merge master

parents f201ba68 84569a41
...@@ -110,7 +110,11 @@ mmdetection ...@@ -110,7 +110,11 @@ mmdetection
│ ├── VOCdevkit │ ├── VOCdevkit
│ │ ├── VOC2007 │ │ ├── VOC2007
│ │ ├── VOC2012 │ │ ├── VOC2012
│ ├── ScanNet
│ │ ├── meta_data
│ │ ├── scannet_train_instance_data
│ ├── SUNRGBD
│ │ ├── sunrgbd_trainval
``` ```
The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`: The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`:
```shell ```shell
......
...@@ -7,6 +7,7 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D, ...@@ -7,6 +7,7 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
from .samplers import (BaseSampler, CombinedSampler, from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import boxes3d_to_bev_torch_lidar from .transforms import boxes3d_to_bev_torch_lidar
from .assign_sampling import ( # isort:skip, avoid recursive imports from .assign_sampling import ( # isort:skip, avoid recursive imports
...@@ -20,5 +21,6 @@ __all__ = [ ...@@ -20,5 +21,6 @@ __all__ = [
'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops', 'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d' 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes'
] ]
from .box_3d_mode import Box3DMode
from .cam_box3d import CameraInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
__all__ = ['Box3DMode', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes']
from abc import abstractmethod
import numpy as np
import torch
from .utils import limit_period
class BaseInstance3DBoxes(object):
"""Base class for 3D Boxes
Args:
tensor (torch.Tensor | np.ndarray): a Nxbox_dim matrix.
box_dim (int): number of the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw).
"""
def __init__(self, tensor, box_dim=7):
if isinstance(tensor, torch.Tensor):
device = tensor.device
else:
device = torch.device('cpu')
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
if tensor.numel() == 0:
# Use reshape, so we don't end up creating a new tensor that
# does not depend on the inputs (and consequently confuses jit)
tensor = tensor.reshape((0, box_dim)).to(
dtype=torch.float32, device=device)
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
self.box_dim = box_dim
self.tensor = tensor
@property
def volume(self):
"""Computes the volume of all the boxes.
Returns:
torch.Tensor: a vector with volume of each box.
"""
return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
@property
def dims(self):
"""Calculate the length in each dimension of all the boxes.
Convert the boxes to the form of (x_size, y_size, z_size)
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
return self.tensor[:, 3:6]
@property
def center(self):
"""Calculate the center of all the boxes.
Note:
In the MMDetection.3D's convention, the bottom center is
usually taken as the default center.
The relative position of the centers in different kinds of
boxes are different, e.g., the relative center of a boxes is
[0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
It is recommended to use `bottom_center` or `gravity_center`
for more clear usage.
Returns:
torch.Tensor: a tensor with center of each box.
"""
return self.bottom_center
@property
def bottom_center(self):
"""Calculate the bottom center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
return self.tensor[:, :3]
@property
def gravity_center(self):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
pass
@property
def corners(self):
"""Calculate the coordinates of corners of all the boxes.
Returns:
torch.Tensor: a tensor with 8 corners of each box.
"""
pass
@abstractmethod
def rotate(self, angles, axis=0):
"""Calculate whether the points is in any of the boxes
Args:
angles (float): rotation angles
axis (int): the axis to rotate the boxes
"""
pass
@abstractmethod
def flip(self):
"""Flip the boxes in horizontal direction
"""
pass
def translate(self, trans_vector):
"""Calculate whether the points is in any of the boxes
Args:
trans_vector (torch.Tensor): translation vector of size 1x3
"""
if not isinstance(trans_vector, torch.Tensor):
trans_vector = self.tensor.new_tensor(trans_vector)
self.tensor[:, :3] += trans_vector
def in_range_3d(self, box_range):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, z_min, x_max, y_max, z_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
a binary vector, indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 1] > box_range[1])
& (self.tensor[:, 2] > box_range[2])
& (self.tensor[:, 0] < box_range[3])
& (self.tensor[:, 1] < box_range[4])
& (self.tensor[:, 2] < box_range[5]))
return in_range_flags
@abstractmethod
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, x_max, y_max)
Returns:
a binary vector, indicating whether each box is inside
the reference range.
"""
pass
def scale(self, scale_factor):
"""Scale the box with horizontal and vertical scaling factors
Args:
scale_factors (float):
scale factors to scale the boxes.
"""
self.tensor[:, :6] *= scale_factor
self.tensor[:, 7:] *= scale_factor
def limit_yaw(self, offset=0.5, period=np.pi):
"""Limit the yaw to a given period and offset
Args:
offset (float): the offset of the yaw
period (float): the expected period
"""
self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
def nonempty(self, threshold: float = 0.0):
"""Find boxes that are non-empty.
A box is considered empty,
if either of its side is no larger than threshold.
Args:
threshold (float): the threshold of minimal sizes
Returns:
Tensor:
a binary vector which represents whether each box is empty
(False) or non-empty (True).
"""
box = self.tensor
size_x = box[..., 3]
size_y = box[..., 4]
size_z = box[..., 5]
keep = ((size_x > threshold)
& (size_y > threshold) & (size_z > threshold))
return keep
def __getitem__(self, item):
"""
Note:
The following usage are allowed:
1. `new_boxes = boxes[3]`:
return a `Boxes` that contains only one box.
2. `new_boxes = boxes[2:10]`:
return a slice of boxes.
3. `new_boxes = boxes[vector]`:
where vector is a torch.BoolTensor with `length = len(boxes)`.
Nonzero elements in the vector will be selected.
Note that the returned Boxes might share storage with this Boxes,
subject to Pytorch's indexing semantics.
Returns:
Boxes: Create a new :class:`Boxes` by indexing.
"""
original_type = type(self)
if isinstance(item, int):
return original_type(self.tensor[item].view(1, -1))
b = self.tensor[item]
assert b.dim() == 2, \
f'Indexing on Boxes with {item} failed to return a matrix!'
return original_type(b)
def __len__(self):
return self.tensor.shape[0]
def __repr__(self):
return self.__class__.__name__ + '(\n ' + str(self.tensor) + ')'
@classmethod
def cat(cls, boxes_list):
"""Concatenates a list of Boxes into a single Boxes
Arguments:
boxes_list (list[Boxes])
Returns:
Boxes: the concatenated Boxes
"""
assert isinstance(boxes_list, (list, tuple))
if len(boxes_list) == 0:
return cls(torch.empty(0))
assert all(isinstance(box, cls) for box in boxes_list)
# use torch.cat (v.s. layers.cat)
# so the returned boxes never share storage with input
cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
return cat_boxes
def to(self, device):
original_type = type(self)
return original_type(self.tensor.to(device))
def clone(self):
"""Clone the Boxes.
Returns:
Boxes
"""
original_type = type(self)
return original_type(self.tensor.clone())
@property
def device(self):
return self.tensor.device
def __iter__(self):
"""
Yield a box as a Tensor of shape (4,) at a time.
"""
yield from self.tensor
from enum import IntEnum, unique
import numpy as np
import torch
from .base_box3d import BaseInstance3DBoxes
from .cam_box3d import CameraInstance3DBoxes
from .lidar_box3d import LiDARInstance3DBoxes
@unique
class Box3DMode(IntEnum):
r"""Enum of different ways to represent a box.
Coordinates in LiDAR:
.. code-block:: none
up z
^ x front
| /
| /
left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
Coordinates in camera:
.. code-block:: none
z front
/
/
0 ------> x right
|
|
v
down y
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1.
Coordinates in Depth mode:
.. code-block:: none
up z
^ y front
| /
| /
0 ------> x right
The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
"""
LIDAR = 0
CAM = 1
DEPTH = 2
@staticmethod
def convert(box, src, dst, rt_mat=None):
"""Convert boxes from `src` mode to `dst` mode.
Args:
box (tuple | list | np.ndarray | torch.Tensor):
can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
src (BoxMode): the src Box mode
dst (BoxMode): the target Box mode
rt_mat (np.ndarray | torch.Tensor): The rotation and translation
matrix between different coordinates. Defaults to None.
The conversion from `src` coordinates to `dst` coordinates
usually comes along the change of sensors, e.g., from camera
to LiDAR. This requires a transformation matrix.
Returns:
(tuple | list | np.ndarray | torch.Tensor):
The converted box of the same type.
"""
if src == dst:
return box
is_numpy = isinstance(box, np.ndarray)
is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
single_box = isinstance(box, (list, tuple))
if single_box:
assert len(box) >= 7, (
'BoxMode.convert takes either a k-tuple/list or '
'an Nxk array/tensor, where k >= 7')
arr = torch.tensor(box)[None, :]
else:
# avoid modifying the input box
if is_numpy:
arr = torch.from_numpy(np.asarray(box)).clone()
elif is_Instance3DBoxes:
arr = box.tensor.clone()
else:
arr = box.clone()
# convert box from `src` mode to `dst` mode.
x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)
elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
if rt_mat is None:
rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)
elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
if rt_mat is None:
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
else:
raise NotImplementedError(
f'Conversion from Box3DMode {src} to {dst} '
'is not supported yet')
if not isinstance(rt_mat, torch.Tensor):
rt_mat = arr.new_tensor(rt_mat)
if rt_mat.size(1) == 4:
extended_xyz = torch.cat(
[arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
xyz = extended_xyz @ rt_mat.t()
else:
xyz = arr[:, :3] @ rt_mat.t()
remains = arr[..., 6:]
arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)
# convert arr to the original type
original_type = type(box)
if single_box:
return original_type(arr.flatten().tolist())
if is_numpy:
return arr.numpy()
elif is_Instance3DBoxes:
if dst == Box3DMode.CAM:
target_type = CameraInstance3DBoxes
elif dst == Box3DMode.LIDAR:
target_type = LiDARInstance3DBoxes
else:
raise NotImplementedError(
f'Conversion to {dst} through {original_type}'
' is not supported yet')
return target_type(arr, box_dim=arr.size(-1))
else:
return arr
import numpy as np
import torch
from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis
class CameraInstance3DBoxes(BaseInstance3DBoxes):
"""3D boxes of instances in CAM coordinates
Coordinates in camera:
.. code-block:: none
z front
/
/
0 ------> x right
|
|
v
down y
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
and the yaw is around the y axis, thus the rotation axis=1.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
"""
@property
def gravity_center(self):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
bottom_center = self.bottom_center
gravity_center = torch.zeros_like(bottom_center)
gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
return gravity_center
@property
def corners(self):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to in clockwise order, in the form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
.. code-block:: none
front z
/
/
(x0, y0, z1) + ----------- + (x1, y0, z1)
/| / |
/ | / |
(x0, y0, z0) + ----------- + + (x1, y1, z0)
| / . | /
| / oriign | /
(x0, y1, z0) + ----------- + -------> x right
| (x1, y1, z0)
|
v
down y
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
dims = self.dims
corners_norm = torch.from_numpy(
np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
device=dims.device, dtype=dims.dtype)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
# use relative origin [0.5, 1, 0.5]
corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
# rotate around y axis
corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=1)
corners += self.tensor[:, :3].view(-1, 1, 3)
return corners
@property
def nearset_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
torch.Tensor: a tensor of 2D BEV box of each box.
"""
# Obtain BEV boxes with rotation in XZWHR format
bev_rotated_boxes = self.tensor[:, [0, 2, 3, 5, 6]]
# convert the rotation to a valid range
rotations = bev_rotated_boxes[:, -1]
normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
# find the center of boxes
conditions = (normed_rotations > np.pi / 4)[..., None]
bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
[0, 1, 3, 2]],
bev_rotated_boxes[:, :4])
centers = bboxes_xywh[:, :2]
dims = bboxes_xywh[:, 2:]
bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
return bev_boxes
def rotate(self, angle):
"""Calculate whether the points is in any of the boxes
Args:
angles (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
"""
if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle)
rot_sin = torch.sin(angle)
rot_cos = torch.cos(angle)
rot_mat_T = self.tensor.new_tensor([[rot_cos, 0, -rot_sin], [0, 1, 0],
[rot_sin, 0, rot_cos]])
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
self.tensor[:, 6] += angle
def flip(self):
"""Flip the boxes in horizontal direction
In CAM coordinates, it flips the x axis.
"""
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, z_min, x_max, z_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
a binary vector, indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 2] > box_range[1])
& (self.tensor[:, 0] < box_range[2])
& (self.tensor[:, 2] < box_range[3]))
return in_range_flags
import numpy as np
import torch
from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis
class LiDARInstance3DBoxes(BaseInstance3DBoxes):
"""3D boxes of instances in LIDAR coordinates
Coordinates in LiDAR:
.. code-block:: none
up z x front
^ ^
| /
| /
left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
and the yaw is around the z axis, thus the rotation axis=2.
Attributes:
tensor (torch.Tensor): float matrix of N x box_dim.
box_dim (int): integer indicates the dimension of a box
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
"""
@property
def gravity_center(self):
"""Calculate the gravity center of all the boxes.
Returns:
torch.Tensor: a tensor with center of each box.
"""
bottom_center = self.bottom_center
gravity_center = torch.zeros_like(bottom_center)
gravity_center[:, :2] = bottom_center[:, :2]
gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
return gravity_center
@property
def corners(self):
"""Calculate the coordinates of corners of all the boxes.
Convert the boxes to corners in clockwise order, in form of
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z0, x1y1z1)
.. code-block:: none
up z
front x ^
/ |
/ |
(x1, y0, z1) + ----------- + (x1, y1, z1)
/| / |
/ | / |
(x0, y0, z1) + ----------- + + (x1, y1, z0)
| / . | /
| / oriign | /
left y<-------- + ----------- + (x0, y1, z0)
(x0, y0, z0)
Returns:
torch.Tensor: corners of each box with size (N, 8, 3)
"""
dims = self.dims
corners_norm = torch.from_numpy(
np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
device=dims.device, dtype=dims.dtype)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
# use relative origin [0.5, 0.5, 0]
corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
# rotate around z axis
corners = rotation_3d_in_axis(corners, self.tensor[:, 6], axis=2)
corners += self.tensor[:, :3].view(-1, 1, 3)
return corners
@property
def nearset_bev(self):
"""Calculate the 2D bounding boxes in BEV without rotation
Returns:
torch.Tensor: a tensor of 2D BEV box of each box.
"""
# Obtain BEV boxes with rotation in XYWHR format
bev_rotated_boxes = self.tensor[:, [0, 1, 3, 4, 6]]
# convert the rotation to a valid range
rotations = bev_rotated_boxes[:, -1]
normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
# find the center of boxes
conditions = (normed_rotations > np.pi / 4)[..., None]
bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
[0, 1, 3, 2]],
bev_rotated_boxes[:, :4])
centers = bboxes_xywh[:, :2]
dims = bboxes_xywh[:, 2:]
bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
return bev_boxes
def rotate(self, angle):
"""Calculate whether the points is in any of the boxes
Args:
angles (float | torch.Tensor): rotation angle
Returns:
None if `return_rot_mat=False`,
torch.Tensor if `return_rot_mat=True`
"""
if not isinstance(angle, torch.Tensor):
angle = self.tensor.new_tensor(angle)
rot_sin = torch.sin(angle)
rot_cos = torch.cos(angle)
rot_mat_T = self.tensor.new_tensor([[rot_cos, -rot_sin, 0],
[rot_sin, rot_cos, 0], [0, 0, 1]])
self.tensor[:, :3] = self.tensor[:, :3] @ rot_mat_T
self.tensor[:, 6] += angle
def flip(self):
"""Flip the boxes in horizontal direction
In LIDAR coordinates, it flips the y axis.
"""
self.tensor[:, 1::7] = -self.tensor[:, 1::7]
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
def in_range_bev(self, box_range):
"""Check whether the boxes are in the given range
Args:
box_range (list | torch.Tensor): the range of box
(x_min, y_min, x_max, y_max)
Note:
In the original implementation of SECOND, checking whether
a box in the range checks whether the points are in a convex
polygon, we try to reduce the burdun for simpler cases.
TODO: check whether this will effect the performance
Returns:
a binary vector, indicating whether each box is inside
the reference range.
"""
in_range_flags = ((self.tensor[:, 0] > box_range[0])
& (self.tensor[:, 1] > box_range[1])
& (self.tensor[:, 0] < box_range[2])
& (self.tensor[:, 1] < box_range[3]))
return in_range_flags
import numpy as np
import torch
def limit_period(val, offset=0.5, period=np.pi):
"""Limit the value into a period for periodic function.
Args:
val (torch.Tensor): The value to be converted
offset (float, optional): Offset to set the value range.
Defaults to 0.5.
period ([type], optional): Period of the value. Defaults to np.pi.
Returns:
torch.Tensor: value in the range of
[-offset * period, (1-offset) * period]
"""
return val - torch.floor(val / period + offset) * period
def rotation_3d_in_axis(points, angles, axis=0):
"""Rotate points by angles according to axis
Args:
points (torch.Tensor): Points of shape (N, M, 3).
angles (torch.Tensor): Vector of angles in shape (N,)
axis (int, optional): The axis to be rotated. Defaults to 0.
Raises:
ValueError: when the axis is not in range [0, 1, 2], it will
raise value error.
Returns:
torch.Tensor: rotated points in shape (N, M, 3)
"""
rot_sin = torch.sin(angles)
rot_cos = torch.cos(angles)
ones = torch.ones_like(rot_cos)
zeros = torch.zeros_like(rot_cos)
if axis == 1:
rot_mat_T = torch.stack([
torch.stack([rot_cos, zeros, -rot_sin]),
torch.stack([zeros, ones, zeros]),
torch.stack([rot_sin, zeros, rot_cos])
])
elif axis == 2 or axis == -1:
rot_mat_T = torch.stack([
torch.stack([rot_cos, -rot_sin, zeros]),
torch.stack([rot_sin, rot_cos, zeros]),
torch.stack([zeros, zeros, ones])
])
elif axis == 0:
rot_mat_T = torch.stack([
torch.stack([zeros, rot_cos, -rot_sin]),
torch.stack([zeros, rot_sin, rot_cos]),
torch.stack([ones, zeros, zeros])
])
else:
raise ValueError(f'axis should in range [0, 1, 2], got {axis}')
return torch.einsum('aij,jka->aik', (points, rot_mat_T))
...@@ -6,9 +6,11 @@ from .kitti_dataset import KittiDataset ...@@ -6,9 +6,11 @@ from .kitti_dataset import KittiDataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .nuscenes_dataset import NuScenesDataset from .nuscenes_dataset import NuScenesDataset
from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale, from .pipelines import (GlobalRotScale, IndoorFlipData, IndoorGlobalRotScale,
IndoorPointsColorJitter, ObjectNoise, IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
ObjectRangeFilter, ObjectSample, PointShuffle, IndoorPointsColorJitter, IndoorPointsColorNormalize,
PointsRangeFilter, RandomFlip3D) ObjectNoise, ObjectRangeFilter, ObjectSample,
PointSample, PointShuffle, PointsRangeFilter,
RandomFlip3D)
__all__ = [ __all__ = [
'KittiDataset', 'GroupSampler', 'DistributedGroupSampler', 'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
...@@ -16,5 +18,7 @@ __all__ = [ ...@@ -16,5 +18,7 @@ __all__ = [
'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample', 'CocoDataset', 'Kitti2DDataset', 'NuScenesDataset', 'ObjectSample',
'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScale', 'PointShuffle',
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'IndoorPointsColorJitter', 'IndoorGlobalRotScale', 'IndoorFlipData' 'IndoorLoadPointsFromFile', 'IndoorPointsColorNormalize', 'PointSample',
'IndoorLoadAnnotations3D', 'IndoorPointsColorJitter',
'IndoorGlobalRotScale', 'IndoorFlipData'
] ]
...@@ -238,6 +238,8 @@ class KittiDataset(torch_data.Dataset): ...@@ -238,6 +238,8 @@ class KittiDataset(torch_data.Dataset):
axis=1).astype(np.float32) axis=1).astype(np.float32)
difficulty = annos['difficulty'] difficulty = annos['difficulty']
# this change gt_bboxes_3d to velodyne coordinates # this change gt_bboxes_3d to velodyne coordinates
import pdb
pdb.set_trace()
gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect, gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
Trv2c) Trv2c)
# only center format is allowed. so we need to convert # only center format is allowed. so we need to convert
......
...@@ -3,6 +3,9 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler ...@@ -3,6 +3,9 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
from .formating import DefaultFormatBundle, DefaultFormatBundle3D from .formating import DefaultFormatBundle, DefaultFormatBundle3D
from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale, from .indoor_augment import (IndoorFlipData, IndoorGlobalRotScale,
IndoorPointsColorJitter) IndoorPointsColorJitter)
from .indoor_loading import (IndoorLoadAnnotations3D, IndoorLoadPointsFromFile,
IndoorPointsColorNormalize)
from .indoor_sample import PointSample
from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile from .loading import LoadMultiViewImageFromFiles, LoadPointsFromFile
from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter, from .train_aug import (GlobalRotScale, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointShuffle, PointsRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter,
...@@ -14,5 +17,6 @@ __all__ = [ ...@@ -14,5 +17,6 @@ __all__ = [
'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile', 'Compose', 'LoadMultiViewImageFromFiles', 'LoadPointsFromFile',
'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler', 'DefaultFormatBundle', 'DefaultFormatBundle3D', 'DataBaseSampler',
'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData', 'IndoorGlobalRotScale', 'IndoorPointsColorJitter', 'IndoorFlipData',
'MMDataBaseSampler' 'MMDataBaseSampler', 'IndoorLoadPointsFromFile',
'IndoorPointsColorNormalize', 'IndoorLoadAnnotations3D', 'PointSample'
] ]
import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module()
class IndoorPointsColorNormalize(object):
"""Indoor Points Color Normalize
Normalize color of the points.
Args:
color_mean (List[float]): Mean color of the point cloud.
"""
def __init__(self, color_mean):
self.color_mean = color_mean
def __call__(self, results):
points = results['points']
assert points.shape[1] >= 6,\
f'Expect points have channel >=6, got {points.shape[1]}'
points[:, 3:6] = points[:, 3:6] - np.array(self.color_mean) / 256.0
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(color_mean={})'.format(self.color_mean)
return repr_str
@PIPELINES.register_module()
class IndoorLoadPointsFromFile(object):
"""Indoor Load Points From File.
Load sunrgbd and scannet points from file.
Args:
use_height (bool): Whether to use height.
load_dim (int): The dimension of the loaded points.
Default: 6.
use_dim (List[int]): Which dimensions of the points to be used.
Default: [0, 1, 2].
"""
def __init__(self, use_height, load_dim=6, use_dim=[0, 1, 2]):
self.use_height = use_height
assert max(use_dim) < load_dim, \
f'Expect all used dimensions < {load_dim}, got {use_dim}'
self.load_dim = load_dim
self.use_dim = use_dim
def __call__(self, results):
pts_filename = results['pts_filename']
mmcv.check_file_exist(pts_filename)
points = np.load(pts_filename)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
if self.use_height:
floor_height = np.percentile(points[:, 2], 0.99)
height = points[:, 2] - floor_height
points = np.concatenate([points, np.expand_dims(height, 1)], 1)
results['points'] = points
return results
def __repr__(self):
repr_str = self.__class__.__name__
repr_str += '(use_height={})'.format(self.use_height)
repr_str += '(mean_color={})'.format(self.color_mean)
repr_str += '(load_dim={})'.format(self.load_dim)
repr_str += '(use_dim={})'.format(self.use_dim)
return repr_str
@PIPELINES.register_module
class IndoorLoadAnnotations3D(object):
"""Indoor Load Annotations3D.
Load instance mask and semantic mask of points.
"""
def __init__(self):
pass
def __call__(self, results):
pts_instance_mask_path = results['pts_instance_mask_path']
pts_semantic_mask_path = results['pts_semantic_mask_path']
mmcv.check_file_exist(pts_instance_mask_path)
mmcv.check_file_exist(pts_semantic_mask_path)
pts_instance_mask = np.load(pts_instance_mask_path)
pts_semantic_mask = np.load(pts_semantic_mask_path)
results['pts_instance_mask'] = pts_instance_mask
results['pts_semantic_mask'] = pts_semantic_mask
return results
def __repr__(self):
repr_str = self.__class__.__name__
return repr_str
from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor
from .single_roiaware_extractor import Single3DRoIAwareExtractor
__all__ = ['SingleRoIExtractor'] __all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor']
import torch
import torch.nn as nn
from mmdet3d import ops
from mmdet.models.builder import ROI_EXTRACTORS
@ROI_EXTRACTORS.register_module
class Single3DRoIAwareExtractor(nn.Module):
"""Point-wise roi-aware Extractor
Extract Point-wise roi features.
Args:
roi_layer (dict): the config of roi layer
"""
def __init__(self, roi_layer=None):
super(Single3DRoIAwareExtractor, self).__init__()
self.roi_layer = self.build_roi_layers(roi_layer)
def build_roi_layers(self, layer_cfg):
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
assert hasattr(ops, layer_type)
layer_cls = getattr(ops, layer_type)
roi_layers = layer_cls(**cfg)
return roi_layers
def forward(self, feats, coordinate, batch_inds, rois):
"""Extract point-wise roi features
Args:
feats (FloatTensor): point-wise features with
shape (batch, npoints, channels) for pooling
coordinate (FloatTensor): coordinate of each point
batch_inds (longTensor): indicate the batch of each point
rois (FloatTensor): roi boxes with batch indices
Returns:
FloatTensor: pooled features
"""
pooled_roi_feats = []
for batch_idx in range(int(batch_inds.max()) + 1):
roi_inds = (rois[..., 0].int() == batch_idx)
coors_inds = (batch_inds.int() == batch_idx)
pooled_roi_feat = self.roi_layer(rois[..., 1:][roi_inds],
coordinate[coors_inds],
feats[coors_inds])
pooled_roi_feats.append(pooled_roi_feat)
pooled_roi_feats = torch.cat(pooled_roi_feats, 0)
return pooled_roi_feats
...@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version, ...@@ -2,28 +2,18 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
get_compiling_cuda_version, nms, roi_align, get_compiling_cuda_version, nms, roi_align,
sigmoid_focal_loss) sigmoid_focal_loss)
from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_cpu,
points_in_boxes_gpu)
from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0, from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
SparseBottleneck, SparseBottleneckV0) SparseBottleneck, SparseBottleneckV0)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
__all__ = [ __all__ = [
'nms', 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version',
'soft_nms', 'get_compiling_cuda_version', 'NaiveSyncBatchNorm1d',
'RoIAlign', 'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization',
'roi_align', 'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss',
'get_compiler_version', 'SigmoidFocalLoss', 'SparseBasicBlockV0', 'SparseBottleneckV0',
'get_compiling_cuda_version', 'SparseBasicBlock', 'SparseBottleneck', 'RoIAwarePool3d',
'NaiveSyncBatchNorm1d', 'points_in_boxes_gpu', 'points_in_boxes_cpu'
'NaiveSyncBatchNorm2d',
'batched_nms',
'Voxelization',
'voxelization',
'dynamic_scatter',
'DynamicScatter',
'sigmoid_focal_loss',
'SigmoidFocalLoss',
'SparseBasicBlockV0',
'SparseBottleneckV0',
'SparseBasicBlock',
'SparseBottleneck',
] ]
This diff is collapsed.
import os.path as osp
import mmcv
import numpy as np
from mmdet3d.datasets.pipelines import (IndoorLoadAnnotations3D,
IndoorLoadPointsFromFile)
def test_indoor_load_points_from_file():
sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')
sunrgbd_load_points_from_file = IndoorLoadPointsFromFile(True, 6)
sunrgbd_results = dict()
data_path = './tests/data/sunrgbd/sunrgbd_trainval'
sunrgbd_info = sunrgbd_info[0]
scan_name = sunrgbd_info['point_cloud']['lidar_idx']
sunrgbd_results['pts_filename'] = osp.join(data_path, 'lidar',
f'{scan_name:06d}.npy')
sunrgbd_results = sunrgbd_load_points_from_file(sunrgbd_results)
sunrgbd_point_cloud = sunrgbd_results['points']
assert sunrgbd_point_cloud.shape == (100, 4)
scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')
scannet_load_data = IndoorLoadPointsFromFile(True)
scannet_results = dict()
data_path = './tests/data/scannet/scannet_train_instance_data'
scannet_results['data_path'] = data_path
scannet_info = scannet_info[0]
scan_name = scannet_info['point_cloud']['lidar_idx']
scannet_results['pts_filename'] = osp.join(data_path,
f'{scan_name}_vert.npy')
scannet_results = scannet_load_data(scannet_results)
scannet_point_cloud = scannet_results['points']
assert scannet_point_cloud.shape == (100, 4)
def test_load_annotations3D():
sunrgbd_info = mmcv.load('./tests/data/sunrgbd/sunrgbd_infos.pkl')[0]
if sunrgbd_info['annos']['gt_num'] != 0:
sunrgbd_gt_bboxes_3d = sunrgbd_info['annos']['gt_boxes_upright_depth']
sunrgbd_gt_labels = sunrgbd_info['annos']['class'].reshape(-1, 1)
sunrgbd_gt_bboxes_3d_mask = np.ones_like(sunrgbd_gt_labels)
else:
sunrgbd_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
sunrgbd_gt_labels = np.zeros((1, 1))
sunrgbd_gt_bboxes_3d_mask = np.zeros((1, 1))
assert sunrgbd_gt_bboxes_3d.shape == (3, 7)
assert sunrgbd_gt_labels.shape == (3, 1)
assert sunrgbd_gt_bboxes_3d_mask.shape == (3, 1)
scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
scannet_load_annotations3D = IndoorLoadAnnotations3D()
scannet_results = dict()
data_path = './tests/data/scannet/scannet_train_instance_data'
if scannet_info['annos']['gt_num'] != 0:
scannet_gt_bboxes_3d = scannet_info['annos']['gt_boxes_upright_depth']
scannet_gt_labels = scannet_info['annos']['class'].reshape(-1, 1)
scannet_gt_bboxes_3d_mask = np.ones_like(scannet_gt_labels)
else:
scannet_gt_bboxes_3d = np.zeros((1, 6), dtype=np.float32)
scannet_gt_labels = np.zeros((1, 1))
scannet_gt_bboxes_3d_mask = np.zeros((1, 1))
scan_name = scannet_info['point_cloud']['lidar_idx']
scannet_results['pts_instance_mask_path'] = osp.join(
data_path, f'{scan_name}_ins_label.npy')
scannet_results['pts_semantic_mask_path'] = osp.join(
data_path, scan_name + '_sem_label.npy')
scannet_results['info'] = scannet_info
scannet_results['gt_bboxes_3d'] = scannet_gt_bboxes_3d
scannet_results['gt_labels'] = scannet_gt_labels
scannet_results['gt_bboxes_3d_mask'] = scannet_gt_bboxes_3d_mask
scannet_results = scannet_load_annotations3D(scannet_results)
scannet_gt_boxes = scannet_results['gt_bboxes_3d']
scannet_gt_lbaels = scannet_results['gt_labels']
scannet_gt_boxes_mask = scannet_results['gt_bboxes_3d_mask']
scannet_pts_instance_mask = scannet_results['pts_instance_mask']
scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
assert scannet_gt_boxes.shape == (27, 6)
assert scannet_gt_lbaels.shape == (27, 1)
assert scannet_gt_boxes_mask.shape == (27, 1)
assert scannet_pts_instance_mask.shape == (100, )
assert scannet_pts_semantic_mask.shape == (100, )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment