Commit d1aac35d authored by zhangwenwei's avatar zhangwenwei
Browse files

Initial commit

parents
import torch
from ..geometry import bbox_overlaps_2d
from .max_iou_assigner import MaxIoUAssigner
class ApproxMaxIoUAssigner(MaxIoUAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
"""
def __init__(self,
pos_iou_thr,
neg_iou_thr,
min_pos_iou=.0,
gt_max_assign_all=True,
ignore_iof_thr=-1,
ignore_wrt_candidates=True):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
def assign(self,
approxs,
squares,
approxs_per_octave,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""Assign gt to approxs.
This method assign a gt bbox to each group of approxs (bboxes),
each group of approxs is represent by a base approx (bbox) and
will be assigned with -1, 0, or a positive number.
-1 means don't care, 0 means negative sample,
positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. use the max IoU of each group of approxs to assign
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
approxs (Tensor): Bounding boxes to be assigned,
shape(approxs_per_octave*n, 4).
squares (Tensor): Base Bounding boxes to be assigned,
shape(n, 4).
approxs_per_octave (int): number of approxs per octave
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
if squares.shape[0] == 0 or gt_bboxes.shape[0] == 0:
raise ValueError('No gt or approxs')
num_squares = squares.size(0)
num_gts = gt_bboxes.size(0)
# re-organize anchors by approxs_per_octave x num_squares
approxs = torch.transpose(
approxs.view(num_squares, approxs_per_octave, 4), 0,
1).contiguous().view(-1, 4)
all_overlaps = bbox_overlaps_2d(approxs, gt_bboxes)
overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,
num_gts).max(dim=0)
overlaps = torch.transpose(overlaps, 0, 1)
bboxes = squares[:, :4]
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = bbox_overlaps_2d(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = bbox_overlaps_2d(
gt_bboxes_ignore, bboxes, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
return assign_result
import torch
class AssignResult(object):
def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
self.num_gts = num_gts
self.gt_inds = gt_inds
self.max_overlaps = max_overlaps
self.labels = labels
def add_gt_(self, gt_labels):
self_inds = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
self.gt_inds = torch.cat([self_inds, self.gt_inds])
self.max_overlaps = torch.cat(
[self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
if self.labels is not None:
self.labels = torch.cat([gt_labels, self.labels])
from abc import ABCMeta, abstractmethod
class BaseAssigner(metaclass=ABCMeta):
@abstractmethod
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
pass
import torch
from .. import geometry
from .assign_result import AssignResult
from .base_assigner import BaseAssigner
class MaxIoUAssigner(BaseAssigner):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `-1`, `0`, or a positive integer
indicating the ground truth index.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. Positive samples can have smaller IoU than
pos_iou_thr due to the 4th step (assign max IoU sample to each gt).
gt_max_assign_all (bool): Whether to assign all bboxes with the same
highest overlap with some gt to that gt.
ignore_iof_thr (float): IoF threshold for ignoring bboxes (if
`gt_bboxes_ignore` is specified). Negative values mean not
ignoring any bboxes.
ignore_wrt_candidates (bool): Whether to compute the iof between
`bboxes` and `gt_bboxes_ignore`, or the contrary.
"""
def __init__(self,
pos_iou_thr,
neg_iou_thr,
min_pos_iou=.0,
gt_max_assign_all=True,
ignore_iof_thr=-1,
iou_type='2d',
ignore_wrt_candidates=True):
self.pos_iou_thr = pos_iou_thr
self.neg_iou_thr = neg_iou_thr
self.min_pos_iou = min_pos_iou
self.gt_max_assign_all = gt_max_assign_all
self.ignore_iof_thr = ignore_iof_thr
self.ignore_wrt_candidates = ignore_wrt_candidates
# iou_type could be 2d, 3d, nearest_3d
self.iou_type = iou_type
self.bbox_overlaps = getattr(geometry,
'bbox_overlaps_{}'.format(iou_type))
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
"""Assign gt to bboxes.
This method assign a gt bbox to every bbox (proposal/anchor), each bbox
will be assigned with -1, 0, or a positive number. -1 means don't care,
0 means negative sample, positive number is the index (1-based) of
assigned gt.
The assignment is done in following steps, the order matters.
1. assign every bbox to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals (may be more than
one) to itself
Args:
bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).
gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
if self.iou_type == '2d':
bboxes = bboxes[:, :4]
overlaps = self.bbox_overlaps(gt_bboxes, bboxes)
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0):
if self.ignore_wrt_candidates:
ignore_overlaps = self.bbox_overlaps(
bboxes, gt_bboxes_ignore, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
else:
ignore_overlaps = self.bbox_overlaps(
gt_bboxes_ignore, bboxes, mode='iof')
ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1
assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
return assign_result
def assign_wrt_overlaps(self, overlaps, gt_labels=None):
"""Assign w.r.t. the overlaps of bboxes with gts.
Args:
overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns:
:obj:`AssignResult`: The assign result.
"""
num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default
assigned_gt_inds = overlaps.new_full((num_bboxes, ),
-1,
dtype=torch.long)
if num_gts == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = overlaps.new_zeros((num_bboxes, ))
if num_gts == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if gt_labels is None:
assigned_labels = None
else:
assigned_labels = overlaps.new_zeros((num_bboxes, ),
dtype=torch.long)
return AssignResult(
num_gts,
assigned_gt_inds,
max_overlaps,
labels=assigned_labels)
# for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts
max_overlaps, argmax_overlaps = overlaps.max(dim=0)
# for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
# 2. assign negative: below
if isinstance(self.neg_iou_thr, float):
assigned_gt_inds[(max_overlaps >= 0)
& (max_overlaps < self.neg_iou_thr)] = 0
elif isinstance(self.neg_iou_thr, tuple):
assert len(self.neg_iou_thr) == 2
assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
& (max_overlaps < self.neg_iou_thr[1])] = 0
# 3. assign positive: above positive IoU threshold
pos_inds = max_overlaps >= self.pos_iou_thr
assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
# 4. assign fg: for each gt, proposals with highest IoU
for i in range(num_gts):
if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all:
max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
assigned_gt_inds[max_iou_inds] = i + 1
else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
if gt_labels is not None:
assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, ))
pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze()
if pos_inds.numel() > 0:
assigned_labels[pos_inds] = gt_labels[
assigned_gt_inds[pos_inds] - 1]
else:
assigned_labels = None
return AssignResult(
num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
import numba
import numpy as np
def camera_to_lidar(points, r_rect, velo2cam):
points_shape = list(points.shape[0:-1])
if points.shape[-1] == 3:
points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)
lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)
return lidar_points[..., :3]
def box_camera_to_lidar(data, r_rect, velo2cam):
xyz = data[:, 0:3]
l, h, w = data[:, 3:4], data[:, 4:5], data[:, 5:6]
r = data[:, 6:7]
xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)
return np.concatenate([xyz_lidar, w, l, h, r], axis=1)
def corners_nd(dims, origin=0.5):
"""generate relative box corners based on length per dim and
origin point.
Args:
dims (float array, shape=[N, ndim]): array of length per dim
origin (list or array or float): origin point relate to smallest point.
Returns:
float array, shape=[N, 2 ** ndim, ndim]: returned corners.
point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
(3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
where x0 < x1, y0 < y1, z0 < z1
"""
ndim = int(dims.shape[1])
corners_norm = np.stack(
np.unravel_index(np.arange(2**ndim), [2] * ndim),
axis=1).astype(dims.dtype)
# now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
# (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
# so need to convert to a format which is convenient to do other computing.
# for 2d boxes, format is clockwise start with minimum point
# for 3d boxes, please draw lines by your hand.
if ndim == 2:
# generate clockwise box corners
corners_norm = corners_norm[[0, 1, 3, 2]]
elif ndim == 3:
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
[1, 2**ndim, ndim])
return corners
def rotation_2d(points, angles):
"""rotation 2d points based on origin point clockwise when angle positive.
Args:
points (float array, shape=[N, point_size, 2]): points to be rotated.
angles (float array, shape=[N]): rotation angle.
Returns:
float array: same shape as points
"""
rot_sin = np.sin(angles)
rot_cos = np.cos(angles)
rot_mat_T = np.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])
return np.einsum('aij,jka->aik', points, rot_mat_T)
def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
"""convert kitti locations, dimensions and angles to corners.
format: center(xy), dims(xy), angles(clockwise when positive)
Args:
centers (float array, shape=[N, 2]): locations in kitti label file.
dims (float array, shape=[N, 2]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file.
Returns:
[type]: [description]
"""
# 'length' in kitti format is in x axis.
# xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
corners = corners_nd(dims, origin=origin)
# corners: [N, 4, 2]
if angles is not None:
corners = rotation_2d(corners, angles)
corners += centers.reshape([-1, 1, 2])
return corners
@numba.jit(nopython=True)
def depth_to_points(depth, trunc_pixel):
num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)
points = np.zeros((num_pts, 3), dtype=depth.dtype)
x = np.array([0, 0, 1], dtype=depth.dtype)
k = 0
for i in range(trunc_pixel, depth.shape[0]):
for j in range(depth.shape[1]):
if depth[i, j] > 0.1:
x = np.array([j, i, 1], dtype=depth.dtype)
points[k] = x * depth[i, j]
k += 1
return points
def depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):
pts = depth_to_points(depth, trunc_pixel)
points_shape = list(pts.shape[0:-1])
points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)
points = points @ np.linalg.inv(P2.T)
lidar_points = camera_to_lidar(points, r_rect, velo2cam)
return lidar_points
def rotation_3d_in_axis(points, angles, axis=0):
# points: [N, point_size, 3]
rot_sin = np.sin(angles)
rot_cos = np.cos(angles)
ones = np.ones_like(rot_cos)
zeros = np.zeros_like(rot_cos)
if axis == 1:
rot_mat_T = np.stack([[rot_cos, zeros, -rot_sin], [zeros, ones, zeros],
[rot_sin, zeros, rot_cos]])
elif axis == 2 or axis == -1:
rot_mat_T = np.stack([[rot_cos, -rot_sin, zeros],
[rot_sin, rot_cos, zeros], [zeros, zeros, ones]])
elif axis == 0:
rot_mat_T = np.stack([[zeros, rot_cos, -rot_sin],
[zeros, rot_sin, rot_cos], [ones, zeros, zeros]])
else:
raise ValueError('axis should in range')
return np.einsum('aij,jka->aik', points, rot_mat_T)
def center_to_corner_box3d(centers,
dims,
angles=None,
origin=(0.5, 1.0, 0.5),
axis=1):
"""convert kitti locations, dimensions and angles to corners
Args:
centers (float array, shape=[N, 3]): locations in kitti label file.
dims (float array, shape=[N, 3]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file.
origin (list or array or float): origin point relate to smallest point.
use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
axis (int): rotation axis. 1 for camera and 2 for lidar.
Returns:
[type]: [description]
"""
# 'length' in kitti format is in x axis.
# yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
corners = corners_nd(dims, origin=origin)
# corners: [N, 8, 3]
if angles is not None:
corners = rotation_3d_in_axis(corners, angles, axis=axis)
corners += centers.reshape([-1, 1, 3])
return corners
@numba.jit(nopython=True)
def box2d_to_corner_jit(boxes):
num_box = boxes.shape[0]
corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
corners_norm[1, 1] = 1.0
corners_norm[2] = 1.0
corners_norm[3, 0] = 1.0
corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(
1, 4, 2)
rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)
for i in range(num_box):
rot_sin = np.sin(boxes[i, -1])
rot_cos = np.cos(boxes[i, -1])
rot_mat_T[0, 0] = rot_cos
rot_mat_T[0, 1] = -rot_sin
rot_mat_T[1, 0] = rot_sin
rot_mat_T[1, 1] = rot_cos
box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]
return box_corners
@numba.njit
def corner_to_standup_nd_jit(boxes_corner):
num_boxes = boxes_corner.shape[0]
ndim = boxes_corner.shape[-1]
result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)
for i in range(num_boxes):
for j in range(ndim):
result[i, j] = np.min(boxes_corner[i, :, j])
for j in range(ndim):
result[i, j + ndim] = np.max(boxes_corner[i, :, j])
return result
@numba.jit(nopython=True)
def corner_to_surfaces_3d_jit(corners):
"""convert 3d box corners from corner function above
to surfaces that normal vectors all direct to internal.
Args:
corners (float array, [N, 8, 3]): 3d box corners.
Returns:
surfaces (float array, [N, 6, 4, 3]):
"""
# box_corners: [N, 8, 3], must from corner functions in this module
num_boxes = corners.shape[0]
surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)
corner_idxes = np.array([
0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7
]).reshape(6, 4)
for i in range(num_boxes):
for j in range(6):
for k in range(4):
surfaces[i, j, k] = corners[i, corner_idxes[j, k]]
return surfaces
def rotation_points_single_angle(points, angle, axis=0):
# points: [N, 3]
rot_sin = np.sin(angle)
rot_cos = np.cos(angle)
if axis == 1:
rot_mat_T = np.array(
[[rot_cos, 0, -rot_sin], [0, 1, 0], [rot_sin, 0, rot_cos]],
dtype=points.dtype)
elif axis == 2 or axis == -1:
rot_mat_T = np.array(
[[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]],
dtype=points.dtype)
elif axis == 0:
rot_mat_T = np.array(
[[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]],
dtype=points.dtype)
else:
raise ValueError('axis should in range')
return points @ rot_mat_T, rot_mat_T
def project_to_image(points_3d, proj_mat):
points_shape = list(points_3d.shape)
points_shape[-1] = 1
points_4 = np.concatenate([points_3d, np.zeros(points_shape)], axis=-1)
point_2d = points_4 @ proj_mat.T
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
return point_2d_res
def box3d_to_bbox(box3d, rect, Trv2c, P2):
box_corners = center_to_corner_box3d(
box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
box_corners_in_image = project_to_image(box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = np.min(box_corners_in_image, axis=1)
maxxy = np.max(box_corners_in_image, axis=1)
bbox = np.concatenate([minxy, maxxy], axis=1)
return bbox
def corner_to_surfaces_3d(corners):
"""convert 3d box corners from corner function above
to surfaces that normal vectors all direct to internal.
Args:
corners (float array, [N, 8, 3]): 3d box corners.
Returns:
surfaces (float array, [N, 6, 4, 3]):
"""
# box_corners: [N, 8, 3], must from corner functions in this module
surfaces = np.array([
[corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],
[corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],
[corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],
[corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],
[corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],
[corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],
]).transpose([2, 0, 1, 3])
return surfaces
def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):
# TODO: this function is different from PointCloud3D, be careful
# when start to use nuscene, check the input
rbbox_corners = center_to_corner_box3d(
rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)
surfaces = corner_to_surfaces_3d(rbbox_corners)
indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)
return indices
def minmax_to_corner_2d(minmax_box):
ndim = minmax_box.shape[-1] // 2
center = minmax_box[..., :ndim]
dims = minmax_box[..., ndim:] - center
return center_to_corner_box2d(center, dims, origin=0.0)
def limit_period(val, offset=0.5, period=np.pi):
return val - np.floor(val / period + offset) * period
def create_anchors_3d_range(feature_size,
anchor_range,
sizes=((1.6, 3.9, 1.56), ),
rotations=(0, np.pi / 2),
dtype=np.float32):
"""
Args:
feature_size: list [D, H, W](zyx)
sizes: [N, 3] list of list or array, size of anchors, xyz
Returns:
anchors: [*feature_size, num_sizes, num_rots, 7] tensor.
"""
anchor_range = np.array(anchor_range, dtype)
z_centers = np.linspace(
anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
y_centers = np.linspace(
anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
x_centers = np.linspace(
anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
rotations = np.array(rotations, dtype=dtype)
rets = np.meshgrid(
x_centers, y_centers, z_centers, rotations, indexing='ij')
tile_shape = [1] * 5
tile_shape[-2] = int(sizes.shape[0])
for i in range(len(rets)):
rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
rets[i] = rets[i][..., np.newaxis] # for concat
sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
tile_size_shape = list(rets[0].shape)
tile_size_shape[3] = 1
sizes = np.tile(sizes, tile_size_shape)
rets.insert(3, sizes)
ret = np.concatenate(rets, axis=-1)
return np.transpose(ret, [2, 1, 0, 3, 4, 5])
def center_to_minmax_2d_0_5(centers, dims):
return np.concatenate([centers - dims / 2, centers + dims / 2], axis=-1)
def center_to_minmax_2d(centers, dims, origin=0.5):
if origin == 0.5:
return center_to_minmax_2d_0_5(centers, dims)
corners = center_to_corner_box2d(centers, dims, origin=origin)
return corners[:, [0, 2]].reshape([-1, 4])
def rbbox2d_to_near_bbox(rbboxes):
"""convert rotated bbox to nearest 'standing' or 'lying' bbox.
Args:
rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
Returns:
bboxes: [N, 4(xmin, ymin, xmax, ymax)] bboxes
"""
rots = rbboxes[..., -1]
rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))
cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]
bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
return bboxes
@numba.jit(nopython=True)
def iou_jit(boxes, query_boxes, mode='iou', eps=0.0):
"""calculate box iou. note that jit version runs ~10x faster than the
box_overlaps function in mmdet3d.core.evaluation
Parameters
----------
boxes: (N, 4) ndarray of float
query_boxes: (K, 4) ndarray of float
Returns
-------
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
N = boxes.shape[0]
K = query_boxes.shape[0]
overlaps = np.zeros((N, K), dtype=boxes.dtype)
for k in range(K):
box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *
(query_boxes[k, 3] - query_boxes[k, 1] + eps))
for n in range(N):
iw = (
min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0]) + eps)
if iw > 0:
ih = (
min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1]) + eps)
if ih > 0:
if mode == 'iou':
ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
(boxes[n, 3] - boxes[n, 1] + eps) + box_area -
iw * ih)
else:
ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
(boxes[n, 3] - boxes[n, 1] + eps))
overlaps[n, k] = iw * ih / ua
return overlaps
def change_box3d_center_(box3d, src, dst):
dst = np.array(dst, dtype=box3d.dtype)
src = np.array(src, dtype=box3d.dtype)
box3d[..., :3] += box3d[..., 3:6] * (dst - src)
def projection_matrix_to_CRT_kitti(proj):
# P = C @ [R|T]
# C is upper triangular matrix, so we need to inverse CR and use QR
# stable for all kitti camera projection matrix
CR = proj[0:3, 0:3]
CT = proj[0:3, 3]
RinvCinv = np.linalg.inv(CR)
Rinv, Cinv = np.linalg.qr(RinvCinv)
C = np.linalg.inv(Cinv)
R = np.linalg.inv(Rinv)
T = Cinv @ CT
return C, R, T
def remove_outside_points(points, rect, Trv2c, P2, image_shape):
# 5x faster than remove_outside_points_v1(2ms vs 10ms)
C, R, T = projection_matrix_to_CRT_kitti(P2)
image_bbox = [0, 0, image_shape[1], image_shape[0]]
frustum = get_frustum(image_bbox, C)
frustum -= T
frustum = np.linalg.inv(R) @ frustum.T
frustum = camera_to_lidar(frustum.T, rect, Trv2c)
frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])
indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)
points = points[indices.reshape([-1])]
return points
def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
fku = C[0, 0]
fkv = -C[1, 1]
u0v0 = C[0:2, 2]
z_points = np.array(
[near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]
b = bbox_image
box_corners = np.array(
[[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],
dtype=C.dtype)
near_box_corners = (box_corners - u0v0) / np.array(
[fku / near_clip, -fkv / near_clip], dtype=C.dtype)
far_box_corners = (box_corners - u0v0) / np.array(
[fku / far_clip, -fkv / far_clip], dtype=C.dtype)
ret_xy = np.concatenate([near_box_corners, far_box_corners],
axis=0) # [8, 2]
ret_xyz = np.concatenate([ret_xy, z_points], axis=1)
return ret_xyz
def surface_equ_3d(polygon_surfaces):
# return [a, b, c], d in ax+by+cz+d=0
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
surface_vec = polygon_surfaces[:, :, :2, :] - polygon_surfaces[:, :,
1:3, :]
# normal_vec: [..., 3]
normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
# print(normal_vec.shape, points[..., 0, :].shape)
# d = -np.inner(normal_vec, points[..., 0, :])
d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])
return normal_vec, -d
@numba.njit
def _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,
num_surfaces):
max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
num_points = points.shape[0]
num_polygons = polygon_surfaces.shape[0]
ret = np.ones((num_points, num_polygons), dtype=np.bool_)
sign = 0.0
for i in range(num_points):
for j in range(num_polygons):
for k in range(max_num_surfaces):
if k > num_surfaces[j]:
break
sign = (
points[i, 0] * normal_vec[j, k, 0] +
points[i, 1] * normal_vec[j, k, 1] +
points[i, 2] * normal_vec[j, k, 2] + d[j, k])
if sign >= 0:
ret[i, j] = False
break
return ret
def points_in_convex_polygon_3d_jit(points,
polygon_surfaces,
num_surfaces=None):
"""check points is in 3d convex polygons.
Args:
points: [num_points, 3] array.
polygon_surfaces: [num_polygon, max_num_surfaces,
max_num_points_of_surface, 3]
array. all surfaces' normal vector must direct to internal.
max_num_points_of_surface must at least 3.
num_surfaces: [num_polygon] array. indicate how many surfaces
a polygon contain
Returns:
[num_points, num_polygon] bool array.
"""
max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
# num_points = points.shape[0]
num_polygons = polygon_surfaces.shape[0]
if num_surfaces is None:
num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)
normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])
# normal_vec: [num_polygon, max_num_surfaces, 3]
# d: [num_polygon, max_num_surfaces]
return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,
normal_vec, d, num_surfaces)
@numba.jit
def points_in_convex_polygon_jit(points, polygon, clockwise=True):
"""check points is in 2d convex polygons. True when point in polygon
Args:
points: [num_points, 2] array.
polygon: [num_polygon, num_points_of_polygon, 2] array.
clockwise: bool. indicate polygon is clockwise.
Returns:
[num_points, num_polygon] bool array.
"""
# first convert polygon to directed lines
num_points_of_polygon = polygon.shape[1]
num_points = points.shape[0]
num_polygons = polygon.shape[0]
# if clockwise:
# vec1 = polygon - polygon[:, [num_points_of_polygon - 1] +
# list(range(num_points_of_polygon - 1)), :]
# else:
# vec1 = polygon[:, [num_points_of_polygon - 1] +
# list(range(num_points_of_polygon - 1)), :] - polygon
# vec1: [num_polygon, num_points_of_polygon, 2]
vec1 = np.zeros((2), dtype=polygon.dtype)
ret = np.zeros((num_points, num_polygons), dtype=np.bool_)
success = True
cross = 0.0
for i in range(num_points):
for j in range(num_polygons):
success = True
for k in range(num_points_of_polygon):
if clockwise:
vec1 = polygon[j, k] - polygon[j, k - 1]
else:
vec1 = polygon[j, k - 1] - polygon[j, k]
cross = vec1[1] * (polygon[j, k, 0] - points[i, 0])
cross -= vec1[0] * (polygon[j, k, 1] - points[i, 1])
if cross >= 0:
success = False
break
ret[i, j] = success
return ret
import numpy as np
import torch
def limit_period(val, offset=0.5, period=np.pi):
return val - torch.floor(val / period + offset) * period
def corners_nd(dims, origin=0.5):
"""generate relative box corners based on length per dim and
origin point.
Args:
dims (float array, shape=[N, ndim]): array of length per dim
origin (list or array or float): origin point relate to smallest point.
Returns:
float array, shape=[N, 2 ** ndim, ndim]: returned corners.
point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
(3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
where x0 < x1, y0 < y1, z0 < z1
"""
ndim = int(dims.shape[1])
corners_norm = np.stack(
np.unravel_index(np.arange(2**ndim), [2] * ndim),
axis=1).astype(dims.dtype)
# now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
# (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
# so need to convert to a format which is convenient to do other computing.
# for 2d boxes, format is clockwise start with minimum point
# for 3d boxes, please draw lines by your hand.
if ndim == 2:
# generate clockwise box corners
corners_norm = corners_norm[[0, 1, 3, 2]]
elif ndim == 3:
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
[1, 2**ndim, ndim])
return corners
def rotation_3d_in_axis(points, angles, axis=0):
# points: [N, point_size, 3]
# angles: [N]
rot_sin = torch.sin(angles)
rot_cos = torch.cos(angles)
ones = torch.ones_like(rot_cos)
zeros = torch.zeros_like(rot_cos)
if axis == 1:
rot_mat_T = torch.stack([
torch.stack([rot_cos, zeros, -rot_sin]),
torch.stack([zeros, ones, zeros]),
torch.stack([rot_sin, zeros, rot_cos])
])
elif axis == 2 or axis == -1:
rot_mat_T = torch.stack([
torch.stack([rot_cos, -rot_sin, zeros]),
torch.stack([rot_sin, rot_cos, zeros]),
torch.stack([zeros, zeros, ones])
])
elif axis == 0:
rot_mat_T = torch.stack([
torch.stack([zeros, rot_cos, -rot_sin]),
torch.stack([zeros, rot_sin, rot_cos]),
torch.stack([ones, zeros, zeros])
])
else:
raise ValueError('axis should in range')
return torch.einsum('aij,jka->aik', (points, rot_mat_T))
def center_to_corner_box3d(centers,
dims,
angles,
origin=[0.5, 1.0, 0.5],
axis=1):
"""convert kitti locations, dimensions and angles to corners
Args:
centers (float array, shape=[N, 3]): locations in kitti label file.
dims (float array, shape=[N, 3]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file.
origin (list or array or float): origin point relate to smallest point.
use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
axis (int): rotation axis. 1 for camera and 2 for lidar.
Returns:
[type]: [description]
"""
# 'length' in kitti format is in x axis.
# yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
corners = corners_nd(dims, origin=origin)
# corners: [N, 8, 3]
corners = rotation_3d_in_axis(corners, angles, axis=axis)
corners += centers.view(-1, 1, 3)
return corners
def lidar_to_camera(points, r_rect, velo2cam):
num_points = points.shape[0]
points = torch.cat(
[points, torch.ones(num_points, 1).type_as(points)], dim=-1)
camera_points = points @ (r_rect @ velo2cam).t()
return camera_points[..., :3]
def box_lidar_to_camera(data, r_rect, velo2cam):
xyz_lidar = data[..., 0:3]
w, l, h = data[..., 3:4], data[..., 4:5], data[..., 5:6]
r = data[..., 6:7]
xyz = lidar_to_camera(xyz_lidar, r_rect, velo2cam)
return torch.cat([xyz, l, h, w, r], dim=-1)
def project_to_image(points_3d, proj_mat):
points_num = list(points_3d.shape)[:-1]
points_shape = np.concatenate([points_num, [1]], axis=0).tolist()
# previous implementation use new_zeros, new_one yeilds better results
points_4 = torch.cat(
[points_3d, points_3d.new_ones(*points_shape)], dim=-1)
# point_2d = points_4 @ tf.transpose(proj_mat, [1, 0])
point_2d = torch.matmul(points_4, proj_mat.t())
point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
return point_2d_res
def rbbox2d_to_near_bbox(rbboxes):
"""convert rotated bbox to nearest 'standing' or 'lying' bbox.
Args:
rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
Returns:
bboxes: [N, 4(xmin, ymin, xmax, ymax)] bboxes
"""
rots = rbboxes[..., -1]
rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, np.pi))
cond = (rots_0_pi_div_2 > np.pi / 4)[..., None]
bboxes_center = torch.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
return bboxes
def center_to_minmax_2d_0_5(centers, dims):
return torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
def center_to_minmax_2d(centers, dims, origin=0.5):
if origin == 0.5:
return center_to_minmax_2d_0_5(centers, dims)
corners = center_to_corner_box2d(centers, dims, origin=origin)
return corners[:, [0, 2]].reshape([-1, 4])
def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
"""convert kitti locations, dimensions and angles to corners.
format: center(xy), dims(xy), angles(clockwise when positive)
Args:
centers (float array, shape=[N, 2]): locations in kitti label file.
dims (float array, shape=[N, 2]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file.
Returns:
[type]: [description]
"""
# 'length' in kitti format is in x axis.
# xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
corners = corners_nd(dims, origin=origin)
# corners: [N, 4, 2]
if angles is not None:
corners = rotation_2d(corners, angles)
corners += centers.reshape([-1, 1, 2])
return corners
def rotation_2d(points, angles):
"""rotation 2d points based on origin point clockwise when angle positive.
Args:
points (float array, shape=[N, point_size, 2]): points to be rotated.
angles (float array, shape=[N]): rotation angle.
Returns:
float array: same shape as points
"""
rot_sin = torch.sin(angles)
rot_cos = torch.cos(angles)
rot_mat_T = torch.stack([[rot_cos, -rot_sin], [rot_sin, rot_cos]])
return torch.einsum('aij,jka->aik', points, rot_mat_T)
from .box_coder import ResidualCoder
__all__ = ['ResidualCoder']
import numpy as np
import torch
class ResidualCoder(object):
def __init__(self, code_size=7, mean=None, std=None):
super().__init__()
self.code_size = code_size
self.mean = mean
self.std = std
@staticmethod
def encode_np(boxes, anchors):
"""
:param boxes: (N, 7) x, y, z, w, l, h, r
:param anchors: (N, 7)
:return:
"""
# need to convert boxes to z-center format
xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
xg, yg, zg, wg, lg, hg, rg = np.split(boxes, 7, axis=-1)
zg = zg + hg / 2
za = za + ha / 2
diagonal = np.sqrt(la**2 + wa**2) # 4.3
xt = (xg - xa) / diagonal
yt = (yg - ya) / diagonal
zt = (zg - za) / ha # 1.6
lt = np.log(lg / la)
wt = np.log(wg / wa)
ht = np.log(hg / ha)
rt = rg - ra
return np.concatenate([xt, yt, zt, wt, lt, ht, rt], axis=-1)
@staticmethod
def decode_np(box_encodings, anchors):
"""
:param box_encodings: (N, 7) x, y, z, w, l, h, r
:param anchors: (N, 7)
:return:
"""
# need to convert box_encodings to z-bottom format
xa, ya, za, wa, la, ha, ra = np.split(anchors, 7, axis=-1)
xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, 7, axis=-1)
za = za + ha / 2
diagonal = np.sqrt(la**2 + wa**2)
xg = xt * diagonal + xa
yg = yt * diagonal + ya
zg = zt * ha + za
lg = np.exp(lt) * la
wg = np.exp(wt) * wa
hg = np.exp(ht) * ha
rg = rt + ra
zg = zg - hg / 2
return np.concatenate([xg, yg, zg, wg, lg, hg, rg], axis=-1)
@staticmethod
def encode_torch(anchors, boxes, means, stds):
"""
:param boxes: (N, 7+n) x, y, z, w, l, h, r, velo*
:param anchors: (N, 7+n)
:return:
"""
box_ndim = anchors.shape[-1]
cas, cgs, cts = [], [], []
if box_ndim > 7:
xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)
xg, yg, zg, wg, lg, hg, rg, *cgs = torch.split(boxes, 1, dim=-1)
cts = [g - a for g, a in zip(cgs, cas)]
else:
xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
xg, yg, zg, wg, lg, hg, rg = torch.split(boxes, 1, dim=-1)
za = za + ha / 2
zg = zg + hg / 2
diagonal = torch.sqrt(la**2 + wa**2)
xt = (xg - xa) / diagonal
yt = (yg - ya) / diagonal
zt = (zg - za) / ha
lt = torch.log(lg / la)
wt = torch.log(wg / wa)
ht = torch.log(hg / ha)
rt = rg - ra
return torch.cat([xt, yt, zt, wt, lt, ht, rt, *cts], dim=-1)
@staticmethod
def decode_torch(anchors, box_encodings, means, stds):
"""
:param box_encodings: (N, 7 + n) x, y, z, w, l, h, r
:param anchors: (N, 7)
:return:
"""
cas, cts = [], []
box_ndim = anchors.shape[-1]
if box_ndim > 7:
xa, ya, za, wa, la, ha, ra, *cas = torch.split(anchors, 1, dim=-1)
xt, yt, zt, wt, lt, ht, rt, *cts = torch.split(
box_encodings, 1, dim=-1)
else:
xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1)
za = za + ha / 2
diagonal = torch.sqrt(la**2 + wa**2)
xg = xt * diagonal + xa
yg = yt * diagonal + ya
zg = zt * ha + za
lg = torch.exp(lt) * la
wg = torch.exp(wt) * wa
hg = torch.exp(ht) * ha
rg = rt + ra
zg = zg - hg / 2
cgs = [t + a for t, a in zip(cts, cas)]
return torch.cat([xg, yg, zg, wg, lg, hg, rg, *cgs], dim=-1)
import torch
from mmdet3d.ops.iou3d import boxes_iou3d_gpu
from . import box_torch_ops
def bbox_overlaps_2d(bboxes1, bboxes2, mode='iou', is_aligned=False):
"""Calculate overlap between two set of bboxes.
If ``is_aligned`` is ``False``, then calculate the ious between each bbox
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format.
bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format.
If is_aligned is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union) or iof (intersection over
foreground).
Returns:
ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
Example:
>>> bboxes1 = torch.FloatTensor([
>>> [0, 0, 10, 10],
>>> [10, 10, 20, 20],
>>> [32, 32, 38, 42],
>>> ])
>>> bboxes2 = torch.FloatTensor([
>>> [0, 0, 10, 20],
>>> [0, 10, 10, 19],
>>> [10, 10, 20, 20],
>>> ])
>>> bbox_overlaps(bboxes1, bboxes2)
tensor([[0.5238, 0.0500, 0.0041],
[0.0323, 0.0452, 1.0000],
[0.0000, 0.0000, 0.0000]])
Example:
>>> empty = torch.FloatTensor([])
>>> nonempty = torch.FloatTensor([
>>> [0, 0, 10, 9],
>>> ])
>>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
>>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
>>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
"""
assert mode in ['iou', 'iof']
rows = bboxes1.size(0)
cols = bboxes2.size(0)
if is_aligned:
assert rows == cols
if rows * cols == 0:
return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
if is_aligned:
lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
wh = (rb - lt).clamp(min=0) # [rows, 2]
overlap = wh[:, 0] * wh[:, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (
bboxes1[:, 3] - bboxes1[:, 1])
if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (
bboxes2[:, 3] - bboxes2[:, 1])
ious = overlap / (area1 + area2 - overlap)
else:
ious = overlap / area1
else:
lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
wh = (rb - lt).clamp(min=0) # [rows, cols, 2]
overlap = wh[:, :, 0] * wh[:, :, 1]
area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (
bboxes1[:, 3] - bboxes1[:, 1])
if mode == 'iou':
area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (
bboxes2[:, 3] - bboxes2[:, 1])
ious = overlap / (area1[:, None] + area2 - overlap)
else:
ious = overlap / (area1[:, None])
return ious
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou'):
'''
:param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]
:param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]
:param mode: mode (str): "iou" (intersection over union) or
iof (intersection over foreground).
:return: iou: (M, N) not support aligned mode currently
'''
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d
return boxes_iou3d_gpu(bboxes1, bboxes2, mode)
def bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode='iou', is_aligned=False):
'''
:param bboxes1: Tensor, shape (N, 7) [x, y, z, h, w, l, ry]?
:param bboxes2: Tensor, shape (M, 7) [x, y, z, h, w, l, ry]?
:param mode: mode (str): "iou" (intersection over union) or iof
(intersection over foreground).
:return: iou: (M, N) not support aligned mode currently
rbboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
'''
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_3d
rbboxes1_np = bboxes1.index_select(
dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
rbboxes2_np = bboxes2.index_select(
dim=-1, index=bboxes1.new_tensor([0, 1, 3, 4, 6]).long())
# Change the bboxes to bev
# box conversion and iou calculation in torch version on CUDA
# is 10x faster than that in numpy version
bboxes1_bv = box_torch_ops.rbbox2d_to_near_bbox(rbboxes1_np)
bboxes2_bv = box_torch_ops.rbbox2d_to_near_bbox(rbboxes2_np)
ret = bbox_overlaps_2d(
bboxes1_bv, bboxes2_bv, mode=mode, is_aligned=is_aligned)
return ret
from .base_sampler import BaseSampler
from .combined_sampler import CombinedSampler
from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
from .iou_balanced_neg_sampler import IoUBalancedNegSampler
from .ohem_sampler import OHEMSampler
from .pseudo_sampler import PseudoSampler
from .random_sampler import RandomSampler
from .sampling_result import SamplingResult
__all__ = [
'BaseSampler', 'PseudoSampler', 'RandomSampler',
'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
'OHEMSampler', 'SamplingResult'
]
from abc import ABCMeta, abstractmethod
import torch
from .sampling_result import SamplingResult
class BaseSampler(metaclass=ABCMeta):
def __init__(self,
num,
pos_fraction,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
self.num = num
self.pos_fraction = pos_fraction
self.neg_pos_ub = neg_pos_ub
self.add_gt_as_proposals = add_gt_as_proposals
self.pos_sampler = self
self.neg_sampler = self
@abstractmethod
def _sample_pos(self, assign_result, num_expected, **kwargs):
pass
@abstractmethod
def _sample_neg(self, assign_result, num_expected, **kwargs):
pass
def sample(self,
assign_result,
bboxes,
gt_bboxes,
gt_labels=None,
**kwargs):
"""Sample positive and negative bboxes.
This is a simple implementation of bbox sampling given candidates,
assigning results and ground truth bboxes.
Args:
assign_result (:obj:`AssignResult`): Bbox assigning results.
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_labels (Tensor, optional): Class labels of ground truth bboxes.
Returns:
:obj:`SamplingResult`: Sampling result.
"""
bboxes = bboxes[:, :4]
gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
if self.add_gt_as_proposals:
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
assign_result.add_gt_(gt_labels)
gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
gt_flags = torch.cat([gt_ones, gt_flags])
num_expected_pos = int(self.num * self.pos_fraction)
pos_inds = self.pos_sampler._sample_pos(
assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
# We found that sampled indices have duplicated items occasionally.
# (may be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_expected_neg = self.num - num_sampled_pos
if self.neg_pos_ub >= 0:
_pos = max(1, num_sampled_pos)
neg_upper_bound = int(self.neg_pos_ub * _pos)
if num_expected_neg > neg_upper_bound:
num_expected_neg = neg_upper_bound
neg_inds = self.neg_sampler._sample_neg(
assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
neg_inds = neg_inds.unique()
return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
from ..assign_sampling import build_sampler
from .base_sampler import BaseSampler
class CombinedSampler(BaseSampler):
def __init__(self, pos_sampler, neg_sampler, **kwargs):
super(CombinedSampler, self).__init__(**kwargs)
self.pos_sampler = build_sampler(pos_sampler, **kwargs)
self.neg_sampler = build_sampler(neg_sampler, **kwargs)
def _sample_pos(self, **kwargs):
raise NotImplementedError
def _sample_neg(self, **kwargs):
raise NotImplementedError
import numpy as np
import torch
from .random_sampler import RandomSampler
class InstanceBalancedPosSampler(RandomSampler):
def _sample_pos(self, assign_result, num_expected, **kwargs):
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
num_gts = len(unique_gt_inds)
num_per_gt = int(round(num_expected / float(num_gts)) + 1)
sampled_inds = []
for i in unique_gt_inds:
inds = torch.nonzero(assign_result.gt_inds == i.item())
if inds.numel() != 0:
inds = inds.squeeze(1)
else:
continue
if len(inds) > num_per_gt:
inds = self.random_choice(inds, num_per_gt)
sampled_inds.append(inds)
sampled_inds = torch.cat(sampled_inds)
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(
list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
if len(extra_inds) > num_extra:
extra_inds = self.random_choice(extra_inds, num_extra)
extra_inds = torch.from_numpy(extra_inds).to(
assign_result.gt_inds.device).long()
sampled_inds = torch.cat([sampled_inds, extra_inds])
elif len(sampled_inds) > num_expected:
sampled_inds = self.random_choice(sampled_inds, num_expected)
return sampled_inds
import numpy as np
import torch
from .random_sampler import RandomSampler
class IoUBalancedNegSampler(RandomSampler):
"""IoU Balanced Sampling
arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
Sampling proposals according to their IoU. `floor_fraction` of needed RoIs
are sampled from proposals whose IoU are lower than `floor_thr` randomly.
The others are sampled from proposals whose IoU are higher than
`floor_thr`. These proposals are sampled from some bins evenly, which are
split by `num_bins` via IoU evenly.
Args:
num (int): number of proposals.
pos_fraction (float): fraction of positive proposals.
floor_thr (float): threshold (minimum) IoU for IoU balanced sampling,
set to -1 if all using IoU balanced sampling.
floor_fraction (float): sampling fraction of proposals under floor_thr.
num_bins (int): number of bins in IoU balanced sampling.
"""
def __init__(self,
num,
pos_fraction,
floor_thr=-1,
floor_fraction=0,
num_bins=3,
**kwargs):
super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
**kwargs)
assert floor_thr >= 0 or floor_thr == -1
assert 0 <= floor_fraction <= 1
assert num_bins >= 1
self.floor_thr = floor_thr
self.floor_fraction = floor_fraction
self.num_bins = num_bins
def sample_via_interval(self, max_overlaps, full_set, num_expected):
max_iou = max_overlaps.max()
iou_interval = (max_iou - self.floor_thr) / self.num_bins
per_num_expected = int(num_expected / self.num_bins)
sampled_inds = []
for i in range(self.num_bins):
start_iou = self.floor_thr + i * iou_interval
end_iou = self.floor_thr + (i + 1) * iou_interval
tmp_set = set(
np.where(
np.logical_and(max_overlaps >= start_iou,
max_overlaps < end_iou))[0])
tmp_inds = list(tmp_set & full_set)
if len(tmp_inds) > per_num_expected:
tmp_sampled_set = self.random_choice(tmp_inds,
per_num_expected)
else:
tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
sampled_inds.append(tmp_sampled_set)
sampled_inds = np.concatenate(sampled_inds)
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(list(full_set - set(sampled_inds)))
if len(extra_inds) > num_extra:
extra_inds = self.random_choice(extra_inds, num_extra)
sampled_inds = np.concatenate([sampled_inds, extra_inds])
return sampled_inds
def _sample_neg(self, assign_result, num_expected, **kwargs):
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
max_overlaps = assign_result.max_overlaps.cpu().numpy()
# balance sampling for negative samples
neg_set = set(neg_inds.cpu().numpy())
if self.floor_thr > 0:
floor_set = set(
np.where(
np.logical_and(max_overlaps >= 0,
max_overlaps < self.floor_thr))[0])
iou_sampling_set = set(
np.where(max_overlaps >= self.floor_thr)[0])
elif self.floor_thr == 0:
floor_set = set(np.where(max_overlaps == 0)[0])
iou_sampling_set = set(
np.where(max_overlaps > self.floor_thr)[0])
else:
floor_set = set()
iou_sampling_set = set(
np.where(max_overlaps > self.floor_thr)[0])
floor_neg_inds = list(floor_set & neg_set)
iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
num_expected_iou_sampling = int(num_expected *
(1 - self.floor_fraction))
if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
if self.num_bins >= 2:
iou_sampled_inds = self.sample_via_interval(
max_overlaps, set(iou_sampling_neg_inds),
num_expected_iou_sampling)
else:
iou_sampled_inds = self.random_choice(
iou_sampling_neg_inds, num_expected_iou_sampling)
else:
iou_sampled_inds = np.array(
iou_sampling_neg_inds, dtype=np.int)
num_expected_floor = num_expected - len(iou_sampled_inds)
if len(floor_neg_inds) > num_expected_floor:
sampled_floor_inds = self.random_choice(
floor_neg_inds, num_expected_floor)
else:
sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
sampled_inds = np.concatenate(
(sampled_floor_inds, iou_sampled_inds))
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(list(neg_set - set(sampled_inds)))
if len(extra_inds) > num_extra:
extra_inds = self.random_choice(extra_inds, num_extra)
sampled_inds = np.concatenate((sampled_inds, extra_inds))
sampled_inds = torch.from_numpy(sampled_inds).long().to(
assign_result.gt_inds.device)
return sampled_inds
import torch
from ..transforms import bbox2roi
from .base_sampler import BaseSampler
class OHEMSampler(BaseSampler):
def __init__(self,
num,
pos_fraction,
context,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
if not hasattr(context, 'num_stages'):
self.bbox_roi_extractor = context.bbox_roi_extractor
self.bbox_head = context.bbox_head
else:
self.bbox_roi_extractor = context.bbox_roi_extractor[
context.current_stage]
self.bbox_head = context.bbox_head[context.current_stage]
def hard_mining(self, inds, num_expected, bboxes, labels, feats):
with torch.no_grad():
rois = bbox2roi([bboxes])
bbox_feats = self.bbox_roi_extractor(
feats[:self.bbox_roi_extractor.num_inputs], rois)
cls_score, _ = self.bbox_head(bbox_feats)
loss = self.bbox_head.loss(
cls_score=cls_score,
bbox_pred=None,
labels=labels,
label_weights=cls_score.new_ones(cls_score.size(0)),
bbox_targets=None,
bbox_weights=None,
reduction_override='none')['loss_cls']
_, topk_loss_inds = loss.topk(num_expected)
return inds[topk_loss_inds]
def _sample_pos(self,
assign_result,
num_expected,
bboxes=None,
feats=None,
**kwargs):
# Sample some hard positive samples
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
assign_result.labels[pos_inds], feats)
def _sample_neg(self,
assign_result,
num_expected,
bboxes=None,
feats=None,
**kwargs):
# Sample some hard negative samples
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
assign_result.labels[neg_inds], feats)
import torch
from .base_sampler import BaseSampler
from .sampling_result import SamplingResult
class PseudoSampler(BaseSampler):
def __init__(self, **kwargs):
pass
def _sample_pos(self, **kwargs):
raise NotImplementedError
def _sample_neg(self, **kwargs):
raise NotImplementedError
def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
pos_inds = torch.nonzero(
assign_result.gt_inds > 0).squeeze(-1).unique()
neg_inds = torch.nonzero(
assign_result.gt_inds == 0).squeeze(-1).unique()
gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
return sampling_result
import numpy as np
import torch
from .base_sampler import BaseSampler
class RandomSampler(BaseSampler):
def __init__(self,
num,
pos_fraction,
neg_pos_ub=-1,
add_gt_as_proposals=True,
**kwargs):
super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
add_gt_as_proposals)
@staticmethod
def random_choice(gallery, num):
"""Random select some elements from the gallery.
It seems that Pytorch's implementation is slower than numpy so we use
numpy to randperm the indices.
"""
assert len(gallery) >= num
if isinstance(gallery, list):
gallery = np.array(gallery)
cands = np.arange(len(gallery))
np.random.shuffle(cands)
rand_inds = cands[:num]
if not isinstance(gallery, np.ndarray):
rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
return gallery[rand_inds]
def _sample_pos(self, assign_result, num_expected, **kwargs):
"""Randomly sample some positive samples."""
pos_inds = torch.nonzero(assign_result.gt_inds > 0)
if pos_inds.numel() != 0:
pos_inds = pos_inds.squeeze(1)
if pos_inds.numel() <= num_expected:
return pos_inds
else:
return self.random_choice(pos_inds, num_expected)
def _sample_neg(self, assign_result, num_expected, **kwargs):
"""Randomly sample some negative samples."""
neg_inds = torch.nonzero(assign_result.gt_inds == 0)
if neg_inds.numel() != 0:
neg_inds = neg_inds.squeeze(1)
if len(neg_inds) <= num_expected:
return neg_inds
else:
return self.random_choice(neg_inds, num_expected)
import torch
class SamplingResult(object):
def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
gt_flags):
self.pos_inds = pos_inds
self.neg_inds = neg_inds
self.pos_bboxes = bboxes[pos_inds]
self.neg_bboxes = bboxes[neg_inds]
self.pos_is_gt = gt_flags[pos_inds]
self.num_gts = gt_bboxes.shape[0]
self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
if assign_result.labels is not None:
self.pos_gt_labels = assign_result.labels[pos_inds]
else:
self.pos_gt_labels = None
@property
def bboxes(self):
return torch.cat([self.pos_bboxes, self.neg_bboxes])
import mmcv
import numpy as np
import torch
def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
assert proposals.size() == gt.size()
proposals = proposals.float()
gt = gt.float()
px = (proposals[..., 0] + proposals[..., 2]) * 0.5
py = (proposals[..., 1] + proposals[..., 3]) * 0.5
pw = proposals[..., 2] - proposals[..., 0]
ph = proposals[..., 3] - proposals[..., 1]
gx = (gt[..., 0] + gt[..., 2]) * 0.5
gy = (gt[..., 1] + gt[..., 3]) * 0.5
gw = gt[..., 2] - gt[..., 0]
gh = gt[..., 3] - gt[..., 1]
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = torch.log(gw / pw)
dh = torch.log(gh / ph)
deltas = torch.stack([dx, dy, dw, dh], dim=-1)
means = deltas.new_tensor(means).unsqueeze(0)
stds = deltas.new_tensor(stds).unsqueeze(0)
deltas = deltas.sub_(means).div_(stds)
return deltas
def delta2bbox(rois,
deltas,
means=[0, 0, 0, 0],
stds=[1, 1, 1, 1],
max_shape=None,
wh_ratio_clip=16 / 1000):
"""
Apply deltas to shift/scale base boxes.
Typically the rois are anchor or proposed bounding boxes and the deltas are
network outputs used to shift/scale those boxes.
Args:
rois (Tensor): boxes to be transformed. Has shape (N, 4)
deltas (Tensor): encoded offsets with respect to each roi.
Has shape (N, 4). Note N = num_anchors * W * H when rois is a grid
of anchors. Offset encoding follows [1]_.
means (list): denormalizing means for delta coordinates
stds (list): denormalizing standard deviation for delta coordinates
max_shape (tuple[int, int]): maximum bounds for boxes. specifies (H, W)
wh_ratio_clip (float): maximum aspect ratio for boxes.
Returns:
Tensor: boxes with shape (N, 4), where columns represent
tl_x, tl_y, br_x, br_y.
References:
.. [1] https://arxiv.org/abs/1311.2524
Example:
>>> rois = torch.Tensor([[ 0., 0., 1., 1.],
>>> [ 0., 0., 1., 1.],
>>> [ 0., 0., 1., 1.],
>>> [ 5., 5., 5., 5.]])
>>> deltas = torch.Tensor([[ 0., 0., 0., 0.],
>>> [ 1., 1., 1., 1.],
>>> [ 0., 0., 2., -1.],
>>> [ 0.7, -1.9, -0.5, 0.3]])
>>> delta2bbox(rois, deltas, max_shape=(32, 32))
tensor([[0.0000, 0.0000, 1.0000, 1.0000],
[0.2817, 0.2817, 4.7183, 4.7183],
[0.0000, 0.6321, 7.3891, 0.3679],
[5.8967, 2.9251, 5.5033, 3.2749]])
"""
means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
denorm_deltas = deltas * stds + means
dx = denorm_deltas[:, 0::4]
dy = denorm_deltas[:, 1::4]
dw = denorm_deltas[:, 2::4]
dh = denorm_deltas[:, 3::4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = dw.clamp(min=-max_ratio, max=max_ratio)
dh = dh.clamp(min=-max_ratio, max=max_ratio)
# Compute center of each roi
px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
# Compute width/height of each roi
pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
# Use exp(network energy) to enlarge/shrink each roi
gw = pw * dw.exp()
gh = ph * dh.exp()
# Use network energy to shift the center of each roi
gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx
gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy
# Convert center-xy/width/height to top-left, bottom-right
x1 = gx - gw * 0.5
y1 = gy - gh * 0.5
x2 = gx + gw * 0.5
y2 = gy + gh * 0.5
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
return bboxes
def bbox_flip(bboxes, img_shape):
"""Flip bboxes horizontally.
Args:
bboxes(Tensor or ndarray): Shape (..., 4*k)
img_shape(tuple): Image shape.
Returns:
Same type as `bboxes`: Flipped bboxes.
"""
if isinstance(bboxes, torch.Tensor):
assert bboxes.shape[-1] % 4 == 0
flipped = bboxes.clone()
flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4]
flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4]
return flipped
elif isinstance(bboxes, np.ndarray):
return mmcv.bbox_flip(bboxes, img_shape)
def bbox_mapping(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from the original image scale to testing scale"""
new_bboxes = bboxes * scale_factor
if flip:
new_bboxes = bbox_flip(new_bboxes, img_shape)
return new_bboxes
def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from testing scale to original image scale"""
new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
new_bboxes = new_bboxes / scale_factor
return new_bboxes
def bbox2roi(bbox_list):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
"""
rois_list = []
for img_id, bboxes in enumerate(bbox_list):
if bboxes.size(0) > 0:
img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)
else:
rois = bboxes.new_zeros((0, 5))
rois_list.append(rois)
rois = torch.cat(rois_list, 0)
return rois
def roi2bbox(rois):
bbox_list = []
img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)
for img_id in img_ids:
inds = (rois[:, 0] == img_id.item())
bbox = rois[inds, 1:]
bbox_list.append(bbox)
return bbox_list
def bbox2result_coco(bboxes, labels, num_classes):
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, )
num_classes (int): class number, including background class
Returns:
list(ndarray): bbox results of each class
"""
if bboxes.shape[0] == 0:
return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
else:
bboxes = bboxes.cpu().numpy()
labels = labels.cpu().numpy()
return [bboxes[labels == i, :] for i in range(num_classes)]
def distance2bbox(points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
Returns:
Tensor: Decoded bboxes.
"""
x1 = points[:, 0] - distance[:, 0]
y1 = points[:, 1] - distance[:, 1]
x2 = points[:, 0] + distance[:, 2]
y2 = points[:, 1] + distance[:, 3]
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
return torch.stack([x1, y1, x2, y2], -1)
def transform_lidar_to_cam(boxes_lidar):
"""
Only transform format, not exactly in camera coords
:param boxes_lidar: (N, 3 or 7) [x, y, z, w, l, h, ry] in LiDAR coords
:return: boxes_cam: (N, 3 or 7) [x, y, z, h, w, l, ry] in camera coords
"""
# boxes_cam = boxes_lidar.new_tensor(boxes_lidar.data)
boxes_cam = boxes_lidar.clone().detach()
boxes_cam[:, 0] = -boxes_lidar[:, 1]
boxes_cam[:, 1] = -boxes_lidar[:, 2]
boxes_cam[:, 2] = boxes_lidar[:, 0]
if boxes_cam.shape[1] > 3:
boxes_cam[:, [3, 4, 5]] = boxes_lidar[:, [5, 3, 4]]
return boxes_cam
def boxes3d_to_bev_torch(boxes3d):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords
:return:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 2]
half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def boxes3d_to_bev_torch_lidar(boxes3d):
"""
:param boxes3d: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords
:return:
boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
"""
boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
cu, cv = boxes3d[:, 0], boxes3d[:, 1]
half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2
boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_w, cv - half_l
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
from .class_names import (coco_classes, dataset_aliases, get_classes,
imagenet_det_classes, imagenet_vid_classes,
kitti_classes, voc_classes)
from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
DistEvalHook, DistEvalmAPHook, KittiDistEvalmAPHook)
from .kitti_utils import kitti_eval, kitti_eval_coco_style
__all__ = [
'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
'coco_classes', 'dataset_aliases', 'get_classes', 'kitti_classes',
'kitti_eval_coco_style', 'kitti_eval', 'CocoDistEvalmAPHook',
'KittiDistEvalmAPHook', 'CocoDistEvalRecallHook', 'DistEvalHook',
'DistEvalmAPHook'
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment