Commit b70ecdb5 authored by wuyuefeng's avatar wuyuefeng Committed by zhangwenwei
Browse files

Indoor boxstructure

parent 9fcfd78f
......@@ -148,7 +148,7 @@ def center_to_corner_box3d(centers,
dims (float array, shape=[N, 3]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file.
origin (list or array or float): origin point relate to smallest point.
use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
axis (int): rotation axis. 1 for camera and 2 for lidar.
Returns:
[type]: [description]
......
......@@ -74,7 +74,7 @@ def rotation_3d_in_axis(points, angles, axis=0):
def center_to_corner_box3d(centers,
dims,
angles,
origin=[0.5, 1.0, 0.5],
origin=(0.5, 1.0, 0.5),
axis=1):
"""convert kitti locations, dimensions and angles to corners
......@@ -83,7 +83,7 @@ def center_to_corner_box3d(centers,
dims (float array, shape=[N, 3]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file.
origin (list or array or float): origin point relate to smallest point.
use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
axis (int): rotation axis. 1 for camera and 2 for lidar.
Returns:
[type]: [description]
......
......@@ -28,28 +28,28 @@ class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target = gt_bboxes_3d[..., 0:3]
center_target = gt_bboxes_3d.gravity_center
# generate bbox size target
size_class_target = gt_labels_3d
size_res_target = gt_bboxes_3d[..., 3:6] - gt_bboxes_3d.new_tensor(
size_res_target = gt_bboxes_3d.dims - gt_bboxes_3d.tensor.new_tensor(
self.mean_sizes)[size_class_target]
# generate dir target
box_num = gt_bboxes_3d.shape[0]
box_num = gt_labels_3d.shape[0]
if self.with_rot:
(dir_class_target,
dir_res_target) = self.angle2class(gt_bboxes_3d[..., 6])
dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)
else:
dir_class_target = gt_labels_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)
return (center_target, size_class_target, size_res_target,
dir_class_target, dir_res_target)
......
......@@ -83,8 +83,6 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
Return:
iou: (M, N) not support aligned mode currently
"""
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d
assert bboxes1.size(-1) == bboxes2.size(-1) == 7
assert coordinate in ['camera', 'lidar']
......
......@@ -12,7 +12,7 @@ class BaseInstance3DBoxes(object):
Note:
The box is bottom centered, i.e. the relative position of origin in
the box is [0.5, 0.5, 0].
the box is (0.5, 0.5, 0).
Args:
tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
......@@ -23,11 +23,11 @@ class BaseInstance3DBoxes(object):
If False, the value of yaw will be set to 0 as minmax boxes.
Default to True.
origin (tuple): The relative position of origin in the box.
Default to [0.5, 0.5, 0]. This will guide the box be converted to
[0.5, 0.5, 0] mode.
Default to (0.5, 0.5, 0). This will guide the box be converted to
(0.5, 0.5, 0) mode.
"""
def __init__(self, tensor, box_dim=7, with_yaw=True, origin=[0.5, 0.5, 0]):
def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):
if isinstance(tensor, torch.Tensor):
device = tensor.device
else:
......@@ -40,18 +40,21 @@ class BaseInstance3DBoxes(object):
dtype=torch.float32, device=device)
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
if not with_yaw and tensor.shape[-1] == 6:
if tensor.shape[-1] == 6:
# If the dimension of boxes is 6, we expand box_dim by padding
# 0 as a fake yaw and set with_yaw to False.
assert box_dim == 6
fake_rot = tensor.new_zeros(tensor.shape[0], 1)
tensor = torch.cat((tensor, fake_rot), dim=-1)
self.box_dim = box_dim + 1
self.with_yaw = False
else:
self.box_dim = box_dim
self.with_yaw = with_yaw
self.tensor = tensor
if origin != [0.5, 0.5, 0]:
dst = self.tensor.new_tensor([0.5, 0.5, 0])
if origin != (0.5, 0.5, 0):
dst = self.tensor.new_tensor((0.5, 0.5, 0))
src = self.tensor.new_tensor(origin)
self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
......@@ -121,7 +124,7 @@ class BaseInstance3DBoxes(object):
The relative position of the centers in different kinds of
boxes are different, e.g., the relative center of a boxes is
[0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar.
(0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
It is recommended to use `bottom_center` or `gravity_center`
for more clear usage.
......
......@@ -22,7 +22,7 @@ class Box3DMode(IntEnum):
| /
left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2.
Coordinates in camera:
......@@ -49,7 +49,7 @@ class Box3DMode(IntEnum):
| /
0 ------> x right
The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0],
The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2.
"""
......
......@@ -20,7 +20,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
v
down y
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
and the yaw is around the y axis, thus the rotation axis=1.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of z.
......
import numpy as np
import torch
from mmdet3d.ops import points_in_boxes_batch
from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis
......@@ -17,7 +18,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
| /
0 ------> x right (yaw=0)
The relative coordinate of bottom center in a Depth box is [0.5, 0.5, 0],
The relative coordinate of bottom center in a Depth box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of y.
......@@ -74,7 +75,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
device=dims.device, dtype=dims.dtype)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
# use relative origin [0.5, 0.5, 0]
# use relative origin (0.5, 0.5, 0)
corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
......@@ -201,3 +202,30 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
from .box_3d_mode import Box3DMode
return Box3DMode.convert(
box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
def points_in_boxes(self, points):
"""Find points that are in boxes (CUDA)
Args:
points (torch.Tensor): [1, M, 3] or [M, 3], [x, y, z]
in LiDAR coordinate.
Returns:
torch.Tensor: The box index of each point in, shape is (B, M, T).
"""
from .box_3d_mode import Box3DMode
# to lidar
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
if points.dim() == 2:
points_lidar = points_lidar.unsqueeze(0)
else:
assert points.dim() == 3 and points_lidar.shape[0] == 1
boxes_lidar = self.convert_to(Box3DMode.LIDAR).tensor
boxes_lidar = boxes_lidar.to(points.device).unsqueeze(0)
box_idxs_of_pts = points_in_boxes_batch(points_lidar, boxes_lidar)
return box_idxs_of_pts.squeeze(0)
......@@ -18,7 +18,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
| /
(yaw=pi) left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the negative direction of y axis, and increases from
the negative direction of y to the positive direction of x.
......
......@@ -3,47 +3,6 @@ import torch
from mmcv.utils import print_log
from terminaltables import AsciiTable
from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d
def boxes3d_depth_to_lidar(boxes3d, mid_to_bottom=True):
"""Boxes3d depth to lidar.
Flip X-right,Y-forward,Z-up to X-forward,Y-left,Z-up.
Args:
boxes3d (ndarray): (N, 7) [x, y, z, w, l, h, r] in depth coords.
Return:
boxes3d_lidar (ndarray): (N, 7) [x, y, z, l, h, w, r] in LiDAR coords.
"""
boxes3d_lidar = boxes3d.copy()
boxes3d_lidar[..., [0, 1, 2, 3, 4, 5]] = boxes3d_lidar[...,
[1, 0, 2, 4, 3, 5]]
boxes3d_lidar[..., 1] *= -1
if mid_to_bottom:
boxes3d_lidar[..., 2] -= boxes3d_lidar[..., 5] / 2
return boxes3d_lidar
def get_iou_gpu(bb1, bb2):
"""Get IoU.
Compute IoU of two bounding boxes.
Args:
bb1 (ndarray): [x, y, z, w, l, h, ry] in LiDAR.
bb2 (ndarray): [x, y, z, h, w, l, ry] in LiDAR.
Returns:
ans_iou (tensor): The answer of IoU.
"""
bb1 = torch.from_numpy(bb1).float().cuda()
bb2 = torch.from_numpy(bb2).float().cuda()
iou3d = bbox_overlaps_3d(bb1, bb2, mode='iou', coordinate='lidar')
return iou3d.cpu().numpy()
def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision (for single or multiple scales).
......@@ -61,7 +20,10 @@ def average_precision(recalls, precisions, mode='area'):
if recalls.ndim == 1:
recalls = recalls[np.newaxis, :]
precisions = precisions[np.newaxis, :]
assert recalls.shape == precisions.shape and recalls.ndim == 2
assert recalls.shape == precisions.shape
assert recalls.ndim == 2
num_scales = recalls.shape[0]
ap = np.zeros(num_scales, dtype=np.float32)
if mode == 'area':
......@@ -103,40 +65,42 @@ def eval_det_cls(pred, gt, iou_thr=None):
float: scalar, average precision.
"""
# construct gt objects
class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}}
# {img_id: {'bbox': box structure, 'det': matched list}}
class_recs = {}
npos = 0
for img_id in gt.keys():
bbox = np.array(gt[img_id])
cur_gt_num = len(gt[img_id])
if cur_gt_num != 0:
gt_cur = torch.zeros([cur_gt_num, 7], dtype=torch.float32)
for i in range(cur_gt_num):
gt_cur[i] = gt[img_id][i].tensor
bbox = gt[img_id][0].new_box(gt_cur)
else:
bbox = gt[img_id]
det = [[False] * len(bbox) for i in iou_thr]
npos += len(bbox)
class_recs[img_id] = {'bbox': bbox, 'det': det}
# pad empty list to all other imgids
for img_id in pred.keys():
if img_id not in gt:
class_recs[img_id] = {'bbox': np.array([]), 'det': []}
# construct dets
image_ids = []
confidence = []
BB = []
ious = []
for img_id in pred.keys():
cur_num = len(pred[img_id])
if cur_num == 0:
continue
BB_cur = np.zeros((cur_num, 7)) # hard code
pred_cur = torch.zeros((cur_num, 7), dtype=torch.float32)
box_idx = 0
for box, score in pred[img_id]:
image_ids.append(img_id)
confidence.append(score)
BB.append(box)
BB_cur[box_idx] = box
pred_cur[box_idx] = box.tensor
box_idx += 1
gt_cur = class_recs[img_id]['bbox'].astype(float)
pred_cur = box.new_box(pred_cur)
gt_cur = class_recs[img_id]['bbox']
if len(gt_cur) > 0:
# calculate iou in each image
iou_cur = get_iou_gpu(BB_cur, gt_cur)
iou_cur = pred_cur.overlaps(pred_cur, gt_cur)
for i in range(cur_num):
ious.append(iou_cur[i])
else:
......@@ -157,12 +121,12 @@ def eval_det_cls(pred, gt, iou_thr=None):
for d in range(nd):
R = class_recs[image_ids[d]]
iou_max = -np.inf
BBGT = R['bbox'].astype(float)
BBGT = R['bbox']
cur_iou = ious[d]
if BBGT.size > 0:
if len(BBGT) > 0:
# compute overlaps
for j in range(BBGT.shape[0]):
for j in range(len(BBGT)):
# iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))
iou = cur_iou[j]
if iou > iou_max:
......@@ -194,61 +158,22 @@ def eval_det_cls(pred, gt, iou_thr=None):
return ret
def eval_map_recall(det_infos, gt_infos, ovthresh=None):
def eval_map_recall(pred, gt, ovthresh=None):
"""Evaluate mAP and recall.
Generic functions to compute precision/recall for object detection
for multiple classes.
Args:
det_infos (list[dict]): Information of detection results, the dict
includes the following keys
- labels_3d (Tensor): Labels of boxes.
- boxes_3d (Tensor): 3d bboxes.
- scores_3d (Tensor): Scores of boxes.
gt_infos (list[dict]): information of gt results, the dict
includes the following keys
- labels_3d (Tensor): labels of boxes.
- boxes_3d (Tensor): 3d bboxes.
pred (dict): Information of detection results,
which maps class_id and predictions.
gt (dict): information of gt results, which maps class_id and gt.
ovthresh (list[float]): iou threshold.
Default: None.
Return:
tuple[dict]: dict results of recall, AP, and precision for all classes.
"""
pred_all = {}
scan_cnt = 0
for det_info in det_infos:
pred_all[scan_cnt] = det_info
scan_cnt += 1
pred = {} # map {classname: pred}
gt = {} # map {classname: gt}
for img_id in pred_all.keys():
for i in range(len(pred_all[img_id]['labels_3d'])):
label = pred_all[img_id]['labels_3d'].numpy()[i]
bbox = pred_all[img_id]['boxes_3d'].numpy()[i]
score = pred_all[img_id]['scores_3d'].numpy()[i]
if label not in pred:
pred[int(label)] = {}
if img_id not in pred[label]:
pred[int(label)][img_id] = []
if label not in gt:
gt[int(label)] = {}
if img_id not in gt[label]:
gt[int(label)][img_id] = []
pred[int(label)][img_id].append((bbox, score))
for img_id in range(len(gt_infos)):
for i in range(len(gt_infos[img_id]['labels_3d'])):
label = gt_infos[img_id]['labels_3d'][i]
bbox = gt_infos[img_id]['boxes_3d'][i]
if label not in gt:
gt[label] = {}
if img_id not in gt[label]:
gt[label][img_id] = []
gt[label][img_id].append(bbox)
ret_values = []
for classname in gt.keys():
......@@ -272,14 +197,24 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None):
return recall, precision, ap
def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None):
def indoor_eval(gt_annos,
dt_annos,
metric,
label2cat,
logger=None,
box_type_3d=None,
box_mode_3d=None):
"""Scannet Evaluation.
Evaluate the result of the detection.
Args:
gt_annos (list[dict]): GT annotations.
dt_annos (list[dict]): Detection annotations.
dt_annos (list[dict]): Detection annotations. the dict
includes the following keys
- labels_3d (Tensor): Labels of boxes.
- boxes_3d (BaseInstance3DBoxes): 3d bboxes in Depth coordinate.
- scores_3d (Tensor): Scores of boxes.
metric (list[float]): AP IoU thresholds.
label2cat (dict): {label: cat}.
logger (logging.Logger | str | None): The way to print the mAP
......@@ -288,24 +223,48 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None):
Return:
dict: Dict of results.
"""
gt_infos = []
for gt_anno in gt_annos:
assert len(dt_annos) == len(gt_annos)
pred = {} # map {class_id: pred}
gt = {} # map {class_id: gt}
for img_id in range(len(dt_annos)):
# parse detected annotations
det_anno = dt_annos[img_id]
for i in range(len(det_anno['labels_3d'])):
label = det_anno['labels_3d'].numpy()[i]
bbox = det_anno['boxes_3d'].convert_to(box_mode_3d)[i]
score = det_anno['scores_3d'].numpy()[i]
if label not in pred:
pred[int(label)] = {}
if img_id not in pred[label]:
pred[int(label)][img_id] = []
if label not in gt:
gt[int(label)] = {}
if img_id not in gt[label]:
gt[int(label)][img_id] = []
pred[int(label)][img_id].append((bbox, score))
# parse gt annotations
gt_anno = gt_annos[img_id]
if gt_anno['gt_num'] != 0:
# convert to lidar coor for evaluation
bbox_lidar_bottom = boxes3d_depth_to_lidar(
gt_anno['gt_boxes_upright_depth'], mid_to_bottom=True)
if bbox_lidar_bottom.shape[-1] == 6:
bbox_lidar_bottom = np.pad(bbox_lidar_bottom, ((0, 0), (0, 1)),
'constant')
gt_infos.append(
dict(boxes_3d=bbox_lidar_bottom, labels_3d=gt_anno['class']))
gt_boxes = box_type_3d(
gt_anno['gt_boxes_upright_depth'],
box_dim=gt_anno['gt_boxes_upright_depth'].shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(box_mode_3d)
labels_3d = gt_anno['class']
else:
gt_infos.append(
dict(
boxes_3d=np.array([], dtype=np.float32),
labels_3d=np.array([], dtype=np.int64)))
gt_boxes = box_type_3d(np.array([], dtype=np.float32))
labels_3d = np.array([], dtype=np.int64)
for i in range(len(labels_3d)):
label = labels_3d[i]
bbox = gt_boxes[i]
if label not in gt:
gt[label] = {}
if img_id not in gt[label]:
gt[label][img_id] = []
gt[label][img_id].append(bbox)
rec, prec, ap = eval_map_recall(dt_annos, gt_infos, metric)
rec, prec, ap = eval_map_recall(pred, gt, metric)
ret_dict = dict()
header = ['classes']
table_columns = [[label2cat[label]
......
......@@ -186,7 +186,13 @@ class Custom3DDataset(Dataset):
gt_annos = [info['annos'] for info in self.data_infos]
label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
ret_dict = indoor_eval(
gt_annos, results, iou_thr, label2cat, logger=logger)
gt_annos,
results,
iou_thr,
label2cat,
logger=logger,
box_type_3d=self.box_type_3d,
box_mode_3d=self.box_mode_3d)
return ret_dict
......
......@@ -172,11 +172,11 @@ class NuScenesDataset(Custom3DDataset):
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0]
# the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=[0.5, 0.5, 0.5]).convert_to(self.box_mode_3d)
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
......
......@@ -6,7 +6,7 @@ from numba.errors import NumbaPerformanceWarning
from mmdet3d.core.bbox import box_np_ops
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)
warnings.filterwarnings('ignore', category=NumbaPerformanceWarning)
@numba.njit
......@@ -301,7 +301,7 @@ def noise_per_object_v3_(gt_boxes,
grot_uppers[..., np.newaxis],
size=[num_boxes, num_try])
origin = [0.5, 0.5, 0]
origin = (0.5, 0.5, 0)
gt_box_corners = box_np_ops.center_to_corner_box3d(
gt_boxes[:, :3],
gt_boxes[:, 3:6],
......
......@@ -25,21 +25,19 @@ class IndoorFlipData(object):
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
aligned = True if gt_bboxes_3d.shape[1] == 6 else False
results['flip_yz'] = False
results['flip_xz'] = False
if np.random.random() < self.flip_ratio_yz:
# Flipping along the YZ plane
points[:, 0] = -1 * points[:, 0]
gt_bboxes_3d[:, 0] = -1 * gt_bboxes_3d[:, 0]
if not aligned:
gt_bboxes_3d[:, 6] = np.pi - gt_bboxes_3d[:, 6]
gt_bboxes_3d.flip('horizontal')
results['flip_yz'] = True
if aligned and np.random.random() < self.flip_ratio_xz:
if not gt_bboxes_3d.with_yaw and np.random.random(
) < self.flip_ratio_xz:
# Flipping along the XZ plane
points[:, 1] = -1 * points[:, 1]
gt_bboxes_3d[:, 1] = -1 * gt_bboxes_3d[:, 1]
gt_bboxes_3d.flip('vertical')
results['flip_xz'] = True
results['points'] = points
......@@ -154,57 +152,18 @@ class IndoorGlobalRotScale(object):
rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
return rot_mat
def _rotate_aligned_boxes(self, input_boxes, rot_mat):
"""Rotate aligned boxes.
Rotate function for the aligned boxes.
Args:
input_boxes (ndarray): 3D boxes.
rot_mat (ndarray): Rotation matrix.
Returns:
rotated_boxes (ndarry): 3D boxes after rotation.
"""
centers, lengths = input_boxes[:, 0:3], input_boxes[:, 3:6]
new_centers = np.dot(centers, rot_mat.T)
dx, dy = lengths[:, 0] / 2.0, lengths[:, 1] / 2.0
new_x = np.zeros((dx.shape[0], 4))
new_y = np.zeros((dx.shape[0], 4))
for i, corner in enumerate([(-1, -1), (1, -1), (1, 1), (-1, 1)]):
corners = np.zeros((dx.shape[0], 3))
corners[:, 0] = corner[0] * dx
corners[:, 1] = corner[1] * dy
corners = np.dot(corners, rot_mat.T)
new_x[:, i] = corners[:, 0]
new_y[:, i] = corners[:, 1]
new_dx = 2.0 * np.max(new_x, 1)
new_dy = 2.0 * np.max(new_y, 1)
new_lengths = np.stack((new_dx, new_dy, lengths[:, 2]), axis=1)
return np.concatenate([new_centers, new_lengths], axis=1)
def __call__(self, results):
points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d']
aligned = True if gt_bboxes_3d.shape[1] == 6 else False
if self.rot_range is not None:
assert len(self.rot_range) == 2, \
f'Expect length of rot range =2, ' \
f'got {len(self.rot_range)}.'
rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
rot_mat = self._rotz(rot_angle)
points[:, :3] = np.dot(points[:, :3], rot_mat.T)
if aligned:
gt_bboxes_3d = self._rotate_aligned_boxes(
gt_bboxes_3d, rot_mat)
else:
gt_bboxes_3d[:, :3] = np.dot(gt_bboxes_3d[:, :3], rot_mat.T)
gt_bboxes_3d[:, 6] -= rot_angle
if gt_bboxes_3d.tensor.shape[0] != 0:
gt_bboxes_3d.rotate(rot_angle)
points[:, :3] = np.dot(points[:, :3], self._rotz(rot_angle).T)
results['rot_angle'] = rot_angle
if self.scale_range is not None:
......@@ -216,15 +175,14 @@ class IndoorGlobalRotScale(object):
self.scale_range[1])
points[:, :3] *= scale_ratio
gt_bboxes_3d[:, :3] *= scale_ratio
gt_bboxes_3d[:, 3:6] *= scale_ratio
gt_bboxes_3d.scale(scale_ratio)
if self.shift_height:
points[:, -1] *= scale_ratio
results['scale_ratio'] = scale_ratio
results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32)
results['gt_bboxes_3d'] = gt_bboxes_3d
return results
def __repr__(self):
......
......@@ -113,7 +113,7 @@ class ObjectSample(object):
# Trv2c = input_dict['Trv2c']
# P2 = input_dict['P2']
if self.sample_2d:
img = input_dict['img'] # .astype(np.float32)
img = input_dict['img']
gt_bboxes_2d = input_dict['gt_bboxes']
# Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all(
......@@ -148,7 +148,7 @@ class ObjectSample(object):
[gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
input_dict['gt_bboxes'] = gt_bboxes_2d
input_dict['img'] = sampled_dict['img'] # .astype(np.uint8)
input_dict['img'] = sampled_dict['img']
input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_labels_3d'] = gt_labels_3d
......
......@@ -2,6 +2,7 @@ import os.path as osp
import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS
from .custom_3d import Custom3DDataset
......@@ -43,6 +44,14 @@ class ScanNetDataset(Custom3DDataset):
else:
gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.long)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
with_yaw=False,
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
pts_instance_mask_path = osp.join(self.data_root,
info['pts_instance_mask_path'])
pts_semantic_mask_path = osp.join(self.data_root,
......
import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS
from .custom_3d import Custom3DDataset
......@@ -40,6 +41,10 @@ class SUNRGBDDataset(Custom3DDataset):
gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.long)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d)
return anns_results
......@@ -20,7 +20,8 @@ class AnchorTrainMixin(object):
Args:
anchor_list (list[list]): Multi level anchors of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
gt_bboxes_list (list[BaseInstance3DBoxes]): Ground truth
bboxes of each image.
img_metas (list[dict]): Meta info of each image.
Returns:
......
......@@ -5,14 +5,11 @@ import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmdet3d.core import build_bbox_coder, multi_apply
from mmdet3d.core.bbox.box_torch_ops import boxes3d_to_corners3d_lidar_torch
from mmdet3d.core.bbox.transforms import upright_depth_to_lidar_torch
from mmdet3d.core.post_processing import aligned_3d_nms
from mmdet3d.models.builder import build_loss
from mmdet3d.models.losses import chamfer_distance
from mmdet3d.models.model_utils import VoteModule
from mmdet3d.ops import (PointSAModule, furthest_point_sample,
points_in_boxes_batch)
from mmdet3d.ops import PointSAModule, furthest_point_sample
from mmdet.models import HEADS
......@@ -276,7 +273,7 @@ class VoteHead(nn.Module):
Args:
points (list[Tensor]): Points of each batch.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
......@@ -293,8 +290,9 @@ class VoteHead(nn.Module):
gt_num = list()
for index in range(len(gt_labels_3d)):
if len(gt_labels_3d[index]) == 0:
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_zeros(
1, gt_bboxes_3d[index].shape[-1])
fake_box = gt_bboxes_3d[index].tensor.new_zeros(
1, gt_bboxes_3d[index].tensor.shape[-1])
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))
gt_num.append(1)
......@@ -359,25 +357,23 @@ class VoteHead(nn.Module):
aggregated_points=None):
assert self.bbox_coder.with_rot or pts_semantic_mask is not None
gt_bboxes_3d = gt_bboxes_3d.to(points.device)
# generate votes target
num_points = points.shape[0]
if self.bbox_coder.with_rot:
points_lidar, gt_bboxes_3d_lidar = upright_depth_to_lidar_torch(
points, gt_bboxes_3d, to_bottom_center=True)
vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed])
vote_target_masks = points.new_zeros([num_points],
dtype=torch.long)
vote_target_idx = points.new_zeros([num_points], dtype=torch.long)
box_indices_all = points_in_boxes_batch(
points_lidar.unsqueeze(0), gt_bboxes_3d_lidar.unsqueeze(0))[0]
for i in range(gt_bboxes_3d.shape[0]):
box_indices_all = gt_bboxes_3d.points_in_boxes(points)
for i in range(gt_labels_3d.shape[0]):
box_indices = box_indices_all[:, i]
indices = torch.nonzero(box_indices).squeeze(-1)
selected_points = points[indices]
vote_target_masks[indices] = 1
vote_targets_tmp = vote_targets[indices]
votes = gt_bboxes_3d[i][:3].unsqueeze(
votes = gt_bboxes_3d.gravity_center[i].unsqueeze(
0) - selected_points[:, :3]
for j in range(self.gt_per_seed):
......@@ -438,7 +434,7 @@ class VoteHead(nn.Module):
size_class_targets = size_class_targets[assignment]
size_res_targets = size_res_targets[assignment]
one_hot_size_targets = gt_bboxes_3d.new_zeros(
one_hot_size_targets = gt_bboxes_3d.tensor.new_zeros(
(proposal_num, self.num_sizes))
one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)
one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat(
......@@ -455,38 +451,43 @@ class VoteHead(nn.Module):
dir_class_targets, dir_res_targets, center_targets,
mask_targets.long(), objectness_targets, objectness_masks)
def get_bboxes(self, points, bbox_preds, img_meta, rescale=False):
def get_bboxes(self, points, bbox_preds, input_meta, rescale=False):
# decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
bbox_depth = self.bbox_coder.decode(bbox_preds)
points_lidar, bbox_lidar = upright_depth_to_lidar_torch(
points[..., :3], bbox_depth, to_bottom_center=True)
batch_size = bbox_depth.shape[0]
results = list()
for b in range(batch_size):
bbox_selected, score_selected, labels = self.multiclass_nms_single(
obj_scores[b], sem_scores[b], bbox_lidar[b], points_lidar[b])
results.append((bbox_selected, score_selected, labels))
obj_scores[b], sem_scores[b], bbox_depth[b],
points[b, ..., :3], input_meta[b])
bbox = input_meta[b]['box_type_3d'](
bbox_selected,
box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot)
results.append((bbox, score_selected, labels))
return results
def multiclass_nms_single(self, obj_scores, sem_scores, bbox,
points_lidar):
box_indices = points_in_boxes_batch(
points_lidar.unsqueeze(0), bbox.unsqueeze(0))[0]
nonempty_box_mask = box_indices.T.sum(1) > 5
def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
input_meta):
bbox = input_meta['box_type_3d'](
bbox,
box_dim=bbox.shape[-1],
with_yaw=self.bbox_coder.with_rot,
origin=(0.5, 0.5, 0.5))
box_indices = bbox.points_in_boxes(points)
bbox_classes = torch.argmax(sem_scores, -1)
# boxes3d to aligned boxes
corner3d = boxes3d_to_corners3d_lidar_torch(bbox)
corner3d = bbox.corners
minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]
minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]
nonempty_box_mask = box_indices.T.sum(1) > 5
bbox_classes = torch.argmax(sem_scores, -1)
nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],
obj_scores[nonempty_box_mask],
bbox_classes[nonempty_box_mask],
......@@ -502,7 +503,7 @@ class VoteHead(nn.Module):
if self.test_cfg.per_class_proposal:
bbox_selected, score_selected, labels = [], [], []
for k in range(sem_scores.shape[-1]):
bbox_selected.append(bbox[selected])
bbox_selected.append(bbox[selected].tensor)
score_selected.append(obj_scores[selected] *
sem_scores[selected][:, k])
labels.append(
......@@ -511,7 +512,7 @@ class VoteHead(nn.Module):
score_selected = torch.cat(score_selected, 0)
labels = torch.cat(labels, 0)
else:
bbox_selected = bbox[selected]
bbox_selected = bbox[selected].tensor
score_selected = obj_scores[selected]
labels = bbox_classes[selected]
......
......@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector):
Args:
points (list[Tensor]): Points of each batch.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch.
......@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector):
Args:
points (list[Tensor]): Points of each sample.
img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each sample.
gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment