Commit 36466f83 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master' into process_raw_data

parents 25d39342 f93167c3
...@@ -7,7 +7,7 @@ import pyquaternion ...@@ -7,7 +7,7 @@ import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import LiDARInstance3DBoxes, box_np_ops from ..core.bbox import LiDARInstance3DBoxes
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -72,8 +72,10 @@ class NuScenesDataset(Custom3DDataset): ...@@ -72,8 +72,10 @@ class NuScenesDataset(Custom3DDataset):
classes=None, classes=None,
load_interval=1, load_interval=1,
with_velocity=True, with_velocity=True,
test_mode=False,
modality=None, modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
eval_version='detection_cvpr_2019'): eval_version='detection_cvpr_2019'):
self.load_interval = load_interval self.load_interval = load_interval
super().__init__( super().__init__(
...@@ -82,6 +84,8 @@ class NuScenesDataset(Custom3DDataset): ...@@ -82,6 +84,8 @@ class NuScenesDataset(Custom3DDataset):
pipeline=pipeline, pipeline=pipeline,
classes=classes, classes=classes,
modality=modality, modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode) test_mode=test_mode)
self.with_velocity = with_velocity self.with_velocity = with_velocity
...@@ -168,11 +172,11 @@ class NuScenesDataset(Custom3DDataset): ...@@ -168,11 +172,11 @@ class NuScenesDataset(Custom3DDataset):
gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1) gt_bboxes_3d = np.concatenate([gt_bboxes_3d, gt_velocity], axis=-1)
# the nuscenes box center is [0.5, 0.5, 0.5], we keep it # the nuscenes box center is [0.5, 0.5, 0.5], we keep it
# the same as KITTI [0.5, 0.5, 0] # the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d = LiDARInstance3DBoxes( gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d, gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1], box_dim=gt_bboxes_3d.shape[-1],
origin=[0.5, 0.5, 0.5]) origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_bboxes_3d=gt_bboxes_3d,
...@@ -352,26 +356,28 @@ class NuScenesDataset(Custom3DDataset): ...@@ -352,26 +356,28 @@ class NuScenesDataset(Custom3DDataset):
def output_to_nusc_box(detection): def output_to_nusc_box(detection):
box3d = detection['boxes_3d'].numpy() box3d = detection['boxes_3d']
scores = detection['scores_3d'].numpy() scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy() labels = detection['labels_3d'].numpy()
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()
# TODO: check whether this is necessary # TODO: check whether this is necessary
# with dir_offset & dir_limit in the head # with dir_offset & dir_limit in the head
box3d[:, 6] = -box3d[:, 6] - np.pi / 2 box_yaw = -box_yaw - np.pi / 2
# the trained model is in [0.5, 0.5, 0],
# change them back to nuscenes [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(box3d, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
box_list = [] box_list = []
for i in range(box3d.shape[0]): for i in range(len(box3d)):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box3d[i, 6]) quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
velocity = (*box3d[i, 7:9], 0.0) velocity = (*box3d.tensor[i, 7:9], 0.0)
# velo_val = np.linalg.norm(box3d[i, 7:9]) # velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6] # velo_ori = box3d[i, 6]
# velocity = ( # velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0) # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box = NuScenesBox( box = NuScenesBox(
box3d[i, :3], box_gravity_center[i],
box3d[i, 3:6], box_dims[i],
quat, quat,
label=labels[i], label=labels[i],
score=scores[i], score=scores[i],
......
...@@ -6,7 +6,7 @@ from numba.errors import NumbaPerformanceWarning ...@@ -6,7 +6,7 @@ from numba.errors import NumbaPerformanceWarning
from mmdet3d.core.bbox import box_np_ops from mmdet3d.core.bbox import box_np_ops
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) warnings.filterwarnings('ignore', category=NumbaPerformanceWarning)
@numba.njit @numba.njit
...@@ -301,7 +301,7 @@ def noise_per_object_v3_(gt_boxes, ...@@ -301,7 +301,7 @@ def noise_per_object_v3_(gt_boxes,
grot_uppers[..., np.newaxis], grot_uppers[..., np.newaxis],
size=[num_boxes, num_try]) size=[num_boxes, num_try])
origin = [0.5, 0.5, 0] origin = (0.5, 0.5, 0)
gt_box_corners = box_np_ops.center_to_corner_box3d( gt_box_corners = box_np_ops.center_to_corner_box3d(
gt_boxes[:, :3], gt_boxes[:, :3],
gt_boxes[:, 3:6], gt_boxes[:, 3:6],
......
...@@ -73,14 +73,13 @@ class Collect3D(object): ...@@ -73,14 +73,13 @@ class Collect3D(object):
def __init__(self, def __init__(self,
keys, keys,
pcd_shape=[1, 1600, 1408],
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img', meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'pcd_flip', 'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation')): 'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation')):
self.keys = keys self.keys = keys
self.meta_keys = meta_keys self.meta_keys = meta_keys
self.pcd_shape = pcd_shape
def __call__(self, results): def __call__(self, results):
data = {} data = {}
...@@ -88,7 +87,7 @@ class Collect3D(object): ...@@ -88,7 +87,7 @@ class Collect3D(object):
for key in self.meta_keys: for key in self.meta_keys:
if key in results: if key in results:
img_meta[key] = results[key] img_meta[key] = results[key]
img_meta.update(pcd_shape=self.pcd_shape, pcd_pad_shape=self.pcd_shape)
data['img_meta'] = DC(img_meta, cpu_only=True) data['img_meta'] = DC(img_meta, cpu_only=True)
for key in self.keys: for key in self.keys:
data[key] = results[key] data[key] = results[key]
......
...@@ -25,21 +25,19 @@ class IndoorFlipData(object): ...@@ -25,21 +25,19 @@ class IndoorFlipData(object):
def __call__(self, results): def __call__(self, results):
points = results['points'] points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d'] gt_bboxes_3d = results['gt_bboxes_3d']
aligned = True if gt_bboxes_3d.shape[1] == 6 else False
results['flip_yz'] = False results['flip_yz'] = False
results['flip_xz'] = False results['flip_xz'] = False
if np.random.random() < self.flip_ratio_yz: if np.random.random() < self.flip_ratio_yz:
# Flipping along the YZ plane # Flipping along the YZ plane
points[:, 0] = -1 * points[:, 0] points[:, 0] = -1 * points[:, 0]
gt_bboxes_3d[:, 0] = -1 * gt_bboxes_3d[:, 0] gt_bboxes_3d.flip('horizontal')
if not aligned:
gt_bboxes_3d[:, 6] = np.pi - gt_bboxes_3d[:, 6]
results['flip_yz'] = True results['flip_yz'] = True
if aligned and np.random.random() < self.flip_ratio_xz: if not gt_bboxes_3d.with_yaw and np.random.random(
) < self.flip_ratio_xz:
# Flipping along the XZ plane # Flipping along the XZ plane
points[:, 1] = -1 * points[:, 1] points[:, 1] = -1 * points[:, 1]
gt_bboxes_3d[:, 1] = -1 * gt_bboxes_3d[:, 1] gt_bboxes_3d.flip('vertical')
results['flip_xz'] = True results['flip_xz'] = True
results['points'] = points results['points'] = points
...@@ -154,57 +152,18 @@ class IndoorGlobalRotScale(object): ...@@ -154,57 +152,18 @@ class IndoorGlobalRotScale(object):
rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]]) rot_mat = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
return rot_mat return rot_mat
def _rotate_aligned_boxes(self, input_boxes, rot_mat):
"""Rotate aligned boxes.
Rotate function for the aligned boxes.
Args:
input_boxes (ndarray): 3D boxes.
rot_mat (ndarray): Rotation matrix.
Returns:
rotated_boxes (ndarry): 3D boxes after rotation.
"""
centers, lengths = input_boxes[:, 0:3], input_boxes[:, 3:6]
new_centers = np.dot(centers, rot_mat.T)
dx, dy = lengths[:, 0] / 2.0, lengths[:, 1] / 2.0
new_x = np.zeros((dx.shape[0], 4))
new_y = np.zeros((dx.shape[0], 4))
for i, corner in enumerate([(-1, -1), (1, -1), (1, 1), (-1, 1)]):
corners = np.zeros((dx.shape[0], 3))
corners[:, 0] = corner[0] * dx
corners[:, 1] = corner[1] * dy
corners = np.dot(corners, rot_mat.T)
new_x[:, i] = corners[:, 0]
new_y[:, i] = corners[:, 1]
new_dx = 2.0 * np.max(new_x, 1)
new_dy = 2.0 * np.max(new_y, 1)
new_lengths = np.stack((new_dx, new_dy, lengths[:, 2]), axis=1)
return np.concatenate([new_centers, new_lengths], axis=1)
def __call__(self, results): def __call__(self, results):
points = results['points'] points = results['points']
gt_bboxes_3d = results['gt_bboxes_3d'] gt_bboxes_3d = results['gt_bboxes_3d']
aligned = True if gt_bboxes_3d.shape[1] == 6 else False
if self.rot_range is not None: if self.rot_range is not None:
assert len(self.rot_range) == 2, \ assert len(self.rot_range) == 2, \
f'Expect length of rot range =2, ' \ f'Expect length of rot range =2, ' \
f'got {len(self.rot_range)}.' f'got {len(self.rot_range)}.'
rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1]) rot_angle = np.random.uniform(self.rot_range[0], self.rot_range[1])
rot_mat = self._rotz(rot_angle) if gt_bboxes_3d.tensor.shape[0] != 0:
points[:, :3] = np.dot(points[:, :3], rot_mat.T) gt_bboxes_3d.rotate(rot_angle)
if aligned: points[:, :3] = np.dot(points[:, :3], self._rotz(rot_angle).T)
gt_bboxes_3d = self._rotate_aligned_boxes(
gt_bboxes_3d, rot_mat)
else:
gt_bboxes_3d[:, :3] = np.dot(gt_bboxes_3d[:, :3], rot_mat.T)
gt_bboxes_3d[:, 6] -= rot_angle
results['rot_angle'] = rot_angle results['rot_angle'] = rot_angle
if self.scale_range is not None: if self.scale_range is not None:
...@@ -216,15 +175,14 @@ class IndoorGlobalRotScale(object): ...@@ -216,15 +175,14 @@ class IndoorGlobalRotScale(object):
self.scale_range[1]) self.scale_range[1])
points[:, :3] *= scale_ratio points[:, :3] *= scale_ratio
gt_bboxes_3d[:, :3] *= scale_ratio gt_bboxes_3d.scale(scale_ratio)
gt_bboxes_3d[:, 3:6] *= scale_ratio
if self.shift_height: if self.shift_height:
points[:, -1] *= scale_ratio points[:, -1] *= scale_ratio
results['scale_ratio'] = scale_ratio results['scale_ratio'] = scale_ratio
results['points'] = points results['points'] = points
results['gt_bboxes_3d'] = gt_bboxes_3d.astype(np.float32) results['gt_bboxes_3d'] = gt_bboxes_3d
return results return results
def __repr__(self): def __repr__(self):
......
...@@ -113,7 +113,7 @@ class ObjectSample(object): ...@@ -113,7 +113,7 @@ class ObjectSample(object):
# Trv2c = input_dict['Trv2c'] # Trv2c = input_dict['Trv2c']
# P2 = input_dict['P2'] # P2 = input_dict['P2']
if self.sample_2d: if self.sample_2d:
img = input_dict['img'] # .astype(np.float32) img = input_dict['img']
gt_bboxes_2d = input_dict['gt_bboxes'] gt_bboxes_2d = input_dict['gt_bboxes']
# Assume for now 3D & 2D bboxes are the same # Assume for now 3D & 2D bboxes are the same
sampled_dict = self.db_sampler.sample_all( sampled_dict = self.db_sampler.sample_all(
...@@ -148,7 +148,7 @@ class ObjectSample(object): ...@@ -148,7 +148,7 @@ class ObjectSample(object):
[gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32) [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
input_dict['gt_bboxes'] = gt_bboxes_2d input_dict['gt_bboxes'] = gt_bboxes_2d
input_dict['img'] = sampled_dict['img'] # .astype(np.uint8) input_dict['img'] = sampled_dict['img']
input_dict['gt_bboxes_3d'] = gt_bboxes_3d input_dict['gt_bboxes_3d'] = gt_bboxes_3d
input_dict['gt_labels_3d'] = gt_labels_3d input_dict['gt_labels_3d'] = gt_labels_3d
...@@ -304,7 +304,11 @@ class ObjectRangeFilter(object): ...@@ -304,7 +304,11 @@ class ObjectRangeFilter(object):
gt_labels_3d = input_dict['gt_labels_3d'] gt_labels_3d = input_dict['gt_labels_3d']
mask = gt_bboxes_3d.in_range_bev(self.bev_range) mask = gt_bboxes_3d.in_range_bev(self.bev_range)
gt_bboxes_3d = gt_bboxes_3d[mask] gt_bboxes_3d = gt_bboxes_3d[mask]
gt_labels_3d = gt_labels_3d[mask] # mask is a torch tensor but gt_labels_3d is still numpy array
# using mask to index gt_labels_3d will cause bug when
# len(gt_labels_3d) == 1, where mask=1 will be interpreted
# as gt_labels_3d[1] and cause out of index error
gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]
# limit rad to [-pi, pi] # limit rad to [-pi, pi]
gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi) gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
......
...@@ -2,6 +2,7 @@ import os.path as osp ...@@ -2,6 +2,7 @@ import os.path as osp
import numpy as np import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -20,10 +21,18 @@ class ScanNetDataset(Custom3DDataset): ...@@ -20,10 +21,18 @@ class ScanNetDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='Depth',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality, super().__init__(
filter_empty_gt, test_mode) data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
...@@ -35,6 +44,14 @@ class ScanNetDataset(Custom3DDataset): ...@@ -35,6 +44,14 @@ class ScanNetDataset(Custom3DDataset):
else: else:
gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32) gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.long) gt_labels_3d = np.zeros((0, ), dtype=np.long)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
with_yaw=False,
origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
pts_instance_mask_path = osp.join(self.data_root, pts_instance_mask_path = osp.join(self.data_root,
info['pts_instance_mask_path']) info['pts_instance_mask_path'])
pts_semantic_mask_path = osp.join(self.data_root, pts_semantic_mask_path = osp.join(self.data_root,
......
import numpy as np import numpy as np
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -16,10 +17,18 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -16,10 +17,18 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='Depth',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality, super().__init__(
filter_empty_gt, test_mode) data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
...@@ -32,6 +41,10 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -32,6 +41,10 @@ class SUNRGBDDataset(Custom3DDataset):
gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32) gt_bboxes_3d = np.zeros((0, 7), dtype=np.float32)
gt_labels_3d = np.zeros((0, ), dtype=np.long) gt_labels_3d = np.zeros((0, ), dtype=np.long)
# to target box structure
gt_bboxes_3d = DepthInstance3DBoxes(
gt_bboxes_3d, origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d)
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d) gt_bboxes_3d=gt_bboxes_3d, gt_labels_3d=gt_labels_3d)
return anns_results return anns_results
from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
from .nostem_regnet import NoStemRegNet
from .pointnet2_sa_ssg import PointNet2SASSG from .pointnet2_sa_ssg import PointNet2SASSG
from .second import SECOND from .second import SECOND
__all__ = [ __all__ = [
'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'SECOND', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'NoStemRegNet',
'PointNet2SASSG' 'SECOND', 'PointNet2SASSG'
] ]
from mmdet.models.backbones import RegNet
from ..builder import BACKBONES
@BACKBONES.register_module()
class NoStemRegNet(RegNet):
"""RegNet backbone without Stem for 3D detection.
More details can be found in `paper <https://arxiv.org/abs/2003.13678>`_ .
Args:
arch (dict): The parameter of RegNets.
- w0 (int): initial width
- wa (float): slope of width
- wm (float): quantization parameter to quantize the width
- depth (int): depth of the backbone
- group_w (int): width of group
- bot_mul (float): bottleneck ratio, i.e. expansion of bottlneck.
strides (Sequence[int]): Strides of the first block of each stage.
base_channels (int): Base channels after stem layer.
in_channels (int): Number of input image channels. Normally 3.
dilations (Sequence[int]): Dilation of each stage.
out_indices (Sequence[int]): Output from which stages.
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
layer is the 3x3 conv layer, otherwise the stride-two layer is
the first 1x1 conv layer.
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
not freezing any parameters.
norm_cfg (dict): dictionary to construct and config norm layer.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed.
zero_init_residual (bool): whether to use zero init for last norm layer
in resblocks to let them behave as identity.
Example:
>>> from mmdet3d.models import NoStemRegNet
>>> import torch
>>> self = NoStemRegNet(
arch=dict(
w0=88,
wa=26.31,
wm=2.25,
group_w=48,
depth=25,
bot_mul=1.0))
>>> self.eval()
>>> inputs = torch.rand(1, 64, 16, 16)
>>> level_outputs = self.forward(inputs)
>>> for level_out in level_outputs:
... print(tuple(level_out.shape))
(1, 96, 8, 8)
(1, 192, 4, 4)
(1, 432, 2, 2)
(1, 1008, 1, 1)
"""
def __init__(self, arch, **kwargs):
super(NoStemRegNet, self).__init__(arch, **kwargs)
def _make_stem_layer(self, in_channels, base_channels):
return
def forward(self, x):
outs = []
for i, layer_name in enumerate(self.res_layers):
res_layer = getattr(self, layer_name)
x = res_layer(x)
if i in self.out_indices:
outs.append(x)
return tuple(outs)
...@@ -381,5 +381,5 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -381,5 +381,5 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
bboxes[..., 6] = ( bboxes[..., 6] = (
dir_rot + self.dir_offset + dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype)) np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
return bboxes, scores, labels return bboxes, scores, labels
...@@ -150,13 +150,15 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -150,13 +150,15 @@ class PartA2RPNHead(Anchor3DHead):
result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms, result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_max_scores, mlvl_label_pred,
mlvl_cls_score, mlvl_dir_scores, mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg) score_thr, cfg.nms_post, cfg,
input_meta)
return result return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score, mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg): mlvl_dir_scores, score_thr, max_num, cfg,
input_meta):
bboxes = [] bboxes = []
scores = [] scores = []
labels = [] labels = []
...@@ -202,6 +204,8 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -202,6 +204,8 @@ class PartA2RPNHead(Anchor3DHead):
labels = labels[inds] labels = labels[inds]
scores = scores[inds] scores = scores[inds]
cls_scores = cls_scores[inds] cls_scores = cls_scores[inds]
bboxes = input_meta['box_type_3d'](
bboxes, box_dim=self.box_code_size)
return dict( return dict(
boxes_3d=bboxes, boxes_3d=bboxes,
scores_3d=scores, scores_3d=scores,
...@@ -210,7 +214,9 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -210,7 +214,9 @@ class PartA2RPNHead(Anchor3DHead):
) )
else: else:
return dict( return dict(
boxes_3d=mlvl_bboxes.new_zeros([0, self.box_code_size]), boxes_3d=input_meta['box_type_3d'](
mlvl_bboxes.new_zeros([0, self.box_code_size]),
box_dim=self.box_code_size),
scores_3d=mlvl_bboxes.new_zeros([0]), scores_3d=mlvl_bboxes.new_zeros([0]),
labels_3d=mlvl_bboxes.new_zeros([0]), labels_3d=mlvl_bboxes.new_zeros([0]),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]])) cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
...@@ -20,7 +20,8 @@ class AnchorTrainMixin(object): ...@@ -20,7 +20,8 @@ class AnchorTrainMixin(object):
Args: Args:
anchor_list (list[list]): Multi level anchors of each image. anchor_list (list[list]): Multi level anchors of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. gt_bboxes_list (list[BaseInstance3DBoxes]): Ground truth
bboxes of each image.
img_metas (list[dict]): Meta info of each image. img_metas (list[dict]): Meta info of each image.
Returns: Returns:
......
...@@ -5,14 +5,11 @@ import torch.nn.functional as F ...@@ -5,14 +5,11 @@ import torch.nn.functional as F
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule
from mmdet3d.core import build_bbox_coder, multi_apply from mmdet3d.core import build_bbox_coder, multi_apply
from mmdet3d.core.bbox.box_torch_ops import boxes3d_to_corners3d_lidar_torch
from mmdet3d.core.bbox.transforms import upright_depth_to_lidar_torch
from mmdet3d.core.post_processing import aligned_3d_nms from mmdet3d.core.post_processing import aligned_3d_nms
from mmdet3d.models.builder import build_loss from mmdet3d.models.builder import build_loss
from mmdet3d.models.losses import chamfer_distance from mmdet3d.models.losses import chamfer_distance
from mmdet3d.models.model_utils import VoteModule from mmdet3d.models.model_utils import VoteModule
from mmdet3d.ops import (PointSAModule, furthest_point_sample, from mmdet3d.ops import PointSAModule, furthest_point_sample
points_in_boxes_batch)
from mmdet.models import HEADS from mmdet.models import HEADS
...@@ -276,7 +273,7 @@ class VoteHead(nn.Module): ...@@ -276,7 +273,7 @@ class VoteHead(nn.Module):
Args: Args:
points (list[Tensor]): Points of each batch. points (list[Tensor]): Points of each batch.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch. gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch. gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch. label of each batch.
...@@ -293,8 +290,9 @@ class VoteHead(nn.Module): ...@@ -293,8 +290,9 @@ class VoteHead(nn.Module):
gt_num = list() gt_num = list()
for index in range(len(gt_labels_3d)): for index in range(len(gt_labels_3d)):
if len(gt_labels_3d[index]) == 0: if len(gt_labels_3d[index]) == 0:
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_zeros( fake_box = gt_bboxes_3d[index].tensor.new_zeros(
1, gt_bboxes_3d[index].shape[-1]) 1, gt_bboxes_3d[index].tensor.shape[-1])
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1) gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
valid_gt_masks.append(gt_labels_3d[index].new_zeros(1)) valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))
gt_num.append(1) gt_num.append(1)
...@@ -359,25 +357,23 @@ class VoteHead(nn.Module): ...@@ -359,25 +357,23 @@ class VoteHead(nn.Module):
aggregated_points=None): aggregated_points=None):
assert self.bbox_coder.with_rot or pts_semantic_mask is not None assert self.bbox_coder.with_rot or pts_semantic_mask is not None
gt_bboxes_3d = gt_bboxes_3d.to(points.device)
# generate votes target # generate votes target
num_points = points.shape[0] num_points = points.shape[0]
if self.bbox_coder.with_rot: if self.bbox_coder.with_rot:
points_lidar, gt_bboxes_3d_lidar = upright_depth_to_lidar_torch(
points, gt_bboxes_3d, to_bottom_center=True)
vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed]) vote_targets = points.new_zeros([num_points, 3 * self.gt_per_seed])
vote_target_masks = points.new_zeros([num_points], vote_target_masks = points.new_zeros([num_points],
dtype=torch.long) dtype=torch.long)
vote_target_idx = points.new_zeros([num_points], dtype=torch.long) vote_target_idx = points.new_zeros([num_points], dtype=torch.long)
box_indices_all = gt_bboxes_3d.points_in_boxes(points)
box_indices_all = points_in_boxes_batch( for i in range(gt_labels_3d.shape[0]):
points_lidar.unsqueeze(0), gt_bboxes_3d_lidar.unsqueeze(0))[0]
for i in range(gt_bboxes_3d.shape[0]):
box_indices = box_indices_all[:, i] box_indices = box_indices_all[:, i]
indices = torch.nonzero(box_indices).squeeze(-1) indices = torch.nonzero(box_indices).squeeze(-1)
selected_points = points[indices] selected_points = points[indices]
vote_target_masks[indices] = 1 vote_target_masks[indices] = 1
vote_targets_tmp = vote_targets[indices] vote_targets_tmp = vote_targets[indices]
votes = gt_bboxes_3d[i][:3].unsqueeze( votes = gt_bboxes_3d.gravity_center[i].unsqueeze(
0) - selected_points[:, :3] 0) - selected_points[:, :3]
for j in range(self.gt_per_seed): for j in range(self.gt_per_seed):
...@@ -438,7 +434,7 @@ class VoteHead(nn.Module): ...@@ -438,7 +434,7 @@ class VoteHead(nn.Module):
size_class_targets = size_class_targets[assignment] size_class_targets = size_class_targets[assignment]
size_res_targets = size_res_targets[assignment] size_res_targets = size_res_targets[assignment]
one_hot_size_targets = gt_bboxes_3d.new_zeros( one_hot_size_targets = gt_bboxes_3d.tensor.new_zeros(
(proposal_num, self.num_sizes)) (proposal_num, self.num_sizes))
one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1) one_hot_size_targets.scatter_(1, size_class_targets.unsqueeze(-1), 1)
one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat( one_hot_size_targets = one_hot_size_targets.unsqueeze(-1).repeat(
...@@ -455,38 +451,43 @@ class VoteHead(nn.Module): ...@@ -455,38 +451,43 @@ class VoteHead(nn.Module):
dir_class_targets, dir_res_targets, center_targets, dir_class_targets, dir_res_targets, center_targets,
mask_targets.long(), objectness_targets, objectness_masks) mask_targets.long(), objectness_targets, objectness_masks)
def get_bboxes(self, points, bbox_preds, img_meta, rescale=False): def get_bboxes(self, points, bbox_preds, input_meta, rescale=False):
# decode boxes # decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1] obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1) sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
bbox_depth = self.bbox_coder.decode(bbox_preds) bbox_depth = self.bbox_coder.decode(bbox_preds)
points_lidar, bbox_lidar = upright_depth_to_lidar_torch(
points[..., :3], bbox_depth, to_bottom_center=True)
batch_size = bbox_depth.shape[0] batch_size = bbox_depth.shape[0]
results = list() results = list()
for b in range(batch_size): for b in range(batch_size):
bbox_selected, score_selected, labels = self.multiclass_nms_single( bbox_selected, score_selected, labels = self.multiclass_nms_single(
obj_scores[b], sem_scores[b], bbox_lidar[b], points_lidar[b]) obj_scores[b], sem_scores[b], bbox_depth[b],
results.append((bbox_selected, score_selected, labels)) points[b, ..., :3], input_meta[b])
bbox = input_meta[b]['box_type_3d'](
bbox_selected,
box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot)
results.append((bbox, score_selected, labels))
return results return results
def multiclass_nms_single(self, obj_scores, sem_scores, bbox, def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
points_lidar): input_meta):
box_indices = points_in_boxes_batch( bbox = input_meta['box_type_3d'](
points_lidar.unsqueeze(0), bbox.unsqueeze(0))[0] bbox,
nonempty_box_mask = box_indices.T.sum(1) > 5 box_dim=bbox.shape[-1],
with_yaw=self.bbox_coder.with_rot,
origin=(0.5, 0.5, 0.5))
box_indices = bbox.points_in_boxes(points)
bbox_classes = torch.argmax(sem_scores, -1) corner3d = bbox.corners
# boxes3d to aligned boxes
corner3d = boxes3d_to_corners3d_lidar_torch(bbox)
minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6)))
minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0] minmax_box3d[:, :3] = torch.min(corner3d, dim=1)[0]
minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0] minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]
nonempty_box_mask = box_indices.T.sum(1) > 5
bbox_classes = torch.argmax(sem_scores, -1)
nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask], nms_selected = aligned_3d_nms(minmax_box3d[nonempty_box_mask],
obj_scores[nonempty_box_mask], obj_scores[nonempty_box_mask],
bbox_classes[nonempty_box_mask], bbox_classes[nonempty_box_mask],
...@@ -502,7 +503,7 @@ class VoteHead(nn.Module): ...@@ -502,7 +503,7 @@ class VoteHead(nn.Module):
if self.test_cfg.per_class_proposal: if self.test_cfg.per_class_proposal:
bbox_selected, score_selected, labels = [], [], [] bbox_selected, score_selected, labels = [], [], []
for k in range(sem_scores.shape[-1]): for k in range(sem_scores.shape[-1]):
bbox_selected.append(bbox[selected]) bbox_selected.append(bbox[selected].tensor)
score_selected.append(obj_scores[selected] * score_selected.append(obj_scores[selected] *
sem_scores[selected][:, k]) sem_scores[selected][:, k])
labels.append( labels.append(
...@@ -511,7 +512,7 @@ class VoteHead(nn.Module): ...@@ -511,7 +512,7 @@ class VoteHead(nn.Module):
score_selected = torch.cat(score_selected, 0) score_selected = torch.cat(score_selected, 0)
labels = torch.cat(labels, 0) labels = torch.cat(labels, 0)
else: else:
bbox_selected = bbox[selected] bbox_selected = bbox[selected].tensor
score_selected = obj_scores[selected] score_selected = obj_scores[selected]
labels = bbox_classes[selected] labels = bbox_classes[selected]
......
...@@ -66,21 +66,30 @@ class MVXTwoStageDetector(BaseDetector): ...@@ -66,21 +66,30 @@ class MVXTwoStageDetector(BaseDetector):
def init_weights(self, pretrained=None): def init_weights(self, pretrained=None):
super(MVXTwoStageDetector, self).init_weights(pretrained) super(MVXTwoStageDetector, self).init_weights(pretrained)
if pretrained is None:
img_pretrained = None
pts_pretrained = None
elif isinstance(pretrained, dict):
img_pretrained = pretrained.get('img', None)
pts_pretrained = pretrained.get('pts', None)
else:
raise ValueError(
f'pretrained should be a dict, got {type(pretrained)}')
if self.with_img_backbone: if self.with_img_backbone:
self.img_backbone.init_weights(pretrained=pretrained) self.img_backbone.init_weights(pretrained=img_pretrained)
if self.with_pts_backbone:
self.pts_backbone.init_weights(pretrained=pts_pretrained)
if self.with_img_neck: if self.with_img_neck:
if isinstance(self.img_neck, nn.Sequential): if isinstance(self.img_neck, nn.Sequential):
for m in self.img_neck: for m in self.img_neck:
m.init_weights() m.init_weights()
else: else:
self.img_neck.init_weights() self.img_neck.init_weights()
if self.with_shared_head:
self.img_shared_head.init_weights(pretrained=pretrained) if self.with_img_roi_head:
self.img_roi_head.init_weights(img_pretrained)
if self.with_img_rpn: if self.with_img_rpn:
self.img_rpn_head.init_weights() self.img_rpn_head.init_weights()
if self.with_img_bbox:
self.img_bbox_roi_extractor.init_weights()
self.img_bbox_head.init_weights()
if self.with_pts_bbox: if self.with_pts_bbox:
self.pts_bbox_head.init_weights() self.pts_bbox_head.init_weights()
...@@ -103,6 +112,10 @@ class MVXTwoStageDetector(BaseDetector): ...@@ -103,6 +112,10 @@ class MVXTwoStageDetector(BaseDetector):
def with_img_backbone(self): def with_img_backbone(self):
return hasattr(self, 'img_backbone') and self.img_backbone is not None return hasattr(self, 'img_backbone') and self.img_backbone is not None
@property
def with_pts_backbone(self):
return hasattr(self, 'pts_backbone') and self.pts_backbone is not None
@property @property
def with_fusion(self): def with_fusion(self):
return hasattr(self, return hasattr(self,
...@@ -120,6 +133,10 @@ class MVXTwoStageDetector(BaseDetector): ...@@ -120,6 +133,10 @@ class MVXTwoStageDetector(BaseDetector):
def with_img_rpn(self): def with_img_rpn(self):
return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None return hasattr(self, 'img_rpn_head') and self.img_rpn_head is not None
@property
def with_img_roi_head(self):
return hasattr(self, 'img_roi_head') and self.img_roi_head is not None
def extract_img_feat(self, img, img_meta): def extract_img_feat(self, img, img_meta):
if self.with_img_backbone: if self.with_img_backbone:
if img.dim() == 5 and img.size(0) == 1: if img.dim() == 5 and img.size(0) == 1:
......
...@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector): ...@@ -43,7 +43,7 @@ class VoteNet(SingleStageDetector):
Args: Args:
points (list[Tensor]): Points of each batch. points (list[Tensor]): Points of each batch.
img_meta (list): Image metas. img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each batch. gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each batch.
gt_labels_3d (list[Tensor]): gt class labels of each batch. gt_labels_3d (list[Tensor]): gt class labels of each batch.
pts_semantic_mask (None | list[Tensor]): point-wise semantic pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each batch. label of each batch.
...@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector): ...@@ -86,7 +86,7 @@ class VoteNet(SingleStageDetector):
Args: Args:
points (list[Tensor]): Points of each sample. points (list[Tensor]): Points of each sample.
img_meta (list): Image metas. img_meta (list): Image metas.
gt_bboxes_3d (list[Tensor]): gt bboxes of each sample. gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes of each sample.
gt_labels_3d (list[Tensor]): gt class labels of each sample. gt_labels_3d (list[Tensor]): gt class labels of each sample.
pts_semantic_mask (None | list[Tensor]): point-wise semantic pts_semantic_mask (None | list[Tensor]): point-wise semantic
label of each sample. label of each sample.
......
...@@ -474,7 +474,9 @@ class PartA2BboxHead(nn.Module): ...@@ -474,7 +474,9 @@ class PartA2BboxHead(nn.Module):
selected_scores = cur_cls_score[selected] selected_scores = cur_cls_score[selected]
result_list.append( result_list.append(
(selected_bboxes, selected_scores, selected_label_preds)) (img_meta[batch_id]['box_type_3d'](selected_bboxes,
self.bbox_coder.code_size),
selected_scores, selected_label_preds))
return result_list return result_list
def multi_class_nms(self, def multi_class_nms(self,
......
...@@ -112,7 +112,7 @@ class PartAggregationROIHead(Base3DRoIHead): ...@@ -112,7 +112,7 @@ class PartAggregationROIHead(Base3DRoIHead):
semantic_results = self.semantic_head(feats_dict['seg_features']) semantic_results = self.semantic_head(feats_dict['seg_features'])
rois = bbox3d2roi([res['boxes_3d'] for res in proposal_list]) rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list] labels_3d = [res['labels_3d'] for res in proposal_list]
cls_preds = [res['cls_preds'] for res in proposal_list] cls_preds = [res['cls_preds'] for res in proposal_list]
bbox_results = self._bbox_forward(feats_dict['seg_features'], bbox_results = self._bbox_forward(feats_dict['seg_features'],
......
...@@ -62,7 +62,7 @@ def points_in_boxes_batch(points, boxes): ...@@ -62,7 +62,7 @@ def points_in_boxes_batch(points, boxes):
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate
boxes (torch.Tensor): [B, T, 7], boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate, num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,
(x, y, z) is the bottom center (x, y, z) is the bottom center.
Returns: Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0 box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0
......
...@@ -33,7 +33,7 @@ def test_lidar_boxes3d(): ...@@ -33,7 +33,7 @@ def test_lidar_boxes3d():
]], ]],
dtype=np.float32) dtype=np.float32)
bottom_center_box = LiDARInstance3DBoxes( bottom_center_box = LiDARInstance3DBoxes(
gravity_center_box, origin=[0.5, 0.5, 0.5]) gravity_center_box, origin=(0.5, 0.5, 0.5))
expected_tensor = torch.tensor( expected_tensor = torch.tensor(
[[ [[
-5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00, -5.24223238e+00, 4.00209696e+01, -4.76429619e-01, 2.06200000e+00,
......
...@@ -5,58 +5,58 @@ from mmdet3d.core.evaluation.indoor_eval import average_precision, indoor_eval ...@@ -5,58 +5,58 @@ from mmdet3d.core.evaluation.indoor_eval import average_precision, indoor_eval
def test_indoor_eval(): def test_indoor_eval():
from mmdet3d.core.bbox.structures import DepthInstance3DBoxes, Box3DMode
det_infos = [{ det_infos = [{
'labels_3d': 'labels_3d':
torch.Tensor([4, 4, 3, 17, 2]), torch.tensor([0, 1, 2, 2, 0, 3, 1, 2, 3, 2]),
'boxes_3d': 'boxes_3d':
torch.Tensor([[ DepthInstance3DBoxes(
2.8734498, -0.187645, -0.02600911, 0.6761766, 0.56542563, torch.tensor([[
0.5953976, 0. -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,
], 1.6506e+00, 0.0000e+00
[ ],
0.4031701, -3.2346897, 0.07118589, 0.73209894, [
0.8711227, 0.5148243, 0. -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,
], 9.6854e-01, 6.1755e-01, 0.0000e+00
[ ],
-1.274147, -2.351935, 0.07428858, 1.4534658, [
2.563081, 0.8587492, 0. -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,
], 4.4708e-01, 6.2538e-01, 0.0000e+00
[ ],
3.2214177, 0.7899204, 0.03836718, 0.05321002, [
1.2607929, 0.1411697, 0. 4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,
], 2.8679e-01, 1.6060e+00, 0.0000e+00
[ ],
-1.6804854, 2.399011, -0.13099639, 0.5608963, [
0.5052759, 0.6770297, 0. 6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,
]]), 4.0162e-01, 1.7303e+00, 0.0000e+00
'scores_3d': ],
torch.Tensor([0.9980684, 0.9747082, 0.9709939, 0.9482147, 0.84311247]) [
}, { -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,
'labels_3d': 7.5957e-01, 9.6930e-01, 0.0000e+00
torch.Tensor([17.0, 17.0, 3.0, 4.0, 17.0]), ],
'boxes_3d': [
torch.Tensor([[ 2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,
3.2112048e+00, 5.6918913e-01, -8.6143613e-04, 1.1942449e-01, 4.1861e-01, 3.7339e-01, 0.0000e+00
1.2988183e+00, 1.9952521e-01, 0.0000000e+00 ],
], [
[ -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,
3.248133, 0.4324184, 0.20038621, 0.17225507, 1.0566e+00, 1.3704e+00, 0.0000e+00
1.2736976, 0.32598814, 0. ],
], [
[ 8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,
-1.2793612, -2.3155289, 0.15598366, 1.2822601, 2.0772e-01, 9.6792e-01, 0.0000e+00
2.2253945, 0.8361754, 0. ],
], [
[ 2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,
2.8716104, -0.26416883, -0.04933786, 0.8190681, 3.5713e-01, 4.5638e-01, 0.0000e+00
0.60294986, 0.5769499, 0. ]]),
], origin=(0.5, 0.5, 0)),
[
-2.2109854, 0.19445783, -0.01614259, 0.40659013,
0.35370222, 0.3290567, 0.
]]),
'scores_3d': 'scores_3d':
torch.Tensor([0.9965866, 0.99507546, 0.9916463, 0.9702634, 0.95803124]) torch.tensor([
1.7516e-05, 1.0167e-06, 8.4486e-07, 7.1048e-02, 6.4274e-05,
1.5003e-07, 5.8102e-06, 1.9399e-08, 5.3126e-07, 1.8630e-09
])
}] }]
label2cat = { label2cat = {
...@@ -64,168 +64,67 @@ def test_indoor_eval(): ...@@ -64,168 +64,67 @@ def test_indoor_eval():
1: 'bed', 1: 'bed',
2: 'chair', 2: 'chair',
3: 'sofa', 3: 'sofa',
4: 'table',
5: 'door',
6: 'window',
7: 'bookshelf',
8: 'picture',
9: 'counter',
10: 'desk',
11: 'curtain',
12: 'refrigerator',
13: 'showercurtrain',
14: 'toilet',
15: 'sink',
16: 'bathtub',
17: 'garbagebin'
} }
gt_annos = [{ gt_annos = [{
'gt_num': 'gt_num':
12, 10,
'gt_boxes_upright_depth': 'gt_boxes_upright_depth':
np.array([[ np.array([[
2.54621506, -0.89397144, 0.54144311, 2.90430856, 1.78370309, -2.4089e-03, -3.3174e+00, 4.9438e-01, 2.1668e+00, 2.8431e-01,
0.93826824 1.6506e+00, 0.0000e+00
], ],
[ [
3.36553669, 0.31014189, 0.38758934, 1.2504847, -3.4269e-01, -2.7565e+00, 2.8144e-02, 6.8554e-01,
0.71281439, 0.3908577 9.6854e-01, 6.1755e-01, 0.0000e+00
], ],
[ [
0.17272574, 2.90289116, 0.27966365, 0.56292468, -3.8320e+00, -1.0646e+00, 1.7074e-01, 2.4981e-01,
0.8512187, 0.4987641 4.4708e-01, 6.2538e-01, 0.0000e+00
], ],
[ [
2.39521956, 1.67557895, 0.40407273, 1.23511314, 4.1073e-01, 3.3757e+00, 3.4311e-01, 8.0617e-01,
0.49469376, 0.62720448 2.8679e-01, 1.6060e+00, 0.0000e+00
], ],
[ [
-2.41815996, -1.69104958, 0.22304082, 0.55816364, 6.1199e-01, -3.1041e+00, 4.1873e-01, 1.2310e+00,
0.48154473, 0.66580439 4.0162e-01, 1.7303e+00, 0.0000e+00
], ],
[ [
-0.18044823, 2.9227581, 0.24480903, 0.36165208, -5.9877e-01, -2.6011e+00, 1.1148e+00, 1.5704e-01,
0.44468427, 0.53103662 7.5957e-01, 9.6930e-01, 0.0000e+00
], ],
[ [
-2.44398379, -2.1610918, 0.23631772, 0.52229881, 2.7462e-01, -3.0088e+00, 6.5231e-02, 8.1208e-01,
0.63388562, 0.66596919 4.1861e-01, 3.7339e-01, 0.0000e+00
], ],
[ [
-2.01452827, -2.9558928, 0.8139953, 1.61732554, -1.4704e+00, -2.0024e+00, 2.7479e-01, 1.7888e+00,
0.60224247, 1.79295814 1.0566e+00, 1.3704e+00, 0.0000e+00
], ],
[ [
-0.61519569, 3.24365234, 1.24335742, 2.11988783, 8.2727e-02, -3.1160e+00, 2.5690e-01, 1.4054e+00,
0.26006722, 1.77748263 2.0772e-01, 9.6792e-01, 0.0000e+00
], ],
[ [
-2.64330673, 0.59929442, 1.59422684, 0.07352924, 2.6896e+00, 1.9881e+00, 1.1566e+00, 9.9885e-02,
0.28620502, 0.35408139 3.5713e-01, 4.5638e-01, 0.0000e+00
],
[
-0.58128822, 3.23699641, 0.06050609, 1.94151425,
0.16413498, 0.20168215
],
[
0.15343043, 2.24693251, 0.22470728, 0.49632657,
0.47379827, 0.43063563
]]), ]]),
'class': 'class':
np.array([3, 4, 4, 17, 2, 2, 2, 7, 11, 8, 17, 2]) np.array([0, 1, 2, 0, 0, 3, 1, 3, 3, 2])
}, {
'gt_num':
12,
'gt_boxes_upright_depth':
np.array([[
3.48649406, 0.24238291, 0.48358256, 1.34014034, 0.72744983,
0.40819243
],
[
-0.50371504, 3.25293231, 1.25988698, 2.12330937,
0.27563906, 1.80230701
],
[
2.58820581, -0.99452347, 0.57732373, 2.94801593,
1.67463434, 0.88743341
],
[
-1.9116497, -2.88811016, 0.70502496, 1.62386703,
0.60732293, 1.5857985
],
[
-2.55324745, 0.6909315, 1.59045517, 0.07264495,
0.32018459, 0.3506999
],
[
-2.3436017, -2.1659112, 0.254318, 0.5333302, 0.56154585,
0.64904487
],
[
-2.32046795, -1.6880455, 0.26138437, 0.5586133,
0.59743834, 0.6378752
],
[
-0.46495372, 3.22126102, 0.03188983, 1.92557108,
0.15160203, 0.24680007
],
[
0.28087699, 2.88433838, 0.2495866, 0.57001019,
0.85177159, 0.5689255
],
[
-0.05292395, 2.90586925, 0.23064148, 0.39113954,
0.43746281, 0.52981442
],
[
0.25537968, 2.25156307, 0.24932587, 0.48192862,
0.51398182, 0.38040417
],
[
2.60432816, 1.62303996, 0.42025632, 1.23775268,
0.51761389, 0.66034317
]]),
'class':
np.array([4, 11, 3, 7, 8, 2, 2, 17, 4, 2, 2, 17])
}] }]
ret_value = indoor_eval(gt_annos, det_infos, [0.25, 0.5], label2cat) ret_value = indoor_eval(
garbagebin_AP_25 = ret_value['garbagebin_AP_0.25'] gt_annos,
sofa_AP_25 = ret_value['sofa_AP_0.25'] det_infos, [0.25, 0.5],
table_AP_25 = ret_value['table_AP_0.25'] label2cat,
chair_AP_25 = ret_value['chair_AP_0.25'] box_type_3d=DepthInstance3DBoxes,
mAP_25 = ret_value['mAP_0.25'] box_mode_3d=Box3DMode.DEPTH)
garbagebin_rec_25 = ret_value['garbagebin_rec_0.25']
sofa_rec_25 = ret_value['sofa_rec_0.25'] assert abs(ret_value['cabinet_AP_0.25'] - 0.666667) < 1e-3
table_rec_25 = ret_value['table_rec_0.25'] assert abs(ret_value['bed_AP_0.25'] - 1.0) < 1e-3
chair_rec_25 = ret_value['chair_rec_0.25'] assert abs(ret_value['chair_AP_0.25'] - 0.5) < 1e-3
mAR_25 = ret_value['mAR_0.25'] assert abs(ret_value['mAP_0.25'] - 0.708333) < 1e-3
sofa_AP_50 = ret_value['sofa_AP_0.50'] assert abs(ret_value['mAR_0.25'] - 0.833333) < 1e-3
table_AP_50 = ret_value['table_AP_0.50']
chair_AP_50 = ret_value['chair_AP_0.50']
mAP_50 = ret_value['mAP_0.50']
sofa_rec_50 = ret_value['sofa_rec_0.50']
table_rec_50 = ret_value['table_rec_0.50']
chair_rec_50 = ret_value['chair_rec_0.50']
mAR_50 = ret_value['mAR_0.50']
assert garbagebin_AP_25 == 0.25
assert sofa_AP_25 == 1.0
assert table_AP_25 == 0.75
assert chair_AP_25 == 0.125
assert abs(mAP_25 - 0.303571) < 0.001
assert garbagebin_rec_25 == 0.25
assert sofa_rec_25 == 1.0
assert table_rec_25 == 0.75
assert chair_rec_25 == 0.125
assert abs(mAR_25 - 0.303571) < 0.001
assert sofa_AP_50 == 0.25
assert abs(table_AP_50 - 0.416667) < 0.001
assert chair_AP_50 == 0.125
assert abs(mAP_50 - 0.113095) < 0.001
assert sofa_rec_50 == 0.5
assert table_rec_50 == 0.5
assert chair_rec_50 == 0.125
assert abs(mAR_50 - 0.160714) < 0.001
def test_average_precision(): def test_average_precision():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment