"git@developer.sourcefind.cn:OpenDAS/fairscale.git" did not exist on "f4531ab73b24af7fbb9a5b114b50c1ce761724d9"
Commit 2d91ef83 authored by zhangwenwei's avatar zhangwenwei
Browse files

Encapsulate network output into box structure

parent 26c08c7e
yapf -r -i --style .style.yapf mmdet3d/ configs/ tests/ tools/
isort -rc mmdet3d/ configs/ tests/ tools/
flake8 .
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3,
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
......@@ -83,7 +83,9 @@ def bbox3d2result(bboxes, scores, labels):
dict(Tensor): bbox results in cpu mode
"""
return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
boxes_3d=bboxes.to('cpu'),
scores_3d=scores.cpu(),
labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None,
......
......@@ -6,11 +6,39 @@ import numpy as np
from torch.utils.data import Dataset
from mmdet.datasets import DATASETS
from ..core.bbox import (Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from .pipelines import Compose
@DATASETS.register_module()
class Custom3DDataset(Dataset):
"""Customized 3D dataset
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality ([dict], optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
def __init__(self,
data_root,
......@@ -18,6 +46,7 @@ class Custom3DDataset(Dataset):
pipeline=None,
classes=None,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False):
super().__init__()
......@@ -26,6 +55,7 @@ class Custom3DDataset(Dataset):
self.test_mode = test_mode
self.modality = modality
self.filter_empty_gt = filter_empty_gt
self.get_box_type(box_type_3d)
self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file)
......@@ -40,6 +70,21 @@ class Custom3DDataset(Dataset):
def load_annotations(self, ann_file):
return mmcv.load(ann_file)
def get_box_type(self, box_type):
box_type_lower = box_type.lower()
if box_type_lower == 'lidar':
self.box_type_3d = LiDARInstance3DBoxes
self.box_mode_3d = Box3DMode.LIDAR
elif box_type_lower == 'camera':
self.box_type_3d = CameraInstance3DBoxes
self.box_mode_3d = Box3DMode.CAM
elif box_type_lower == 'depth':
self.box_type_3d = DepthInstance3DBoxes
self.box_mode_3d = Box3DMode.DEPTH
else:
raise ValueError('Only "box_type" of "camera", "lidar", "depth"'
f' are supported, got {box_type}')
def get_data_info(self, index):
info = self.data_infos[index]
sample_idx = info['point_cloud']['lidar_idx']
......@@ -61,6 +106,8 @@ class Custom3DDataset(Dataset):
results['bbox3d_fields'] = []
results['pts_mask_fields'] = []
results['pts_seg_fields'] = []
results['box_type_3d'] = self.box_type_3d
results['box_mode_3d'] = self.box_mode_3d
def prepare_train_data(self, index):
input_dict = self.get_data_info(index)
......
......@@ -9,7 +9,7 @@ import torch
from mmcv.utils import print_log
from mmdet.datasets import DATASETS
from ..core.bbox import Box3DMode, CameraInstance3DBoxes, box_np_ops
from ..core.bbox import Box3DMode, CameraInstance3DBoxes
from .custom_3d import Custom3DDataset
from .utils import remove_dontcare
......@@ -27,6 +27,8 @@ class KittiDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False):
super().__init__(
data_root=data_root,
......@@ -34,6 +36,8 @@ class KittiDataset(Custom3DDataset):
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
self.root_split = os.path.join(self.data_root, split)
......@@ -90,7 +94,7 @@ class KittiDataset(Custom3DDataset):
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
self.box_mode_3d, np.linalg.inv(rect @ Trv2c))
gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
......@@ -395,73 +399,66 @@ class KittiDataset(Custom3DDataset):
def convert_valid_bboxes(self, box_dict, info):
# TODO: refactor this function
final_box_preds = box_dict['boxes_3d']
final_scores = box_dict['scores_3d']
final_labels = box_dict['labels_3d']
box_preds = box_dict['boxes_3d']
scores = box_dict['scores_3d']
labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
final_box_preds[:, -1] = box_np_ops.limit_period(
final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2)
# TODO: remove the hack of yaw
box_preds.tensor[:, -1] = box_preds.tensor[:, -1] - np.pi
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if final_box_preds.shape[0] == 0:
if len(box_preds) == 0:
return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(),
scores=final_box_preds.new_zeros([0]).numpy(),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(),
sample_idx=sample_idx,
)
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx)
from mmdet3d.core.bbox import box_torch_ops
rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32)
img_shape = info['image']['image_shape']
rect = final_box_preds.new_tensor(rect)
Trv2c = final_box_preds.new_tensor(Trv2c)
P2 = final_box_preds.new_tensor(P2)
final_box_preds_camera = box_torch_ops.box_lidar_to_camera(
final_box_preds, rect, Trv2c)
locs = final_box_preds_camera[:, :3]
dims = final_box_preds_camera[:, 3:6]
angles = final_box_preds_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_torch_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
P2 = box_preds.tensor.new_tensor(P2)
box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
box_corners = box_preds_camera.corners
box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing
# check final_box_preds_camera
image_shape = final_box_preds.new_tensor(img_shape)
valid_cam_inds = ((final_box_preds_camera[:, 0] < image_shape[1]) &
(final_box_preds_camera[:, 1] < image_shape[0]) &
(final_box_preds_camera[:, 2] > 0) &
(final_box_preds_camera[:, 3] > 0))
# check final_box_preds
limit_range = final_box_preds.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((final_box_preds[:, :3] > limit_range[:3]) &
(final_box_preds[:, :3] < limit_range[3:]))
# check box_preds_camera
image_shape = box_preds.tensor.new_tensor(img_shape)
valid_cam_inds = ((box_preds_camera.tensor[:, 0] < image_shape[1]) &
(box_preds_camera.tensor[:, 1] < image_shape[0]) &
(box_preds_camera.tensor[:, 2] > 0) &
(box_preds_camera.tensor[:, 3] > 0))
# check box_preds
limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(box_preds.center < limit_range[3:]))
valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
if valid_inds.sum() > 0:
return dict(
bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=final_box_preds_camera[valid_inds, :].numpy(),
box3d_lidar=final_box_preds[valid_inds, :].numpy(),
scores=final_scores[valid_inds].numpy(),
label_preds=final_labels[valid_inds].numpy(),
box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=scores[valid_inds].numpy(),
label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx,
)
else:
return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(),
scores=final_box_preds.new_zeros([0]).numpy(),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(),
bbox=np.zeros([0, 4]),
box3d_camera=np.zeros([0, 7]),
box3d_lidar=np.zeros([0, 7]),
scores=np.zeros([0]),
label_preds=np.zeros([0, 4]),
sample_idx=sample_idx,
)
......@@ -7,7 +7,7 @@ import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS
from ..core.bbox import LiDARInstance3DBoxes, box_np_ops
from ..core.bbox import LiDARInstance3DBoxes
from .custom_3d import Custom3DDataset
......@@ -72,8 +72,10 @@ class NuScenesDataset(Custom3DDataset):
classes=None,
load_interval=1,
with_velocity=True,
test_mode=False,
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
eval_version='detection_cvpr_2019'):
self.load_interval = load_interval
super().__init__(
......@@ -82,6 +84,8 @@ class NuScenesDataset(Custom3DDataset):
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
self.with_velocity = with_velocity
......@@ -172,7 +176,7 @@ class NuScenesDataset(Custom3DDataset):
gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1],
origin=[0.5, 0.5, 0.5])
origin=[0.5, 0.5, 0.5]).convert_to(self.box_mode_3d)
anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d,
......@@ -352,26 +356,28 @@ class NuScenesDataset(Custom3DDataset):
def output_to_nusc_box(detection):
box3d = detection['boxes_3d'].numpy()
box3d = detection['boxes_3d']
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()
# TODO: check whether this is necessary
# with dir_offset & dir_limit in the head
box3d[:, 6] = -box3d[:, 6] - np.pi / 2
# the trained model is in [0.5, 0.5, 0],
# change them back to nuscenes [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(box3d, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
box_yaw = -box_yaw - np.pi / 2
box_list = []
for i in range(box3d.shape[0]):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box3d[i, 6])
velocity = (*box3d[i, 7:9], 0.0)
for i in range(len(box3d)):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
velocity = (*box3d.tensor[i, 7:9], 0.0)
# velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6]
# velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box = NuScenesBox(
box3d[i, :3],
box3d[i, 3:6],
box_gravity_center[i],
box_dims[i],
quat,
label=labels[i],
score=scores[i],
......
......@@ -73,14 +73,13 @@ class Collect3D(object):
def __init__(self,
keys,
pcd_shape=[1, 1600, 1408],
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation')):
'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation')):
self.keys = keys
self.meta_keys = meta_keys
self.pcd_shape = pcd_shape
def __call__(self, results):
data = {}
......@@ -88,7 +87,7 @@ class Collect3D(object):
for key in self.meta_keys:
if key in results:
img_meta[key] = results[key]
img_meta.update(pcd_shape=self.pcd_shape, pcd_pad_shape=self.pcd_shape)
data['img_meta'] = DC(img_meta, cpu_only=True)
for key in self.keys:
data[key] = results[key]
......
......@@ -20,10 +20,18 @@ class ScanNetDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality,
filter_empty_gt, test_mode)
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
......
......@@ -16,10 +16,18 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline=None,
classes=None,
modality=None,
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality,
filter_empty_gt, test_mode)
super().__init__(
data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api
......
......@@ -381,5 +381,5 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
bboxes[..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
return bboxes, scores, labels
......@@ -150,13 +150,15 @@ class PartA2RPNHead(Anchor3DHead):
result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred,
mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg)
score_thr, cfg.nms_post, cfg,
input_meta)
return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg):
mlvl_dir_scores, score_thr, max_num, cfg,
input_meta):
bboxes = []
scores = []
labels = []
......@@ -202,6 +204,8 @@ class PartA2RPNHead(Anchor3DHead):
labels = labels[inds]
scores = scores[inds]
cls_scores = cls_scores[inds]
bboxes = input_meta['box_type_3d'](
bboxes, box_dim=self.box_code_size)
return dict(
boxes_3d=bboxes,
scores_3d=scores,
......@@ -210,7 +214,9 @@ class PartA2RPNHead(Anchor3DHead):
)
else:
return dict(
boxes_3d=mlvl_bboxes.new_zeros([0, self.box_code_size]),
boxes_3d=input_meta['box_type_3d'](
mlvl_bboxes.new_zeros([0, self.box_code_size]),
box_dim=self.box_code_size),
scores_3d=mlvl_bboxes.new_zeros([0]),
labels_3d=mlvl_bboxes.new_zeros([0]),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
......@@ -474,7 +474,9 @@ class PartA2BboxHead(nn.Module):
selected_scores = cur_cls_score[selected]
result_list.append(
(selected_bboxes, selected_scores, selected_label_preds))
(img_meta[batch_id]['box_type_3d'](selected_bboxes,
self.bbox_coder.code_size),
selected_scores, selected_label_preds))
return result_list
def multi_class_nms(self,
......
......@@ -112,7 +112,7 @@ class PartAggregationROIHead(Base3DRoIHead):
semantic_results = self.semantic_head(feats_dict['seg_features'])
rois = bbox3d2roi([res['boxes_3d'] for res in proposal_list])
rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list]
cls_preds = [res['cls_preds'] for res in proposal_list]
bbox_results = self._bbox_forward(feats_dict['seg_features'],
......
......@@ -4,6 +4,8 @@ from os.path import dirname, exists, join
import pytest
import torch
from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
def _get_config_directory():
""" Find the predefined detector config directory """
......@@ -129,11 +131,16 @@ def test_anchor3d_head_getboxes():
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
# fake input_metas
input_metas = [{
'sample_idx': 1234
'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, {
'sample_idx': 2345
}] # fake input_metas
'sample_idx': 2345,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes
......@@ -155,11 +162,16 @@ def test_parta2_rpnhead_getboxes():
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
# fake input_metas
input_metas = [{
'sample_idx': 1234
'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, {
'sample_idx': 2345
}] # fake input_metas
'sample_idx': 2345,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes
......@@ -169,7 +181,7 @@ def test_parta2_rpnhead_getboxes():
assert result_list[0]['scores_3d'].shape == torch.Size([512])
assert result_list[0]['labels_3d'].shape == torch.Size([512])
assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])
assert result_list[0]['boxes_3d'].shape == torch.Size([512, 7])
assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7])
def test_vote_head():
......
......@@ -3,6 +3,7 @@ import torch
from mmcv import Config
from torch.nn import BatchNorm1d, ReLU
from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
from mmdet3d.models import PartA2BboxHead
from mmdet3d.ops import make_sparse_convmodule
......@@ -336,8 +337,10 @@ def test_get_bboxes():
use_raw_score=True,
nms_thr=0.01,
score_thr=0.1))
input_meta = dict(
box_type_3d=LiDARInstance3DBoxes, box_mode_3d=Box3DMode.LIDAR)
result_list = self.get_bboxes(rois, cls_score, bbox_pred, class_labels,
class_pred, None, cfg)
class_pred, [input_meta], cfg)
selected_bboxes, selected_scores, selected_label_preds = result_list[0]
expected_selected_bboxes = torch.Tensor(
......@@ -347,7 +350,8 @@ def test_get_bboxes():
expected_selected_scores = torch.Tensor([-2.2061, -2.1121, -0.1761]).cuda()
expected_selected_label_preds = torch.Tensor([2., 2., 2.]).cuda()
assert torch.allclose(selected_bboxes, expected_selected_bboxes, 1e-3)
assert torch.allclose(selected_bboxes.tensor, expected_selected_bboxes,
1e-3)
assert torch.allclose(selected_scores, expected_selected_scores, 1e-3)
assert torch.allclose(selected_label_preds, expected_selected_label_preds)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment