Commit 2d91ef83 authored by zhangwenwei's avatar zhangwenwei
Browse files

Encapsulate network output into box structure

parent 26c08c7e
yapf -r -i --style .style.yapf mmdet3d/ configs/ tests/ tools/
isort -rc mmdet3d/ configs/ tests/ tools/
flake8 .
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3,
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -83,7 +83,9 @@ def bbox3d2result(bboxes, scores, labels): ...@@ -83,7 +83,9 @@ def bbox3d2result(bboxes, scores, labels):
dict(Tensor): bbox results in cpu mode dict(Tensor): bbox results in cpu mode
""" """
return dict( return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu()) boxes_3d=bboxes.to('cpu'),
scores_3d=scores.cpu(),
labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None, def upright_depth_to_lidar_torch(points=None,
......
...@@ -6,11 +6,39 @@ import numpy as np ...@@ -6,11 +6,39 @@ import numpy as np
from torch.utils.data import Dataset from torch.utils.data import Dataset
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import (Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from .pipelines import Compose from .pipelines import Compose
@DATASETS.register_module() @DATASETS.register_module()
class Custom3DDataset(Dataset): class Custom3DDataset(Dataset):
"""Customized 3D dataset
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality ([dict], optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
def __init__(self, def __init__(self,
data_root, data_root,
...@@ -18,6 +46,7 @@ class Custom3DDataset(Dataset): ...@@ -18,6 +46,7 @@ class Custom3DDataset(Dataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__() super().__init__()
...@@ -26,6 +55,7 @@ class Custom3DDataset(Dataset): ...@@ -26,6 +55,7 @@ class Custom3DDataset(Dataset):
self.test_mode = test_mode self.test_mode = test_mode
self.modality = modality self.modality = modality
self.filter_empty_gt = filter_empty_gt self.filter_empty_gt = filter_empty_gt
self.get_box_type(box_type_3d)
self.CLASSES = self.get_classes(classes) self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file) self.data_infos = self.load_annotations(self.ann_file)
...@@ -40,6 +70,21 @@ class Custom3DDataset(Dataset): ...@@ -40,6 +70,21 @@ class Custom3DDataset(Dataset):
def load_annotations(self, ann_file): def load_annotations(self, ann_file):
return mmcv.load(ann_file) return mmcv.load(ann_file)
def get_box_type(self, box_type):
box_type_lower = box_type.lower()
if box_type_lower == 'lidar':
self.box_type_3d = LiDARInstance3DBoxes
self.box_mode_3d = Box3DMode.LIDAR
elif box_type_lower == 'camera':
self.box_type_3d = CameraInstance3DBoxes
self.box_mode_3d = Box3DMode.CAM
elif box_type_lower == 'depth':
self.box_type_3d = DepthInstance3DBoxes
self.box_mode_3d = Box3DMode.DEPTH
else:
raise ValueError('Only "box_type" of "camera", "lidar", "depth"'
f' are supported, got {box_type}')
def get_data_info(self, index): def get_data_info(self, index):
info = self.data_infos[index] info = self.data_infos[index]
sample_idx = info['point_cloud']['lidar_idx'] sample_idx = info['point_cloud']['lidar_idx']
...@@ -61,6 +106,8 @@ class Custom3DDataset(Dataset): ...@@ -61,6 +106,8 @@ class Custom3DDataset(Dataset):
results['bbox3d_fields'] = [] results['bbox3d_fields'] = []
results['pts_mask_fields'] = [] results['pts_mask_fields'] = []
results['pts_seg_fields'] = [] results['pts_seg_fields'] = []
results['box_type_3d'] = self.box_type_3d
results['box_mode_3d'] = self.box_mode_3d
def prepare_train_data(self, index): def prepare_train_data(self, index):
input_dict = self.get_data_info(index) input_dict = self.get_data_info(index)
......
...@@ -9,7 +9,7 @@ import torch ...@@ -9,7 +9,7 @@ import torch
from mmcv.utils import print_log from mmcv.utils import print_log
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import Box3DMode, CameraInstance3DBoxes, box_np_ops from ..core.bbox import Box3DMode, CameraInstance3DBoxes
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
from .utils import remove_dontcare from .utils import remove_dontcare
...@@ -27,6 +27,8 @@ class KittiDataset(Custom3DDataset): ...@@ -27,6 +27,8 @@ class KittiDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
...@@ -34,6 +36,8 @@ class KittiDataset(Custom3DDataset): ...@@ -34,6 +36,8 @@ class KittiDataset(Custom3DDataset):
pipeline=pipeline, pipeline=pipeline,
classes=classes, classes=classes,
modality=modality, modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode) test_mode=test_mode)
self.root_split = os.path.join(self.data_root, split) self.root_split = os.path.join(self.data_root, split)
...@@ -90,7 +94,7 @@ class KittiDataset(Custom3DDataset): ...@@ -90,7 +94,7 @@ class KittiDataset(Custom3DDataset):
# convert gt_bboxes_3d to velodyne coordinates # convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c)) self.box_mode_3d, np.linalg.inv(rect @ Trv2c))
gt_bboxes = annos['bbox'] gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare']) selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
...@@ -395,73 +399,66 @@ class KittiDataset(Custom3DDataset): ...@@ -395,73 +399,66 @@ class KittiDataset(Custom3DDataset):
def convert_valid_bboxes(self, box_dict, info): def convert_valid_bboxes(self, box_dict, info):
# TODO: refactor this function # TODO: refactor this function
final_box_preds = box_dict['boxes_3d'] box_preds = box_dict['boxes_3d']
final_scores = box_dict['scores_3d'] scores = box_dict['scores_3d']
final_labels = box_dict['labels_3d'] labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx'] sample_idx = info['image']['image_idx']
final_box_preds[:, -1] = box_np_ops.limit_period( # TODO: remove the hack of yaw
final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2) box_preds.tensor[:, -1] = box_preds.tensor[:, -1] - np.pi
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if final_box_preds.shape[0] == 0: if len(box_preds) == 0:
return dict( return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(), bbox=np.zeros([0, 4]),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(), box3d_camera=np.zeros([0, 7]),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(), box3d_lidar=np.zeros([0, 7]),
scores=final_box_preds.new_zeros([0]).numpy(), scores=np.zeros([0]),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx, sample_idx=sample_idx)
)
from mmdet3d.core.bbox import box_torch_ops from mmdet3d.core.bbox import box_torch_ops
rect = info['calib']['R0_rect'].astype(np.float32) rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32) Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32) P2 = info['calib']['P2'].astype(np.float32)
img_shape = info['image']['image_shape'] img_shape = info['image']['image_shape']
rect = final_box_preds.new_tensor(rect) P2 = box_preds.tensor.new_tensor(P2)
Trv2c = final_box_preds.new_tensor(Trv2c)
P2 = final_box_preds.new_tensor(P2) box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
final_box_preds_camera = box_torch_ops.box_lidar_to_camera( box_corners = box_preds_camera.corners
final_box_preds, rect, Trv2c)
locs = final_box_preds_camera[:, :3]
dims = final_box_preds_camera[:, 3:6]
angles = final_box_preds_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_torch_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2) box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2)
# box_corners_in_image: [N, 8, 2] # box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0] minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0] maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1) box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing # Post-processing
# check final_box_preds_camera # check box_preds_camera
image_shape = final_box_preds.new_tensor(img_shape) image_shape = box_preds.tensor.new_tensor(img_shape)
valid_cam_inds = ((final_box_preds_camera[:, 0] < image_shape[1]) & valid_cam_inds = ((box_preds_camera.tensor[:, 0] < image_shape[1]) &
(final_box_preds_camera[:, 1] < image_shape[0]) & (box_preds_camera.tensor[:, 1] < image_shape[0]) &
(final_box_preds_camera[:, 2] > 0) & (box_preds_camera.tensor[:, 2] > 0) &
(final_box_preds_camera[:, 3] > 0)) (box_preds_camera.tensor[:, 3] > 0))
# check final_box_preds # check box_preds
limit_range = final_box_preds.new_tensor(self.pcd_limit_range) limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((final_box_preds[:, :3] > limit_range[:3]) & valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(final_box_preds[:, :3] < limit_range[3:])) (box_preds.center < limit_range[3:]))
valid_inds = valid_cam_inds & valid_pcd_inds.all(-1) valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
if valid_inds.sum() > 0: if valid_inds.sum() > 0:
return dict( return dict(
bbox=box_2d_preds[valid_inds, :].numpy(), bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=final_box_preds_camera[valid_inds, :].numpy(), box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=final_box_preds[valid_inds, :].numpy(), box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=final_scores[valid_inds].numpy(), scores=scores[valid_inds].numpy(),
label_preds=final_labels[valid_inds].numpy(), label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx, sample_idx=sample_idx,
) )
else: else:
return dict( return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(), bbox=np.zeros([0, 4]),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(), box3d_camera=np.zeros([0, 7]),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(), box3d_lidar=np.zeros([0, 7]),
scores=final_box_preds.new_zeros([0]).numpy(), scores=np.zeros([0]),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx, sample_idx=sample_idx,
) )
...@@ -7,7 +7,7 @@ import pyquaternion ...@@ -7,7 +7,7 @@ import pyquaternion
from nuscenes.utils.data_classes import Box as NuScenesBox from nuscenes.utils.data_classes import Box as NuScenesBox
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import LiDARInstance3DBoxes, box_np_ops from ..core.bbox import LiDARInstance3DBoxes
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
...@@ -72,8 +72,10 @@ class NuScenesDataset(Custom3DDataset): ...@@ -72,8 +72,10 @@ class NuScenesDataset(Custom3DDataset):
classes=None, classes=None,
load_interval=1, load_interval=1,
with_velocity=True, with_velocity=True,
test_mode=False,
modality=None, modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
eval_version='detection_cvpr_2019'): eval_version='detection_cvpr_2019'):
self.load_interval = load_interval self.load_interval = load_interval
super().__init__( super().__init__(
...@@ -82,6 +84,8 @@ class NuScenesDataset(Custom3DDataset): ...@@ -82,6 +84,8 @@ class NuScenesDataset(Custom3DDataset):
pipeline=pipeline, pipeline=pipeline,
classes=classes, classes=classes,
modality=modality, modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode) test_mode=test_mode)
self.with_velocity = with_velocity self.with_velocity = with_velocity
...@@ -172,7 +176,7 @@ class NuScenesDataset(Custom3DDataset): ...@@ -172,7 +176,7 @@ class NuScenesDataset(Custom3DDataset):
gt_bboxes_3d = LiDARInstance3DBoxes( gt_bboxes_3d = LiDARInstance3DBoxes(
gt_bboxes_3d, gt_bboxes_3d,
box_dim=gt_bboxes_3d.shape[-1], box_dim=gt_bboxes_3d.shape[-1],
origin=[0.5, 0.5, 0.5]) origin=[0.5, 0.5, 0.5]).convert_to(self.box_mode_3d)
anns_results = dict( anns_results = dict(
gt_bboxes_3d=gt_bboxes_3d, gt_bboxes_3d=gt_bboxes_3d,
...@@ -352,26 +356,28 @@ class NuScenesDataset(Custom3DDataset): ...@@ -352,26 +356,28 @@ class NuScenesDataset(Custom3DDataset):
def output_to_nusc_box(detection): def output_to_nusc_box(detection):
box3d = detection['boxes_3d'].numpy() box3d = detection['boxes_3d']
scores = detection['scores_3d'].numpy() scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy() labels = detection['labels_3d'].numpy()
box_gravity_center = box3d.gravity_center.numpy()
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()
# TODO: check whether this is necessary # TODO: check whether this is necessary
# with dir_offset & dir_limit in the head # with dir_offset & dir_limit in the head
box3d[:, 6] = -box3d[:, 6] - np.pi / 2 box_yaw = -box_yaw - np.pi / 2
# the trained model is in [0.5, 0.5, 0],
# change them back to nuscenes [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(box3d, [0.5, 0.5, 0], [0.5, 0.5, 0.5])
box_list = [] box_list = []
for i in range(box3d.shape[0]): for i in range(len(box3d)):
quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box3d[i, 6]) quat = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
velocity = (*box3d[i, 7:9], 0.0) velocity = (*box3d.tensor[i, 7:9], 0.0)
# velo_val = np.linalg.norm(box3d[i, 7:9]) # velo_val = np.linalg.norm(box3d[i, 7:9])
# velo_ori = box3d[i, 6] # velo_ori = box3d[i, 6]
# velocity = ( # velocity = (
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0) # velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
box = NuScenesBox( box = NuScenesBox(
box3d[i, :3], box_gravity_center[i],
box3d[i, 3:6], box_dims[i],
quat, quat,
label=labels[i], label=labels[i],
score=scores[i], score=scores[i],
......
...@@ -73,14 +73,13 @@ class Collect3D(object): ...@@ -73,14 +73,13 @@ class Collect3D(object):
def __init__(self, def __init__(self,
keys, keys,
pcd_shape=[1, 1600, 1408],
meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img', meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img',
'pad_shape', 'scale_factor', 'flip', 'pcd_flip', 'pad_shape', 'scale_factor', 'flip', 'pcd_flip',
'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
'sample_idx', 'pcd_scale_factor', 'pcd_rotation')): 'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx',
'pcd_scale_factor', 'pcd_rotation')):
self.keys = keys self.keys = keys
self.meta_keys = meta_keys self.meta_keys = meta_keys
self.pcd_shape = pcd_shape
def __call__(self, results): def __call__(self, results):
data = {} data = {}
...@@ -88,7 +87,7 @@ class Collect3D(object): ...@@ -88,7 +87,7 @@ class Collect3D(object):
for key in self.meta_keys: for key in self.meta_keys:
if key in results: if key in results:
img_meta[key] = results[key] img_meta[key] = results[key]
img_meta.update(pcd_shape=self.pcd_shape, pcd_pad_shape=self.pcd_shape)
data['img_meta'] = DC(img_meta, cpu_only=True) data['img_meta'] = DC(img_meta, cpu_only=True)
for key in self.keys: for key in self.keys:
data[key] = results[key] data[key] = results[key]
......
...@@ -20,10 +20,18 @@ class ScanNetDataset(Custom3DDataset): ...@@ -20,10 +20,18 @@ class ScanNetDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='Depth',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality, super().__init__(
filter_empty_gt, test_mode) data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
......
...@@ -16,10 +16,18 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -16,10 +16,18 @@ class SUNRGBDDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='Depth',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__(data_root, ann_file, pipeline, classes, modality, super().__init__(
filter_empty_gt, test_mode) data_root=data_root,
ann_file=ann_file,
pipeline=pipeline,
classes=classes,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
def get_ann_info(self, index): def get_ann_info(self, index):
# Use index to get the annos, thus the evalhook could also use this api # Use index to get the annos, thus the evalhook could also use this api
......
...@@ -381,5 +381,5 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin): ...@@ -381,5 +381,5 @@ class Anchor3DHead(nn.Module, AnchorTrainMixin):
bboxes[..., 6] = ( bboxes[..., 6] = (
dir_rot + self.dir_offset + dir_rot + self.dir_offset +
np.pi * dir_scores.to(bboxes.dtype)) np.pi * dir_scores.to(bboxes.dtype))
bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.box_code_size)
return bboxes, scores, labels return bboxes, scores, labels
...@@ -150,13 +150,15 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -150,13 +150,15 @@ class PartA2RPNHead(Anchor3DHead):
result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms, result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_max_scores, mlvl_label_pred,
mlvl_cls_score, mlvl_dir_scores, mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg) score_thr, cfg.nms_post, cfg,
input_meta)
return result return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score, mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg): mlvl_dir_scores, score_thr, max_num, cfg,
input_meta):
bboxes = [] bboxes = []
scores = [] scores = []
labels = [] labels = []
...@@ -202,6 +204,8 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -202,6 +204,8 @@ class PartA2RPNHead(Anchor3DHead):
labels = labels[inds] labels = labels[inds]
scores = scores[inds] scores = scores[inds]
cls_scores = cls_scores[inds] cls_scores = cls_scores[inds]
bboxes = input_meta['box_type_3d'](
bboxes, box_dim=self.box_code_size)
return dict( return dict(
boxes_3d=bboxes, boxes_3d=bboxes,
scores_3d=scores, scores_3d=scores,
...@@ -210,7 +214,9 @@ class PartA2RPNHead(Anchor3DHead): ...@@ -210,7 +214,9 @@ class PartA2RPNHead(Anchor3DHead):
) )
else: else:
return dict( return dict(
boxes_3d=mlvl_bboxes.new_zeros([0, self.box_code_size]), boxes_3d=input_meta['box_type_3d'](
mlvl_bboxes.new_zeros([0, self.box_code_size]),
box_dim=self.box_code_size),
scores_3d=mlvl_bboxes.new_zeros([0]), scores_3d=mlvl_bboxes.new_zeros([0]),
labels_3d=mlvl_bboxes.new_zeros([0]), labels_3d=mlvl_bboxes.new_zeros([0]),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]])) cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
...@@ -474,7 +474,9 @@ class PartA2BboxHead(nn.Module): ...@@ -474,7 +474,9 @@ class PartA2BboxHead(nn.Module):
selected_scores = cur_cls_score[selected] selected_scores = cur_cls_score[selected]
result_list.append( result_list.append(
(selected_bboxes, selected_scores, selected_label_preds)) (img_meta[batch_id]['box_type_3d'](selected_bboxes,
self.bbox_coder.code_size),
selected_scores, selected_label_preds))
return result_list return result_list
def multi_class_nms(self, def multi_class_nms(self,
......
...@@ -112,7 +112,7 @@ class PartAggregationROIHead(Base3DRoIHead): ...@@ -112,7 +112,7 @@ class PartAggregationROIHead(Base3DRoIHead):
semantic_results = self.semantic_head(feats_dict['seg_features']) semantic_results = self.semantic_head(feats_dict['seg_features'])
rois = bbox3d2roi([res['boxes_3d'] for res in proposal_list]) rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list] labels_3d = [res['labels_3d'] for res in proposal_list]
cls_preds = [res['cls_preds'] for res in proposal_list] cls_preds = [res['cls_preds'] for res in proposal_list]
bbox_results = self._bbox_forward(feats_dict['seg_features'], bbox_results = self._bbox_forward(feats_dict['seg_features'],
......
...@@ -4,6 +4,8 @@ from os.path import dirname, exists, join ...@@ -4,6 +4,8 @@ from os.path import dirname, exists, join
import pytest import pytest
import torch import torch
from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
def _get_config_directory(): def _get_config_directory():
""" Find the predefined detector config directory """ """ Find the predefined detector config directory """
...@@ -129,11 +131,16 @@ def test_anchor3d_head_getboxes(): ...@@ -129,11 +131,16 @@ def test_anchor3d_head_getboxes():
feats = list() feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda()) feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
# fake input_metas
input_metas = [{ input_metas = [{
'sample_idx': 1234 'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, { }, {
'sample_idx': 2345 'sample_idx': 2345,
}] # fake input_metas 'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats) (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes # test get_boxes
...@@ -155,11 +162,16 @@ def test_parta2_rpnhead_getboxes(): ...@@ -155,11 +162,16 @@ def test_parta2_rpnhead_getboxes():
feats = list() feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda()) feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
# fake input_metas
input_metas = [{ input_metas = [{
'sample_idx': 1234 'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, { }, {
'sample_idx': 2345 'sample_idx': 2345,
}] # fake input_metas 'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats) (cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes # test get_boxes
...@@ -169,7 +181,7 @@ def test_parta2_rpnhead_getboxes(): ...@@ -169,7 +181,7 @@ def test_parta2_rpnhead_getboxes():
assert result_list[0]['scores_3d'].shape == torch.Size([512]) assert result_list[0]['scores_3d'].shape == torch.Size([512])
assert result_list[0]['labels_3d'].shape == torch.Size([512]) assert result_list[0]['labels_3d'].shape == torch.Size([512])
assert result_list[0]['cls_preds'].shape == torch.Size([512, 3]) assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])
assert result_list[0]['boxes_3d'].shape == torch.Size([512, 7]) assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7])
def test_vote_head(): def test_vote_head():
......
...@@ -3,6 +3,7 @@ import torch ...@@ -3,6 +3,7 @@ import torch
from mmcv import Config from mmcv import Config
from torch.nn import BatchNorm1d, ReLU from torch.nn import BatchNorm1d, ReLU
from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
from mmdet3d.models import PartA2BboxHead from mmdet3d.models import PartA2BboxHead
from mmdet3d.ops import make_sparse_convmodule from mmdet3d.ops import make_sparse_convmodule
...@@ -336,8 +337,10 @@ def test_get_bboxes(): ...@@ -336,8 +337,10 @@ def test_get_bboxes():
use_raw_score=True, use_raw_score=True,
nms_thr=0.01, nms_thr=0.01,
score_thr=0.1)) score_thr=0.1))
input_meta = dict(
box_type_3d=LiDARInstance3DBoxes, box_mode_3d=Box3DMode.LIDAR)
result_list = self.get_bboxes(rois, cls_score, bbox_pred, class_labels, result_list = self.get_bboxes(rois, cls_score, bbox_pred, class_labels,
class_pred, None, cfg) class_pred, [input_meta], cfg)
selected_bboxes, selected_scores, selected_label_preds = result_list[0] selected_bboxes, selected_scores, selected_label_preds = result_list[0]
expected_selected_bboxes = torch.Tensor( expected_selected_bboxes = torch.Tensor(
...@@ -347,7 +350,8 @@ def test_get_bboxes(): ...@@ -347,7 +350,8 @@ def test_get_bboxes():
expected_selected_scores = torch.Tensor([-2.2061, -2.1121, -0.1761]).cuda() expected_selected_scores = torch.Tensor([-2.2061, -2.1121, -0.1761]).cuda()
expected_selected_label_preds = torch.Tensor([2., 2., 2.]).cuda() expected_selected_label_preds = torch.Tensor([2., 2., 2.]).cuda()
assert torch.allclose(selected_bboxes, expected_selected_bboxes, 1e-3) assert torch.allclose(selected_bboxes.tensor, expected_selected_bboxes,
1e-3)
assert torch.allclose(selected_scores, expected_selected_scores, 1e-3) assert torch.allclose(selected_scores, expected_selected_scores, 1e-3)
assert torch.allclose(selected_label_preds, expected_selected_label_preds) assert torch.allclose(selected_label_preds, expected_selected_label_preds)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment