Commit 36466f83 authored by liyinhao's avatar liyinhao
Browse files

Merge branch 'master' into process_raw_data

parents 25d39342 f93167c3
yapf -r -i --style .style.yapf mmdet3d/ configs/ tests/ tools/
isort -rc mmdet3d/ configs/ tests/ tools/
flake8 .
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3,
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 128, 256],
out_channels=256,
start_level=0,
num_outs=3),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[28, 34])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=36)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 36
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# Designing Network Design Spaces
## Introduction
We implement RegNetX and RegNetY models in 3D detection systems and provide their first results on PointPillars.
The pre-trained modles are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md) and maintained in [mmcv](https://github.com/open-mmlab/mmcv).
```
@article{radosavovic2020designing,
title={Designing Network Design Spaces},
author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár},
year={2020},
eprint={2003.13678},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
## Usage
To use a regnet model, there are two steps to do:
1. Convert the model to ResNet-style supported by MMDetection
2. Modify backbone and neck in config accordingly
### Convert model
We already prepare models of FLOPs from 800M to 12G in our model zoo.
For more general usage, we also provide script `regnet2mmdet.py` in the tools directory to convert the key of models pretrained by [pycls](https://github.com/facebookresearch/pycls/) to
ResNet-style checkpoints used in MMDetection.
```bash
python -u tools/regnet2mmdet.py ${PRETRAIN_PATH} ${STORE_PATH}
```
This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`.
### Modify config
The users can modify the config's `depth` of backbone and corresponding keys in `arch` according to the configs in the [pycls model zoo](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md).
The parameter `in_channels` in FPN can be found in the Figure 15 & 16 of the paper (`wi` in the legend).
This directory already provides some configs with their performance, using RegNetX from 800MF to 12GF level.
For other pre-trained models or self-implemented regnet models, the users are responsible to check these parameters by themselves.
**Note**: Although Fig. 15 & 16 also provide `w0`, `wa`, `wm`, `group_w`, and `bot_mul` for `arch`, they are quantized thus inaccurate, using them sometimes produces different backbone that does not match the key in the pre-trained model.
## Results
### PointPillars
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | mAP |NDS| Download |
| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |
| [SECFPN](../) | 2x ||||
|[RegNetX-400MF-SECFPN](./hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py)| 2x ||||
| [FPN](../) | 2x ||||
|[RegNetX-400MF-FPN](./hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py)| 2x ||||
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pretrained=dict(pts='open-mmlab://regnetx_400mf'),
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(
type='FPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
act_cfg=dict(type='ReLU'),
in_channels=[64, 160, 384],
out_channels=256,
start_level=0,
num_outs=3),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=256,
feat_channels=256,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
scales=[1, 2, 4],
sizes=[
[0.8660, 2.5981, 1.], # 1.5/sqrt(3)
[0.5774, 1.7321, 1.], # 1/sqrt(3)
[1., 1., 1.],
[0.4, 0.4, 1],
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pretrained=dict(pts='open-mmlab://regnetx_400mf'),
pts_voxel_layer=dict(
max_num_points=64,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
pts_backbone=dict(
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 160, 384],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -9,39 +9,57 @@ It is recommended to symlink the dataset root to `$MMDETECTION/data`. ...@@ -9,39 +9,57 @@ It is recommended to symlink the dataset root to `$MMDETECTION/data`.
If your folder structure is different, you may need to change the corresponding paths in config files. If your folder structure is different, you may need to change the corresponding paths in config files.
``` ```
mmdetection mmdetection3d
├── mmdet ├── mmdet3d
├── tools ├── tools
├── configs ├── configs
├── data ├── data
│ ├── coco │ ├── nuscenes
│ │ ├── annotations │ │ ├── maps
│ │ ├── train2017 │ │ ├── samples
│ │ ├── val2017 │ │ ├── sweeps
│ │ ├── test2017 │ │ ├── v1.0-test
│ ├── cityscapes | | ├── v1.0-trainval
│ │ ├── annotations │ ├── kitti
│ │ ├── leftImg8bit │ │ ├── ImageSets
│ │ │ ├── train │ │ ├── testing
│ │ │ ├── val │ │ │ ├── calib
│ │ ├── gtFine │ │ │ ├── image_2
│ │ │ ├── train │ │ │ ├── velodyne
│ │ │ ├── val │ │ ├── training
│ ├── VOCdevkit │ │ │ ├── calib
│ │ ├── VOC2007 │ │ │ ├── image_2
│ │ ├── VOC2012 │ │ │ ├── label_2
│ │ │ ├── velodyne
``` │ ├── scannet
│ │ ├── meta_data
The cityscapes annotations have to be converted into the coco format using `tools/convert_datasets/cityscapes.py`: │ │ ├── scans
│ │ ├── batch_load_scannet_data.py
```shell │ │ ├── load_scannet_data.py
pip install cityscapesscripts │ │ ├── scannet_utils.py
python tools/convert_datasets/cityscapes.py ./data/cityscapes --nproc 8 --out_dir ./data/cityscapes/annotations │ │ ├── README.md
``` │ ├── sunrgbd
│ │ ├── OFFICIAL_SUNRGBD
Currently the config files in `cityscapes` use COCO pre-trained weights to initialize. │ │ ├── matlab
You could download the pre-trained models in advance if network is unavailable or slow, otherwise it would cause errors at the beginning of training. │ │ ├── sunrgbd_data.py
│ │ ├── sunrgbd_utils.py
│ │ ├── README.md
```
Download nuScenes V1.0 full dataset data [HERE]( https://www.nuscenes.org/download). Prepare nuscenes data by running
```bash
python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes
```
Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare kitti data by running
```bash
python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti
```
To prepare scannet data, please see [scannet](../data/scannet/README.md).
To prepare sunrgbd data, please see [sunrgbd](../data/sunrgbd/README.md).
For using custom datasets, please refer to [Tutorials 2: Adding New Dataset](tutorials/new_dataset.md). For using custom datasets, please refer to [Tutorials 2: Adding New Dataset](tutorials/new_dataset.md).
......
...@@ -148,7 +148,7 @@ def center_to_corner_box3d(centers, ...@@ -148,7 +148,7 @@ def center_to_corner_box3d(centers,
dims (float array, shape=[N, 3]): dimensions in kitti label file. dims (float array, shape=[N, 3]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file. angles (float array, shape=[N]): rotation_y in kitti label file.
origin (list or array or float): origin point relate to smallest point. origin (list or array or float): origin point relate to smallest point.
use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar. use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
axis (int): rotation axis. 1 for camera and 2 for lidar. axis (int): rotation axis. 1 for camera and 2 for lidar.
Returns: Returns:
[type]: [description] [type]: [description]
......
...@@ -74,7 +74,7 @@ def rotation_3d_in_axis(points, angles, axis=0): ...@@ -74,7 +74,7 @@ def rotation_3d_in_axis(points, angles, axis=0):
def center_to_corner_box3d(centers, def center_to_corner_box3d(centers,
dims, dims,
angles, angles,
origin=[0.5, 1.0, 0.5], origin=(0.5, 1.0, 0.5),
axis=1): axis=1):
"""convert kitti locations, dimensions and angles to corners """convert kitti locations, dimensions and angles to corners
...@@ -83,7 +83,7 @@ def center_to_corner_box3d(centers, ...@@ -83,7 +83,7 @@ def center_to_corner_box3d(centers,
dims (float array, shape=[N, 3]): dimensions in kitti label file. dims (float array, shape=[N, 3]): dimensions in kitti label file.
angles (float array, shape=[N]): rotation_y in kitti label file. angles (float array, shape=[N]): rotation_y in kitti label file.
origin (list or array or float): origin point relate to smallest point. origin (list or array or float): origin point relate to smallest point.
use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar. use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
axis (int): rotation axis. 1 for camera and 2 for lidar. axis (int): rotation axis. 1 for camera and 2 for lidar.
Returns: Returns:
[type]: [description] [type]: [description]
......
...@@ -28,28 +28,28 @@ class PartialBinBasedBBoxCoder(BaseBBoxCoder): ...@@ -28,28 +28,28 @@ class PartialBinBasedBBoxCoder(BaseBBoxCoder):
"""Encode ground truth to prediction targets. """Encode ground truth to prediction targets.
Args: Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7). gt_bboxes_3d (BaseInstance3DBoxes): gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes. gt_labels_3d (Tensor): gt classes.
Returns: Returns:
tuple: Targets of center, size and direction. tuple: Targets of center, size and direction.
""" """
# generate center target # generate center target
center_target = gt_bboxes_3d[..., 0:3] center_target = gt_bboxes_3d.gravity_center
# generate bbox size target # generate bbox size target
size_class_target = gt_labels_3d size_class_target = gt_labels_3d
size_res_target = gt_bboxes_3d[..., 3:6] - gt_bboxes_3d.new_tensor( size_res_target = gt_bboxes_3d.dims - gt_bboxes_3d.tensor.new_tensor(
self.mean_sizes)[size_class_target] self.mean_sizes)[size_class_target]
# generate dir target # generate dir target
box_num = gt_bboxes_3d.shape[0] box_num = gt_labels_3d.shape[0]
if self.with_rot: if self.with_rot:
(dir_class_target, (dir_class_target,
dir_res_target) = self.angle2class(gt_bboxes_3d[..., 6]) dir_res_target) = self.angle2class(gt_bboxes_3d.yaw)
else: else:
dir_class_target = gt_labels_3d.new_zeros(box_num) dir_class_target = gt_labels_3d.new_zeros(box_num)
dir_res_target = gt_bboxes_3d.new_zeros(box_num) dir_res_target = gt_bboxes_3d.tensor.new_zeros(box_num)
return (center_target, size_class_target, size_res_target, return (center_target, size_class_target, size_res_target,
dir_class_target, dir_res_target) dir_class_target, dir_res_target)
......
...@@ -83,8 +83,6 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'): ...@@ -83,8 +83,6 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
Return: Return:
iou: (M, N) not support aligned mode currently iou: (M, N) not support aligned mode currently
""" """
# TODO: check the input dimension meanings,
# this is inconsistent with that in bbox_overlaps_nearest_3d
assert bboxes1.size(-1) == bboxes2.size(-1) == 7 assert bboxes1.size(-1) == bboxes2.size(-1) == 7
assert coordinate in ['camera', 'lidar'] assert coordinate in ['camera', 'lidar']
......
...@@ -12,7 +12,7 @@ class BaseInstance3DBoxes(object): ...@@ -12,7 +12,7 @@ class BaseInstance3DBoxes(object):
Note: Note:
The box is bottom centered, i.e. the relative position of origin in The box is bottom centered, i.e. the relative position of origin in
the box is [0.5, 0.5, 0]. the box is (0.5, 0.5, 0).
Args: Args:
tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix. tensor (torch.Tensor | np.ndarray | list): a Nxbox_dim matrix.
...@@ -23,11 +23,11 @@ class BaseInstance3DBoxes(object): ...@@ -23,11 +23,11 @@ class BaseInstance3DBoxes(object):
If False, the value of yaw will be set to 0 as minmax boxes. If False, the value of yaw will be set to 0 as minmax boxes.
Default to True. Default to True.
origin (tuple): The relative position of origin in the box. origin (tuple): The relative position of origin in the box.
Default to [0.5, 0.5, 0]. This will guide the box be converted to Default to (0.5, 0.5, 0). This will guide the box be converted to
[0.5, 0.5, 0] mode. (0.5, 0.5, 0) mode.
""" """
def __init__(self, tensor, box_dim=7, with_yaw=True, origin=[0.5, 0.5, 0]): def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):
if isinstance(tensor, torch.Tensor): if isinstance(tensor, torch.Tensor):
device = tensor.device device = tensor.device
else: else:
...@@ -40,18 +40,21 @@ class BaseInstance3DBoxes(object): ...@@ -40,18 +40,21 @@ class BaseInstance3DBoxes(object):
dtype=torch.float32, device=device) dtype=torch.float32, device=device)
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size() assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
if not with_yaw and tensor.shape[-1] == 6: if tensor.shape[-1] == 6:
# If the dimension of boxes is 6, we expand box_dim by padding
# 0 as a fake yaw and set with_yaw to False.
assert box_dim == 6 assert box_dim == 6
fake_rot = tensor.new_zeros(tensor.shape[0], 1) fake_rot = tensor.new_zeros(tensor.shape[0], 1)
tensor = torch.cat((tensor, fake_rot), dim=-1) tensor = torch.cat((tensor, fake_rot), dim=-1)
self.box_dim = box_dim + 1 self.box_dim = box_dim + 1
self.with_yaw = False
else: else:
self.box_dim = box_dim self.box_dim = box_dim
self.with_yaw = with_yaw self.with_yaw = with_yaw
self.tensor = tensor self.tensor = tensor
if origin != [0.5, 0.5, 0]: if origin != (0.5, 0.5, 0):
dst = self.tensor.new_tensor([0.5, 0.5, 0]) dst = self.tensor.new_tensor((0.5, 0.5, 0))
src = self.tensor.new_tensor(origin) src = self.tensor.new_tensor(origin)
self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src) self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
...@@ -121,7 +124,7 @@ class BaseInstance3DBoxes(object): ...@@ -121,7 +124,7 @@ class BaseInstance3DBoxes(object):
The relative position of the centers in different kinds of The relative position of the centers in different kinds of
boxes are different, e.g., the relative center of a boxes is boxes are different, e.g., the relative center of a boxes is
[0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar. (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
It is recommended to use `bottom_center` or `gravity_center` It is recommended to use `bottom_center` or `gravity_center`
for more clear usage. for more clear usage.
......
...@@ -22,7 +22,7 @@ class Box3DMode(IntEnum): ...@@ -22,7 +22,7 @@ class Box3DMode(IntEnum):
| / | /
left y <------ 0 left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0], The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2. and the yaw is around the z axis, thus the rotation axis=2.
Coordinates in camera: Coordinates in camera:
...@@ -49,7 +49,7 @@ class Box3DMode(IntEnum): ...@@ -49,7 +49,7 @@ class Box3DMode(IntEnum):
| / | /
0 ------> x right 0 ------> x right
The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0], The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2. and the yaw is around the z axis, thus the rotation axis=2.
""" """
......
...@@ -20,7 +20,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): ...@@ -20,7 +20,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
v v
down y down y
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5], The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
and the yaw is around the y axis, thus the rotation axis=1. and the yaw is around the y axis, thus the rotation axis=1.
The yaw is 0 at the positive direction of x axis, and increases from The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of z. the positive direction of x to the positive direction of z.
......
import numpy as np import numpy as np
import torch import torch
from mmdet3d.ops import points_in_boxes_batch
from .base_box3d import BaseInstance3DBoxes from .base_box3d import BaseInstance3DBoxes
from .utils import limit_period, rotation_3d_in_axis from .utils import limit_period, rotation_3d_in_axis
...@@ -17,7 +18,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes): ...@@ -17,7 +18,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
| / | /
0 ------> x right (yaw=0) 0 ------> x right (yaw=0)
The relative coordinate of bottom center in a Depth box is [0.5, 0.5, 0], The relative coordinate of bottom center in a Depth box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2. and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the positive direction of x axis, and increases from The yaw is 0 at the positive direction of x axis, and increases from
the positive direction of x to the positive direction of y. the positive direction of x to the positive direction of y.
...@@ -74,7 +75,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes): ...@@ -74,7 +75,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
device=dims.device, dtype=dims.dtype) device=dims.device, dtype=dims.dtype)
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]] corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
# use relative origin [0.5, 0.5, 0] # use relative origin (0.5, 0.5, 0)
corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0]) corners_norm = corners_norm - dims.new_tensor([0.5, 0.5, 0])
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3]) corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
...@@ -201,3 +202,30 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes): ...@@ -201,3 +202,30 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes):
from .box_3d_mode import Box3DMode from .box_3d_mode import Box3DMode
return Box3DMode.convert( return Box3DMode.convert(
box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat) box=self, src=Box3DMode.DEPTH, dst=dst, rt_mat=rt_mat)
def points_in_boxes(self, points):
"""Find points that are in boxes (CUDA)
Args:
points (torch.Tensor): [1, M, 3] or [M, 3], [x, y, z]
in LiDAR coordinate.
Returns:
torch.Tensor: The box index of each point in, shape is (B, M, T).
"""
from .box_3d_mode import Box3DMode
# to lidar
points_lidar = points.clone()
points_lidar = points_lidar[..., [1, 0, 2]]
points_lidar[..., 1] *= -1
if points.dim() == 2:
points_lidar = points_lidar.unsqueeze(0)
else:
assert points.dim() == 3 and points_lidar.shape[0] == 1
boxes_lidar = self.convert_to(Box3DMode.LIDAR).tensor
boxes_lidar = boxes_lidar.to(points.device).unsqueeze(0)
box_idxs_of_pts = points_in_boxes_batch(points_lidar, boxes_lidar)
return box_idxs_of_pts.squeeze(0)
...@@ -18,7 +18,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): ...@@ -18,7 +18,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
| / | /
(yaw=pi) left y <------ 0 (yaw=pi) left y <------ 0
The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0], The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
and the yaw is around the z axis, thus the rotation axis=2. and the yaw is around the z axis, thus the rotation axis=2.
The yaw is 0 at the negative direction of y axis, and increases from The yaw is 0 at the negative direction of y axis, and increases from
the negative direction of y to the positive direction of x. the negative direction of y to the positive direction of x.
......
...@@ -83,7 +83,9 @@ def bbox3d2result(bboxes, scores, labels): ...@@ -83,7 +83,9 @@ def bbox3d2result(bboxes, scores, labels):
dict(Tensor): bbox results in cpu mode dict(Tensor): bbox results in cpu mode
""" """
return dict( return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu()) boxes_3d=bboxes.to('cpu'),
scores_3d=scores.cpu(),
labels_3d=labels.cpu())
def upright_depth_to_lidar_torch(points=None, def upright_depth_to_lidar_torch(points=None,
......
...@@ -3,47 +3,6 @@ import torch ...@@ -3,47 +3,6 @@ import torch
from mmcv.utils import print_log from mmcv.utils import print_log
from terminaltables import AsciiTable from terminaltables import AsciiTable
from mmdet3d.core.bbox.iou_calculators.iou3d_calculator import bbox_overlaps_3d
def boxes3d_depth_to_lidar(boxes3d, mid_to_bottom=True):
"""Boxes3d depth to lidar.
Flip X-right,Y-forward,Z-up to X-forward,Y-left,Z-up.
Args:
boxes3d (ndarray): (N, 7) [x, y, z, w, l, h, r] in depth coords.
Return:
boxes3d_lidar (ndarray): (N, 7) [x, y, z, l, h, w, r] in LiDAR coords.
"""
boxes3d_lidar = boxes3d.copy()
boxes3d_lidar[..., [0, 1, 2, 3, 4, 5]] = boxes3d_lidar[...,
[1, 0, 2, 4, 3, 5]]
boxes3d_lidar[..., 1] *= -1
if mid_to_bottom:
boxes3d_lidar[..., 2] -= boxes3d_lidar[..., 5] / 2
return boxes3d_lidar
def get_iou_gpu(bb1, bb2):
"""Get IoU.
Compute IoU of two bounding boxes.
Args:
bb1 (ndarray): [x, y, z, w, l, h, ry] in LiDAR.
bb2 (ndarray): [x, y, z, h, w, l, ry] in LiDAR.
Returns:
ans_iou (tensor): The answer of IoU.
"""
bb1 = torch.from_numpy(bb1).float().cuda()
bb2 = torch.from_numpy(bb2).float().cuda()
iou3d = bbox_overlaps_3d(bb1, bb2, mode='iou', coordinate='lidar')
return iou3d.cpu().numpy()
def average_precision(recalls, precisions, mode='area'): def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision (for single or multiple scales). """Calculate average precision (for single or multiple scales).
...@@ -61,7 +20,10 @@ def average_precision(recalls, precisions, mode='area'): ...@@ -61,7 +20,10 @@ def average_precision(recalls, precisions, mode='area'):
if recalls.ndim == 1: if recalls.ndim == 1:
recalls = recalls[np.newaxis, :] recalls = recalls[np.newaxis, :]
precisions = precisions[np.newaxis, :] precisions = precisions[np.newaxis, :]
assert recalls.shape == precisions.shape and recalls.ndim == 2
assert recalls.shape == precisions.shape
assert recalls.ndim == 2
num_scales = recalls.shape[0] num_scales = recalls.shape[0]
ap = np.zeros(num_scales, dtype=np.float32) ap = np.zeros(num_scales, dtype=np.float32)
if mode == 'area': if mode == 'area':
...@@ -103,40 +65,42 @@ def eval_det_cls(pred, gt, iou_thr=None): ...@@ -103,40 +65,42 @@ def eval_det_cls(pred, gt, iou_thr=None):
float: scalar, average precision. float: scalar, average precision.
""" """
# construct gt objects # {img_id: {'bbox': box structure, 'det': matched list}}
class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}} class_recs = {}
npos = 0 npos = 0
for img_id in gt.keys(): for img_id in gt.keys():
bbox = np.array(gt[img_id]) cur_gt_num = len(gt[img_id])
if cur_gt_num != 0:
gt_cur = torch.zeros([cur_gt_num, 7], dtype=torch.float32)
for i in range(cur_gt_num):
gt_cur[i] = gt[img_id][i].tensor
bbox = gt[img_id][0].new_box(gt_cur)
else:
bbox = gt[img_id]
det = [[False] * len(bbox) for i in iou_thr] det = [[False] * len(bbox) for i in iou_thr]
npos += len(bbox) npos += len(bbox)
class_recs[img_id] = {'bbox': bbox, 'det': det} class_recs[img_id] = {'bbox': bbox, 'det': det}
# pad empty list to all other imgids
for img_id in pred.keys():
if img_id not in gt:
class_recs[img_id] = {'bbox': np.array([]), 'det': []}
# construct dets # construct dets
image_ids = [] image_ids = []
confidence = [] confidence = []
BB = []
ious = [] ious = []
for img_id in pred.keys(): for img_id in pred.keys():
cur_num = len(pred[img_id]) cur_num = len(pred[img_id])
if cur_num == 0: if cur_num == 0:
continue continue
BB_cur = np.zeros((cur_num, 7)) # hard code pred_cur = torch.zeros((cur_num, 7), dtype=torch.float32)
box_idx = 0 box_idx = 0
for box, score in pred[img_id]: for box, score in pred[img_id]:
image_ids.append(img_id) image_ids.append(img_id)
confidence.append(score) confidence.append(score)
BB.append(box) pred_cur[box_idx] = box.tensor
BB_cur[box_idx] = box
box_idx += 1 box_idx += 1
gt_cur = class_recs[img_id]['bbox'].astype(float) pred_cur = box.new_box(pred_cur)
gt_cur = class_recs[img_id]['bbox']
if len(gt_cur) > 0: if len(gt_cur) > 0:
# calculate iou in each image # calculate iou in each image
iou_cur = get_iou_gpu(BB_cur, gt_cur) iou_cur = pred_cur.overlaps(pred_cur, gt_cur)
for i in range(cur_num): for i in range(cur_num):
ious.append(iou_cur[i]) ious.append(iou_cur[i])
else: else:
...@@ -157,12 +121,12 @@ def eval_det_cls(pred, gt, iou_thr=None): ...@@ -157,12 +121,12 @@ def eval_det_cls(pred, gt, iou_thr=None):
for d in range(nd): for d in range(nd):
R = class_recs[image_ids[d]] R = class_recs[image_ids[d]]
iou_max = -np.inf iou_max = -np.inf
BBGT = R['bbox'].astype(float) BBGT = R['bbox']
cur_iou = ious[d] cur_iou = ious[d]
if BBGT.size > 0: if len(BBGT) > 0:
# compute overlaps # compute overlaps
for j in range(BBGT.shape[0]): for j in range(len(BBGT)):
# iou = get_iou_main(get_iou_func, (bb, BBGT[j,...])) # iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))
iou = cur_iou[j] iou = cur_iou[j]
if iou > iou_max: if iou > iou_max:
...@@ -194,61 +158,22 @@ def eval_det_cls(pred, gt, iou_thr=None): ...@@ -194,61 +158,22 @@ def eval_det_cls(pred, gt, iou_thr=None):
return ret return ret
def eval_map_recall(det_infos, gt_infos, ovthresh=None): def eval_map_recall(pred, gt, ovthresh=None):
"""Evaluate mAP and recall. """Evaluate mAP and recall.
Generic functions to compute precision/recall for object detection Generic functions to compute precision/recall for object detection
for multiple classes. for multiple classes.
Args: Args:
det_infos (list[dict]): Information of detection results, the dict pred (dict): Information of detection results,
includes the following keys which maps class_id and predictions.
- labels_3d (Tensor): Labels of boxes. gt (dict): information of gt results, which maps class_id and gt.
- boxes_3d (Tensor): 3d bboxes.
- scores_3d (Tensor): Scores of boxes.
gt_infos (list[dict]): information of gt results, the dict
includes the following keys
- labels_3d (Tensor): labels of boxes.
- boxes_3d (Tensor): 3d bboxes.
ovthresh (list[float]): iou threshold. ovthresh (list[float]): iou threshold.
Default: None. Default: None.
Return: Return:
tuple[dict]: dict results of recall, AP, and precision for all classes. tuple[dict]: dict results of recall, AP, and precision for all classes.
""" """
pred_all = {}
scan_cnt = 0
for det_info in det_infos:
pred_all[scan_cnt] = det_info
scan_cnt += 1
pred = {} # map {classname: pred}
gt = {} # map {classname: gt}
for img_id in pred_all.keys():
for i in range(len(pred_all[img_id]['labels_3d'])):
label = pred_all[img_id]['labels_3d'].numpy()[i]
bbox = pred_all[img_id]['boxes_3d'].numpy()[i]
score = pred_all[img_id]['scores_3d'].numpy()[i]
if label not in pred:
pred[int(label)] = {}
if img_id not in pred[label]:
pred[int(label)][img_id] = []
if label not in gt:
gt[int(label)] = {}
if img_id not in gt[label]:
gt[int(label)][img_id] = []
pred[int(label)][img_id].append((bbox, score))
for img_id in range(len(gt_infos)):
for i in range(len(gt_infos[img_id]['labels_3d'])):
label = gt_infos[img_id]['labels_3d'][i]
bbox = gt_infos[img_id]['boxes_3d'][i]
if label not in gt:
gt[label] = {}
if img_id not in gt[label]:
gt[label][img_id] = []
gt[label][img_id].append(bbox)
ret_values = [] ret_values = []
for classname in gt.keys(): for classname in gt.keys():
...@@ -272,14 +197,24 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None): ...@@ -272,14 +197,24 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None):
return recall, precision, ap return recall, precision, ap
def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None): def indoor_eval(gt_annos,
dt_annos,
metric,
label2cat,
logger=None,
box_type_3d=None,
box_mode_3d=None):
"""Scannet Evaluation. """Scannet Evaluation.
Evaluate the result of the detection. Evaluate the result of the detection.
Args: Args:
gt_annos (list[dict]): GT annotations. gt_annos (list[dict]): GT annotations.
dt_annos (list[dict]): Detection annotations. dt_annos (list[dict]): Detection annotations. the dict
includes the following keys
- labels_3d (Tensor): Labels of boxes.
- boxes_3d (BaseInstance3DBoxes): 3d bboxes in Depth coordinate.
- scores_3d (Tensor): Scores of boxes.
metric (list[float]): AP IoU thresholds. metric (list[float]): AP IoU thresholds.
label2cat (dict): {label: cat}. label2cat (dict): {label: cat}.
logger (logging.Logger | str | None): The way to print the mAP logger (logging.Logger | str | None): The way to print the mAP
...@@ -288,24 +223,48 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None): ...@@ -288,24 +223,48 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat, logger=None):
Return: Return:
dict: Dict of results. dict: Dict of results.
""" """
gt_infos = [] assert len(dt_annos) == len(gt_annos)
for gt_anno in gt_annos: pred = {} # map {class_id: pred}
gt = {} # map {class_id: gt}
for img_id in range(len(dt_annos)):
# parse detected annotations
det_anno = dt_annos[img_id]
for i in range(len(det_anno['labels_3d'])):
label = det_anno['labels_3d'].numpy()[i]
bbox = det_anno['boxes_3d'].convert_to(box_mode_3d)[i]
score = det_anno['scores_3d'].numpy()[i]
if label not in pred:
pred[int(label)] = {}
if img_id not in pred[label]:
pred[int(label)][img_id] = []
if label not in gt:
gt[int(label)] = {}
if img_id not in gt[label]:
gt[int(label)][img_id] = []
pred[int(label)][img_id].append((bbox, score))
# parse gt annotations
gt_anno = gt_annos[img_id]
if gt_anno['gt_num'] != 0: if gt_anno['gt_num'] != 0:
# convert to lidar coor for evaluation gt_boxes = box_type_3d(
bbox_lidar_bottom = boxes3d_depth_to_lidar( gt_anno['gt_boxes_upright_depth'],
gt_anno['gt_boxes_upright_depth'], mid_to_bottom=True) box_dim=gt_anno['gt_boxes_upright_depth'].shape[-1],
if bbox_lidar_bottom.shape[-1] == 6: origin=(0.5, 0.5, 0.5)).convert_to(box_mode_3d)
bbox_lidar_bottom = np.pad(bbox_lidar_bottom, ((0, 0), (0, 1)), labels_3d = gt_anno['class']
'constant')
gt_infos.append(
dict(boxes_3d=bbox_lidar_bottom, labels_3d=gt_anno['class']))
else: else:
gt_infos.append( gt_boxes = box_type_3d(np.array([], dtype=np.float32))
dict( labels_3d = np.array([], dtype=np.int64)
boxes_3d=np.array([], dtype=np.float32),
labels_3d=np.array([], dtype=np.int64))) for i in range(len(labels_3d)):
label = labels_3d[i]
bbox = gt_boxes[i]
if label not in gt:
gt[label] = {}
if img_id not in gt[label]:
gt[label][img_id] = []
gt[label][img_id].append(bbox)
rec, prec, ap = eval_map_recall(dt_annos, gt_infos, metric) rec, prec, ap = eval_map_recall(pred, gt, metric)
ret_dict = dict() ret_dict = dict()
header = ['classes'] header = ['classes']
table_columns = [[label2cat[label] table_columns = [[label2cat[label]
......
...@@ -6,11 +6,39 @@ import numpy as np ...@@ -6,11 +6,39 @@ import numpy as np
from torch.utils.data import Dataset from torch.utils.data import Dataset
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import (Box3DMode, CameraInstance3DBoxes,
DepthInstance3DBoxes, LiDARInstance3DBoxes)
from .pipelines import Compose from .pipelines import Compose
@DATASETS.register_module() @DATASETS.register_module()
class Custom3DDataset(Dataset): class Custom3DDataset(Dataset):
"""Customized 3D dataset
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
Args:
data_root (str): Path of dataset root.
ann_file (str): Path of annotation file.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
classes (tuple[str], optional): Classes used in the dataset.
Defaults to None.
modality ([dict], optional): Modality to specify the sensor data used
as input. Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR'. Available options includes
- 'LiDAR': box in LiDAR coordinates
- 'Depth': box in depth coordinates, usually for indoor dataset
- 'Camera': box in camera coordinates
filter_empty_gt (bool, optional): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False.
"""
def __init__(self, def __init__(self,
data_root, data_root,
...@@ -18,6 +46,7 @@ class Custom3DDataset(Dataset): ...@@ -18,6 +46,7 @@ class Custom3DDataset(Dataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__() super().__init__()
...@@ -26,6 +55,7 @@ class Custom3DDataset(Dataset): ...@@ -26,6 +55,7 @@ class Custom3DDataset(Dataset):
self.test_mode = test_mode self.test_mode = test_mode
self.modality = modality self.modality = modality
self.filter_empty_gt = filter_empty_gt self.filter_empty_gt = filter_empty_gt
self.get_box_type(box_type_3d)
self.CLASSES = self.get_classes(classes) self.CLASSES = self.get_classes(classes)
self.data_infos = self.load_annotations(self.ann_file) self.data_infos = self.load_annotations(self.ann_file)
...@@ -40,6 +70,21 @@ class Custom3DDataset(Dataset): ...@@ -40,6 +70,21 @@ class Custom3DDataset(Dataset):
def load_annotations(self, ann_file): def load_annotations(self, ann_file):
return mmcv.load(ann_file) return mmcv.load(ann_file)
def get_box_type(self, box_type):
box_type_lower = box_type.lower()
if box_type_lower == 'lidar':
self.box_type_3d = LiDARInstance3DBoxes
self.box_mode_3d = Box3DMode.LIDAR
elif box_type_lower == 'camera':
self.box_type_3d = CameraInstance3DBoxes
self.box_mode_3d = Box3DMode.CAM
elif box_type_lower == 'depth':
self.box_type_3d = DepthInstance3DBoxes
self.box_mode_3d = Box3DMode.DEPTH
else:
raise ValueError('Only "box_type" of "camera", "lidar", "depth"'
f' are supported, got {box_type}')
def get_data_info(self, index): def get_data_info(self, index):
info = self.data_infos[index] info = self.data_infos[index]
sample_idx = info['point_cloud']['lidar_idx'] sample_idx = info['point_cloud']['lidar_idx']
...@@ -61,6 +106,8 @@ class Custom3DDataset(Dataset): ...@@ -61,6 +106,8 @@ class Custom3DDataset(Dataset):
results['bbox3d_fields'] = [] results['bbox3d_fields'] = []
results['pts_mask_fields'] = [] results['pts_mask_fields'] = []
results['pts_seg_fields'] = [] results['pts_seg_fields'] = []
results['box_type_3d'] = self.box_type_3d
results['box_mode_3d'] = self.box_mode_3d
def prepare_train_data(self, index): def prepare_train_data(self, index):
input_dict = self.get_data_info(index) input_dict = self.get_data_info(index)
...@@ -139,7 +186,13 @@ class Custom3DDataset(Dataset): ...@@ -139,7 +186,13 @@ class Custom3DDataset(Dataset):
gt_annos = [info['annos'] for info in self.data_infos] gt_annos = [info['annos'] for info in self.data_infos]
label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)} label2cat = {i: cat_id for i, cat_id in enumerate(self.CLASSES)}
ret_dict = indoor_eval( ret_dict = indoor_eval(
gt_annos, results, iou_thr, label2cat, logger=logger) gt_annos,
results,
iou_thr,
label2cat,
logger=logger,
box_type_3d=self.box_type_3d,
box_mode_3d=self.box_mode_3d)
return ret_dict return ret_dict
......
...@@ -9,7 +9,7 @@ import torch ...@@ -9,7 +9,7 @@ import torch
from mmcv.utils import print_log from mmcv.utils import print_log
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from ..core.bbox import Box3DMode, CameraInstance3DBoxes, box_np_ops from ..core.bbox import Box3DMode, CameraInstance3DBoxes
from .custom_3d import Custom3DDataset from .custom_3d import Custom3DDataset
from .utils import remove_dontcare from .utils import remove_dontcare
...@@ -27,6 +27,8 @@ class KittiDataset(Custom3DDataset): ...@@ -27,6 +27,8 @@ class KittiDataset(Custom3DDataset):
pipeline=None, pipeline=None,
classes=None, classes=None,
modality=None, modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False): test_mode=False):
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
...@@ -34,6 +36,8 @@ class KittiDataset(Custom3DDataset): ...@@ -34,6 +36,8 @@ class KittiDataset(Custom3DDataset):
pipeline=pipeline, pipeline=pipeline,
classes=classes, classes=classes,
modality=modality, modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode) test_mode=test_mode)
self.root_split = os.path.join(self.data_root, split) self.root_split = os.path.join(self.data_root, split)
...@@ -90,7 +94,7 @@ class KittiDataset(Custom3DDataset): ...@@ -90,7 +94,7 @@ class KittiDataset(Custom3DDataset):
# convert gt_bboxes_3d to velodyne coordinates # convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c)) self.box_mode_3d, np.linalg.inv(rect @ Trv2c))
gt_bboxes = annos['bbox'] gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare']) selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
...@@ -395,73 +399,66 @@ class KittiDataset(Custom3DDataset): ...@@ -395,73 +399,66 @@ class KittiDataset(Custom3DDataset):
def convert_valid_bboxes(self, box_dict, info): def convert_valid_bboxes(self, box_dict, info):
# TODO: refactor this function # TODO: refactor this function
final_box_preds = box_dict['boxes_3d'] box_preds = box_dict['boxes_3d']
final_scores = box_dict['scores_3d'] scores = box_dict['scores_3d']
final_labels = box_dict['labels_3d'] labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx'] sample_idx = info['image']['image_idx']
final_box_preds[:, -1] = box_np_ops.limit_period( # TODO: remove the hack of yaw
final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2) box_preds.tensor[:, -1] = box_preds.tensor[:, -1] - np.pi
box_preds.limit_yaw(offset=0.5, period=np.pi * 2)
if final_box_preds.shape[0] == 0: if len(box_preds) == 0:
return dict( return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(), bbox=np.zeros([0, 4]),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(), box3d_camera=np.zeros([0, 7]),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(), box3d_lidar=np.zeros([0, 7]),
scores=final_box_preds.new_zeros([0]).numpy(), scores=np.zeros([0]),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx, sample_idx=sample_idx)
)
from mmdet3d.core.bbox import box_torch_ops from mmdet3d.core.bbox import box_torch_ops
rect = info['calib']['R0_rect'].astype(np.float32) rect = info['calib']['R0_rect'].astype(np.float32)
Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32) Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32)
P2 = info['calib']['P2'].astype(np.float32) P2 = info['calib']['P2'].astype(np.float32)
img_shape = info['image']['image_shape'] img_shape = info['image']['image_shape']
rect = final_box_preds.new_tensor(rect) P2 = box_preds.tensor.new_tensor(P2)
Trv2c = final_box_preds.new_tensor(Trv2c)
P2 = final_box_preds.new_tensor(P2) box_preds_camera = box_preds.convert_to(Box3DMode.CAM, rect @ Trv2c)
final_box_preds_camera = box_torch_ops.box_lidar_to_camera( box_corners = box_preds_camera.corners
final_box_preds, rect, Trv2c)
locs = final_box_preds_camera[:, :3]
dims = final_box_preds_camera[:, 3:6]
angles = final_box_preds_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_torch_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2) box_corners_in_image = box_torch_ops.project_to_image(box_corners, P2)
# box_corners_in_image: [N, 8, 2] # box_corners_in_image: [N, 8, 2]
minxy = torch.min(box_corners_in_image, dim=1)[0] minxy = torch.min(box_corners_in_image, dim=1)[0]
maxxy = torch.max(box_corners_in_image, dim=1)[0] maxxy = torch.max(box_corners_in_image, dim=1)[0]
box_2d_preds = torch.cat([minxy, maxxy], dim=1) box_2d_preds = torch.cat([minxy, maxxy], dim=1)
# Post-processing # Post-processing
# check final_box_preds_camera # check box_preds_camera
image_shape = final_box_preds.new_tensor(img_shape) image_shape = box_preds.tensor.new_tensor(img_shape)
valid_cam_inds = ((final_box_preds_camera[:, 0] < image_shape[1]) & valid_cam_inds = ((box_preds_camera.tensor[:, 0] < image_shape[1]) &
(final_box_preds_camera[:, 1] < image_shape[0]) & (box_preds_camera.tensor[:, 1] < image_shape[0]) &
(final_box_preds_camera[:, 2] > 0) & (box_preds_camera.tensor[:, 2] > 0) &
(final_box_preds_camera[:, 3] > 0)) (box_preds_camera.tensor[:, 3] > 0))
# check final_box_preds # check box_preds
limit_range = final_box_preds.new_tensor(self.pcd_limit_range) limit_range = box_preds.tensor.new_tensor(self.pcd_limit_range)
valid_pcd_inds = ((final_box_preds[:, :3] > limit_range[:3]) & valid_pcd_inds = ((box_preds.center > limit_range[:3]) &
(final_box_preds[:, :3] < limit_range[3:])) (box_preds.center < limit_range[3:]))
valid_inds = valid_cam_inds & valid_pcd_inds.all(-1) valid_inds = valid_cam_inds & valid_pcd_inds.all(-1)
if valid_inds.sum() > 0: if valid_inds.sum() > 0:
return dict( return dict(
bbox=box_2d_preds[valid_inds, :].numpy(), bbox=box_2d_preds[valid_inds, :].numpy(),
box3d_camera=final_box_preds_camera[valid_inds, :].numpy(), box3d_camera=box_preds_camera[valid_inds].tensor.numpy(),
box3d_lidar=final_box_preds[valid_inds, :].numpy(), box3d_lidar=box_preds[valid_inds].tensor.numpy(),
scores=final_scores[valid_inds].numpy(), scores=scores[valid_inds].numpy(),
label_preds=final_labels[valid_inds].numpy(), label_preds=labels[valid_inds].numpy(),
sample_idx=sample_idx, sample_idx=sample_idx,
) )
else: else:
return dict( return dict(
bbox=final_box_preds.new_zeros([0, 4]).numpy(), bbox=np.zeros([0, 4]),
box3d_camera=final_box_preds.new_zeros([0, 7]).numpy(), box3d_camera=np.zeros([0, 7]),
box3d_lidar=final_box_preds.new_zeros([0, 7]).numpy(), box3d_lidar=np.zeros([0, 7]),
scores=final_box_preds.new_zeros([0]).numpy(), scores=np.zeros([0]),
label_preds=final_box_preds.new_zeros([0, 4]).numpy(), label_preds=np.zeros([0, 4]),
sample_idx=sample_idx, sample_idx=sample_idx,
) )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment