Unverified Commit 579b0799 authored by Wenwei Zhang's avatar Wenwei Zhang Committed by GitHub
Browse files

Bump to V0.6.0 (#118)



* Add gitlab CI back

* clean isort

* Update gitlab CI version

* Update mmcv install

* fix unit test bug

* waymo

* Use new flake8

* Update mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py, tools/data_converter/waymo_converter.py files

* Add baseline configs for waymo

* fix linting

* yapf reformat

* update waymo results

* Update waymo model zoo and docs

* Bump v0.6.0

* Fix a minor bug when converting waymo data

* Fix cmds in the waymo doc

* Fix setup.cfg to pass isort test

* Fix waymo configs

* Update model zoo link & doc

* update version date

* clean ci
Co-authored-by: default avatarwangtai <wangtai@sensetime.com>
Co-authored-by: default avatarTai-Wang <tab_wang@outlook.com>
parent 62ce67c0
import argparse import argparse
import glob import glob
import json import json
import os.path as osp import mmcv
import shutil import shutil
import subprocess import subprocess
import mmcv
import torch import torch
from os import path as osp
# build schedule look-up table to automatically find the final model # build schedule look-up table to automatically find the final model
SCHEDULES_LUT = { SCHEDULES_LUT = {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
[![license](https://img.shields.io/github/license/open-mmlab/mmdetection3d.svg)](https://github.com/open-mmlab/mmdetection3d/blob/master/LICENSE) [![license](https://img.shields.io/github/license/open-mmlab/mmdetection3d.svg)](https://github.com/open-mmlab/mmdetection3d/blob/master/LICENSE)
**News**: We released the codebase v0.1.0. **News**: We released the codebase v0.6.0.
Documentation: https://mmdetection3d.readthedocs.io/ Documentation: https://mmdetection3d.readthedocs.io/
...@@ -56,7 +56,7 @@ This project is released under the [Apache 2.0 license](LICENSE). ...@@ -56,7 +56,7 @@ This project is released under the [Apache 2.0 license](LICENSE).
## Changelog ## Changelog
v0.1.0 was released in 9/7/2020. v0.6.0 was released in 20/9/2020.
Please refer to [changelog.md](docs/changelog.md) for details and release history. Please refer to [changelog.md](docs/changelog.md) for details and release history.
## Benchmark and model zoo ## Benchmark and model zoo
...@@ -74,6 +74,7 @@ Results and models are available in the [model zoo](docs/model_zoo.md). ...@@ -74,6 +74,7 @@ Results and models are available in the [model zoo](docs/model_zoo.md).
| 3DSSD | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | | 3DSSD | ✗ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ |
| Part-A2 | ☐ | ☐ | ☐ | ✗ | ☐ | ✓ | ☐ | | Part-A2 | ☐ | ☐ | ☐ | ✗ | ☐ | ✓ | ☐ |
| MVXNet | ☐ | ☐ | ☐ | ✗ | ☐ | ✓ | ☐ | | MVXNet | ☐ | ☐ | ☐ | ✗ | ☐ | ✓ | ☐ |
| CenterPoint | ☐ | ☐ | ☐ | ✗ | ☐ | ✓ | ☐ |
Other features Other features
- [x] [Dynamic Voxelization](configs/carafe/README.md) - [x] [Dynamic Voxelization](configs/carafe/README.md)
......
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
class_names = ['Car', 'Pedestrian', 'Cyclist']
point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
points_loader=dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=24)
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
class_names = ['Car']
point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'waymo_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
classes=class_names,
sample_groups=dict(Car=15),
points_loader=dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
file_client_args=file_client_args))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=6,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_train.pkl',
split='training',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
# load one frame every five frames
load_interval=5)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'waymo_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=24)
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size = [0.32, 0.32, 6]
model = dict(
type='MVXFasterRCNN',
pts_voxel_layer=dict(
max_num_points=20,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
voxel_size=voxel_size,
max_voxels=(32000, 32000)),
pts_voxel_encoder=dict(
type='HardVFE',
in_channels=5,
feat_channels=[64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[1, 2, 2],
out_channels=[64, 128, 256]),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=3,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
[-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
[-74.88, -74.88, 0, 74.88, 74.88, 0]],
sizes=[
[2.08, 4.73, 1.77], # car
[0.84, 1.81, 1.77], # cyclist
[0.84, 0.91, 1.74] # pedestrian
],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=[
dict( # car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=4096,
nms_thr=0.25,
score_thr=0.1,
min_bbox_size=0,
max_num=500))
...@@ -48,6 +48,6 @@ We follow the below style to name config files. Contributors are advised to foll ...@@ -48,6 +48,6 @@ We follow the below style to name config files. Contributors are advised to foll
|[SECFPN](./centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py)|voxel (0.075)|✓|✗|||||| |[SECFPN](./centerpoint_0075voxel_second_secfpn_dcn_4x8_cyclic_20e_nus.py)|voxel (0.075)|✓|✗||||||
|[SECFPN](./centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.075)|✓|✓|||||| |[SECFPN](./centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|voxel (0.075)|✓|✓||||||
|[SECFPN](./centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py)|pillar (0.2)|✗|✗|||||| |[SECFPN](./centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py)|pillar (0.2)|✗|✗||||||
|[SECFPN](./centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus.py)|pillar (0.2)|✗|✓|||||| |[SECFPN](./centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus.py)|pillar (0.2)|✗|✓|||48.72|59.40||
|[SECFPN](./centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus.py)|pillar (0.2)|✓|✗|||||| |[SECFPN](./centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus.py)|pillar (0.2)|✓|✗||||||
|[SECFPN](./centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|pillar (0.2)|✓|✓|||||| |[SECFPN](./centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus.py)|pillar (0.2)|✓|✓||||||
...@@ -16,4 +16,4 @@ We implement H3DNet and provide the result and checkpoints on ScanNet datasets. ...@@ -16,4 +16,4 @@ We implement H3DNet and provide the result and checkpoints on ScanNet datasets.
### ScanNet ### ScanNet
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download | | Backbone | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 |AP@0.5| Download |
| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: | | :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |
| [MultiBackbone](./h3dnet_scannet-3d-18class.py) | 3x |7.9||66.43|48.01|| | [MultiBackbone](./h3dnet_scannet-3d-18class.py) | 3x |7.9||66.43|48.01|[model](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/h3dnet/h3dnet_scannet-3d-18class/h3dnet_scannet-3d-18class_20200830_000136-02e36246.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/h3dnet/h3dnet_scannet-3d-18class/h3dnet_scannet-3d-18class_20200830_000136.log.json) |
...@@ -37,3 +37,21 @@ We implement PointPillars and provide the results and checkpoints on KITTI and n ...@@ -37,3 +37,21 @@ We implement PointPillars and provide the results and checkpoints on KITTI and n
| :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: | | :---------: | :-----: | :------: | :------------: | :----: |:----: | :------: |
|[SECFPN](./hv_pointpillars_secfpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||13.4|13.4|| |[SECFPN](./hv_pointpillars_secfpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||13.4|13.4||
|[FPN](./hv_pointpillars_fpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||14.0|14.2|| |[FPN](./hv_pointpillars_fpn_sbn-all_4x8_2x_lyft-3d.py)|2x|||14.0|14.2||
### Waymo
| Backbone | Load Interval | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP@L1 | mAPH@L1 | mAP@L2 | **mAPH@L2** | Download |
| :-------: | :-----------: |:-----:| :------:| :------: | :------------: | :----: | :-----: | :-----: | :-----: | :------: |
| [SECFPN](./hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car.py)|5|Car|2x|7.76||70.2|69.6|62.6|62.1|[model](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car_20200901_204315-302fc3e7.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car_20200901_204315.log.json)|
| [SECFPN](./hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py)|5|3 Class|2x|8.12||64.7|57.6|58.4|52.1|[model](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class_20200831_204144-d1a706b1.pth) &#124; [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class_20200831_204144.log.json)|
| above @ Car|||2x|8.12||68.5|67.9|60.1|59.6| |
| above @ Pedestrian|||2x|8.12||67.8|50.6|59.6|44.3| |
| above @ Cyclist|||2x|8.12||57.7|54.4|55.5|52.4| |
Note:
- **Metric**: For model trained with 3 classes, the average APH@L2 (mAPH@L2) of all the categories is reported and used to rank the model. For model trained with only 1 class, the APH@L2 is reported and used to rank the model.
- **Data Split**: Here we provide several baselines for waymo dataset, which are adapted from the original pointpillars implementation. Specifically, we divide the dataset into 5 folds (denoted as D5 in the config names) for efficient experiments.
Using the complete dataset can boost the performance a lot, especially for the detection of cyclist and pedestrian, where more than 5 mAP or mAPH improvement can be expected. A more complete benchmark with more models and methods is coming soon.
- **Implementation Details**: We basically follow the implementation in the [paper](https://arxiv.org/pdf/1912.04838.pdf) in terms of the network architecture (having a
stride of 1 for the first convolutional block). Different settings of voxelization, data augmentation and hyper parameters make these baselines outperform those in the paper by about 7 mAP for car and 4 mAP for pedestrian with only a subset of the whole dataset.
_base_ = [
'../_base_/models/hv_pointpillars_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-3class.py',
'../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py',
]
_base_ = [
'../_base_/models/hv_pointpillars_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-car.py',
'../_base_/schedules/schedule_2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='MVXFasterRCNN',
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]],
sizes=[[2.08, 4.73, 1.77]],
rotations=[0, 1.57],
reshape_out=True)))
# model training and testing settings
train_cfg = dict(
_delete_=True,
pts=dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False))
...@@ -8,9 +8,8 @@ ...@@ -8,9 +8,8 @@
""" """
import csv import csv
import os
import numpy as np import numpy as np
import os
from plyfile import PlyData from plyfile import PlyData
......
## Changelog ## Changelog
### v0.6.0 (20/9/2020)
#### Highlights
- Support new methods [H3DNet](https://arxiv.org/abs/2006.05682), [3DSSD](https://arxiv.org/abs/2002.10187), [CenterPoint](https://arxiv.org/abs/2006.11275).
- Support new dataset [Waymo](https://waymo.com/open/) (with PointPillars baselines) and [nuImages](https://www.nuscenes.org/nuimages) (with Mask R-CNN and Cascade Mask R-CNN baselines).
- Support Batch Inference
- Support Pytorch 1.6
- Start to publish `mmdet3d` package to PyPI since v0.5.0. You can use mmdet3d through `pip install mmdet3d`.
#### Backwards Incompatible Changes
- Support Batch Inference (#95, #103, #116): MMDetection3D v0.6.0 migrates to support batch inference based on MMDetection >= v2.4.0. This change influences all the test APIs in MMDetection3D and downstream codebases.
- Start to use collect environment function from MMCV (#113): MMDetection3D v0.6.0 migrates to use `collect_env` function in MMCV.
`get_compiler_version` and `get_compiling_cuda_version` compiled in `mmdet3d.ops.utils` are removed. Please import these two functions from `mmcv.ops`.
#### Bug Fixes
- Rename CosineAnealing to CosineAnnealing (#57)
- Fix device inconsistant bug in 3D IoU computation (#69)
- Fix a minor bug in json2csv of lyft dataset (#78)
- Add missed test data for pointnet modules (#85)
- Fix `use_valid_flag` bug in `CustomDataset` (#106)
#### New Features
- Support [nuImages](https://www.nuscenes.org/nuimages) dataset by converting them into coco format and release Mask R-CNN and Cascade Mask R-CNN baseline models (#91, #94)
- Support to publish to PyPI in github-action (#17, #19, #25, #39, #40)
- Support CBGSDataset and make it generally applicable to all the supported datasets (#75, #94)
- Support [H3DNet](https://arxiv.org/abs/2006.05682) and release models on ScanNet dataset (#53, #58, #105)
- Support Fusion Point Sampling used in [3DSSD](https://arxiv.org/abs/2002.10187) (#66)
- Add `BackgroundPointsFilter` to filter background points in data pipeline (#84)
- Support pointnet2 with multi-scale grouping in backbone and refactor pointnets (#82)
- Support dilated ball query used in [3DSSD](https://arxiv.org/abs/2002.10187) (#96)
- Support [3DSSD](https://arxiv.org/abs/2002.10187) and release models on KITTI dataset (#83, #100, #104)
- Support [CenterPoint](https://arxiv.org/abs/2006.11275) and release models on nuScenes dataset (#49, #92)
- Support [Waymo](https://waymo.com/open/) dataset and release PointPillars baseline models (#118)
- Allow `LoadPointsFromMultiSweeps` to pad empty sweeps and select multiple sweeps randomly (#67)
#### Improvements
- Fix all warnings and bugs in Pytorch 1.6.0 (#70, #72)
- Update issue templates (#43)
- Update unit tests (#20, #24, #30)
- Update documentation for using `ply` format point cloud data (#41)
- Use points loader to load point cloud data in ground truth (GT) samplers (#87)
- Unify version file of OpenMMLab projects by using `version.py` (#112)
- Remove unnecessary data preprocessing commands of SUN RGB-D dataset (#110)
### v0.5.0 (9/7/2020) ### v0.5.0 (9/7/2020)
MMDetection3D is released. MMDetection3D is released.
# Getting Started # Getting Started
This page provides basic tutorials about the usage of MMDetection. This page provides basic tutorials about the usage of MMDetection3D.
For installation instructions, please see [install.md](install.md). For installation instructions, please see [install.md](install.md).
## Prepare datasets ## Prepare datasets
...@@ -31,6 +31,14 @@ mmdetection3d ...@@ -31,6 +31,14 @@ mmdetection3d
│ │ │ ├── image_2 │ │ │ ├── image_2
│ │ │ ├── label_2 │ │ │ ├── label_2
│ │ │ ├── velodyne │ │ │ ├── velodyne
│ ├── waymo
│ │ ├── waymo_format
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── testing
│ │ │ ├── gt.bin
│ │ ├── kitti_format
│ │ │ ├── ImageSets
│ ├── lyft │ ├── lyft
│ │ ├── v1.01-train │ │ ├── v1.01-train
│ │ │ ├── v1.01-train (train_data) │ │ │ ├── v1.01-train (train_data)
...@@ -62,12 +70,6 @@ mmdetection3d ...@@ -62,12 +70,6 @@ mmdetection3d
``` ```
Download nuScenes V1.0 full dataset data [HERE]( https://www.nuscenes.org/download). Prepare nuscenes data by running
```bash
python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes
```
Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare kitti data by running Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare kitti data by running
```bash ```bash
...@@ -82,6 +84,20 @@ wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/sec ...@@ -82,6 +84,20 @@ wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/sec
python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti
``` ```
Download Waymo open dataset V1.2 [HERE](https://waymo.com/open/download/) and its data split [HERE](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing). Then put tfrecord files into corresponding folders in `data/waymo/waymo_format/` and put the data split txt files into `data/waymo/kitti_format/ImageSets`. Download ground truth bin file for validation set [HERE](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects) and put it into `data/waymo/waymo_format/`. A tip is that you can use `gsutil` to download the large-scale dataset with commands. You can take this [tool](https://github.com/RalphMao/Waymo-Dataset-Tool) as an example for more details. Subsequently, prepare waymo data by running
```bash
python tools/create_data.py waymo --root-path ./data/waymo/ --out-dir ./data/waymo/ --workers 128 --extra-tag waymo
```
Note that if your local disk does not have enough space for saving converted data, you can change the `out-dir` to anywhere else. Just remember to create folders and prepare data there in advance and link them back to `data/waymo/kitti_format` after the data conversion.
Download nuScenes V1.0 full dataset data [HERE]( https://www.nuscenes.org/download). Prepare nuscenes data by running
```bash
python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes
```
Download Lyft 3D detection data [HERE](https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data). Prepare Lyft data by running Download Lyft 3D detection data [HERE](https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/data). Prepare Lyft data by running
```bash ```bash
...@@ -180,6 +196,39 @@ Assume that you have already downloaded the checkpoints to the directory `checkp ...@@ -180,6 +196,39 @@ Assume that you have already downloaded the checkpoints to the directory `checkp
The generated results be under `./second_kitti_results` directory. The generated results be under `./second_kitti_results` directory.
7. Test PointPillars on Lyft with 8 GPUs, generate the pkl files and make a submission to the leaderboard.
```shell
./tools/slurm_test.sh ${PARTITION} ${JOB_NAME} configs/pointpillars/hv_pointpillars_fpn_sbn-2x8_2x_lyft-3d.py \
checkpoints/hv_pointpillars_fpn_sbn-2x8_2x_lyft-3d_latest.pth --out results/pp_lyft/results_challenge.pkl \
--format-only --options 'jsonfile_prefix=results/pp_lyft/results_challenge' \
'csv_path=results/pp_lyft/results_challenge.csv'
```
**Notice**: To generate submissions on Lyft, `csv_path` must be given in the options. After generating the csv file, you can make a submission with kaggle commands given on the [website](https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/submit).
7. Test PointPillars on waymo with 8 GPUs, and evaluate the mAP with waymo metrics.
```shell
./tools/slurm_test.sh ${PARTITION} ${JOB_NAME} configs/pointpillars/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car.py \
checkpoints/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car_latest.pth --out results/waymo-car/results_eval.pkl \
--eval waymo --options 'pklfile_prefix=results/waymo-car/kitti_results' \
'submission_prefix=results/waymo-car/kitti_results'
```
**Notice**: For evaluation on waymo, please follow the [instruction](https://github.com/waymo-research/waymo-open-dataset/blob/master/docs/quick_start.md) to build the binary file `compute_detection_metrics_main` for metrics computation and put it into `mmdet3d/core/evaluation/waymo_utils/`.(Sometimes when using bazel to build `compute_detection_metrics_main`, an error `'round' is not a member of 'std'` may appear. We just need to remove the `std::` before `round` in that file.) `pklfile_prefix` should be given in the options for the bin file generation. For metrics, `waymo` is the recommended official evaluation prototype. Currently, evaluating with choice `kitti` is adapted from KITTI and the results for each difficulty are not exactly the same as the definition of KITTI. Instead, most of objects are marked with difficulty 0 currently, which will be fixed in the future. The reasons of its instability include the large computation for evalution, the lack of occlusion and truncation in the converted data, different definition of difficulty and different methods of computing average precision.
8. Test PointPillars on waymo with 8 GPUs, generate the bin files and make a submission to the leaderboard.
```shell
./tools/slurm_test.sh ${PARTITION} ${JOB_NAME} configs/pointpillars/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car.py \
checkpoints/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car_latest.pth --out results/waymo-car/results_eval.pkl \
--format-only --options 'pklfile_prefix=results/waymo-car/kitti_results' \
'submission_prefix=results/waymo-car/kitti_results'
```
**Notice**: After generating the bin file, you can simply build the binary file `create_submission` and use them to create a submission file by following the [instruction](https://github.com/waymo-research/waymo-open-dataset/blob/master/docs/quick_start.md). For evaluation on the validation set with the eval server, you can also use the same way to generate a submission.
### Visualization ### Visualization
To see the SUNRGBD, ScanNet or KITTI points and detection results, you can run the following command To see the SUNRGBD, ScanNet or KITTI points and detection results, you can run the following command
......
...@@ -49,6 +49,7 @@ c. Install [MMCV](https://mmcv.readthedocs.io/en/latest/). ...@@ -49,6 +49,7 @@ c. Install [MMCV](https://mmcv.readthedocs.io/en/latest/).
*mmcv-full* is necessary since MMDetection3D relies on MMDetection, CUDA ops in *mmcv-full* are required. *mmcv-full* is necessary since MMDetection3D relies on MMDetection, CUDA ops in *mmcv-full* are required.
The pre-build *mmcv-full* could be installed by running: (available versions could be found [here](https://mmcv.readthedocs.io/en/latest/#install-with-pip)) The pre-build *mmcv-full* could be installed by running: (available versions could be found [here](https://mmcv.readthedocs.io/en/latest/#install-with-pip))
```shell ```shell
pip install mmcv-full==latest+torch1.5.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html pip install mmcv-full==latest+torch1.5.0+cu101 -f https://openmmlab.oss-accelerate.aliyuncs.com/mmcv/dist/index.html
``` ```
...@@ -74,11 +75,16 @@ pip install -r requirements/build.txt ...@@ -74,11 +75,16 @@ pip install -r requirements/build.txt
pip install -v -e . # or "python setup.py develop" pip install -v -e . # or "python setup.py develop"
``` ```
If you build mmdetection on macOS, replace the last command with **Important**:
1. The required versions of MMCV and MMDetection for different versions of MMDetection3D are as below. Please install the correct version of MMCV and MMDetection to avoid installation issues.
| MMDetection3D version | MMDetection version | MMCV version |
|:-------------------:|:-------------------:|:-------------------:|
| master | mmdet>=2.4.0 | mmcv-full>=1.1.1, <=1.2|
| 0.6.0 | mmdet>=2.4.0 | mmcv-full>=1.1.1, <=1.2|
| 0.5.0 | 2.3.0 | mmcv-full==1.0.5|
```
CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' pip install -e .
```
e. Clone the MMDetection3D repository. e. Clone the MMDetection3D repository.
...@@ -100,7 +106,7 @@ It is recommended that you run step d each time you pull some updates from githu ...@@ -100,7 +106,7 @@ It is recommended that you run step d each time you pull some updates from githu
> Important: Be sure to remove the `./build` folder if you reinstall mmdet with a different CUDA/PyTorch version. > Important: Be sure to remove the `./build` folder if you reinstall mmdet with a different CUDA/PyTorch version.
``` ```shell
pip uninstall mmdet3d pip uninstall mmdet3d
rm -rf ./build rm -rf ./build
find . -name "*.so" | xargs rm find . -name "*.so" | xargs rm
...@@ -115,7 +121,6 @@ you can install it before installing MMCV. ...@@ -115,7 +121,6 @@ you can install it before installing MMCV.
5. The code can not be built for CPU only environment (where CUDA isn't available) for now. 5. The code can not be built for CPU only environment (where CUDA isn't available) for now.
### A from-scratch setup script ### A from-scratch setup script
Here is a full script for setting up mmdetection with conda. Here is a full script for setting up mmdetection with conda.
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
- For fair comparison with other codebases, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 8 GPUs. Note that this value is usually less than what `nvidia-smi` shows. - For fair comparison with other codebases, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 8 GPUs. Note that this value is usually less than what `nvidia-smi` shows.
- We report the inference time as the total time of network forwarding and post-processing, excluding the data loading time. Results are obtained with the script [benchmark.py](https://github.com/open-mmlab/mmdetection/blob/master/tools/benchmark.py) which computes the average time on 2000 images. - We report the inference time as the total time of network forwarding and post-processing, excluding the data loading time. Results are obtained with the script [benchmark.py](https://github.com/open-mmlab/mmdetection/blob/master/tools/benchmark.py) which computes the average time on 2000 images.
## Baselines ## Baselines
### SECOND ### SECOND
...@@ -46,3 +45,7 @@ Please refer to [H3DNet](https://github.com/open-mmlab/mmdetection3d/blob/master ...@@ -46,3 +45,7 @@ Please refer to [H3DNet](https://github.com/open-mmlab/mmdetection3d/blob/master
### 3DSSD ### 3DSSD
Please refer to [3DSSD](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/3dssd) for details. Please refer to [3DSSD](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/3dssd) for details.
### CenterPoint
Please refer to [CenterPoint](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/centerpoint) for details.
# A Brief Tutorial for Waymo Dataset
This page provides specific tutorials about the usage of MMDetection3D for waymo dataset.
## Prepare datasets
Like the general way to prepare dataset, it is recommended to symlink the dataset root to `$MMDETECTION3D/data`.
Due to the original waymo data format is based on `tfrecord`, we need to preprocess the raw data for convenient usage in the training and evaluation procedure. Our approach is to convert them into KITTI format.
The folder structure should be organized as follows before our processing.
```
mmdetection3d
├── mmdet3d
├── tools
├── configs
├── data
│ ├── waymo
│ │ ├── waymo_format
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── testing
│ │ │ ├── gt.bin
│ │ ├── kitti_format
│ │ │ ├── ImageSets
```
You can download Waymo open dataset V1.2 [HERE](https://waymo.com/open/download/) and its data split [HERE](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing). Then put tfrecord files into corresponding folders in `data/waymo/waymo_format/` and put the data split txt files into `data/waymo/kitti_format/ImageSets`. Download ground truth bin file for validation set [HERE](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects) and put it into `data/waymo/waymo_format/`. A tip is that you can use `gsutil` to download the large-scale dataset with commands. You can take this [tool](https://github.com/RalphMao/Waymo-Dataset-Tool) as an example for more details. Subsequently, prepare waymo data by running
```bash
python tools/create_data.py waymo --root-path ./data/waymo/ --out-dir ./data/waymo/ --workers 128 --extra-tag waymo
```
Note that if your local disk does not have enough space for saving converted data, you can change the `out-dir` to anywhere else. Just remember to create folders and prepare data there in advance and link them back to `data/waymo/kitti_format` after the data conversion.
After the data conversion, the folder structure and info files should be organized as below.
```
mmdetection3d
├── mmdet3d
├── tools
├── configs
├── data
│ ├── waymo
│ │ ├── waymo_format
│ │ │ ├── training
│ │ │ ├── validation
│ │ │ ├── testing
│ │ │ ├── gt.bin
│ │ ├── kitti_format
│ │ │ ├── ImageSets
│ │ │ ├── training
│ │ │ │ ├── calib
│ │ │ │ ├── image_0
│ │ │ │ ├── image_1
│ │ │ │ ├── image_2
│ │ │ │ ├── image_3
│ │ │ │ ├── image_4
│ │ │ │ ├── label_0
│ │ │ │ ├── label_1
│ │ │ │ ├── label_2
│ │ │ │ ├── label_3
│ │ │ │ ├── label_4
│ │ │ │ ├── label_all
│ │ │ │ ├── pose
│ │ │ │ ├── velodyne
│ │ │ ├── testing
│ │ │ │ ├── (the same as training)
│ │ │ ├── waymo_gt_database
│ │ │ ├── waymo_infos_trainval.pkl
│ │ │ ├── waymo_infos_train.pkl
│ │ │ ├── waymo_infos_val.pkl
│ │ │ ├── waymo_infos_test.pkl
│ │ │ ├── waymo_dbinfos_train.pkl
```
Here because there are several cameras, we store the corresponding image and labels that can be projected to that camera respectively and save pose for further usage of consecutive frames point clouds. We use a coding way `{a}{bbb}{ccc}` to name the data for each frame, where `a` is the prefix for different split (`0` for training, `1` for validation and `2` for testing), `bbb` for segment index and `ccc` for frame index. You can easily locate the required frame according to this naming rule. We gather the data for training and validation together as KITTI and store the indices for different set in the ImageSet files.
## Training
Considering there are many similar frames in the original dataset, we can basically use a subset to train our model primarily. In our preliminary baselines, we load one frame every five frames, and thanks to our hyper parameters settings and data augmentation, we obtain a better result compared with the performance given in the original dataset [paper](https://arxiv.org/pdf/1912.04838.pdf). For more details about the configuration and performance, please refer to README.md in the `configs/pointpillars/`. A more complete benchmark based on other settings and methods is coming soon.
## Evaluation
For evaluation on waymo, please follow the [instruction](https://github.com/waymo-research/waymo-open-dataset/blob/master/docs/quick_start.md) to build the binary file `compute_detection_metrics_main` for metrics computation and put it into `mmdet3d/core/evaluation/waymo_utils/`. Basically, you can follow the commands below to install bazel and build the file.
```shell
git clone https://github.com/waymo-research/waymo-open-dataset.git waymo-od
cd waymo-od
git checkout remotes/origin/master
sudo apt-get install --assume-yes pkg-config zip g++ zlib1g-dev unzip python3 python3-pip
wget https://github.com/bazelbuild/bazel/releases/download/0.28.0/bazel-0.28.0-installer-linux-x86_64.sh
sudo bash bazel-0.28.0-installer-linux-x86_64.sh
sudo apt install build-essential
./configure.sh
bazel clean
bazel build waymo_open_dataset/metrics/tools/compute_detection_metrics_main
cp bazel-bin/waymo_open_dataset/metrics/tools/compute_detection_metrics_main ../mmdetection3d/mmdet3d/core/evaluation/waymo_utils/
```
Then you can evaluate your models on waymo. An example to evaluate PointPillars on waymo with 8 GPUs with waymo metrics is as follows.
```shell
./tools/slurm_test.sh ${PARTITION} ${JOB_NAME} configs/pointpillars/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car.py \
checkpoints/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car_latest.pth --out results/waymo-car/results_eval.pkl \
--eval waymo --options 'pklfile_prefix=results/waymo-car/kitti_results' \
'submission_prefix=results/waymo-car/kitti_results'
```
`pklfile_prefix` should be given in the options if the bin file is needed to be generated. For metrics, `waymo` is the recommended official evaluation prototype. Currently, evaluating with choice `kitti` is adapted from KITTI and the results for each difficulty are not exactly the same as the definition of KITTI. Instead, most of objects are marked with difficulty 0 currently, which will be fixed in the future. The reasons of its instability include the large computation for evalution, the lack of occlusion and truncation in the converted data, different definition of difficulty and different methods of computing average precision.
**Notice**:
1. Sometimes when using bazel to build `compute_detection_metrics_main`, an error `'round' is not a member of 'std'` may appear. We just need to remove the `std::` before `round` in that file.
2. Considering it takes a little long time to evaluate once, we recommend to evaluate only once at the end of model training.
3. To use tensorflow with cuda9, it is recommended to compile it from source. Apart from official tutorials, you can refer to this [link](https://github.com/SmileTM/Tensorflow2.X-GPU-CUDA9.0) for possibly suitable precompiled packages and useful information for compiling it from source.
## Testing and make a submission
An example to test PointPillars on waymo with 8 GPUs, generate the bin files and make a submission to the leaderboard.
```shell
./tools/slurm_test.sh ${PARTITION} ${JOB_NAME} configs/pointpillars/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car.py \
checkpoints/hv_pointpillars_secfpn_sbn-2x16_2x_waymo-3d-car_latest.pth --out results/waymo-car/results_eval.pkl \
--format-only --options 'pklfile_prefix=results/waymo-car/kitti_results' \
'submission_prefix=results/waymo-car/kitti_results'
```
After generating the bin file, you can simply build the binary file `create_submission` and use them to create a submission file by following the [instruction](https://github.com/waymo-research/waymo-open-dataset/blob/master/docs/quick_start.md). Basically, here are some example commands.
```shell
cd ../waymo-od/
bazel build waymo_open_dataset/metrics/tools/create_submission
cp bazel-bin/waymo_open_dataset/metrics/tools/create_submission ../mmdetection3d/mmdet3d/core/evaluation/waymo_utils/
vim waymo_open_dataset/metrics/tools/submission.txtpb # set the metadata information
cp waymo_open_dataset/metrics/tools/submission.txtpb ../mmdetection3d/mmdet3d/core/evaluation/waymo_utils/
cd ../mmdetection3d
# suppose the result bin is in `results/waymo-car/submission`
mmdet3d/core/evaluation/waymo_utils/create_submission --input_filenames='results/waymo-car/kitti_results_test.bin' --output_filename='results/waymo-car/submission/model' --submission_filename='mmdet3d/core/evaluation/waymo_utils/submission.txtpb'
tar cvf results/waymo-car/submission/my_model.tar results/waymo-car/submission/my_model/
gzip results/waymo-car/submission/my_model.tar
```
For evaluation on the validation set with the eval server, you can also use the same way to generate a submission. Make sure you change the fields in submission.txtpb before running the command above.
...@@ -591,19 +591,21 @@ def do_eval(gt_annos, ...@@ -591,19 +591,21 @@ def do_eval(gt_annos,
eval_types=['bbox', 'bev', '3d']): eval_types=['bbox', 'bev', '3d']):
# min_overlaps: [num_minoverlap, metric, num_class] # min_overlaps: [num_minoverlap, metric, num_class]
difficultys = [0, 1, 2] difficultys = [0, 1, 2]
ret = eval_class( mAP_bbox = None
gt_annos,
dt_annos,
current_classes,
difficultys,
0,
min_overlaps,
compute_aos=('aos' in eval_types))
# ret: [num_class, num_diff, num_minoverlap, num_sample_points]
mAP_bbox = get_mAP(ret['precision'])
mAP_aos = None mAP_aos = None
if 'aos' in eval_types: if 'bbox' in eval_types:
mAP_aos = get_mAP(ret['orientation']) ret = eval_class(
gt_annos,
dt_annos,
current_classes,
difficultys,
0,
min_overlaps,
compute_aos=('aos' in eval_types))
# ret: [num_class, num_diff, num_minoverlap, num_sample_points]
mAP_bbox = get_mAP(ret['precision'])
if 'aos' in eval_types:
mAP_aos = get_mAP(ret['orientation'])
mAP_bev = None mAP_bev = None
if 'bev' in eval_types: if 'bev' in eval_types:
...@@ -654,7 +656,9 @@ def kitti_eval(gt_annos, ...@@ -654,7 +656,9 @@ def kitti_eval(gt_annos,
Returns: Returns:
tuple: String and dict of evaluation results. tuple: String and dict of evaluation results.
""" """
assert 'bbox' in eval_types, 'must evaluate bbox at least' assert len(eval_types) > 0, 'must contain at least one evaluation type'
if 'aos' in eval_types:
assert 'bbox' in eval_types, 'must evaluate bbox when evaluating aos'
overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7, overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7,
0.5], [0.7, 0.5, 0.5, 0.7, 0.5], 0.5], [0.7, 0.5, 0.5, 0.7, 0.5],
[0.7, 0.5, 0.5, 0.7, 0.5]]) [0.7, 0.5, 0.5, 0.7, 0.5]])
...@@ -683,12 +687,19 @@ def kitti_eval(gt_annos, ...@@ -683,12 +687,19 @@ def kitti_eval(gt_annos,
result = '' result = ''
# check whether alpha is valid # check whether alpha is valid
compute_aos = False compute_aos = False
pred_alpha = False
valid_alpha_gt = False
for anno in dt_annos: for anno in dt_annos:
if anno['alpha'].shape[0] != 0: if anno['alpha'].shape[0] != 0:
if anno['alpha'][0] != -10: pred_alpha = True
compute_aos = True break
eval_types.append('aos') for anno in gt_annos:
if anno['alpha'][0] != -10:
valid_alpha_gt = True
break break
compute_aos = (pred_alpha and valid_alpha_gt)
if compute_aos:
eval_types.append('aos')
mAPbbox, mAPbev, mAP3d, mAPaos = do_eval(gt_annos, dt_annos, mAPbbox, mAPbev, mAP3d, mAPaos = do_eval(gt_annos, dt_annos,
current_classes, min_overlaps, current_classes, min_overlaps,
......
r"""Adapted from `Waymo to KITTI converter
<https://github.com/caizhongang/waymo_kitti_converter>`_.
"""
import mmcv
import numpy as np
import tensorflow as tf
from glob import glob
from os.path import join
from waymo_open_dataset import dataset_pb2 as open_dataset
from waymo_open_dataset import label_pb2
from waymo_open_dataset.protos import metrics_pb2
class KITTI2Waymo(object):
"""KITTI predictions to Waymo converter.
This class serves as the converter to change predictions from KITTI to
Waymo format.
Args:
kitti_result_files (list[dict]): Predictions in KITTI format.
waymo_tfrecords_dir (str): Directory to load waymo raw data.
waymo_results_save_dir (str): Directory to save converted predictions
in waymo format (.bin files).
waymo_results_final_path (str): Path to save combined
predictions in waymo format (.bin file), like 'a/b/c.bin'.
prefix (str): Prefix of filename. In general, 0 for training, 1 for
validation and 2 for testing.
workers (str): Number of parallel processes.
"""
def __init__(self,
kitti_result_files,
waymo_tfrecords_dir,
waymo_results_save_dir,
waymo_results_final_path,
prefix,
workers=64):
self.kitti_result_files = kitti_result_files
self.waymo_tfrecords_dir = waymo_tfrecords_dir
self.waymo_results_save_dir = waymo_results_save_dir
self.waymo_results_final_path = waymo_results_final_path
self.prefix = prefix
self.workers = int(workers)
self.name2idx = {}
for idx, result in enumerate(kitti_result_files):
if len(result['sample_idx']) > 0:
self.name2idx[str(result['sample_idx'][0])] = idx
# turn on eager execution for older tensorflow versions
if int(tf.__version__.split('.')[0]) < 2:
tf.enable_eager_execution()
self.k2w_cls_map = {
'Car': label_pb2.Label.TYPE_VEHICLE,
'Pedestrian': label_pb2.Label.TYPE_PEDESTRIAN,
'Sign': label_pb2.Label.TYPE_SIGN,
'Cyclist': label_pb2.Label.TYPE_CYCLIST,
}
self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0],
[-1.0, 0.0, 0.0, 0.0],
[0.0, -1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0]])
self.get_file_names()
self.create_folder()
def get_file_names(self):
"""Get file names of waymo raw data."""
self.waymo_tfrecord_pathnames = sorted(
glob(join(self.waymo_tfrecords_dir, '*.tfrecord')))
print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.')
def create_folder(self):
"""Create folder for data conversion."""
mmcv.mkdir_or_exist(self.waymo_results_save_dir)
def parse_objects(self, kitti_result, T_k2w, context_name,
frame_timestamp_micros):
"""Parse one prediction with several instances in kitti format and
convert them to `Object` proto.
Args:
kitti_result (dict): Predictions in kitti format.
- name (np.ndarray): Class labels of predictions.
- dimensions (np.ndarray): Height, width, length of boxes.
- location (np.ndarray): Bottom center of boxes (x, y, z).
- rotation_y (np.ndarray): Orientation of boxes.
- score (np.ndarray): Scores of predictions.
T_k2w (np.ndarray): Transformation matrix from kitti to waymo.
context_name (str): Context name of the frame.
frame_timestamp_micros (int): Frame timestamp.
Returns:
:obj:`Object`: Predictions in waymo dataset Object proto.
"""
def parse_one_object(instance_idx):
"""Parse one instance in kitti format and convert them to
`Object` proto.
Args:
instance_idx (int): Index of the instance to be converted.
Returns:
:obj:`Object`: Predicted instance in waymo dataset \
Object proto.
"""
cls = kitti_result['name'][instance_idx]
length = round(kitti_result['dimensions'][instance_idx, 0], 4)
height = round(kitti_result['dimensions'][instance_idx, 1], 4)
width = round(kitti_result['dimensions'][instance_idx, 2], 4)
x = round(kitti_result['location'][instance_idx, 0], 4)
y = round(kitti_result['location'][instance_idx, 1], 4)
z = round(kitti_result['location'][instance_idx, 2], 4)
rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)
score = round(kitti_result['score'][instance_idx], 4)
# y: downwards; move box origin from bottom center (kitti) to
# true center (waymo)
y -= height / 2
# frame transformation: kitti -> waymo
x, y, z = self.transform(T_k2w, x, y, z)
# different conventions
heading = -(rotation_y + np.pi / 2)
while heading < -np.pi:
heading += 2 * np.pi
while heading > np.pi:
heading -= 2 * np.pi
box = label_pb2.Label.Box()
box.center_x = x
box.center_y = y
box.center_z = z
box.length = length
box.width = width
box.height = height
box.heading = heading
o = metrics_pb2.Object()
o.object.box.CopyFrom(box)
o.object.type = self.k2w_cls_map[cls]
o.score = score
o.context_name = context_name
o.frame_timestamp_micros = frame_timestamp_micros
return o
objects = metrics_pb2.Objects()
for instance_idx in range(len(kitti_result['name'])):
o = parse_one_object(instance_idx)
objects.objects.append(o)
return objects
def convert_one(self, file_idx):
"""Convert action for single file.
Args:
file_idx (int): Index of the file to be converted.
"""
file_pathname = self.waymo_tfrecord_pathnames[file_idx]
file_data = tf.data.TFRecordDataset(file_pathname, compression_type='')
for frame_num, frame_data in enumerate(file_data):
frame = open_dataset.Frame()
frame.ParseFromString(bytearray(frame_data.numpy()))
filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}'
for camera in frame.context.camera_calibrations:
# FRONT = 1, see dataset.proto for details
if camera.name == 1:
T_front_cam_to_vehicle = np.array(
camera.extrinsic.transform).reshape(4, 4)
T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam
context_name = frame.context.name
frame_timestamp_micros = frame.timestamp_micros
if filename in self.name2idx:
kitti_result = \
self.kitti_result_files[self.name2idx[filename]]
objects = self.parse_objects(kitti_result, T_k2w, context_name,
frame_timestamp_micros)
else:
print(filename, 'not found.')
objects = metrics_pb2.Objects()
with open(
join(self.waymo_results_save_dir, f'{filename}.bin'),
'wb') as f:
f.write(objects.SerializeToString())
def convert(self):
"""Convert action."""
print('Start converting ...')
mmcv.track_parallel_progress(self.convert_one, range(len(self)),
self.workers)
print('\nFinished ...')
# combine all files into one .bin
pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin')))
combined = self.combine(pathnames)
with open(self.waymo_results_final_path, 'wb') as f:
f.write(combined.SerializeToString())
def __len__(self):
"""Length of the filename list."""
return len(self.waymo_tfrecord_pathnames)
def transform(self, T, x, y, z):
"""Transform the coordinates with matrix T.
Args:
T (np.ndarray): Transformation matrix.
x(float): Coordinate in x axis.
y(float): Coordinate in y axis.
z(float): Coordinate in z axis.
Returns:
list: Coordinates after transformation.
"""
pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1)
pt_aft = np.matmul(T, pt_bef)
return pt_aft[:3].flatten().tolist()
def combine(self, pathnames):
"""Combine predictions in waymo format for each sample together.
Args:
pathnames (str): Paths to save predictions.
Returns:
:obj:`Objects`: Combined predictions in Objects proto.
"""
combined = metrics_pb2.Objects()
for pathname in pathnames:
objects = metrics_pb2.Objects()
with open(pathname, 'rb') as f:
objects.ParseFromString(f.read())
for o in objects.objects:
combined.objects.append(o)
return combined
...@@ -12,6 +12,7 @@ from .pipelines import (BackgroundPointsFilter, GlobalRotScaleTrans, ...@@ -12,6 +12,7 @@ from .pipelines import (BackgroundPointsFilter, GlobalRotScaleTrans,
RandomFlip3D) RandomFlip3D)
from .scannet_dataset import ScanNetDataset from .scannet_dataset import ScanNetDataset
from .sunrgbd_dataset import SUNRGBDDataset from .sunrgbd_dataset import SUNRGBDDataset
from .waymo_dataset import WaymoDataset
__all__ = [ __all__ = [
'KittiDataset', 'GroupSampler', 'DistributedGroupSampler', 'KittiDataset', 'GroupSampler', 'DistributedGroupSampler',
...@@ -21,5 +22,5 @@ __all__ = [ ...@@ -21,5 +22,5 @@ __all__ = [
'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D', 'ObjectRangeFilter', 'PointsRangeFilter', 'Collect3D',
'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample', 'LoadPointsFromFile', 'NormalizePointsColor', 'IndoorPointSample',
'LoadAnnotations3D', 'SUNRGBDDataset', 'ScanNetDataset', 'Custom3DDataset', 'LoadAnnotations3D', 'SUNRGBDDataset', 'ScanNetDataset', 'Custom3DDataset',
'LoadPointsFromMultiSweeps', 'BackgroundPointsFilter' 'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter'
] ]
...@@ -44,6 +44,8 @@ class KittiDataset(Custom3DDataset): ...@@ -44,6 +44,8 @@ class KittiDataset(Custom3DDataset):
Defaults to True. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode. test_mode (bool, optional): Whether the dataset is in test mode.
Defaults to False. Defaults to False.
pcd_limit_range (list): The range of point cloud used to filter
invalid predicted boxes. Default: [0, -40, -3, 70.4, 40, 0.0].
""" """
CLASSES = ('car', 'pedestrian', 'cyclist') CLASSES = ('car', 'pedestrian', 'cyclist')
...@@ -57,7 +59,8 @@ class KittiDataset(Custom3DDataset): ...@@ -57,7 +59,8 @@ class KittiDataset(Custom3DDataset):
modality=None, modality=None,
box_type_3d='LiDAR', box_type_3d='LiDAR',
filter_empty_gt=True, filter_empty_gt=True,
test_mode=False): test_mode=False,
pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0]):
super().__init__( super().__init__(
data_root=data_root, data_root=data_root,
ann_file=ann_file, ann_file=ann_file,
...@@ -68,9 +71,10 @@ class KittiDataset(Custom3DDataset): ...@@ -68,9 +71,10 @@ class KittiDataset(Custom3DDataset):
filter_empty_gt=filter_empty_gt, filter_empty_gt=filter_empty_gt,
test_mode=test_mode) test_mode=test_mode)
self.split = split
self.root_split = os.path.join(self.data_root, split) self.root_split = os.path.join(self.data_root, split)
assert self.modality is not None assert self.modality is not None
self.pcd_limit_range = [0, -40, -3, 70.4, 40, 0.0] self.pcd_limit_range = pcd_limit_range
self.pts_prefix = pts_prefix self.pts_prefix = pts_prefix
def _get_pts_filename(self, idx): def _get_pts_filename(self, idx):
...@@ -157,7 +161,6 @@ class KittiDataset(Custom3DDataset): ...@@ -157,7 +161,6 @@ class KittiDataset(Custom3DDataset):
dims = annos['dimensions'] dims = annos['dimensions']
rots = annos['rotation_y'] rots = annos['rotation_y']
gt_names = annos['name'] gt_names = annos['name']
# print(gt_names, len(loc))
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32) axis=1).astype(np.float32)
...@@ -167,7 +170,6 @@ class KittiDataset(Custom3DDataset): ...@@ -167,7 +170,6 @@ class KittiDataset(Custom3DDataset):
gt_bboxes = annos['bbox'] gt_bboxes = annos['bbox']
selected = self.drop_arrays_by_name(gt_names, ['DontCare']) selected = self.drop_arrays_by_name(gt_names, ['DontCare'])
# gt_bboxes_3d = gt_bboxes_3d[selected].astype('float32')
gt_bboxes = gt_bboxes[selected].astype('float32') gt_bboxes = gt_bboxes[selected].astype('float32')
gt_names = gt_names[selected] gt_names = gt_names[selected]
...@@ -177,7 +179,7 @@ class KittiDataset(Custom3DDataset): ...@@ -177,7 +179,7 @@ class KittiDataset(Custom3DDataset):
gt_labels.append(self.CLASSES.index(cat)) gt_labels.append(self.CLASSES.index(cat))
else: else:
gt_labels.append(-1) gt_labels.append(-1)
gt_labels = np.array(gt_labels) gt_labels = np.array(gt_labels).astype(np.int64)
gt_labels_3d = copy.deepcopy(gt_labels) gt_labels_3d = copy.deepcopy(gt_labels)
anns_results = dict( anns_results = dict(
...@@ -372,7 +374,8 @@ class KittiDataset(Custom3DDataset): ...@@ -372,7 +374,8 @@ class KittiDataset(Custom3DDataset):
Returns: Returns:
list[dict]: A list of dictionaries with the kitti format. list[dict]: A list of dictionaries with the kitti format.
""" """
assert len(net_outputs) == len(self.data_infos) assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
if submission_prefix is not None: if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix) mmcv.mkdir_or_exist(submission_prefix)
...@@ -465,7 +468,7 @@ class KittiDataset(Custom3DDataset): ...@@ -465,7 +468,7 @@ class KittiDataset(Custom3DDataset):
if not pklfile_prefix.endswith(('.pkl', '.pickle')): if not pklfile_prefix.endswith(('.pkl', '.pickle')):
out = f'{pklfile_prefix}.pkl' out = f'{pklfile_prefix}.pkl'
mmcv.dump(det_annos, out) mmcv.dump(det_annos, out)
print('Result is saved to %s' % out) print(f'Result is saved to {out}.')
return det_annos return det_annos
...@@ -487,8 +490,8 @@ class KittiDataset(Custom3DDataset): ...@@ -487,8 +490,8 @@ class KittiDataset(Custom3DDataset):
Returns: Returns:
list[dict]: A list of dictionaries have the kitti format list[dict]: A list of dictionaries have the kitti format
""" """
assert len(net_outputs) == len(self.data_infos) assert len(net_outputs) == len(self.data_infos), \
'invalid list length of network outputs'
det_annos = [] det_annos = []
print('\nConverting prediction to KITTI format') print('\nConverting prediction to KITTI format')
for i, bboxes_per_sample in enumerate( for i, bboxes_per_sample in enumerate(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment