Unverified Commit 27fd5d29 authored by ChaimZhu's avatar ChaimZhu Committed by GitHub
Browse files

[Enhance] Support load annotations from ceph (#1325)

* support load annotations from ceph in mmdet3d

* update doc about ceph backend

* fix comments
parent 8a8da91b
# Tutorial 7: Backends Support
We support different file client backends: Disk, Ceph and LMDB, etc. Here is an example of how to modify configs for Ceph-based data loading and saving.
## Load data and annotations from Ceph
We support loading data and generated annotation info files (pkl and json) from Ceph:
```python
# set file client backends as Ceph
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/':
's3://openmmlab/datasets/detection3d/nuscenes/', # replace the path with your data path on Ceph
'data/nuscenes/':
's3://openmmlab/datasets/detection3d/nuscenes/' # replace the path with your data path on Ceph
}))
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
sample_groups=dict(Car=15),
classes=class_names,
# set file client for points loader to load training data
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
# set file client for data base sampler to load db info file
file_client_args=file_client_args)
train_pipeline = [
# set file client for loading training data
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args),
# set file client for loading training data annotations
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0],
rot_range=[-0.15707963267, 0.15707963267]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
# set file client for loading validation/testing data
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
# set file client for loading training info files (.pkl)
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(pipeline=train_pipeline, classes=class_names, file_client_args=file_client_args)),
# set file client for loading validation info files (.pkl)
val=dict(pipeline=test_pipeline, classes=class_names,file_client_args=file_client_args),
# set file client for loading testing info files (.pkl)
test=dict(pipeline=test_pipeline, classes=class_names, file_client_args=file_client_args))
```
## Load pretrained model from Ceph
```python
model = dict(
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch='regnetx_1.6gf',
init_cfg=dict(
type='Pretrained', checkpoint='s3://openmmlab/checkpoints/mmdetection3d/regnetx_1.6gf'), # replace the path with your pretrained model path on Ceph
...
```
## Load checkpoint from Ceph
```python
# replace the path with your checkpoint path on Ceph
load_from = 's3://openmmlab/checkpoints/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230614-77663cd6.pth.pth'
resume_from = None
workflow = [('train', 1)]
```
## Save checkpoint into Ceph
```python
# checkpoint saving
# replace the path with your checkpoint saving path on Ceph
checkpoint_config = dict(interval=1, max_keep_ckpts=2, out_dir='s3://openmmlab/mmdetection3d')
```
## EvalHook saves the best checkpoint into Ceph
```python
# replace the path with your checkpoint saving path on Ceph
evaluation = dict(interval=1, save_best='bbox', out_dir='s3://openmmlab/mmdetection3d')
```
## Save the training log into Ceph
The training log will be backed up to the specified Ceph path after training.
```python
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', out_dir='s3://openmmlab/mmdetection3d'),
])
```
You can also delete the local training log after backing up to the specified Ceph path by setting `keep_local = False`.
```python
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', out_dir='s3://openmmlab/mmdetection3d'', keep_local=False),
])
```
......@@ -7,3 +7,4 @@
customize_models.md
customize_runtime.md
coord_sys_tutorial.md
backends_support.md
# Tutorial 7: 后端支持
我们支持不同的文件客户端后端:磁盘、Ceph 和 LMDB 等。下面是修改配置使之从 Ceph 加载和保存数据的示例。
## 从 Ceph 读取数据和标注文件
我们支持从 Ceph 加载数据和生成的标注信息文件(pkl 和 json):
```python
# set file client backends as Ceph
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/':
's3://openmmlab/datasets/detection3d/nuscenes/', # replace the path with your data path on Ceph
'data/nuscenes/':
's3://openmmlab/datasets/detection3d/nuscenes/' # replace the path with your data path on Ceph
}))
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
sample_groups=dict(Car=15),
classes=class_names,
# set file client for points loader to load training data
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
# set file client for data base sampler to load db info file
file_client_args=file_client_args)
train_pipeline = [
# set file client for loading training data
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args),
# set file client for loading training data annotations
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0],
rot_range=[-0.15707963267, 0.15707963267]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
# set file client for loading validation/testing data
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
# set file client for loading training info files (.pkl)
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(pipeline=train_pipeline, classes=class_names, file_client_args=file_client_args)),
# set file client for loading validation info files (.pkl)
val=dict(pipeline=test_pipeline, classes=class_names,file_client_args=file_client_args),
# set file client for loading testing info files (.pkl)
test=dict(pipeline=test_pipeline, classes=class_names, file_client_args=file_client_args))
```
## 从 Ceph 读取预训练模型
```python
model = dict(
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch='regnetx_1.6gf',
init_cfg=dict(
type='Pretrained', checkpoint='s3://openmmlab/checkpoints/mmdetection3d/regnetx_1.6gf'), # replace the path with your pretrained model path on Ceph
...
```
## 从 Ceph 读取模型权重文件
```python
# replace the path with your checkpoint path on Ceph
load_from = 's3://openmmlab/checkpoints/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230614-77663cd6.pth.pth'
resume_from = None
workflow = [('train', 1)]
```
## 保存模型权重文件至 Ceph
```python
# checkpoint saving
# replace the path with your checkpoint saving path on Ceph
checkpoint_config = dict(interval=1, max_keep_ckpts=2, out_dir='s3://openmmlab/mmdetection3d')
```
## EvalHook 保存最优模型权重文件至 Ceph
```python
# replace the path with your checkpoint saving path on Ceph
evaluation = dict(interval=1, save_best='bbox', out_dir='s3://openmmlab/mmdetection3d')
```
## 训练日志保存至 Ceph
训练后的训练日志会备份到指定的 Ceph 路径。
```python
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', out_dir='s3://openmmlab/mmdetection3d'),
])
```
您还可以通过设置 `keep_local = False` 备份到指定的 Ceph 路径后删除本地训练日志。
```python
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', out_dir='s3://openmmlab/mmdetection3d'', keep_local=False),
])
```
......@@ -7,3 +7,4 @@
customize_models.md
customize_runtime.md
coord_sys_tutorial.md
backends_support.md
......@@ -51,7 +51,8 @@ class Custom3DDataset(Dataset):
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False):
test_mode=False,
file_client_args=dict(backend='disk')):
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
......@@ -61,13 +62,26 @@ class Custom3DDataset(Dataset):
self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)
self.CLASSES = self.get_classes(classes)
self.file_client = mmcv.FileClient(**file_client_args)
self.cat2id = {name: i for i, name in enumerate(self.CLASSES)}
# load annotations
if hasattr(self.file_client, 'get_local_path'):
with self.file_client.get_local_path(self.ann_file) as local_path:
self.data_infos = self.load_annotations(open(local_path, 'rb'))
else:
warnings.warn(
'The used MMCV version does not have get_local_path. '
f'We treat the {self.ann_file} as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.')
self.data_infos = self.load_annotations(self.ann_file)
# process pipeline
if pipeline is not None:
self.pipeline = Compose(pipeline)
# set group flag for the sampler
# set group flag for the samplers
if not self.test_mode:
self._set_group_flag()
......@@ -80,7 +94,8 @@ class Custom3DDataset(Dataset):
Returns:
list[dict]: List of annotations.
"""
return mmcv.load(ann_file)
# loading data from a file-like object needs file format
return mmcv.load(ann_file, file_format='pkl')
def get_data_info(self, index):
"""Get data info according to the given index.
......
......@@ -62,13 +62,25 @@ class Custom3DSegDataset(Dataset):
modality=None,
test_mode=False,
ignore_index=None,
scene_idxs=None):
scene_idxs=None,
file_client_args=dict(backend='disk')):
super().__init__()
self.data_root = data_root
self.ann_file = ann_file
self.test_mode = test_mode
self.modality = modality
self.file_client = mmcv.FileClient(**file_client_args)
# load annotations
if hasattr(self.file_client, 'get_local_path'):
with self.file_client.get_local_path(self.ann_file) as local_path:
self.data_infos = self.load_annotations(open(local_path, 'rb'))
else:
warnings.warn(
'The used MMCV version does not have get_local_path. '
f'We treat the {self.ann_file} as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.')
self.data_infos = self.load_annotations(self.ann_file)
if pipeline is not None:
......@@ -94,7 +106,8 @@ class Custom3DSegDataset(Dataset):
Returns:
list[dict]: List of annotations.
"""
return mmcv.load(ann_file)
# loading data from a file-like object needs file format
return mmcv.load(ann_file, file_format='pkl')
def get_data_info(self, index):
"""Get data info according to the given index.
......
......@@ -65,7 +65,8 @@ class KittiDataset(Custom3DDataset):
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False,
pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0]):
pcd_limit_range=[0, -40, -3, 70.4, 40, 0.0],
**kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -74,7 +75,8 @@ class KittiDataset(Custom3DDataset):
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
test_mode=test_mode,
**kwargs)
self.split = split
self.root_split = os.path.join(self.data_root, split)
......
......@@ -86,7 +86,8 @@ class LyftDataset(Custom3DDataset):
modality=None,
box_type_3d='LiDAR',
filter_empty_gt=True,
test_mode=False):
test_mode=False,
**kwargs):
self.load_interval = load_interval
super().__init__(
data_root=data_root,
......@@ -96,7 +97,8 @@ class LyftDataset(Custom3DDataset):
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
test_mode=test_mode,
**kwargs)
if self.modality is None:
self.modality = dict(
......@@ -116,7 +118,8 @@ class LyftDataset(Custom3DDataset):
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
data = mmcv.load(ann_file)
# loading data from a file-like object needs file format
data = mmcv.load(ann_file, file_format='pkl')
data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
self.metadata = data['metadata']
......
......@@ -125,7 +125,8 @@ class NuScenesDataset(Custom3DDataset):
filter_empty_gt=True,
test_mode=False,
eval_version='detection_cvpr_2019',
use_valid_flag=False):
use_valid_flag=False,
**kwargs):
self.load_interval = load_interval
self.use_valid_flag = use_valid_flag
super().__init__(
......@@ -136,7 +137,8 @@ class NuScenesDataset(Custom3DDataset):
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
test_mode=test_mode,
**kwargs)
self.with_velocity = with_velocity
self.eval_version = eval_version
......@@ -184,7 +186,8 @@ class NuScenesDataset(Custom3DDataset):
Returns:
list[dict]: List of annotations sorted by timestamps.
"""
data = mmcv.load(ann_file)
# loading data from a file-like object needs file format
data = mmcv.load(ann_file, file_format='pkl')
data_infos = list(sorted(data['infos'], key=lambda e: e['timestamp']))
data_infos = data_infos[::self.load_interval]
self.metadata = data['metadata']
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os
import warnings
import mmcv
import numpy as np
......@@ -104,7 +105,8 @@ class DataBaseSampler(object):
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=[0, 1, 2, 3])):
use_dim=[0, 1, 2, 3]),
file_client_args=dict(backend='disk')):
super().__init__()
self.data_root = data_root
self.info_path = info_path
......@@ -114,7 +116,19 @@ class DataBaseSampler(object):
self.cat2label = {name: i for i, name in enumerate(classes)}
self.label2cat = {i: name for i, name in enumerate(classes)}
self.points_loader = mmcv.build_from_cfg(points_loader, PIPELINES)
self.file_client = mmcv.FileClient(**file_client_args)
# load data base infos
if hasattr(self.file_client, 'get_local_path'):
with self.file_client.get_local_path(info_path) as local_path:
# loading data from a file-like object needs file format
db_infos = mmcv.load(open(local_path, 'rb'), file_format='pkl')
else:
warnings.warn(
'The used MMCV version does not have get_local_path. '
f'We treat the {info_path} as local paths and it '
'might cause errors if the path is not a local path. '
'Please use MMCV>= 1.3.16 if you meet errors.')
db_infos = mmcv.load(info_path)
# filter database infos
......
......@@ -54,7 +54,8 @@ class S3DISDataset(Custom3DDataset):
modality=None,
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
test_mode=False,
*kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -63,7 +64,8 @@ class S3DISDataset(Custom3DDataset):
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
test_mode=test_mode,
*kwargs)
def get_ann_info(self, index):
"""Get annotation info according to the given index.
......@@ -205,7 +207,8 @@ class _S3DISSegDataset(Custom3DSegDataset):
modality=None,
test_mode=False,
ignore_index=None,
scene_idxs=None):
scene_idxs=None,
**kwargs):
super().__init__(
data_root=data_root,
......@@ -216,7 +219,8 @@ class _S3DISSegDataset(Custom3DSegDataset):
modality=modality,
test_mode=test_mode,
ignore_index=ignore_index,
scene_idxs=scene_idxs)
scene_idxs=scene_idxs,
**kwargs)
def get_ann_info(self, index):
"""Get annotation info according to the given index.
......@@ -347,7 +351,8 @@ class S3DISSegDataset(_S3DISSegDataset):
modality=None,
test_mode=False,
ignore_index=None,
scene_idxs=None):
scene_idxs=None,
**kwargs):
# make sure that ann_files and scene_idxs have same length
ann_files = self._check_ann_files(ann_files)
......@@ -363,7 +368,8 @@ class S3DISSegDataset(_S3DISSegDataset):
modality=modality,
test_mode=test_mode,
ignore_index=ignore_index,
scene_idxs=scene_idxs[0])
scene_idxs=scene_idxs[0],
**kwargs)
datasets = [
_S3DISSegDataset(
......
......@@ -58,7 +58,8 @@ class ScanNetDataset(Custom3DDataset):
modality=dict(use_camera=False, use_depth=True),
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
test_mode=False,
**kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -67,7 +68,8 @@ class ScanNetDataset(Custom3DDataset):
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
test_mode=test_mode,
**kwargs)
assert 'use_camera' in self.modality and \
'use_depth' in self.modality
assert self.modality['use_camera'] or self.modality['use_depth']
......@@ -322,7 +324,8 @@ class ScanNetSegDataset(Custom3DSegDataset):
modality=None,
test_mode=False,
ignore_index=None,
scene_idxs=None):
scene_idxs=None,
**kwargs):
super().__init__(
data_root=data_root,
......@@ -333,7 +336,8 @@ class ScanNetSegDataset(Custom3DSegDataset):
modality=modality,
test_mode=test_mode,
ignore_index=ignore_index,
scene_idxs=scene_idxs)
scene_idxs=scene_idxs,
**kwargs)
def get_ann_info(self, index):
"""Get annotation info according to the given index.
......
......@@ -54,7 +54,8 @@ class SUNRGBDDataset(Custom3DDataset):
modality=dict(use_camera=True, use_lidar=True),
box_type_3d='Depth',
filter_empty_gt=True,
test_mode=False):
test_mode=False,
**kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -63,7 +64,8 @@ class SUNRGBDDataset(Custom3DDataset):
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode)
test_mode=test_mode,
**kwargs)
assert 'use_camera' in self.modality and \
'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar']
......
......@@ -66,7 +66,8 @@ class WaymoDataset(KittiDataset):
filter_empty_gt=True,
test_mode=False,
load_interval=1,
pcd_limit_range=[-85, -85, -5, 85, 85, 5]):
pcd_limit_range=[-85, -85, -5, 85, 85, 5],
**kwargs):
super().__init__(
data_root=data_root,
ann_file=ann_file,
......@@ -78,7 +79,8 @@ class WaymoDataset(KittiDataset):
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
pcd_limit_range=pcd_limit_range)
pcd_limit_range=pcd_limit_range,
**kwargs)
# to load a subset, just set the load_interval in the dataset config
self.data_infos = self.data_infos[::load_interval]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment