Commit 007f2e68 authored by 雍大凯's avatar 雍大凯
Browse files

将子模块转换为普通目录

parent 19472568
BEVFormer @ 66b65f3a
Subproject commit 66b65f3a1f58caf0507cb2a971b9c0e7f842376c
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
ckpts/
data/
ckpts
data
test/
val/
work_dirs/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from nuscenes.can_bus.can_bus_api import NuScenesCanBus\n",
"nusc_can = NuScenesCanBus(dataroot='can_bus')"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"The CAN bus is a vehicle bus over which information such as position, velocity, acceleration, steering, lights, battery and many more are submitted. We recommend you start by reading the [README](https://github.com/nutonomy/nuscenes-devkit/tree/master/python-sdk/nuscenes/can_bus/README.md)\n",
"In BEVFormer, we only use the `pose` fields."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"scene_name = 'scene-0001'\n",
"pose_list = nusc_can.get_messages(scene_name, 'pose')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Each value of `pose_list` contains: \n",
"- `orientation`: a Quaternion representation of orientation\n",
"- `pos`: a global postion of ego-car\n",
"- `vel`: the velocity of ego-car\n",
"- `rotation_rate`: rotation rate"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'accel': [0.05252802768009661, 0.9291747528573647, 9.513756543139106],\n",
" 'orientation': [0.7479305678167669, 0.0, 0.0, 0.6637769698666026],\n",
" 'pos': [1010.1436201720262, 610.8882352282457, 0.0],\n",
" 'rotation_rate': [0.040320225059986115,\n",
" -0.002563952235504985,\n",
" 0.28492140769958496],\n",
" 'utime': 1531883530467511,\n",
" 'vel': [4.1688763951334185, 0.0, 0.0]}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pose_list[0] # one example"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['accel', 'orientation', 'pos', 'rotation_rate', 'utime', 'vel'])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pose_list[0].keys()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In [data_converter](https://github.com/zhiqi-li/BEVFormer/blob/master/tools/data_converter/nuscenes_converter.py), we use the following function to obatain the can bus information for each sample."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def _get_can_bus_info(nusc, nusc_can_bus, sample):\n",
" scene_name = nusc.get('scene', sample['scene_token'])['name']\n",
" sample_timestamp = sample['timestamp']\n",
" try:\n",
" pose_list = nusc_can_bus.get_messages(scene_name, 'pose')\n",
" except:\n",
" return np.zeros(18) # serveral scenes do not have can bus information.\n",
" can_bus = []\n",
" # during each scene, the first timestamp of can_bus may be large than the first sample's timestamp\n",
" last_pose = pose_list[0]\n",
" for i, pose in enumerate(pose_list):\n",
" if pose['utime'] > sample_timestamp:\n",
" break\n",
" last_pose = pose # we obtain the can_bus information which is recorded before the sample recorded.\n",
" \n",
" _ = last_pose.pop('utime') # useless\n",
" pos = last_pose.pop('pos') \n",
" rotation = last_pose.pop('orientation')\n",
" \n",
" # one can_bus record contains 18 numbers\n",
" can_bus.extend(pos) # [0:3] is the position\n",
" can_bus.extend(rotation) # [3:7] is the orientation\n",
" \n",
" for key in last_pose.keys():\n",
" can_bus.extend(pose[key]) # accel: [7, 10], rotation_rate: [10: 13], velocity: [13: 16]\n",
" \n",
" # the last two numbers are reserved for later calculation of rotation angle.\n",
" can_bus.extend([0., 0.])\n",
" \n",
" \n",
" return np.array(can_bus)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In [dataset](https://github.com/zhiqi-li/BEVFormer/blob/master/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py#L174), we reorganize the can_bus."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" # actually, the nuScenes provides the rotation and translation of each sample, which is more accurate than we obtained from can bus. \n",
" rotation = Quaternion(input_dict['ego2global_rotation'])\n",
" translation = input_dict['ego2global_translation']\n",
" \n",
" can_bus = input_dict['can_bus']\n",
" can_bus[:3] = translation # We use the provided translation and rotation to repalce the original translation and rotation in can bus\n",
" can_bus[3:7] = rotation\n",
" \n",
" patch_angle = quaternion_yaw(rotation) / np.pi * 180 # we get the yaw angle of ego car\n",
" can_bus[-2] = patch_angle / 180 * np.pi # this angle is kept unchanged.\n",
" can_bus[-1] = patch_angle # this angle is used to compute the detal of adjacent timestamps."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In [dataset](https://github.com/zhiqi-li/BEVFormer/blob/master/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py#L93), we compute the delta orientation and position of adjacent timestamps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" prev_pos = None\n",
" prev_angle = None\n",
" for i, each in enumerate(queue):\n",
" metas_map[i] = each['img_metas'].data\n",
" if i == 0:\n",
" metas_map[i]['prev_bev'] = False\n",
" prev_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])\n",
" prev_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])\n",
" metas_map[i]['can_bus'][:3] = 0\n",
" metas_map[i]['can_bus'][-1] = 0\n",
" else:\n",
" metas_map[i]['prev_bev'] = True\n",
" tmp_pos = copy.deepcopy(metas_map[i]['can_bus'][:3])\n",
" tmp_angle = copy.deepcopy(metas_map[i]['can_bus'][-1])\n",
" metas_map[i]['can_bus'][:3] -= prev_pos\n",
" metas_map[i]['can_bus'][-1] -= prev_angle\n",
" prev_pos = copy.deepcopy(tmp_pos)\n",
" prev_angle = copy.deepcopy(tmp_angle)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
# Prerequisites
**Please ensure you have prepared the environment and the nuScenes dataset.**
# Train and Test
Train BEVFormer with 8 GPUs
```
./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py 8
```
Eval BEVFormer with 8 GPUs
```
./tools/dist_test.sh ./projects/configs/bevformer/bevformer_base.py ./path/to/ckpts.pth 8
```
Note: using 1 GPU to eval can obtain slightly higher performance because continuous video may be truncated with multiple GPUs. By default we report the score evaled with 8 GPUs.
# Using FP16 to train the model.
The above training script can not support FP16 training,
and we provide another script to train BEVFormer with FP16.
```
./tools/fp16/dist_train.sh ./projects/configs/bevformer_fp16/bevformer_tiny_fp16.py 8
```
# Visualization
see [visual.py](../tools/analysis_tools/visual.py)
\ No newline at end of file
# Step-by-step installation instructions
Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation
**a. Create a conda virtual environment and activate it.**
```shell
conda create -n open-mmlab python=3.8 -y
conda activate open-mmlab
```
**b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).**
```shell
pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
# Recommended torch>=1.9
```
**c. Install gcc>=5 in conda env (optional).**
```shell
conda install -c omgarcia gcc-6 # gcc-6.2
```
**c. Install mmcv-full.**
```shell
pip install mmcv-full==1.4.0
# pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
```
**d. Install mmdet and mmseg.**
```shell
pip install mmdet==2.14.0
pip install mmsegmentation==0.14.1
```
**e. Install mmdet3d from source code.**
```shell
git clone https://github.com/open-mmlab/mmdetection3d.git
cd mmdetection3d
git checkout v0.17.1 # Other versions may not be compatible.
python setup.py install
```
**f. Install Detectron2 and Timm.**
```shell
pip install einops fvcore seaborn iopath==0.1.9 timm==0.6.13 typing-extensions==4.5.0 pylint ipython==8.12 numpy==1.19.5 matplotlib==3.5.2 numba==0.48.0 pandas==1.4.4 scikit-image==0.19.3 setuptools==59.5.0
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
```
**g. Clone BEVFormer.**
```
git clone https://github.com/fundamentalvision/BEVFormer.git
```
**h. Prepare pretrained models.**
```shell
cd bevformer
mkdir ckpts
cd ckpts & wget https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth
```
note: this pretrained model is the same model used in [detr3d](https://github.com/WangYueFt/detr3d)
## NuScenes
Download nuScenes V1.0 full dataset data and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running
**Download CAN bus expansion**
```
# download 'can_bus.zip'
unzip can_bus.zip
# move can_bus to data dir
```
**Prepare nuScenes data**
*We genetate custom annotation files which are different from mmdet3d's*
```
python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes --version v1.0 --canbus ./data
```
Using the above code will generate `nuscenes_infos_temporal_{train,val}.pkl`.
**Folder structure**
```
bevformer
├── projects/
├── tools/
├── configs/
├── ckpts/
│ ├── r101_dcn_fcos3d_pretrain.pth
├── data/
│ ├── can_bus/
│ ├── nuscenes/
│ │ ├── maps/
│ │ ├── samples/
│ │ ├── sweeps/
│ │ ├── v1.0-test/
| | ├── v1.0-trainval/
| | ├── nuscenes_infos_temporal_train.pkl
| | ├── nuscenes_infos_temporal_val.pkl
```
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'segm'])
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=1, pipeline=eval_pipeline)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
classes=class_names,
sample_groups=dict(Car=15))
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(interval=1, pipeline=eval_pipeline)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-80, -80, -5, 80, 80, 3]
# For Lyft we usually do 9-class detection
class_names = [
'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
'bicycle', 'pedestrian', 'animal'
]
dataset_type = 'LyftDataset'
data_root = 'data/lyft/'
# Input modality for Lyft dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/lyft/': 's3://lyft/lyft/',
# 'data/lyft/': 's3://lyft/lyft/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_test.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True))
# For Lyft dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation = dict(interval=24, pipeline=eval_pipeline)
dataset_type = 'CocoDataset'
data_root = 'data/nuimages/'
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='Resize',
img_scale=[(1280, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
img_prefix=data_root,
classes=class_names,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline))
evaluation = dict(metric=['bbox', 'segm'])
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-50, -50, -5, 50, 50, 3]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True,
box_type_3d='LiDAR'))
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation = dict(interval=24, pipeline=eval_pipeline)
dataset_type = 'CustomNuScenesMonoDataset'
data_root = 'data/nuscenes/'
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=True,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img']),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
img_prefix=data_root,
classes=class_names,
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
box_type_3d='Camera'),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
box_type_3d='Camera'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
box_type_3d='Camera'))
evaluation = dict(interval=2)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-100, -100, -5, 100, 100, 3]
# For Lyft we usually do 9-class detection
class_names = [
'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
'bicycle', 'pedestrian', 'animal'
]
dataset_type = 'LyftDataset'
data_root = 'data/lyft/'
# Input modality for Lyft dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/lyft/': 's3://lyft/lyft/',
# 'data/lyft/': 's3://lyft/lyft/'
# }))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'lyft_infos_test.pkl',
pipeline=test_pipeline,
classes=class_names,
modality=input_modality,
test_mode=True))
# For Lyft dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation = dict(interval=24, pipeline=eval_pipeline)
# dataset settings
dataset_type = 'S3DISDataset'
data_root = './data/s3dis/'
class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
train_area = [1, 2, 3, 4, 6]
test_area = 5
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointSample', num_points=40000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
# following ScanNet dataset the rotation range is 5 degrees
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=40000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
box_type_3d='Depth') for i in train_area
],
separate_eval=False)),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
# dataset settings
dataset_type = 'S3DISSegDataset'
data_root = './data/s3dis/'
class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
num_points = 4096
train_area = [1, 2, 3, 4, 6]
test_area = 5
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict(
type='IndoorPatchPointSample',
num_points=num_points,
block_size=1.0,
ignore_index=len(class_names),
use_normalized_coord=True,
enlarge_size=0.2,
min_unique_num=None),
dict(type='NormalizePointsColor', color_mean=None),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(type='NormalizePointsColor', color_mean=None),
dict(
# a wrapper in order to successfully call test function
# actually we don't perform test-time-aug
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.0,
flip_ratio_bev_vertical=0.0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
use_color=True,
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True),
dict(
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(class_names))),
max_cat_id=13),
dict(
type='DefaultFormatBundle3D',
with_label=False,
class_names=class_names),
dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
# train on area 1, 2, 3, 4, 6
# test on area 5
train=dict(
type=dataset_type,
data_root=data_root,
ann_files=[
data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area
],
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
ignore_index=len(class_names),
scene_idxs=[
data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'
for i in train_area
]),
val=dict(
type=dataset_type,
data_root=data_root,
ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
ignore_index=len(class_names),
scene_idxs=data_root +
f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
ignore_index=len(class_names)))
evaluation = dict(pipeline=eval_pipeline)
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
with_mask_3d=True,
with_seg_3d=True),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='PointSegClassMapping',
valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
36, 39),
max_cat_id=40),
dict(type='PointSample', num_points=40000),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=40000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment