Commit d1aac35d authored by zhangwenwei's avatar zhangwenwei
Browse files

Initial commit

parents
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.ipynb
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# cython generated cpp
mmdet3d/ops/nms/src/soft_nms_cpu.cpp
mmdet3d/version.py
data
.vscode
.idea
# custom
*.pkl
*.pkl.json
*.log.json
work_dirs/
exps/
*~
# Pytorch
*.pth
# demo
*.jpg
*.png
variables:
PYTORCH_IMAGE: registry.sensetime.com/eig-research/pytorch:pytorch1.3.1-cuda10.1-devel
stages:
- linting
- test
before_script:
- echo $PATH
- gcc --version
- nvcc --version
- python --version
- pip --version
- python -c "import torch; print(torch.__version__)"
.linting_template: &linting_template_def
stage: linting
script:
- pip install flake8 yapf isort
- flake8 .
- isort -rc --check-only --diff mmdet3d/ tools/ tests/
- yapf -r -d mmdet3d/ tools/ tests/ configs/
.test_template: &test_template_def
stage: test
script:
- echo "Start building..."
- conda install av -c conda-forge -y
- pip install git+https://github.com/open-mmlab/mmdetection.git@v2.0
- python -c "import mmdet; print(mmdet.__version__)"
- pip install -v -e .[all]
- python -c "import mmdet3d; print(mmdet3d.__version__)"
- echo "Start testing..."
- coverage run --branch --source mmdet3d -m pytest tests/
- coverage report -m
linting:pytorch1.3-cuda10:
image: $PYTORCH_IMAGE
<<: *linting_template_def
test:pytorch1.3-cuda10:
image: $PYTORCH_IMAGE
<<: *test_template_def
[isort]
line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmdet,mmdet3d
known_third_party = Cython,cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,scipy,shapely,six,skimage,terminaltables,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
repos:
- repo: https://gitlab.com/pycqa/flake8.git
rev: 3.7.9
hooks:
- id: flake8
- repo: https://github.com/asottile/seed-isort-config
rev: v2.1.0
hooks:
- id: seed-isort-config
- repo: https://github.com/timothycrosley/isort
rev: 4.3.21
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.29.0
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:
- id: trailing-whitespace
- id: check-yaml
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: double-quote-string-fixer
- id: fix-encoding-pragma
args: ["--remove"]
[style]
BASED_ON_STYLE = pep8
BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
dist: bionic # ubuntu 18.04
language: python
python:
- "3.5"
- "3.6"
- "3.7"
env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1
cache: pip
# Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis
before_install:
- INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
- wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
- sudo dpkg -i ${INSTALLER}
- wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
- sudo apt-key add 7fa2af80.pub
- sudo apt update -qq
- sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
- sudo apt clean
- CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
- LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
- PATH=${CUDA_HOME}/bin:${PATH}
install:
- pip install Pillow==6.2.2 # remove this line when torchvision>=0.5
- pip install torch==1.2 torchvision==0.4.0 # TODO: fix CI for pytorch>1.2
- pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
- pip install -r requirements.txt
before_script:
- flake8 .
- isort -rc --check-only --diff mmdet3d/ tools/ tests/
- yapf -r -d --style .style.yapf mmdet3d/ tools/ tests/ configs/
script:
- python setup.py check -m -s
- python setup.py build_ext --inplace
- coverage run --source mmdet3d -m py.test -v --xdoctest-modules tests mmdet3d
after_success:
- coverage report
# MMDetection3D
## Introduction
The master branch works with **PyTorch 1.1** or higher.
mmdetection3d is an open source 3D object detection toolbox based on PyTorch. It is
a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).
### Major features
## License
This project is released under the [Apache 2.0 license](LICENSE).
## Updates
v0.0.1 (07/08/2019)
- the project is initiated
## Benchmark and model zoo
Supported methods and backbones are shown in the below table.
Results and models are available in the [Model zoo](MODEL_ZOO.md).
## Installation
Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.
## Get Started
Please see [GETTING_STARTED.md](GETTING_STARTED.md) for the basic usage of MMDetection.
## Contributing
We appreciate all contributions to improve MMDetection3D. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for the contributing guideline.
## Acknowledgement
MMDetection3D is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks.
We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors.
## Citation
## Contact
This repo is currently maintained by Wenwei Zhang ([@ZwwWayne](http://github.com/ZwwWayne)).
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='DynamicMVXFasterRCNNV2',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
img_neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
pts_voxel_layer=dict(
max_num_points=-1, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(-1, -1), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='DynamicVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
fusion_layer=dict(
type='PointFusion',
img_channels=256,
pts_channels=64,
mid_channels=128,
out_channels=128,
img_levels=[0, 1, 2, 3, 4],
align_corners=False,
activate_out=True,
fuse_out=False),
),
pts_middle_encoder=dict(
type='SparseEncoder',
in_channels=128,
output_shape=[41, 1600, 1408], # checked from PointCloud3D
pre_act=False,
),
pts_backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
num_filters=[128, 256],
),
pts_neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
num_upsample_filters=[256, 256],
),
pts_bbox_head=dict(
type='SECONDHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
assigner_per_size=True,
anchor_strides=[2],
anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True,
assign_per_class=True,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
), )
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=True,
)
db_sampler = dict(
type='MMDataBaseSampler',
root_path=data_root,
info_path=data_root + 'kitti_mm_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
blending_type=['box', 'gaussian', 'poisson'],
depth_consistent=True,
check_2D_collision=True,
collision_thr=[0, 0.3, 0.5, 0.7],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
Car=5,
Pedestrian=10,
Cyclist=10,
),
),
sample_groups=dict(
Car=12,
Pedestrian=6,
Cyclist=6,
),
)
train_pipeline = [
dict(
type='Resize',
img_scale=[(640, 192), (2560, 768)],
multiscale_mode='range',
keep_ratio=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0.2, 0.2, 0.2]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'points', 'img', 'gt_bboxes_3d', 'gt_bboxes', 'gt_labels',
'gt_labels_3d'
]),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='Resize',
img_scale=[
(1280, 384),
],
multiscale_mode='value',
keep_ratio=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[0, 0],
scaling_uniform_noise=[1, 1]),
dict(type='RandomFlip3D', flip_ratio=0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img']),
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True,
test_mode=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True,
test_mode=True))
# Training settings
optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='cosine',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
target_lr=1e-5,
as_ratio=True,
)
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = './pretrain_mmdet/mvx_faster_rcnn_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_1x_coco-3-class_44.7_20200205-b1c1533f.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.16, 0.16, 4]
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
model = dict(
type='DynamicVoxelNet',
voxel_layer=dict(
max_num_points=-1, # set -1 for dynamic voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(-1, -1), # set -1 for dynamic voxel
),
voxel_encoder=dict(
type='DynamicPillarFeatureNet',
num_input_features=4,
num_filters=[64],
with_distance=False,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
),
middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[496, 432],
),
backbone=dict(
type='SECOND',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
num_filters=[64, 128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
num_upsample_filters=[128, 128, 128],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
anchor_strides=[2],
anchor_sizes=[[1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=point_cloud_range,
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267]),
dict(type='PointsRandomFlip', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='cyclic',
target_ratio=[10, 1e-4],
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=[0.85 / 0.95, 1],
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 160
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/pp_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='DynamicVoxelNet',
voxel_layer=dict(
max_num_points=-1, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(-1, -1), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='DynamicVFEV3',
num_input_features=4,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
output_shape=[41, 1600, 1408],
pre_act=False,
),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
num_filters=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
num_upsample_filters=[256, 256],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
anchor_strides=[2],
anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
Car=5,
Pedestrian=10,
Cyclist=10,
),
),
sample_groups=dict(
Car=12,
Pedestrian=6,
Cyclist=6,
),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[0, 0, 0],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.39269908, 0.39269908]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0.2, 0.2, 0.2]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cosine',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
target_lr=1e-5,
as_ratio=True,
)
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl', port=29502)
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='DynamicVoxelNet',
voxel_layer=dict(
max_num_points=-1, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(-1, -1), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='DynamicVFEV3',
num_input_features=4,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
output_shape=[41, 1600, 1408], # checked from PointCloud3D
pre_act=False,
),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
num_filters=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
num_upsample_filters=[256, 256],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
anchor_strides=[2],
anchor_sizes=[[1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='PointsRandomFlip', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=[10, 1e-4],
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=[0.85 / 0.95, 1],
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[1 / 3, 0.5, 1.0, 2.0, 3.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.0, loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'Kitti2DDataset'
data_root = 'data/kitti/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
# Values to be used for image normalization (BGR order)
# Default mean pixel value from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
type='Resize',
img_scale=[(640, 192), (2560, 768)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 384),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
class_names=class_names,
ann_file='kitti_infos_train.pkl',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
class_names=class_names,
ann_file='kitti_infos_val.pkl',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
class_names=class_names,
ann_file='kitti_infos_val.pkl',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = './pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=[0.16, 0.16, 4],
max_voxels=(12000, 20000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='PillarFeatureNet',
num_input_features=4,
num_filters=[64],
with_distance=False,
# these two arguments should be consistent with the voxel_generator
voxel_size=[0.16, 0.16, 4],
point_cloud_range=point_cloud_range,
),
middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[496, 432],
),
backbone=dict(
type='SECOND',
in_channels=64,
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
num_filters=[64, 128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
num_upsample_filters=[128, 128, 128],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
anchor_strides=[2],
anchor_sizes=[[1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=point_cloud_range,
)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[0.25, 0.25, 0.25],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.15707963267, 0.15707963267]),
dict(type='PointsRandomFlip', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='cyclic',
target_ratio=[10, 1e-4],
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=[0.85 / 0.95, 1],
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 160
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/pp_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
output_shape=[41, 1600, 1408], # checked from PointCloud3D
pre_act=False,
),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
num_filters=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
num_upsample_filters=[256, 256],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
anchor_strides=[2],
anchor_sizes=[[1.6, 3.9, 1.56]],
anchor_rotations=[0, 1.57],
diff_rad_by_sin=True,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='PointsRandomFlip', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=[10, 1e-4],
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=[0.85 / 0.95, 1],
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.0, loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'NuScenes2DDataset'
data_root = 'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
type='Resize',
img_scale=[(1200, 720), (1920, 1080)],
multiscale_mode='range',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
num_filters=[64, 128, 256],
),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
num_upsample_filters=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DVeloHead',
class_names=class_names,
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_range=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795], # car
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365], # truck
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504], # trailer
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111], # bicycle
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072], # pedestrian
[-49.6, -49.6, -1.80984986, 49.6, 49.6,
-1.80984986], # traffic_cone
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965], # barrier
],
anchor_strides=[2],
anchor_sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
anchor_custom_values=[0, 0],
anchor_rotations=[0, 1.57],
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_per_img=500,
post_center_limit_range=point_cloud_range,
# TODO: check whether need to change this
# post_center_limit_range=[-59.6, -59.6, -6, 59.6, 59.6, 4],
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=True,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(),
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
),
)
train_pipeline = [
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='Resize',
img_scale=[
(1280, 720),
],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio=0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=False))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[16, 19])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=20)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 20
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/pp_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=4,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01),
fusion_layer=dict(
type='MultiViewPointFusion',
img_channels=2048,
pts_channels=64,
mid_channels=128,
out_channels=128,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01),
img_levels=[3],
align_corners=False,
activate_out=True,
fuse_out=False),
),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=128,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=128,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
num_filters=[64, 128, 256],
),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
num_upsample_filters=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DVeloHead',
class_names=class_names,
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(type='AlignedAnchorGeneratorRange', ),
anchor_range=[
[-50, -50, -1.80032795, 50, 50, -1.80032795], # car
[-50, -50, -1.74440365, 50, 50, -1.74440365], # truck
[-50, -50, -1.68526504, 50, 50, -1.68526504], # trailer
[-50, -50, -1.67339111, 50, 50, -1.67339111], # bicycle
[-50, -50, -1.61785072, 50, 50, -1.61785072], # pedestrian
[-50, -50, -1.80984986, 50, 50, -1.80984986], # traffic_cone
[-50, -50, -1.763965, 50, 50, -1.763965], # barrier
],
anchor_strides=[2],
anchor_sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
anchor_custom_values=[0, 0],
anchor_rotations=[0, 1.57],
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='ResidualCoder', ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_type='nearest_3d',
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_per_img=500,
post_center_limit_range=point_cloud_range,
# TODO: check whether need to change this
# post_center_limit_range=[-59.6, -59.6, -6, 59.6, 59.6, 4],
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_radar=False,
use_map=False,
use_external=False,
use_camera=True,
)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(),
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
),
)
train_pipeline = [
dict(
type='Resize',
img_scale=(1280, 720),
ratio_range=(0.8, 1.2),
keep_ratio=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='Resize',
img_scale=[
(1280, 720),
],
multiscale_mode='value',
keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img']),
]
data = dict(
samples_per_gpu=1,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'nuscenes_infos_test.pkl',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=False))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[16, 19])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=20)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 20
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/pp_secfpn_80e'
load_from = './pretrain_mmdet/mvx_faster_rcnn_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_nus_1x_coco-3x-pre_ap-28.8-4e72d8c7.pth' # noqa
resume_from = None
workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='RetinaNet',
pretrained=('./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
num_outs=5),
bbox_head=dict(
type='RetinaHead',
num_classes=10,
in_channels=256,
stacked_convs=4,
feat_channels=256,
octave_base_scale=4,
scales_per_octave=3,
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[8, 16, 32, 64, 128],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=0.0, loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100)
# dataset settings
dataset_type = 'NuScenes2DDataset'
data_root = 'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel value are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=(1600, 900),
ratio_range=(0.8, 1.2),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1600, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/retinanet_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
## Changelog
### v1.1.0 (24/2/2020)
**Highlights**
- Dataset evaluation is rewritten with a unified api, which is used by both evaluation hooks and test scripts.
- Support new methods: [CARAFE](https://arxiv.org/abs/1905.02188).
**Breaking Changes**
- The new MMDDP inherits from the official DDP, thus the `__init__` api is changed to be the same as official DDP.
- The `mask_head` field in HTC config files is modified.
- The evaluation and testing script is updated.
- In all transforms, instance masks are stored as a numpy array shaped (n, h, w) instead of a list of (h, w) arrays, where n is the number of instances.
**Bug Fixes**
- Fix IOU assigners when ignore_iof_thr > 0 and there is no pred boxes. (#2135)
- Fix mAP evaluation when there are no ignored boxes. (#2116)
- Fix the empty RoI input for Deformable RoI Pooling. (#2099)
- Fix the dataset settings for multiple workflows. (#2103)
- Fix the warning related to `torch.uint8` in PyTorch 1.4. (#2105)
- Fix the inference demo on devices other than gpu:0. (#2098)
- Fix Dockerfile. (#2097)
- Fix the bug that `pad_val` is unused in Pad transform. (#2093)
- Fix the albumentation transform when there is no ground truth bbox. (#2032)
**Improvements**
- Use torch instead of numpy for random sampling. (#2094)
- Migrate to the new MMDDP implementation in MMCV v0.3. (#2090)
- Add meta information in logs. (#2086)
- Rewrite Soft NMS with pytorch extension and remove cython as a dependency. (#2056)
- Rewrite dataset evaluation. (#2042, #2087, #2114, #2128)
- Use numpy array for masks in transforms. (#2030)
**New Features**
- Implement "CARAFE: Content-Aware ReAssembly of FEatures". (#1583)
- Add `worker_init_fn()` in data_loader when seed is set. (#2066, #2111)
- Add logging utils. (#2035)
### v1.0.0 (30/1/2020)
This release mainly improves the code quality and add more docstrings.
**Highlights**
- Documentation is online now: https://mmdetection.readthedocs.io.
- Support new models: [ATSS](https://arxiv.org/abs/1912.02424).
- DCN is now available with the api `build_conv_layer` and `ConvModule` like the normal conv layer.
- A tool to collect environment information is available for trouble shooting.
**Bug Fixes**
- Fix the incompatibility of the latest numpy and pycocotools. (#2024)
- Fix the case when distributed package is unavailable, e.g., on Windows. (#1985)
- Fix the dimension issue for `refine_bboxes()`. (#1962)
- Fix the typo when `seg_prefix` is a list. (#1906)
- Add segmentation map cropping to RandomCrop. (#1880)
- Fix the return value of `ga_shape_target_single()`. (#1853)
- Fix the loaded shape of empty proposals. (#1819)
- Fix the mask data type when using albumentation. (#1818)
**Improvements**
- Enhance AssignResult and SamplingResult. (#1995)
- Add ability to overwrite existing module in Registry. (#1982)
- Reorganize requirements and make albumentations and imagecorruptions optional. (#1969)
- Check NaN in `SSDHead`. (#1935)
- Encapsulate the DCN in ResNe(X)t into a ConvModule & Conv_layers. (#1894)
- Refactoring for mAP evaluation and support multiprocessing and logging. (#1889)
- Init the root logger before constructing Runner to log more information. (#1865)
- Split `SegResizeFlipPadRescale` into different existing transforms. (#1852)
- Move `init_dist()` to MMCV. (#1851)
- Documentation and docstring improvements. (#1971, #1938, #1869, #1838)
- Fix the color of the same class for mask visualization. (#1834)
- Remove the option `keep_all_stages` in HTC and Cascade R-CNN. (#1806)
**New Features**
- Add two test-time options `crop_mask` and `rle_mask_encode` for mask heads. (#2013)
- Support loading grayscale images as single channel. (#1975)
- Implement "Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection". (#1872)
- Add sphinx generated docs. (#1859, #1864)
- Add GN support for flops computation. (#1850)
- Collect env info for trouble shooting. (#1812)
### v1.0rc1 (13/12/2019)
The RC1 release mainly focuses on improving the user experience, and fixing bugs.
**Highlights**
- Support new models: [FoveaBox](https://arxiv.org/abs/1904.03797), [RepPoints](https://arxiv.org/abs/1904.11490) and [FreeAnchor](https://arxiv.org/abs/1909.02466).
- Add a Dockerfile.
- Add a jupyter notebook demo and a webcam demo.
- Setup the code style and CI.
- Add lots of docstrings and unit tests.
- Fix lots of bugs.
**Breaking Changes**
- There was a bug for computing COCO-style mAP w.r.t different scales (AP_s, AP_m, AP_l), introduced by #621. (#1679)
**Bug Fixes**
- Fix a sampling interval bug in Libra R-CNN. (#1800)
- Fix the learning rate in SSD300 WIDER FACE. (#1781)
- Fix the scaling issue when `keep_ratio=False`. (#1730)
- Fix typos. (#1721, #1492, #1242, #1108, #1107)
- Fix the shuffle argument in `build_dataloader`. (#1693)
- Clip the proposal when computing mask targets. (#1688)
- Fix the "index out of range" bug for samplers in some corner cases. (#1610, #1404)
- Fix the NMS issue on devices other than GPU:0. (#1603)
- Fix SSD Head and GHM Loss on CPU. (#1578)
- Fix the OOM error when there are too many gt bboxes. (#1575)
- Fix the wrong keyword argument `nms_cfg` in HTC. (#1573)
- Process masks and semantic segmentation in Expand and MinIoUCrop transforms. (#1550, #1361)
- Fix a scale bug in the Non Local op. (#1528)
- Fix a bug in transforms when `gt_bboxes_ignore` is None. (#1498)
- Fix a bug when `img_prefix` is None. (#1497)
- Pass the device argument to `grid_anchors` and `valid_flags`. (#1478)
- Fix the data pipeline for test_robustness. (#1476)
- Fix the argument type of deformable pooling. (#1390)
- Fix the coco_eval when there are only two classes. (#1376)
- Fix a bug in Modulated DeformableConv when deformable_group>1. (#1359)
- Fix the mask cropping in RandomCrop. (#1333)
- Fix zero outputs in DeformConv when not running on cuda:0. (#1326)
- Fix the type issue in Expand. (#1288)
- Fix the inference API. (#1255)
- Fix the inplace operation in Expand. (#1249)
- Fix the from-scratch training config. (#1196)
- Fix inplace add in RoIExtractor which cause an error in PyTorch 1.2. (#1160)
- Fix FCOS when input images has no positive sample. (#1136)
- Fix recursive imports. (#1099)
**Improvements**
- Print the config file and mmdet version in the log. (#1721)
- Lint the code before compiling in travis CI. (#1715)
- Add a probability argument for the `Expand` transform. (#1651)
- Update the PyTorch and CUDA version in the docker file. (#1615)
- Raise a warning when specifying `--validate` in non-distributed training. (#1624, #1651)
- Beautify the mAP printing. (#1614)
- Add pre-commit hook. (#1536)
- Add the argument `in_channels` to backbones. (#1475)
- Add lots of docstrings and unit tests, thanks to [@Erotemic](https://github.com/Erotemic). (#1603, #1517, #1506, #1505, #1491, #1479, #1477, #1475, #1474)
- Add support for multi-node distributed test when there is no shared storage. (#1399)
- Optimize Dockerfile to reduce the image size. (#1306)
- Update new results of HRNet. (#1284, #1182)
- Add an argument `no_norm_on_lateral` in FPN. (#1240)
- Test the compiling in CI. (#1235)
- Move docs to a separate folder. (#1233)
- Add a jupyter notebook demo. (#1158)
- Support different type of dataset for training. (#1133)
- Use int64_t instead of long in cuda kernels. (#1131)
- Support unsquare RoIs for bbox and mask heads. (#1128)
- Manually add type promotion to make compatible to PyTorch 1.2. (#1114)
- Allowing validation dataset for computing validation loss. (#1093)
- Use `.scalar_type()` instead of `.type()` to suppress some warnings. (#1070)
**New Features**
- Add an option `--with_ap` to compute the AP for each class. (#1549)
- Implement "FreeAnchor: Learning to Match Anchors for Visual Object Detection". (#1391)
- Support [Albumentations](https://github.com/albumentations-team/albumentations) for augmentations in the data pipeline. (#1354)
- Implement "FoveaBox: Beyond Anchor-based Object Detector". (#1339)
- Support horizontal and vertical flipping. (#1273, #1115)
- Implement "RepPoints: Point Set Representation for Object Detection". (#1265)
- Add test-time augmentation to HTC and Cascade R-CNN. (#1251)
- Add a COCO result analysis tool. (#1228)
- Add Dockerfile. (#1168)
- Add a webcam demo. (#1155, #1150)
- Add FLOPs counter. (#1127)
- Allow arbitrary layer order for ConvModule. (#1078)
### v1.0rc0 (27/07/2019)
- Implement lots of new methods and components (Mixed Precision Training, HTC, Libra R-CNN, Guided Anchoring, Empirical Attention, Mask Scoring R-CNN, Grid R-CNN (Plus), GHM, GCNet, FCOS, HRNet, Weight Standardization, etc.). Thank all collaborators!
- Support two additional datasets: WIDER FACE and Cityscapes.
- Refactoring for loss APIs and make it more flexible to adopt different losses and related hyper-parameters.
- Speed up multi-gpu testing.
- Integrate all compiling and installing in a single script.
### v0.6.0 (14/04/2019)
- Up to 30% speedup compared to the model zoo.
- Support both PyTorch stable and nightly version.
- Replace NMS and SigmoidFocalLoss with Pytorch CUDA extensions.
### v0.6rc0(06/02/2019)
- Migrate to PyTorch 1.0.
### v0.5.7 (06/02/2019)
- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))
- This is the last release based on PyTorch 0.4.1.
### v0.5.6 (17/01/2019)
- Add support for Group Normalization.
- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
### v0.5.5 (22/12/2018)
- Add SSD for COCO and PASCAL VOC.
- Add ResNeXt backbones and detection models.
- Refactoring for Samplers/Assigners and add OHEM.
- Add VOC dataset and evaluation scripts.
### v0.5.4 (27/11/2018)
- Add SingleStageDetector and RetinaNet.
### v0.5.3 (26/11/2018)
- Add Cascade R-CNN and Cascade Mask R-CNN.
- Add support for Soft-NMS in config files.
### v0.5.2 (21/10/2018)
- Add support for custom datasets.
- Add a script to convert PASCAL VOC annotations to the expected format.
### v0.5.1 (20/10/2018)
- Add BBoxAssigner and BBoxSampler, the `train_cfg` field in config files are restructured.
- `ConvFCRoIHead` / `SharedFCRoIHead` are renamed to `ConvFCBBoxHead` / `SharedFCBBoxHead` for consistency.
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at chenkaidev@gmail.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment