Initial commit

d1aac35d · zhangwenwei · d1aac35d · d1aac35d · d1aac35d · d1aac35d
Commit d1aac35d authored Apr 14, 2020 by zhangwenwei
20 changed files
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.ipynb
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# cython generated cpp
+mmdet3d/ops/nms/src/soft_nms_cpu.cpp
+mmdet3d/version.py
+data
+.vscode
+.idea
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+exps/
+*~
+# Pytorch
+*.pth
+# demo
+*.jpg
+*.png
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+variables:
+  PYTORCH_IMAGE: registry.sensetime.com/eig-research/pytorch:pytorch1.3.1-cuda10.1-devel
+stages:
+  - linting
+  - test
+before_script:
+  - echo $PATH
+  - gcc --version
+  - nvcc --version
+  - python --version
+  - pip --version
+  - python -c "import torch; print(torch.__version__)"
+.linting_template: &linting_template_def
+  stage: linting
+  script:
+    - pip install flake8 yapf isort
+    - flake8 .
+    - isort -rc --check-only --diff mmdet3d/ tools/ tests/
+    - yapf -r -d mmdet3d/ tools/ tests/ configs/
+.test_template: &test_template_def
+  stage: test
+  script:
+    - echo "Start building..."
+    - conda install av -c conda-forge -y
+    - pip install git+https://github.com/open-mmlab/mmdetection.git@v2.0
+    - python -c "import mmdet; print(mmdet.__version__)"
+    - pip install -v -e .[all]
+    - python -c "import mmdet3d; print(mmdet3d.__version__)"
+    - echo "Start testing..."
+    - coverage run --branch --source mmdet3d -m pytest tests/
+    - coverage report -m
+linting:pytorch1.3-cuda10:
+  image: $PYTORCH_IMAGE
+  <<: *linting_template_def
+test:pytorch1.3-cuda10:
+  image: $PYTORCH_IMAGE
+  <<: *test_template_def
--- a/.isort.cfg
+++ b/.isort.cfg
+[isort]
+line_length = 79
+multi_line_output = 0
+known_standard_library = setuptools
+known_first_party = mmdet,mmdet3d
+known_third_party = Cython,cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,scipy,shapely,six,skimage,terminaltables,torch,torchvision
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+  - repo: https://gitlab.com/pycqa/flake8.git
+    rev: 3.7.9
+    hooks:
+      - id: flake8
+  - repo: https://github.com/asottile/seed-isort-config
+    rev: v2.1.0
+    hooks:
+      - id: seed-isort-config
+  - repo: https://github.com/timothycrosley/isort
+    rev: 4.3.21
+    hooks:
+        - id: isort
+  - repo: https://github.com/pre-commit/mirrors-yapf
+    rev: v0.29.0
+    hooks:
+      - id: yapf
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: fix-encoding-pragma
+        args: ["--remove"]
--- a/.style.yapf
+++ b/.style.yapf
+[style]
+BASED_ON_STYLE = pep8
+BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
+SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
--- a/.travis.yml
+++ b/.travis.yml
+dist: bionic  # ubuntu 18.04
+language: python
+python:
+  - "3.5"
+  - "3.6"
+  - "3.7"
+env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1
+cache: pip
+# Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis
+before_install:
+  - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
+  - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
+  - sudo dpkg -i ${INSTALLER}
+  - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
+  - sudo apt-key add 7fa2af80.pub
+  - sudo apt update -qq
+  - sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
+  - sudo apt clean
+  - CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
+  - LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
+  - PATH=${CUDA_HOME}/bin:${PATH}
+install:
+  - pip install Pillow==6.2.2  # remove this line when torchvision>=0.5
+  - pip install torch==1.2 torchvision==0.4.0  # TODO: fix CI for pytorch>1.2
+  - pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
+  - pip install -r requirements.txt
+before_script:
+  - flake8 .
+  - isort -rc --check-only --diff mmdet3d/ tools/ tests/
+  - yapf -r -d --style .style.yapf mmdet3d/ tools/ tests/ configs/
+script:
+  - python setup.py check -m -s
+  - python setup.py build_ext --inplace
+  - coverage run --source mmdet3d -m py.test -v --xdoctest-modules tests mmdet3d
+after_success:
+  - coverage report
--- a/README.md
+++ b/README.md
+# MMDetection3D
+## Introduction
+The master branch works with **PyTorch 1.1** or higher.
+mmdetection3d is an open source 3D object detection toolbox based on PyTorch. It is
+a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).
+### Major features
+## License
+This project is released under the [Apache 2.0 license](LICENSE).
+## Updates
+v0.0.1 (07/08/2019)
+- the project is initiated
+## Benchmark and model zoo
+Supported methods and backbones are shown in the below table.
+Results and models are available in the [Model zoo](MODEL_ZOO.md).
+## Installation
+Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.
+## Get Started
+Please see [GETTING_STARTED.md](GETTING_STARTED.md) for the basic usage of MMDetection.
+## Contributing
+We appreciate all contributions to improve MMDetection3D. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for the contributing guideline.
+## Acknowledgement
+MMDetection3D is an open source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks.
+We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors.
+## Citation
+## Contact
+This repo is currently maintained by Wenwei Zhang ([@ZwwWayne](http://github.com/ZwwWayne)).
--- a/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+# model settings
+voxel_size = [0.05, 0.05, 0.1]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
+model = dict(
+    type='DynamicMVXFasterRCNNV2',
+    pretrained=('./pretrain_detectron/'
+                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe'),
+    img_neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    pts_voxel_layer=dict(
+        max_num_points=-1,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1),  # (training, testing) max_coxels
+    ),
+    pts_voxel_encoder=dict(
+        type='DynamicVFE',
+        num_input_features=4,
+        num_filters=[64, 64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        point_cloud_range=point_cloud_range,
+        fusion_layer=dict(
+            type='PointFusion',
+            img_channels=256,
+            pts_channels=64,
+            mid_channels=128,
+            out_channels=128,
+            img_levels=[0, 1, 2, 3, 4],
+            align_corners=False,
+            activate_out=True,
+            fuse_out=False),
+    ),
+    pts_middle_encoder=dict(
+        type='SparseEncoder',
+        in_channels=128,
+        output_shape=[41, 1600, 1408],  # checked from PointCloud3D
+        pre_act=False,
+    ),
+    pts_backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        num_filters=[128, 256],
+    ),
+    pts_neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        upsample_strides=[1, 2],
+        num_upsample_filters=[256, 256],
+    ),
+    pts_bbox_head=dict(
+        type='SECONDHead',
+        class_name=['Pedestrian', 'Cyclist', 'Car'],
+        in_channels=512,
+        feat_channels=512,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[
+            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+        ],
+        assigner_per_size=True,
+        anchor_strides=[2],
+        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+        anchor_rotations=[0, 1.57],
+        diff_rad_by_sin=True,
+        assign_per_class=True,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ))
+# model training and testing settings
+train_cfg = dict(
+    pts=dict(
+        assigner=[
+            dict(  # for Pedestrian
+                type='MaxIoUAssigner',
+                iou_type='nearest_3d',
+                pos_iou_thr=0.35,
+                neg_iou_thr=0.2,
+                min_pos_iou=0.2,
+                ignore_iof_thr=-1),
+            dict(  # for Cyclist
+                type='MaxIoUAssigner',
+                iou_type='nearest_3d',
+                pos_iou_thr=0.35,
+                neg_iou_thr=0.2,
+                min_pos_iou=0.2,
+                ignore_iof_thr=-1),
+            dict(  # for Car
+                type='MaxIoUAssigner',
+                iou_type='nearest_3d',
+                pos_iou_thr=0.6,
+                neg_iou_thr=0.45,
+                min_pos_iou=0.45,
+                ignore_iof_thr=-1),
+        ],
+        allowed_border=0,
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    pts=dict(
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_thr=0.01,
+        score_thr=0.3,
+        min_bbox_size=0,
+        post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
+    ), )
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=True,
+)
+db_sampler = dict(
+    type='MMDataBaseSampler',
+    root_path=data_root,
+    info_path=data_root + 'kitti_mm_dbinfos_train.pkl',
+    rate=1.0,
+    object_rot_range=[0.0, 0.0],
+    blending_type=['box', 'gaussian', 'poisson'],
+    depth_consistent=True,
+    check_2D_collision=True,
+    collision_thr=[0, 0.3, 0.5, 0.7],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(
+            Car=5,
+            Pedestrian=10,
+            Cyclist=10,
+        ),
+    ),
+    sample_groups=dict(
+        Car=12,
+        Pedestrian=6,
+        Cyclist=6,
+    ),
+)
+train_pipeline = [
+    dict(
+        type='Resize',
+        img_scale=[(640, 192), (2560, 768)],
+        multiscale_mode='range',
+        keep_ratio=True),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05],
+        trans_normal_noise=[0.2, 0.2, 0.2]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D',
+        keys=[
+            'points', 'img', 'gt_bboxes_3d', 'gt_bboxes', 'gt_labels',
+            'gt_labels_3d'
+        ]),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='Resize',
+        img_scale=[
+            (1280, 384),
+        ],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[0, 0],
+        scaling_uniform_noise=[1, 1]),
+    dict(type='RandomFlip3D', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'img']),
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True,
+        test_mode=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True,
+        test_mode=True))
+# Training settings
+optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
+# max_norm=10 is better for SECOND
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+lr_config = dict(
+    policy='cosine',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 10,
+    target_lr=1e-5,
+    as_ratio=True,
+)
+momentum_config = None
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+evaluation = dict(interval=1)
+# runtime settings
+total_epochs = 80
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/sec_secfpn_80e'
+load_from = './pretrain_mmdet/mvx_faster_rcnn_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_1x_coco-3-class_44.7_20200205-b1c1533f.pth'  # noqa
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+# model settings
+voxel_size = [0.16, 0.16, 4]
+point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
+model = dict(
+    type='DynamicVoxelNet',
+    voxel_layer=dict(
+        max_num_points=-1,  # set -1 for dynamic voxel
+        point_cloud_range=point_cloud_range,  # velodyne coordinates, x, y, z
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1),  # set -1 for dynamic voxel
+    ),
+    voxel_encoder=dict(
+        type='DynamicPillarFeatureNet',
+        num_input_features=4,
+        num_filters=[64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range,
+    ),
+    middle_encoder=dict(
+        type='PointPillarsScatter',
+        in_channels=64,
+        output_shape=[496, 432],
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=64,
+        layer_nums=[3, 5, 5],
+        layer_strides=[2, 2, 2],
+        num_filters=[64, 128, 256],
+    ),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[64, 128, 256],
+        upsample_strides=[1, 2, 4],
+        num_upsample_filters=[128, 128, 128],
+    ),
+    bbox_head=dict(
+        type='SECONDHead',
+        class_name=['Car'],
+        in_channels=384,
+        feat_channels=384,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
+        anchor_strides=[2],
+        anchor_sizes=[[1.6, 3.9, 1.56]],
+        anchor_rotations=[0, 1.57],
+        diff_rad_by_sin=True,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        iou_type='nearest_3d',
+        pos_iou_thr=0.6,
+        neg_iou_thr=0.45,
+        min_pos_iou=0.45,
+        ignore_iof_thr=-1),
+    allowed_border=0,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    use_rotate_nms=True,
+    nms_across_levels=False,
+    nms_thr=0.01,
+    score_thr=0.3,
+    min_bbox_size=0,
+    post_center_limit_range=point_cloud_range,
+    # soft-nms is also supported for rcnn testing
+    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+)
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=False,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5),
+    ),
+    sample_groups=dict(Car=15),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[0.25, 0.25, 0.25],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
+    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+]
+data = dict(
+    samples_per_gpu=6,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.001  # max learning rate
+optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='cyclic',
+    target_ratio=[10, 1e-4],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+momentum_config = dict(
+    policy='cyclic',
+    target_ratio=[0.85 / 0.95, 1],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 160
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/pp_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+++ b/configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+# model settings
+voxel_size = [0.05, 0.05, 0.1]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
+model = dict(
+    type='DynamicVoxelNet',
+    voxel_layer=dict(
+        max_num_points=-1,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1),  # (training, testing) max_coxels
+    ),
+    voxel_encoder=dict(
+        type='DynamicVFEV3',
+        num_input_features=4,
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range),
+    middle_encoder=dict(
+        type='SparseEncoder',
+        in_channels=4,
+        output_shape=[41, 1600, 1408],
+        pre_act=False,
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        num_filters=[128, 256],
+    ),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        upsample_strides=[1, 2],
+        num_upsample_filters=[256, 256],
+    ),
+    bbox_head=dict(
+        type='SECONDHead',
+        class_name=['Pedestrian', 'Cyclist', 'Car'],
+        in_channels=512,
+        feat_channels=512,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[
+            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            [0, -40.0, -0.6, 70.4, 40.0, -0.6],
+            [0, -40.0, -1.78, 70.4, 40.0, -1.78],
+        ],
+        anchor_strides=[2],
+        anchor_sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+        anchor_rotations=[0, 1.57],
+        diff_rad_by_sin=True,
+        assigner_per_size=True,
+        assign_per_class=True,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    assigner=[
+        dict(  # for Pedestrian
+            type='MaxIoUAssigner',
+            iou_type='nearest_3d',
+            pos_iou_thr=0.35,
+            neg_iou_thr=0.2,
+            min_pos_iou=0.2,
+            ignore_iof_thr=-1),
+        dict(  # for Cyclist
+            type='MaxIoUAssigner',
+            iou_type='nearest_3d',
+            pos_iou_thr=0.35,
+            neg_iou_thr=0.2,
+            min_pos_iou=0.2,
+            ignore_iof_thr=-1),
+        dict(  # for Car
+            type='MaxIoUAssigner',
+            iou_type='nearest_3d',
+            pos_iou_thr=0.6,
+            neg_iou_thr=0.45,
+            min_pos_iou=0.45,
+            ignore_iof_thr=-1),
+    ],
+    allowed_border=0,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    use_rotate_nms=True,
+    nms_across_levels=False,
+    nms_thr=0.01,
+    score_thr=0.3,
+    min_bbox_size=0,
+    post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
+    # soft-nms is also supported for rcnn testing
+    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+)
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=False,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(
+            Car=5,
+            Pedestrian=10,
+            Cyclist=10,
+        ),
+    ),
+    sample_groups=dict(
+        Car=12,
+        Pedestrian=6,
+        Cyclist=6,
+    ),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[0, 0, 0],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.39269908, 0.39269908]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05],
+        trans_normal_noise=[0.2, 0.2, 0.2]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.003  # max learning rate
+optimizer = dict(
+    type='AdamW',
+    lr=lr,
+    betas=(0.95, 0.99),  # the momentum is change during training
+    weight_decay=0.001)
+optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
+lr_config = dict(
+    policy='cosine',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 10,
+    target_lr=1e-5,
+    as_ratio=True,
+)
+momentum_config = None
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 80
+dist_params = dict(backend='nccl', port=29502)
+log_level = 'INFO'
+work_dir = './work_dirs/sec_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+# model settings
+voxel_size = [0.05, 0.05, 0.1]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
+model = dict(
+    type='DynamicVoxelNet',
+    voxel_layer=dict(
+        max_num_points=-1,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(-1, -1),  # (training, testing) max_coxels
+    ),
+    voxel_encoder=dict(
+        type='DynamicVFEV3',
+        num_input_features=4,
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range),
+    middle_encoder=dict(
+        type='SparseEncoder',
+        in_channels=4,
+        output_shape=[41, 1600, 1408],  # checked from PointCloud3D
+        pre_act=False,
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        num_filters=[128, 256],
+    ),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        upsample_strides=[1, 2],
+        num_upsample_filters=[256, 256],
+    ),
+    bbox_head=dict(
+        type='SECONDHead',
+        class_name=['Car'],
+        in_channels=512,
+        feat_channels=512,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
+        anchor_strides=[2],
+        anchor_sizes=[[1.6, 3.9, 1.56]],
+        anchor_rotations=[0, 1.57],
+        diff_rad_by_sin=True,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        iou_type='nearest_3d',
+        pos_iou_thr=0.6,
+        neg_iou_thr=0.45,
+        min_pos_iou=0.45,
+        ignore_iof_thr=-1),
+    allowed_border=0,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    use_rotate_nms=True,
+    nms_across_levels=False,
+    nms_thr=0.01,
+    score_thr=0.3,
+    min_bbox_size=0,
+    post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
+    # soft-nms is also supported for rcnn testing
+    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+)
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=False,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5),
+    ),
+    sample_groups=dict(Car=15),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[1.0, 1.0, 0.5],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.78539816, 0.78539816]),
+    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+]
+data = dict(
+    samples_per_gpu=6,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.0018  # max learning rate
+optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
+lr_config = dict(
+    policy='cyclic',
+    target_ratio=[10, 1e-4],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+momentum_config = dict(
+    policy='cyclic',
+    target_ratio=[0.85 / 0.95, 1],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 80
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/sec_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+++ b/configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+# model settings
+norm_cfg = dict(type='BN', requires_grad=False)
+model = dict(
+    type='FasterRCNN',
+    pretrained=('./pretrain_detectron/'
+                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=norm_cfg,
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_scales=[8],
+        anchor_ratios=[1 / 3, 0.5, 1.0, 2.0, 3.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=0.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.1, 0.1, 0.2, 0.2],
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        debug=False),
+    rpn_proposal=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        # following the setting of detectron,
+        # which improves ~0.2 bbox mAP.
+        nms_post=1000,
+        max_num=1000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_post=1000,
+        max_num=1000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
+    # soft-nms is also supported for rcnn testing
+    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+)
+# dataset settings
+dataset_type = 'Kitti2DDataset'
+data_root = 'data/kitti/'
+class_names = ['Car', 'Pedestrian', 'Cyclist']
+# Values to be used for image normalization (BGR order)
+# Default mean pixel value from ImageNet: [103.53, 116.28, 123.675]
+# When using pre-trained models in Detectron1 or any MSRA models,
+# std has been absorbed into its conv1 weights, so the std needs to be set 1.
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
+    dict(
+        type='Resize',
+        img_scale=[(640, 192), (2560, 768)],
+        multiscale_mode='range',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1280, 384),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        class_names=class_names,
+        ann_file='kitti_infos_train.pkl',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        data_root=data_root,
+        class_names=class_names,
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        data_root=data_root,
+        class_names=class_names,
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 1000,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+evaluation = dict(interval=1)
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl', port=29501)
+log_level = 'INFO'
+work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
+load_from = './pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth'  # noqa
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+++ b/configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+# model settings
+point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
+model = dict(
+    type='VoxelNet',
+    voxel_layer=dict(
+        max_num_points=64,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,  # velodyne coordinates, x, y, z
+        voxel_size=[0.16, 0.16, 4],
+        max_voxels=(12000, 20000),  # (training, testing) max_coxels
+    ),
+    voxel_encoder=dict(
+        type='PillarFeatureNet',
+        num_input_features=4,
+        num_filters=[64],
+        with_distance=False,
+        # these two arguments should be consistent with the voxel_generator
+        voxel_size=[0.16, 0.16, 4],
+        point_cloud_range=point_cloud_range,
+    ),
+    middle_encoder=dict(
+        type='PointPillarsScatter',
+        in_channels=64,
+        output_shape=[496, 432],
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=64,
+        layer_nums=[3, 5, 5],
+        layer_strides=[2, 2, 2],
+        num_filters=[64, 128, 256],
+    ),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[64, 128, 256],
+        upsample_strides=[1, 2, 4],
+        num_upsample_filters=[128, 128, 128],
+    ),
+    bbox_head=dict(
+        type='SECONDHead',
+        class_name=['Car'],
+        in_channels=384,
+        feat_channels=384,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
+        anchor_strides=[2],
+        anchor_sizes=[[1.6, 3.9, 1.56]],
+        anchor_rotations=[0, 1.57],
+        diff_rad_by_sin=True,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        iou_type='nearest_3d',
+        pos_iou_thr=0.6,
+        neg_iou_thr=0.45,
+        min_pos_iou=0.45,
+        ignore_iof_thr=-1),
+    allowed_border=0,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    use_rotate_nms=True,
+    nms_across_levels=False,
+    nms_thr=0.01,
+    score_thr=0.3,
+    min_bbox_size=0,
+    post_center_limit_range=point_cloud_range,
+)
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=False,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5),
+    ),
+    sample_groups=dict(Car=15),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[0.25, 0.25, 0.25],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.15707963267, 0.15707963267]),
+    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+]
+data = dict(
+    samples_per_gpu=6,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.001  # max learning rate
+optimizer = dict(
+    type='AdamW',
+    lr=lr,
+    betas=(0.95, 0.99),  # the momentum is change during training
+    weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='cyclic',
+    target_ratio=[10, 1e-4],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+momentum_config = dict(
+    policy='cyclic',
+    target_ratio=[0.85 / 0.95, 1],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 160
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/pp_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+++ b/configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+# model settings
+voxel_size = [0.05, 0.05, 0.1]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]  # velodyne coordinates, x, y, z
+model = dict(
+    type='VoxelNet',
+    voxel_layer=dict(
+        max_num_points=5,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,
+        voxel_size=voxel_size,
+        max_voxels=(16000, 40000),  # (training, testing) max_coxels
+    ),
+    voxel_encoder=dict(
+        type='VoxelFeatureExtractorV3',
+        num_input_features=4,
+        num_filters=[4],
+        with_distance=False),
+    middle_encoder=dict(
+        type='SparseEncoder',
+        in_channels=4,
+        output_shape=[41, 1600, 1408],  # checked from PointCloud3D
+        pre_act=False,
+    ),
+    backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        num_filters=[128, 256],
+    ),
+    neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        upsample_strides=[1, 2],
+        num_upsample_filters=[256, 256],
+    ),
+    bbox_head=dict(
+        type='SECONDHead',
+        class_name=['Car'],
+        in_channels=512,
+        feat_channels=512,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[0, -40.0, -1.78, 70.4, 40.0, -1.78],
+        anchor_strides=[2],
+        anchor_sizes=[[1.6, 3.9, 1.56]],
+        anchor_rotations=[0, 1.57],
+        diff_rad_by_sin=True,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        iou_type='nearest_3d',
+        pos_iou_thr=0.6,
+        neg_iou_thr=0.45,
+        min_pos_iou=0.45,
+        ignore_iof_thr=-1),
+    allowed_border=0,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    use_rotate_nms=True,
+    nms_across_levels=False,
+    nms_thr=0.01,
+    score_thr=0.3,
+    min_bbox_size=0,
+    post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
+)
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=False,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5),
+    ),
+    sample_groups=dict(Car=15),
+)
+train_pipeline = [
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        loc_noise_std=[1.0, 1.0, 0.5],
+        global_rot_range=[0.0, 0.0],
+        rot_uniform_noise=[-0.78539816, 0.78539816]),
+    dict(type='PointsRandomFlip', flip_ratio=0.5),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.78539816, 0.78539816],
+        scaling_uniform_noise=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes']),
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_train.pkl',
+        split='training',
+        training=True,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='training',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'kitti_infos_val.pkl',
+        split='testing',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True))
+# optimizer
+lr = 0.001  # max learning rate
+optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
+optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
+lr_config = dict(
+    policy='cyclic',
+    target_ratio=[10, 1e-4],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+momentum_config = dict(
+    policy='cyclic',
+    target_ratio=[0.85 / 0.95, 1],
+    cyclic_times=1,
+    step_ratio_up=0.4,
+)
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 80
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/sec_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+# model settings
+norm_cfg = dict(type='BN', requires_grad=False)
+model = dict(
+    type='FasterRCNN',
+    pretrained=('./pretrain_detectron/'
+                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=norm_cfg,
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=0.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=80,
+            target_means=[0., 0., 0., 0.],
+            target_stds=[0.1, 0.1, 0.2, 0.2],
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    rpn_proposal=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        # following the setting of detectron,
+        # which improves ~0.2 bbox mAP.
+        nms_post=1000,
+        max_num=1000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_post=1000,
+        max_num=1000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
+    # soft-nms is also supported for rcnn testing
+    # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+)
+# dataset settings
+dataset_type = 'NuScenes2DDataset'
+data_root = 'data/nuscenes/'
+# Values to be used for image normalization (BGR order)
+# Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
+# When using pre-trained models in Detectron1 or any MSRA models,
+# std has been absorbed into its conv1 weights, so the std needs to be set 1.
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
+    dict(
+        type='Resize',
+        img_scale=[(1200, 720), (1920, 1080)],
+        multiscale_mode='range',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1600, 900),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'nuscenes_infos_train.coco.json',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'nuscenes_infos_val.coco.json',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'nuscenes_infos_val.coco.json',
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 1000,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+evaluation = dict(interval=1)
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl', port=29501)
+log_level = 'INFO'
+work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
+++ b/configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py
+# model settings
+voxel_size = [0.25, 0.25, 8]
+point_cloud_range = [-50, -50, -5, 50, 50, 3]
+class_names = [
+    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+model = dict(
+    type='MVXFasterRCNNV2',
+    pts_voxel_layer=dict(
+        max_num_points=64,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,  # velodyne coordinates, x, y, z
+        voxel_size=voxel_size,
+        max_voxels=(30000, 40000),  # (training, testing) max_coxels
+    ),
+    pts_voxel_encoder=dict(
+        type='HardVFE',
+        num_input_features=4,
+        num_filters=[64, 64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        point_cloud_range=point_cloud_range,
+        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
+    pts_middle_encoder=dict(
+        type='PointPillarsScatter',
+        in_channels=64,
+        output_shape=[400, 400],  # checked from PointCloud3D
+    ),
+    pts_backbone=dict(
+        type='SECOND',
+        in_channels=64,
+        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
+        layer_nums=[3, 5, 5],
+        layer_strides=[2, 2, 2],
+        num_filters=[64, 128, 256],
+    ),
+    pts_neck=dict(
+        type='SECONDFPN',
+        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
+        in_channels=[64, 128, 256],
+        upsample_strides=[1, 2, 4],
+        num_upsample_filters=[128, 128, 128],
+    ),
+    pts_bbox_head=dict(
+        type='Anchor3DVeloHead',
+        class_names=class_names,
+        num_classes=10,
+        in_channels=384,
+        feat_channels=384,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_range=[
+            [-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],  # car
+            [-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],  # truck
+            [-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],  # trailer
+            [-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],  # bicycle
+            [-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],  # pedestrian
+            [-49.6, -49.6, -1.80984986, 49.6, 49.6,
+             -1.80984986],  # traffic_cone
+            [-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],  # barrier
+        ],
+        anchor_strides=[2],
+        anchor_sizes=[
+            [1.95017717, 4.60718145, 1.72270761],  # car
+            [2.4560939, 6.73778078, 2.73004906],  # truck
+            [2.87427237, 12.01320693, 3.81509561],  # trailer
+            [0.60058911, 1.68452161, 1.27192197],  # bicycle
+            [0.66344886, 0.7256437, 1.75748069],  # pedestrian
+            [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
+            [2.49008838, 0.48578221, 0.98297065],  # barrier
+        ],
+        anchor_custom_values=[0, 0],
+        anchor_rotations=[0, 1.57],
+        assigner_per_size=False,
+        diff_rad_by_sin=True,
+        dir_offset=0.7854,  # pi/4
+        dir_limit_offset=0,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    pts=dict(
+        assigner=dict(  # for Car
+            type='MaxIoUAssigner',
+            iou_type='nearest_3d',
+            pos_iou_thr=0.6,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        allowed_border=0,
+        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    pts=dict(
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_thr=0.2,
+        score_thr=0.05,
+        min_bbox_size=0,
+        max_per_img=500,
+        post_center_limit_range=point_cloud_range,
+        # TODO: check whether need to change this
+        # post_center_limit_range=[-59.6, -59.6, -6, 59.6, 59.6, 4],
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+    ))
+# dataset settings
+dataset_type = 'NuScenesDataset'
+data_root = 'data/nuscenes/'
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+input_modality = dict(
+    use_lidar=True,
+    use_depth=False,
+    use_lidar_intensity=True,
+    use_camera=True,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'nuscenes_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(),
+    sample_groups=dict(
+        bus=4,
+        trailer=4,
+        truck=4,
+    ),
+)
+train_pipeline = [
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.3925, 0.3925],
+        scaling_uniform_noise=[0.95, 1.05],
+        trans_normal_noise=[0, 0, 0]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='Resize',
+        img_scale=[
+            (1280, 720),
+        ],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='RandomFlip3D', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points']),
+]
+data = dict(
+    samples_per_gpu=4,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'nuscenes_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'nuscenes_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=False))
+# optimizer
+optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
+# max_norm=10 is better for SECOND
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 1000,
+    step=[16, 19])
+momentum_config = None
+checkpoint_config = dict(interval=1)
+# yapf:disable
+evaluation = dict(interval=20)
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 20
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/pp_secfpn_80e'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/nus/hv_pointpillars_secfpn_sbn-all_freeze_adamw_1x16_20e_nus-mm.py
+++ b/configs/nus/hv_pointpillars_secfpn_sbn-all_freeze_adamw_1x16_20e_nus-mm.py
+# model settings
+voxel_size = [0.25, 0.25, 8]
+point_cloud_range = [-50, -50, -5, 50, 50, 3]
+class_names = [
+    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+model = dict(
+    type='MVXFasterRCNNV2',
+    pretrained=('./pretrain_detectron/'
+                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    img_backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=4,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe'),
+    pts_voxel_layer=dict(
+        max_num_points=64,  # max_points_per_voxel
+        point_cloud_range=point_cloud_range,  # velodyne coordinates, x, y, z
+        voxel_size=voxel_size,
+        max_voxels=(30000, 40000),  # (training, testing) max_coxels
+    ),
+    pts_voxel_encoder=dict(
+        type='HardVFE',
+        num_input_features=4,
+        num_filters=[64, 64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        with_cluster_center=True,
+        with_voxel_center=True,
+        point_cloud_range=point_cloud_range,
+        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01),
+        fusion_layer=dict(
+            type='MultiViewPointFusion',
+            img_channels=2048,
+            pts_channels=64,
+            mid_channels=128,
+            out_channels=128,
+            norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01),
+            img_levels=[3],
+            align_corners=False,
+            activate_out=True,
+            fuse_out=False),
+    ),
+    pts_middle_encoder=dict(
+        type='PointPillarsScatter',
+        in_channels=128,
+        output_shape=[400, 400],  # checked from PointCloud3D
+    ),
+    pts_backbone=dict(
+        type='SECOND',
+        in_channels=128,
+        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
+        layer_nums=[3, 5, 5],
+        layer_strides=[2, 2, 2],
+        num_filters=[64, 128, 256],
+    ),
+    pts_neck=dict(
+        type='SECONDFPN',
+        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
+        in_channels=[64, 128, 256],
+        upsample_strides=[1, 2, 4],
+        num_upsample_filters=[128, 128, 128],
+    ),
+    pts_bbox_head=dict(
+        type='Anchor3DVeloHead',
+        class_names=class_names,
+        num_classes=10,
+        in_channels=384,
+        feat_channels=384,
+        use_direction_classifier=True,
+        encode_bg_as_zeros=True,
+        anchor_generator=dict(type='AlignedAnchorGeneratorRange', ),
+        anchor_range=[
+            [-50, -50, -1.80032795, 50, 50, -1.80032795],  # car
+            [-50, -50, -1.74440365, 50, 50, -1.74440365],  # truck
+            [-50, -50, -1.68526504, 50, 50, -1.68526504],  # trailer
+            [-50, -50, -1.67339111, 50, 50, -1.67339111],  # bicycle
+            [-50, -50, -1.61785072, 50, 50, -1.61785072],  # pedestrian
+            [-50, -50, -1.80984986, 50, 50, -1.80984986],  # traffic_cone
+            [-50, -50, -1.763965, 50, 50, -1.763965],  # barrier
+        ],
+        anchor_strides=[2],
+        anchor_sizes=[
+            [1.95017717, 4.60718145, 1.72270761],  # car
+            [2.4560939, 6.73778078, 2.73004906],  # truck
+            [2.87427237, 12.01320693, 3.81509561],  # trailer
+            [0.60058911, 1.68452161, 1.27192197],  # bicycle
+            [0.66344886, 0.7256437, 1.75748069],  # pedestrian
+            [0.39694519, 0.40359262, 1.06232151],  # traffic_cone
+            [2.49008838, 0.48578221, 0.98297065],  # barrier
+        ],
+        anchor_custom_values=[0, 0],
+        anchor_rotations=[0, 1.57],
+        assigner_per_size=False,
+        assign_per_class=False,
+        diff_rad_by_sin=True,
+        dir_offset=0.7854,  # pi/4
+        dir_limit_offset=0,
+        bbox_coder=dict(type='ResidualCoder', ),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
+    ),
+)
+# model training and testing settings
+train_cfg = dict(
+    pts=dict(
+        assigner=dict(  # for Car
+            type='MaxIoUAssigner',
+            iou_type='nearest_3d',
+            pos_iou_thr=0.6,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        allowed_border=0,
+        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    pts=dict(
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_thr=0.2,
+        score_thr=0.05,
+        min_bbox_size=0,
+        max_per_img=500,
+        post_center_limit_range=point_cloud_range,
+        # TODO: check whether need to change this
+        # post_center_limit_range=[-59.6, -59.6, -6, 59.6, 59.6, 4],
+        # soft-nms is also supported for rcnn testing
+        # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
+    ))
+# dataset settings
+dataset_type = 'NuScenesDataset'
+data_root = 'data/nuscenes/'
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+input_modality = dict(
+    use_lidar=True,
+    use_radar=False,
+    use_map=False,
+    use_external=False,
+    use_camera=True,
+)
+db_sampler = dict(
+    root_path=data_root,
+    info_path=data_root + 'nuscenes_dbinfos_train.pkl',
+    rate=1.0,
+    use_road_plane=False,
+    object_rot_range=[0.0, 0.0],
+    prepare=dict(),
+    sample_groups=dict(
+        bus=4,
+        trailer=4,
+        truck=4,
+    ),
+)
+train_pipeline = [
+    dict(
+        type='Resize',
+        img_scale=(1280, 720),
+        ratio_range=(0.8, 1.2),
+        keep_ratio=True),
+    dict(
+        type='GlobalRotScale',
+        rot_uniform_noise=[-0.3925, 0.3925],
+        scaling_uniform_noise=[0.95, 1.05],
+        trans_normal_noise=[0, 0, 0]),
+    dict(type='RandomFlip3D', flip_ratio=0.5),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(
+        type='Collect3D',
+        keys=['img', 'points', 'gt_bboxes_3d', 'gt_labels_3d']),
+]
+test_pipeline = [
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(
+        type='Resize',
+        img_scale=[
+            (1280, 720),
+        ],
+        multiscale_mode='value',
+        keep_ratio=True),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(
+        type='DefaultFormatBundle3D',
+        class_names=class_names,
+        with_label=False),
+    dict(type='Collect3D', keys=['points', 'img']),
+]
+data = dict(
+    samples_per_gpu=1,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'nuscenes_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        root_path=data_root,
+        ann_file=data_root + 'nuscenes_infos_test.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        class_names=class_names,
+        with_label=False))
+# optimizer
+optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
+# max_norm=10 is better for SECOND
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 1000,
+    step=[16, 19])
+momentum_config = None
+checkpoint_config = dict(interval=1)
+# yapf:disable
+evaluation = dict(interval=20)
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 20
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/pp_secfpn_80e'
+load_from = './pretrain_mmdet/mvx_faster_rcnn_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_nus_1x_coco-3x-pre_ap-28.8-4e72d8c7.pth'  # noqa
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+++ b/configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+# model settings
+norm_cfg = dict(type='BN', requires_grad=False)
+model = dict(
+    type='RetinaNet',
+    pretrained=('./pretrain_detectron/'
+                'ImageNetPretrained/MSRA/resnet50_msra.pth'),
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=norm_cfg,
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs=True,
+        num_outs=5),
+    bbox_head=dict(
+        type='RetinaHead',
+        num_classes=10,
+        in_channels=256,
+        stacked_convs=4,
+        feat_channels=256,
+        octave_base_scale=4,
+        scales_per_octave=3,
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[8, 16, 32, 64, 128],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=0.0, loss_weight=1.0)))
+# training and testing settings
+train_cfg = dict(
+    assigner=dict(
+        type='MaxIoUAssigner',
+        pos_iou_thr=0.5,
+        neg_iou_thr=0.4,
+        min_pos_iou=0,
+        ignore_iof_thr=-1),
+    allowed_border=-1,
+    pos_weight=-1,
+    debug=False)
+test_cfg = dict(
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_thr=0.5),
+    max_per_img=100)
+# dataset settings
+dataset_type = 'NuScenes2DDataset'
+data_root = 'data/nuscenes/'
+# Values to be used for image normalization (BGR order)
+# Default mean pixel value are from ImageNet: [103.53, 116.28, 123.675]
+# When using pre-trained models in Detectron1 or any MSRA models,
+# std has been absorbed into its conv1 weights, so the std needs to be set 1.
+img_norm_cfg = dict(
+    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Resize',
+        img_scale=(1600, 900),
+        ratio_range=(0.8, 1.2),
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size_divisor=32),
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1600, 900),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'nuscenes_infos_train.coco.json',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'nuscenes_infos_val.coco.json',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'nuscenes_infos_val.coco.json',
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=1.0 / 1000,
+    step=[8, 11])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/retinanet_r50_fpn_1x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
+## Changelog
+### v1.1.0 (24/2/2020)
+**Highlights**
+- Dataset evaluation is rewritten with a unified api, which is used by both evaluation hooks and test scripts.
+- Support new methods: [CARAFE](https://arxiv.org/abs/1905.02188).
+**Breaking Changes**
+- The new MMDDP inherits from the official DDP, thus the `__init__` api is changed to be the same as official DDP.
+- The `mask_head` field in HTC config files is modified.
+- The evaluation and testing script is updated.
+- In all transforms, instance masks are stored as a numpy array shaped (n, h, w) instead of a list of (h, w) arrays, where n is the number of instances.
+**Bug Fixes**
+- Fix IOU assigners when ignore_iof_thr > 0 and there is no pred boxes. (#2135)
+- Fix mAP evaluation when there are no ignored boxes. (#2116)
+- Fix the empty RoI input for Deformable RoI Pooling. (#2099)
+- Fix the dataset settings for multiple workflows. (#2103)
+- Fix the warning related to `torch.uint8` in PyTorch 1.4. (#2105)
+- Fix the inference demo on devices other than gpu:0. (#2098)
+- Fix Dockerfile. (#2097)
+- Fix the bug that `pad_val` is unused in Pad transform. (#2093)
+- Fix the albumentation transform when there is no ground truth bbox. (#2032)
+**Improvements**
+- Use torch instead of numpy for random sampling. (#2094)
+- Migrate to the new MMDDP implementation in MMCV v0.3. (#2090)
+- Add meta information in logs. (#2086)
+- Rewrite Soft NMS with pytorch extension and remove cython as a dependency. (#2056)
+- Rewrite dataset evaluation. (#2042, #2087, #2114, #2128)
+- Use numpy array for masks in transforms. (#2030)
+**New Features**
+- Implement "CARAFE: Content-Aware ReAssembly of FEatures". (#1583)
+- Add `worker_init_fn()` in data_loader when seed is set. (#2066, #2111)
+- Add logging utils. (#2035)
+### v1.0.0 (30/1/2020)
+This release mainly improves the code quality and add more docstrings.
+**Highlights**
+- Documentation is online now: https://mmdetection.readthedocs.io.
+- Support new models: [ATSS](https://arxiv.org/abs/1912.02424).
+- DCN is now available with the api `build_conv_layer` and `ConvModule` like the normal conv layer.
+- A tool to collect environment information is available for trouble shooting.
+**Bug Fixes**
+- Fix the incompatibility of the latest numpy and pycocotools. (#2024)
+- Fix the case when distributed package is unavailable, e.g., on Windows. (#1985)
+- Fix the dimension issue for `refine_bboxes()`. (#1962)
+- Fix the typo when `seg_prefix` is a list. (#1906)
+- Add segmentation map cropping to RandomCrop. (#1880)
+- Fix the return value of `ga_shape_target_single()`. (#1853)
+- Fix the loaded shape of empty proposals. (#1819)
+- Fix the mask data type when using albumentation. (#1818)
+**Improvements**
+- Enhance AssignResult and SamplingResult. (#1995)
+- Add ability to overwrite existing module in Registry. (#1982)
+- Reorganize requirements and make albumentations and imagecorruptions optional. (#1969)
+- Check NaN in `SSDHead`. (#1935)
+- Encapsulate the DCN in ResNe(X)t into a ConvModule & Conv_layers. (#1894)
+- Refactoring for mAP evaluation and support multiprocessing and logging. (#1889)
+- Init the root logger before constructing Runner to log more information. (#1865)
+- Split `SegResizeFlipPadRescale` into different existing transforms. (#1852)
+- Move `init_dist()` to MMCV. (#1851)
+- Documentation and docstring improvements. (#1971, #1938, #1869, #1838)
+- Fix the color of the same class for mask visualization. (#1834)
+- Remove the option `keep_all_stages` in HTC and Cascade R-CNN. (#1806)
+**New Features**
+- Add two test-time options `crop_mask` and `rle_mask_encode` for mask heads. (#2013)
+- Support loading grayscale images as single channel. (#1975)
+- Implement "Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection". (#1872)
+- Add sphinx generated docs. (#1859, #1864)
+- Add GN support for flops computation. (#1850)
+- Collect env info for trouble shooting. (#1812)
+### v1.0rc1 (13/12/2019)
+The RC1 release mainly focuses on improving the user experience, and fixing bugs.
+**Highlights**
+- Support new models: [FoveaBox](https://arxiv.org/abs/1904.03797), [RepPoints](https://arxiv.org/abs/1904.11490) and [FreeAnchor](https://arxiv.org/abs/1909.02466).
+- Add a Dockerfile.
+- Add a jupyter notebook demo and a webcam demo.
+- Setup the code style and CI.
+- Add lots of docstrings and unit tests.
+- Fix lots of bugs.
+**Breaking Changes**
+- There was a bug for computing COCO-style mAP w.r.t different scales (AP_s, AP_m, AP_l), introduced by #621. (#1679)
+**Bug Fixes**
+- Fix a sampling interval bug in Libra R-CNN. (#1800)
+- Fix the learning rate in SSD300 WIDER FACE. (#1781)
+- Fix the scaling issue when `keep_ratio=False`. (#1730)
+- Fix typos. (#1721, #1492, #1242, #1108, #1107)
+- Fix the shuffle argument in `build_dataloader`. (#1693)
+- Clip the proposal when computing mask targets. (#1688)
+- Fix the "index out of range" bug for samplers in some corner cases. (#1610, #1404)
+- Fix the NMS issue on devices other than GPU:0. (#1603)
+- Fix SSD Head and GHM Loss on CPU. (#1578)
+- Fix the OOM error when there are too many gt bboxes. (#1575)
+- Fix the wrong keyword argument `nms_cfg` in HTC. (#1573)
+- Process masks and semantic segmentation in Expand and MinIoUCrop transforms. (#1550, #1361)
+- Fix a scale bug in the Non Local op. (#1528)
+- Fix a bug in transforms when `gt_bboxes_ignore` is None. (#1498)
+- Fix a bug when `img_prefix` is None. (#1497)
+- Pass the device argument to `grid_anchors` and `valid_flags`. (#1478)
+- Fix the data pipeline for test_robustness. (#1476)
+- Fix the argument type of deformable pooling. (#1390)
+- Fix the coco_eval when there are only two classes. (#1376)
+- Fix a bug in Modulated DeformableConv when deformable_group>1. (#1359)
+- Fix the mask cropping in RandomCrop. (#1333)
+- Fix zero outputs in DeformConv when not running on cuda:0. (#1326)
+- Fix the type issue in Expand. (#1288)
+- Fix the inference API. (#1255)
+- Fix the inplace operation in Expand. (#1249)
+- Fix the from-scratch training config. (#1196)
+- Fix inplace add in RoIExtractor which cause an error in PyTorch 1.2. (#1160)
+- Fix FCOS when input images has no positive sample. (#1136)
+- Fix recursive imports. (#1099)
+**Improvements**
+- Print the config file and mmdet version in the log. (#1721)
+- Lint the code before compiling in travis CI. (#1715)
+- Add a probability argument for the `Expand` transform. (#1651)
+- Update the PyTorch and CUDA version in the docker file. (#1615)
+- Raise a warning when specifying `--validate` in non-distributed training. (#1624, #1651)
+- Beautify the mAP printing. (#1614)
+- Add pre-commit hook. (#1536)
+- Add the argument `in_channels` to backbones. (#1475)
+- Add lots of docstrings and unit tests, thanks to [@Erotemic](https://github.com/Erotemic). (#1603, #1517, #1506, #1505, #1491, #1479, #1477, #1475, #1474)
+- Add support for multi-node distributed test when there is no shared storage. (#1399)
+- Optimize Dockerfile to reduce the image size. (#1306)
+- Update new results of HRNet. (#1284, #1182)
+- Add an argument `no_norm_on_lateral` in FPN. (#1240)
+- Test the compiling in CI. (#1235)
+- Move docs to a separate folder. (#1233)
+- Add a jupyter notebook demo. (#1158)
+- Support different type of dataset for training. (#1133)
+- Use int64_t instead of long in cuda kernels. (#1131)
+- Support unsquare RoIs for bbox and mask heads. (#1128)
+- Manually add type promotion to make compatible to PyTorch 1.2. (#1114)
+- Allowing validation dataset for computing validation loss. (#1093)
+- Use `.scalar_type()` instead of `.type()` to suppress some warnings. (#1070)
+**New Features**
+- Add an option `--with_ap` to compute the AP for each class. (#1549)
+- Implement "FreeAnchor: Learning to Match Anchors for Visual Object Detection". (#1391)
+- Support [Albumentations](https://github.com/albumentations-team/albumentations) for augmentations in the data pipeline. (#1354)
+- Implement "FoveaBox: Beyond Anchor-based Object Detector". (#1339)
+- Support horizontal and vertical flipping. (#1273, #1115)
+- Implement "RepPoints: Point Set Representation for Object Detection". (#1265)
+- Add test-time augmentation to HTC and Cascade R-CNN. (#1251)
+- Add a COCO result analysis tool. (#1228)
+- Add Dockerfile. (#1168)
+- Add a webcam demo. (#1155, #1150)
+- Add FLOPs counter. (#1127)
+- Allow arbitrary layer order for ConvModule. (#1078)
+### v1.0rc0 (27/07/2019)
+- Implement lots of new methods and components (Mixed Precision Training, HTC, Libra R-CNN, Guided Anchoring, Empirical Attention, Mask Scoring R-CNN, Grid R-CNN (Plus), GHM, GCNet, FCOS, HRNet, Weight Standardization, etc.). Thank all collaborators!
+- Support two additional datasets: WIDER FACE and Cityscapes.
+- Refactoring for loss APIs and make it more flexible to adopt different losses and related hyper-parameters.
+- Speed up multi-gpu testing.
+- Integrate all compiling and installing in a single script.
+### v0.6.0 (14/04/2019)
+- Up to 30% speedup compared to the model zoo.
+- Support both PyTorch stable and nightly version.
+- Replace NMS and SigmoidFocalLoss with Pytorch CUDA extensions.
+### v0.6rc0(06/02/2019)
+- Migrate to PyTorch 1.0.
+### v0.5.7 (06/02/2019)
+- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))
+- This is the last release based on PyTorch 0.4.1.
+### v0.5.6 (17/01/2019)
+- Add support for Group Normalization.
+- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
+### v0.5.5 (22/12/2018)
+- Add SSD for COCO and PASCAL VOC.
+- Add ResNeXt backbones and detection models.
+- Refactoring for Samplers/Assigners and add OHEM.
+- Add VOC dataset and evaluation scripts.
+### v0.5.4 (27/11/2018)
+- Add SingleStageDetector and RetinaNet.
+### v0.5.3 (26/11/2018)
+- Add Cascade R-CNN and Cascade Mask R-CNN.
+- Add support for Soft-NMS in config files.
+### v0.5.2 (21/10/2018)
+- Add support for custom datasets.
+- Add a script to convert PASCAL VOC annotations to the expected format.
+### v0.5.1 (20/10/2018)
+- Add BBoxAssigner and BBoxSampler, the `train_cfg` field in config files are restructured.
+- `ConvFCRoIHead` / `SharedFCRoIHead` are renamed to `ConvFCBBoxHead` / `SharedFCBBoxHead` for consistency.
--- a/docs/CODE_OF_CONDUCT.md
+++ b/docs/CODE_OF_CONDUCT.md
+# Contributor Covenant Code of Conduct
+## Our Pledge
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+## Our Standards
+Examples of behavior that contributes to creating a positive environment
+include:
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+Examples of unacceptable behavior by participants include:
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+## Our Responsibilities
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+## Scope
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at chenkaidev@gmail.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+[homepage]: https://www.contributor-covenant.org
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq