fix_mmdetection

eb1107e4 · raojy · 7aa442d5 · eb1107e4 · eb1107e4 · eb1107e4
Commit eb1107e4 authored Apr 01, 2026 by raojy
20 changed files
--- a/mmde/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area4.py
+++ b/mmde/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area4.py
+_base_ = './dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py'
+# data settings
+train_area = [1, 2, 3, 5, 6]
+test_area = 4
+train_dataloader = dict(
+    batch_size=32,
+    dataset=dict(
+        ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
+        scene_idxs=[
+            f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
+        ]))
+test_dataloader = dict(
+    dataset=dict(
+        ann_files=f's3dis_infos_Area_{test_area}.pkl',
+        scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'))
+val_dataloader = test_dataloader
--- a/mmde/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py
+++ b/mmde/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py
+_base_ = [
+    '../_base_/datasets/s3dis-seg.py', '../_base_/models/dgcnn.py',
+    '../_base_/schedules/seg-cosine-100e.py', '../_base_/default_runtime.py'
+]
+# model settings
+model = dict(
+    backbone=dict(in_channels=9),  # [xyz, rgb, normalized_xyz]
+    decode_head=dict(
+        num_classes=13, ignore_index=13,
+        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+    test_cfg=dict(
+        num_points=4096,
+        block_size=1.0,
+        sample_rate=0.5,
+        use_normalized_coord=True,
+        batch_size=24))
+default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=2))
+train_dataloader = dict(batch_size=32)
+train_cfg = dict(val_interval=2)
--- a/mmde/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area6.py
+++ b/mmde/configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area6.py
+_base_ = './dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py'
+# data settings
+train_area = [1, 2, 3, 4, 5]
+test_area = 6
+train_dataloader = dict(
+    batch_size=32,
+    dataset=dict(
+        ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
+        scene_idxs=[
+            f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
+        ]))
+test_dataloader = dict(
+    dataset=dict(
+        ann_files=f's3dis_infos_Area_{test_area}.pkl',
+        scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'))
+val_dataloader = test_dataloader
--- a/mmde/configs/dgcnn/metafile.yml
+++ b/mmde/configs/dgcnn/metafile.yml
+Collections:
+  - Name: DGCNN
+    Metadata:
+      Training Techniques:
+        - SGD
+      Training Resources: 4x Titan XP GPUs
+      Architecture:
+        - DGCNN
+    Paper: https://arxiv.org/abs/1801.07829
+    README: configs/dgcnn/README.md
+Models:
+  - Name: dgcnn_4xb32-cosine-100e_s3dis-seg_test-area1.py
+    In Collection: DGCNN
+    Config: configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area1.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 13.3
+    Results:
+      - Task: 3D Semantic Segmentation
+        Dataset: S3DIS Area1
+        Metrics:
+          mIoU: 68.33
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area1/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210731_000734-39658f14.pth
+  - Name: dgcnn_4xb32-cosine-100e_s3dis-seg_test-area2.py
+    In Collection: DGCNN
+    Config: configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area2.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 13.3
+    Results:
+      - Task: 3D Semantic Segmentation
+        Dataset: S3DIS Area2
+        Metrics:
+          mIoU: 40.68
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area2/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210731_144648-aea9ecb6.pth
+  - Name: dgcnn_4xb32-cosine-100e_s3dis-seg_test-area3.py
+    In Collection: DGCNN
+    Config: configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area3.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 13.3
+    Results:
+      - Task: 3D Semantic Segmentation
+        Dataset: S3DIS Area3
+        Metrics:
+          mIoU: 69.38
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area3/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210801_154629-2ff50ee0.pth
+  - Name: dgcnn_4xb32-cosine-100e_s3dis-seg_test-area4.py
+    In Collection: DGCNN
+    Config: configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area4.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 13.3
+    Results:
+      - Task: 3D Semantic Segmentation
+        Dataset: S3DIS Area4
+        Metrics:
+          mIoU: 50.07
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area4/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210802_073551-dffab9cd.pth
+  - Name: dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py
+    In Collection: DGCNN
+    Config: configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 13.3
+    Results:
+      - Task: 3D Semantic Segmentation
+        Dataset: S3DIS Area5
+        Metrics:
+          mIoU: 50.59
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area5/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210730_235824-f277e0c5.pth
+  - Name: dgcnn_4xb32-cosine-100e_s3dis-seg_test-area6.py
+    In Collection: DGCNN
+    Config: configs/dgcnn/dgcnn_4xb32-cosine-100e_s3dis-seg_test-area6.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 13.3
+    Results:
+      - Task: 3D Semantic Segmentation
+        Dataset: S3DIS Area6
+        Metrics:
+          mIoU: 77.94
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area6/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210802_154317-e3511b32.pth
--- a/mmde/configs/dynamic_voxelization/README.md
+++ b/mmde/configs/dynamic_voxelization/README.md
+# Dynamic Voxelization
+> [End-to-End Multi-View Fusion for 3D Object Detection in LiDAR Point Clouds](https://arxiv.org/abs/1910.06528)
+<!-- [ALGORITHM] -->
+## Abstract
+Recent work on 3D object detection advocates point cloud voxelization in birds-eye view, where objects preserve their physical dimensions and are naturally separable. When represented in this view, however, point clouds are sparse and have highly variable point density, which may cause detectors difficulties in detecting distant or small objects (pedestrians, traffic signs, etc.). On the other hand, perspective view provides dense observations, which could allow more favorable feature encoding for such cases. In this paper, we aim to synergize the birds-eye view and the perspective view and propose a novel end-to-end multi-view fusion (MVF) algorithm, which can effectively learn to utilize the complementary information from both. Specifically, we introduce dynamic voxelization, which has four merits compared to existing voxelization methods, i) removing the need of pre-allocating a tensor with fixed size; ii) overcoming the information loss due to stochastic point/voxel dropout; iii) yielding deterministic voxel embeddings and more stable detection outcomes; iv) establishing the bi-directional relationship between points and voxels, which potentially lays a natural foundation for cross-view feature fusion. By employing dynamic voxelization, the proposed feature fusion architecture enables each point to learn to fuse context information from different views. MVF operates on points and can be naturally extended to other approaches using LiDAR point clouds. We evaluate our MVF model extensively on the newly released Waymo Open Dataset and on the KITTI dataset and demonstrate that it significantly improves detection accuracy over the comparable single-view PointPillars baseline.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/30491025/143856017-98b77ecb-7c13-4164-9c1d-e3011a7645e6.png" width="600"/>
+</div>
+## Introduction
+We implement Dynamic Voxelization proposed in  and provide its results and models on KITTI dataset.
+## Results and models
+### KITTI
+|                               Model                                |  Class  |  Lr schd   | Mem (GB) | Inf time (fps) |  mAP  |                                                                                                                                                                                                                 Download                                                                                                                                                                                                                 |
+| :----------------------------------------------------------------: | :-----: | :--------: | :------: | :------------: | :---: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|       [SECOND](./second_dv_secfpn_8xb6-80e_kitti-3d-car.py)        |   Car   | cyclic 80e |   5.5    |                | 78.83 |                     [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car/dv_second_secfpn_6x8_80e_kitti-3d-car_20200620_235228-ac2c1c0c.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car/dv_second_secfpn_6x8_80e_kitti-3d-car_20200620_235228.log.json)                     |
+|  [SECOND](./second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py)   | 3 Class | cosine 80e |   5.5    |                | 65.27 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class_20210831_054106-e742d163.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class_20210831_054106.log.json) |
+| [PointPillars](./pointpillars_dv_secfpn_8xb6-160e_kitti-3d-car.py) |   Car   | cyclic 80e |   4.7    |                | 77.76 |       [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230844-ee7b75c9.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230844.log.json)       |
+## Citation
+```latex
+@article{zhou2019endtoend,
+    title={End-to-End Multi-View Fusion for 3D Object Detection in LiDAR Point Clouds},
+    author={Yin Zhou and Pei Sun and Yu Zhang and Dragomir Anguelov and Jiyang Gao and Tom Ouyang and James Guo and Jiquan Ngiam and Vijay Vasudevan},
+    year={2019},
+    eprint={1910.06528},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV}
+}
+```
--- a/mmde/configs/dynamic_voxelization/metafile.yml
+++ b/mmde/configs/dynamic_voxelization/metafile.yml
+Collections:
+  - Name: Dynamic Voxelization
+    Metadata:
+      Training Data: KITTI
+      Training Techniques:
+        - AdamW
+      Training Resources: 8x V100 GPUs
+      Architecture:
+        - Dynamic Voxelization
+    Paper:
+      URL: https://arxiv.org/abs/1910.06528
+      Title: 'End-to-End Multi-View Fusion for 3D Object Detection in LiDAR Point Clouds'
+    README: configs/dynamic_voxelization/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/dynamic_voxelnet.py#L11
+      Version: v0.5.0
+Models:
+  - Name: dv_second_secfpn_6x8_80e_kitti-3d-car
+    In Collection: Dynamic Voxelization
+    Config: configs/dynamic_voxelization/second_dv_secfpn_8xb6-80e_kitti-3d-car.py
+    Metadata:
+      Training Memory (GB): 5.5
+    Results:
+      - Task: 3D Object Detection
+        Dataset: KITTI
+        Metrics:
+          mAP: 78.83
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_second_secfpn_6x8_80e_kitti-3d-car/dv_second_secfpn_6x8_80e_kitti-3d-car_20200620_235228-ac2c1c0c.pth
+  - Name: dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class
+    In Collection: Dynamic Voxelization
+    Config: configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+    Metadata:
+      Training Memory (GB): 5.5
+    Results:
+      - Task: 3D Object Detection
+        Dataset: KITTI
+        Metrics:
+          mAP: 65.27
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/dynamic_voxelization/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class_20210831_054106-e742d163.pth
+  - Name: dv_pointpillars_secfpn_6x8_160e_kitti-3d-car
+    In Collection: Dynamic Voxelization
+    Config: configs/dynamic_voxelization/pointpillars_dv_secfpn_8xb6-160e_kitti-3d-car.py
+    Metadata:
+      Training Memory (GB): 4.7
+    Results:
+      - Task: 3D Object Detection
+        Dataset: KITTI
+        Metrics:
+          mAP: 77.76
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20200620_230844-ee7b75c9.pth
--- a/mmde/configs/dynamic_voxelization/pointpillars_dv_secfpn_8xb6-160e_kitti-3d-car.py
+++ b/mmde/configs/dynamic_voxelization/pointpillars_dv_secfpn_8xb6-160e_kitti-3d-car.py
+_base_ = '../pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-car.py'
+voxel_size = [0.16, 0.16, 4]
+point_cloud_range = [0, -39.68, -3, 69.12, 39.68, 1]
+model = dict(
+    type='DynamicVoxelNet',
+    data_preprocessor=dict(
+        voxel_type='dynamic',
+        voxel_layer=dict(
+            max_num_points=-1,
+            point_cloud_range=point_cloud_range,
+            voxel_size=voxel_size,
+            max_voxels=(-1, -1))),
+    voxel_encoder=dict(
+        type='DynamicPillarFeatureNet',
+        in_channels=4,
+        feat_channels=[64],
+        with_distance=False,
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range))
--- a/mmde/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+++ b/mmde/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+_base_ = [
+    '../_base_/models/second_hv_secfpn_kitti.py',
+    '../_base_/datasets/kitti-3d-3class.py', '../_base_/schedules/cosine.py',
+    '../_base_/default_runtime.py'
+]
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+voxel_size = [0.05, 0.05, 0.1]
+model = dict(
+    type='DynamicVoxelNet',
+    data_preprocessor=dict(
+        voxel_type='dynamic',
+        voxel_layer=dict(
+            _delete_=True,
+            max_num_points=-1,
+            point_cloud_range=point_cloud_range,
+            voxel_size=voxel_size,
+            max_voxels=(-1, -1))),
+    voxel_encoder=dict(
+        _delete_=True,
+        type='DynamicSimpleVFE',
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range))
--- a/mmde/configs/dynamic_voxelization/second_dv_secfpn_8xb6-80e_kitti-3d-car.py
+++ b/mmde/configs/dynamic_voxelization/second_dv_secfpn_8xb6-80e_kitti-3d-car.py
+_base_ = '../second/second_hv_secfpn_8xb6-80e_kitti-3d-car.py'
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+voxel_size = [0.05, 0.05, 0.1]
+model = dict(
+    type='DynamicVoxelNet',
+    data_preprocessor=dict(
+        voxel_type='dynamic',
+        voxel_layer=dict(
+            _delete_=True,
+            max_num_points=-1,
+            point_cloud_range=point_cloud_range,
+            voxel_size=voxel_size,
+            max_voxels=(-1, -1))),
+    voxel_encoder=dict(
+        _delete_=True,
+        type='DynamicSimpleVFE',
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range))
--- a/mmde/configs/fcaf3d/README.md
+++ b/mmde/configs/fcaf3d/README.md
+# FCAF3D: Fully Convolutional Anchor-Free 3D Object Detection
+> [FCAF3D: Fully Convolutional Anchor-Free 3D Object Detection](https://arxiv.org/abs/2112.00322)
+<!-- [ALGORITHM] -->
+## Abstract
+Recently, promising applications in robotics and augmented reality have attracted considerable attention to 3D object detection from point clouds. In this paper, we present FCAF3D --- a first-in-class fully convolutional anchor-free indoor 3D object detection method. It is a simple yet effective method that uses a voxel representation of a point cloud and processes voxels with sparse convolutions. FCAF3D can handle large-scale scenes with minimal runtime through a single fully convolutional feed-forward pass. Existing 3D object detection methods make prior assumptions on the geometry of objects, and we argue that it limits their generalization ability. To eliminate prior assumptions, we propose a novel parametrization of oriented bounding boxes that allows obtaining better results in a purely data-driven way. The proposed method achieves state-of-the-art 3D object detection results in terms of mAP@0.5 on ScanNet V2 (+4.5), SUN RGB-D (+3.5), and S3DIS (+20.5) datasets.
+<div align="center">
+<img src="https://user-images.githubusercontent.com/6030962/182842796-98c10576-d39c-4c2b-a15a-a04c9870919c.png" width="800"/>
+</div>
+## Introduction
+We implement FCAF3D and provide the result and checkpoints on the ScanNet and SUN RGB-D dataset.
+## Results and models
+### ScanNet
+|                      Backbone                      | Mem (GB) | Inf time (fps) |   AP@0.25    |    AP@0.5    |                                                                                                                                                          Download                                                                                                                                                           |
+| :------------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [MinkResNet34](./fcaf3d_8x2_scannet-3d-18class.py) |   10.5   |      15.7      | 69.7(70.7\*) | 55.2(56.0\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_scannet-3d-18class/fcaf3d_8x2_scannet-3d-18class_20220805_084956.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_scannet-3d-18class/fcaf3d_8x2_scannet-3d-18class_20220805_084956.log.json) |
+### SUN RGB-D
+|                      Backbone                      | Mem (GB) | Inf time (fps) |   AP@0.25    |    AP@0.5    |                                                                                                                                                          Download                                                                                                                                                           |
+| :------------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [MinkResNet34](./fcaf3d_8x2_sunrgbd-3d-10class.py) |   6.3    |      17.9      | 63.8(63.8\*) | 47.3(48.2\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.log.json) |
+### S3DIS
+|                     Backbone                     | Mem (GB) | Inf time (fps) |   AP@0.25    |    AP@0.5    |                                                                                                                                                    Download                                                                                                                                                     |
+| :----------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [MinkResNet34](./fcaf3d_2xb8_s3dis-3d-5class.py) |   23.5   |      10.9      | 67.4(64.9\*) | 45.7(43.8\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.log.json) |
+**Note**
+- We report the results across 5 train runs followed by 5 test runs. * means the results reported in the paper.
+- Inference time is given for a single NVidia RTX 4090 GPU. All models are trained on 2 GPUs.
+## Citation
+```latex
+@inproceedings{rukhovich2022fcaf3d,
+  title={FCAF3D: Fully Convolutional Anchor-Free 3D Object Detection},
+  author={Danila Rukhovich, Anna Vorontsova, Anton Konushin},
+  booktitle={European conference on computer vision},
+  year={2022}
+}
+```
--- a/mmde/configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
+++ b/mmde/configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
+_base_ = [
+    '../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
+    '../_base_/datasets/s3dis-3d.py'
+]
+model = dict(bbox_head=dict(num_classes=5))
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=10, norm_type=2))
+# learning rate
+param_scheduler = dict(
+    type='MultiStepLR',
+    begin=0,
+    end=12,
+    by_epoch=True,
+    milestones=[8, 11],
+    gamma=0.1)
+custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
--- a/mmde/configs/fcaf3d/fcaf3d_2xb8_scannet-3d-18class.py
+++ b/mmde/configs/fcaf3d/fcaf3d_2xb8_scannet-3d-18class.py
+_base_ = [
+    '../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
+    '../_base_/datasets/scannet-3d.py'
+]
+n_points = 100000
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D'),
+    dict(type='GlobalAlignment', rotation_axis=2),
+    dict(type='PointSample', num_points=n_points),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.087266, 0.087266],
+        scale_ratio_range=[.9, 1.1],
+        translation_std=[.1, .1, .1],
+        shift_height=False),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(type='GlobalAlignment', rotation_axis=2),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=n_points),
+            dict(type='NormalizePointsColor', color_mean=None),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    dataset=dict(
+        type='RepeatDataset',
+        times=10,
+        dataset=dict(pipeline=train_pipeline, filter_empty_gt=True)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=10, norm_type=2))
+# learning rate
+param_scheduler = dict(
+    type='MultiStepLR',
+    begin=0,
+    end=12,
+    by_epoch=True,
+    milestones=[8, 11],
+    gamma=0.1)
+custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
--- a/mmde/configs/fcaf3d/fcaf3d_2xb8_sunrgbd-3d-10class.py
+++ b/mmde/configs/fcaf3d/fcaf3d_2xb8_sunrgbd-3d-10class.py
+_base_ = [
+    '../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
+    '../_base_/datasets/sunrgbd-3d.py'
+]
+n_points = 100000
+backend_args = None
+model = dict(
+    bbox_head=dict(
+        num_classes=10,
+        num_reg_outs=8,
+        bbox_loss=dict(type='RotatedIoU3DLoss')))
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D'),
+    dict(type='PointSample', num_points=n_points),
+    dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.523599, 0.523599],
+        scale_ratio_range=[0.85, 1.15],
+        translation_std=[.1, .1, .1],
+        shift_height=False),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=n_points)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=8,
+    dataset=dict(
+        type='RepeatDataset',
+        times=3,
+        dataset=dict(pipeline=train_pipeline, filter_empty_gt=True)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=10, norm_type=2))
+# learning rate
+param_scheduler = dict(
+    type='MultiStepLR',
+    begin=0,
+    end=12,
+    by_epoch=True,
+    milestones=[8, 11],
+    gamma=0.1)
+custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
--- a/mmde/configs/fcaf3d/metafile.yml
+++ b/mmde/configs/fcaf3d/metafile.yml
+Collections:
+  - Name: FCAF3D
+    Metadata:
+      Training Techniques:
+        - AdamW
+      Training Resources: 2x V100 GPUs
+      Architecture:
+        - MinkResNet
+    Paper:
+      URL: https://arxiv.org/abs/2112.00322
+      Title: 'FCAF3D: Fully Convolutional Anchor-Free 3D Object Detection'
+    README: configs/fcaf3d/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/mink_single_stage.py#L15
+      Version: v1.0.0rc4
+Models:
+  - Name: fcaf3d_2xb8_scannet-3d-18class
+    In Collection: FCAF3D
+    Config: configs/fcaf3d/fcaf3d_2xb8_scannet-3d-18class.py
+    Metadata:
+      Training Data: ScanNet
+      Training Memory (GB): 10.7
+    Results:
+      - Task: 3D Object Detection
+        Dataset: ScanNet
+        Metrics:
+          AP@0.25: 69.7
+          AP@0.5: 55.2
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_scannet-3d-18class/fcaf3d_8x2_scannet-3d-18class_20220805_084956.pth
+  - Name: fcaf3d_2xb8_sunrgbd-3d-10class
+    In Collection: FCAF3D
+    Config: configs/fcaf3d/fcaf3d_2xb8_sunrgbd-3d-10class.py
+    Metadata:
+      Training Data: SUNRGBD
+      Training Memory (GB): 6.5
+    Results:
+      - Task: 3D Object Detection
+        Dataset: SUNRGBD
+        Metrics:
+          AP@0.25: 63.76
+          AP@0.5: 47.31
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.pth
+  - Name: fcaf3d_2xb8_s3dis-3d-5class
+    In Collection: FCAF3D
+    Config: configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
+    Metadata:
+      Training Data: S3DIS
+      Training Memory (GB): 23.5
+    Results:
+      - Task: 3D Object Detection
+        Dataset: S3DIS
+        Metrics:
+          AP@0.25: 67.36
+          AP@0.5: 45.74
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.pth
--- a/mmde/configs/fcos3d/README.md
+++ b/mmde/configs/fcos3d/README.md
+# FCOS3D: Fully Convolutional One-Stage Monocular 3D Object Detection
+> [FCOS3D: Fully Convolutional One-Stage Monocular 3D Object Detection](https://arxiv.org/abs/2104.10956)
+<!-- [ALGORITHM] -->
+## Abstract
+Monocular 3D object detection is an important task for autonomous driving considering its advantage of low cost. It is much more challenging than conventional 2D cases due to its inherent ill-posed property, which is mainly reflected in the lack of depth information. Recent progress on 2D detection offers opportunities to better solving this problem. However, it is non-trivial to make a general adapted 2D detector work in this 3D task. In this paper, we study this problem with a practice built on a fully convolutional single-stage detector and propose a general framework FCOS3D. Specifically, we first transform the commonly defined 7-DoF 3D targets to the image domain and decouple them as 2D and 3D attributes. Then the objects are distributed to different feature levels with consideration of their 2D scales and assigned only according to the projected 3D-center for the training procedure. Furthermore, the center-ness is redefined with a 2D Gaussian distribution based on the 3D-center to fit the 3D target formulation. All of these make this framework simple yet effective, getting rid of any 2D detection or 2D-3D correspondence priors. Our solution achieves 1st place out of all the vision-only methods in the nuScenes 3D detection challenge of NeurIPS 2020.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/30491025/143856739-93b7c4ff-e116-4824-8cc3-8cf1a433a84c.png" width="800"/>
+</div>
+## Introduction
+FCOS3D is a general anchor-free, one-stage monocular 3D object detector adapted from the original 2D version FCOS.
+It serves as a baseline built on top of mmdetection and mmdetection3d for 3D detection based on monocular vision.
+Currently we first support the benchmark on the large-scale nuScenes dataset, which achieved 1st place out of all the vision-only methods in the [nuScenes 3D detecton challenge](https://www.nuscenes.org/object-detection?externalData=all&mapData=all&modalities=Camera) of NeurIPS 2020.
+![demo image](../../resources/browse_dataset_mono.png)
+## Usage
+### Data Preparation
+After supporting FCOS3D and monocular 3D object detection in v0.13.0, the coco-style 2D json info files will include related annotations by default
+(see [here](https://github.com/open-mmlab/mmdetection3d/blob/master/tools/dataset_converters/nuscenes_converter.py#L333) if you would like to change the parameter).
+So you can just follow the data preparation steps given in the documentation, then all the needed infos are ready together.
+### Training and Inference
+The way to training and inference a monocular 3D object detector is the same as others in mmdetection and mmdetection3d. You can basically follow the [documentation](https://mmdetection3d.readthedocs.io/en/latest/1_exist_data_model.html#train-predefined-models-on-standard-datasets) and change the `config`, `work_dirs`, etc. accordingly.
+### Test time augmentation
+We implement test time augmentation for the dense outputs of detection heads, which is more effective than merging predicted boxes at last.
+You can turn on it by setting `flip=True` in the `test_pipeline`.
+### Training with finetune
+Due to the scale and measurements of depth is different from those of other regression targets, we first train the model with depth weight equal to 0.2 for a more stable training procedure. For a stronger detector with better performance, please finetune the model with depth weight changed to 1.0 as shown in the [config](./fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune.py). Note that the path of `load_from` needs to be changed to yours accordingly.
+### Visualizing prediction results
+We also provide visualization functions to show the monocular 3D detection results. Simply follow the [documentation](https://mmdetection3d.readthedocs.io/en/latest/1_exist_data_model.html#test-existing-models-on-standard-datasets) and use the `single-gpu testing` command. You only need to add the `--show` flag and specify `--show-dir` to store the visualization results.
+## Results and models
+### NuScenes
+|                                        Backbone                                         | Lr schd | Mem (GB) | Inf time (fps) | mAP  | NDS  |                                                                                                                                                                                                                             Download                                                                                                                                                                                                                             |
+| :-------------------------------------------------------------------------------------: | :-----: | :------: | :------------: | :--: | :--: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|      [ResNet101 w/ DCN](./fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py)      |   1x    |   8.69   |                | 29.8 | 37.7 |                   [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813.log.json)                   |
+| [above w/ finetune](./fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune.py) |   1x    |   8.69   |                | 32.1 | 39.5 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune_20210717_095645-8d806dc2.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune_20210717_095645.log.json) |
+|                                      above w/ tta                                       |   1x    |   8.69   |                | 33.1 | 40.3 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+## Citation
+```latex
+@inproceedings{wang2021fcos3d,
+	title={{FCOS3D: Fully} Convolutional One-Stage Monocular 3D Object Detection},
+	author={Wang, Tai and Zhu, Xinge and Pang, Jiangmiao and Lin, Dahua},
+	booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
+	year={2021}
+}
+# For the original 2D version
+@inproceedings{tian2019fcos,
+  title     =  {{FCOS: Fully} Convolutional One-Stage Object Detection},
+  author    =  {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
+  booktitle =  {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
+  year      =  {2019}
+}
+```
--- a/mmde/configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
+++ b/mmde/configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
+_base_ = [
+    '../_base_/datasets/nus-mono3d.py', '../_base_/models/fcos3d.py',
+    '../_base_/schedules/mmdet-schedule-1x.py', '../_base_/default_runtime.py'
+]
+# model settings
+model = dict(
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        mean=[103.530, 116.280, 123.675],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_size_divisor=32),
+    backbone=dict(
+        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+        stage_with_dcn=(False, False, True, True)))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=True,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    dict(type='mmdet.Resize', scale=(1600, 900), keep_ratio=True),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'attr_labels',
+            'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(type='mmdet.Resize', scale_factor=1.0),
+    dict(type='Pack3DDetInputs', keys=['img'])
+]
+train_dataloader = dict(
+    batch_size=2, num_workers=2, dataset=dict(pipeline=train_pipeline))
+test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(lr=0.002),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1.0 / 3,
+        by_epoch=False,
+        begin=0,
+        end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=12,
+        by_epoch=True,
+        milestones=[8, 11],
+        gamma=0.1)
+]
--- a/mmde/configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune.py
+++ b/mmde/configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune.py
+_base_ = './fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py'
+# model settings
+model = dict(
+    train_cfg=dict(
+        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05]))
+# optimizer
+optim_wrapper = dict(optimizer=dict(lr=0.001))
+load_from = 'work_dirs/fcos3d_nus/latest.pth'
--- a/mmde/configs/fcos3d/metafile.yml
+++ b/mmde/configs/fcos3d/metafile.yml
+Collections:
+  - Name: FCOS3D
+    Metadata:
+      Training Data: NuScenes
+      Training Techniques:
+        - SGD
+      Training Resources: 8x GeForce RTX 2080 Ti
+      Architecture:
+        - FCOSMono3DHead
+    Paper:
+      URL: https://arxiv.org/abs/2104.10956
+      Title: 'FCOS3D: Fully Convolutional One-Stage Monocular 3D Object Detection'
+    README: configs/fcos3d/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/fcos_mono3d.py#L7
+      Version: v0.13.0
+Models:
+  - Name: fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune
+    In Collection: FCOS3D
+    Config: configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
+    Metadata:
+      Training Memory (GB): 8.7
+    Results:
+      - Task: 3D Object Detection
+        Dataset: NuScenes
+        Metrics:
+          mAP: 29.9
+          NDS: 37.3
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_20210715_235813-4bed5239.pth
+  - Name: fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune
+    In Collection: FCOS3D
+    Config: configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d_finetune.py
+    Metadata:
+      Training Memory (GB): 8.7
+    Results:
+      - Task: 3D Object Detection
+        Dataset: NuScenes
+        Metrics:
+          mAP: 32.1
+          NDS: 39.3
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune_20210717_095645-8d806dc2.pth
--- a/mmde/configs/free_anchor/README.md
+++ b/mmde/configs/free_anchor/README.md
+# FreeAnchor for 3D Object Detection
+> [FreeAnchor: Learning to Match Anchors for Visual Object Detection](https://arxiv.org/abs/1909.02466)
+<!-- [ALGORITHM] -->
+## Abstract
+Modern CNN-based object detectors assign anchors for ground-truth objects under the restriction of object-anchor Intersection-over-Unit (IoU). In this study, we propose a learning-to-match approach to break IoU restriction, allowing objects to match anchors in a flexible manner. Our approach, referred to as FreeAnchor, updates hand-crafted anchor assignment to “free" anchor matching by formulating detector training as a maximum likelihood estimation (MLE) procedure. FreeAnchor targets at learning features which best explain a class of objects in terms of both classification and localization. FreeAnchor is implemented by optimizing detection customized likelihood and can be fused with CNN-based detectors in a plug-and-play manner. Experiments on COCO demonstrate that FreeAnchor consistently outperforms the counterparts with significant margins.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/36950400/143866685-e3ac08bb-cd0c-4ada-ba8a-18e03cccdd0f.png" width="600"/>
+</div>
+## Introduction
+We implement FreeAnchor in 3D detection systems and provide their first results with PointPillars on nuScenes dataset.
+With the implemented `FreeAnchor3DHead`, a PointPillar detector with a big backbone (e.g., RegNet-3.2GF) achieves top performance
+on the nuScenes benchmark.
+## Usage
+### Modify config
+As in the [baseline config](pointpillars_hv_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py), we only need to replace the head of an existing one-stage detector to use FreeAnchor head.
+Since the config is inherit from a common detector head, `_delete_=True` is necessary to avoid conflicts.
+The hyperparameters are specifically tuned according to the original paper.
+```python
+_base_ = [
+    '../_base_/models/pointpillars_hv_fpn_lyft.py',
+    '../_base_/datasets/nus-3d.py', '../_base_/schedules/schedule-2x.py',
+    '../_base_/default_runtime.py'
+]
+model = dict(
+    pts_bbox_head=dict(
+        _delete_=True,
+        type='FreeAnchor3DHead',
+        num_classes=10,
+        in_channels=256,
+        feat_channels=256,
+        use_direction_classifier=True,
+        pre_anchor_topk=25,
+        bbox_thr=0.5,
+        gamma=2.0,
+        alpha=0.5,
+        anchor_generator=dict(
+            type='AlignedAnchor3DRangeGenerator',
+            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
+            scales=[1, 2, 4],
+            sizes=[
+                [2.5981, 0.8660, 1.],  # 1.5 / sqrt(3)
+                [1.7321, 0.5774, 1.],  # 1 / sqrt(3)
+                [1., 1., 1.],
+                [0.4, 0.4, 1],
+            ],
+            custom_values=[0, 0],
+            rotations=[0, 1.57],
+            reshape_out=True),
+        assigner_per_size=False,
+        diff_rad_by_sin=True,
+        dir_offset=-0.7854,  # -pi / 4
+        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.8),
+        loss_dir=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
+    # model training and testing settings
+    train_cfg = dict(
+        pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.25, 0.25])))
+```
+## Results and models
+### PointPillars
+|                                                    Backbone                                                     | FreeAnchor | Lr schd | Mem (GB) | Inf time (fps) |  mAP  |  NDS  |                                                                                                                                                                                                                                                                    Download                                                                                                                                                                                                                                                                    |
+| :-------------------------------------------------------------------------------------------------------------: | :--------: | :-----: | :------: | :------------: | :---: | :---: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|                      [FPN](../pointpillars/pointpillars_hv_fpn_sbn-all_8xb4-2x_nus-3d.py)                       |     ✗      |   2x    |   17.1   |                | 40.0  | 53.3  |                                                                        [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405.log.json)                                                                        |
+|                     [FPN](./pointpillars_hv_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py)                     |     ✓      |   2x    |   16.3   |                | 43.82 | 54.86 |                                                 [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210816_163441-ae0897e7.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210816_163441.log.json)                                                 |
+|            [RegNetX-400MF-FPN](../regnet/pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d.py)            |     ✗      |   2x    |   17.3   |                | 44.8  | 56.4  |                                                    [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239-c694dce7.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239.log.json)                                                    |
+|       [RegNetX-400MF-FPN](./pointpillars_hv_regnet-400mf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py)        |     ✓      |   2x    |   17.6   |                | 48.3  | 58.65 |                       [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210827_213939-a2dd3fff.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210827_213939.log.json)                       |
+|       [RegNetX-1.6GF-FPN](./pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py)        |     ✓      |   2x    |   24.3   |                | 52.04 | 61.49 |                       [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210828_025608-bfbd506e.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210828_025608.log.json)                       |
+| [RegNetX-1.6GF-FPN](./pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py)\* |     ✓      |   3x    |   24.4   |                | 52.69 | 62.45 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20210827_184909-14d2dbd1.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20210827_184909.log.json) |
+|       [RegNetX-3.2GF-FPN](./pointpillars_hv_regnet-3.2gf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py)        |     ✓      |   2x    |   29.4   |                | 52.4  | 61.94 |                       [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210827_181237-e385c35a.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210827_181237.log.json)                       |
+| [RegNetX-3.2GF-FPN](./pointpillars_hv_regnet-3.2gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py)\* |     ✓      |   3x    |   29.2   |                | 54.23 | 63.41 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20210828_030816-06708918.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20210828_030816.log.json) |
+**Note**: Models noted by `*` means it is trained using stronger augmentation with vertical flip under bird-eye-view, global translation, and larger range of global rotation.
+## Citation
+```latex
+@inproceedings{zhang2019freeanchor,
+  title   =  {{FreeAnchor}: Learning to Match Anchors for Visual Object Detection},
+  author  =  {Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang},
+  booktitle =  {Neural Information Processing Systems},
+  year    =  {2019}
+}
+```
--- a/mmde/configs/free_anchor/metafile.yml
+++ b/mmde/configs/free_anchor/metafile.yml
+Collections:
+  - Name: FreeAnchor
+    Metadata:
+      Training Data: nuScenes
+      Training Techniques:
+        - AdamW
+      Training Resources: 8x V100 GPUs
+      Architecture:
+        - Hard Voxelization
+        - Free Anchor
+    Paper:
+      URL: https://arxiv.org/abs/1909.02466
+      Title: 'FreeAnchor: Learning to Match Anchors for Visual Object Detection'
+    README: configs/free_anchor/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/dense_heads/free_anchor3d_head.py#L13
+      Version: v0.5.0
+Models:
+  - Name: pointpillars_hv_fpn_sbn-all_8xb4-2x_nus-3d
+    In Collection: FreeAnchor
+    Config: pointpillars/pointpillars_hv_fpn_sbn-all_8xb4-2x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 17.1
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 40.0
+          NDS: 53.3
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth
+  - Name: pointpillars_hv_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d
+    In Collection: FreeAnchor
+    Config: free_anchor/pointpillars_hv_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 16.3
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 43.82
+          NDS: 54.86
+    Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210816_163441-ae0897e7.pth
+  - Name: pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d
+    In Collection: FreeAnchor
+    Config: configs/regnet/pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 17.3
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 44.8
+          NDS: 56.4
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210816_163441-ae0897e7.pth
+  - Name: pointpillars_hv_regnet-400mf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d
+    In Collection: FreeAnchor
+    Config: configs/free_anchor/pointpillars_hv_regnet-400mf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 17.6
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 48.3
+          NDS: 58.65
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210827_213939-a2dd3fff.pth
+  - Name: hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d
+    In Collection: FreeAnchor
+    Config: configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 24.3
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 52.04
+          NDS: 61.49
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210828_025608-bfbd506e.pth
+  - Name: pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d
+    In Collection: FreeAnchor
+    Config: configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 24.4
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 52.69
+          NDS: 62.45
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20210827_184909-14d2dbd1.pth
+  - Name: pointpillars_hv_regnet-3.2gf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d
+    In Collection: FreeAnchor
+    Config: configs/free_anchor/pointpillars_hv_regnet-3.2gf_fpn_head-free-anchor_sbn-all_8xb4-2x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 29.4
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 52.4
+          NDS: 61.94
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_4x8_2x_nus-3d_20210827_181237-e385c35a.pth
+  - Name: pointpillars_hv_regnet-3.2gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d
+    In Collection: FreeAnchor
+    Config: configs/free_anchor/pointpillars_hv_regnet-3.2gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
+    Metadata:
+      Training Memory (GB): 29.2
+    Results:
+      - Task: 3D Object Detection
+        Dataset: nuScenes
+        Metrics:
+          mAP: 54.23
+          NDS: 63.41
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/free_anchor/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d/hv_pointpillars_regnet-3.2gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d_20210828_030816-06708918.pth