Merge branch 'master' of github.com:open-mmlab/mmdetection into dcn_cpp_extension

db14d74b · Kai Chen · c1e0884f · b7aa30c2 · db14d74b · db14d74b
Commit db14d74b authored Jan 25, 2019 by Kai Chen
20 changed files
--- a/MODEL_ZOO.md
+++ b/MODEL_ZOO.md
@@ -168,6 +168,22 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m
 - Inference time is reported for batch size = 1 and batch size = 8.
 - The speed difference between VOC and COCO is caused by model parameters and nms.

+### Group Normalization (GN)
+
+| Backbone      | model      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
+|:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
+| R-50-FPN (d)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.9   | 36.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
+| R-50-FPN (d)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.2   | 36.5    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
+| R-101-FPN (d) | Mask R-CNN | 2x      | 9.9      | 0.970               | 4.8            | 41.6   | 37.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
+| R-101-FPN (d) | Mask R-CNN | 3x      | 9.9      | 0.970               | 4.8            | 41.7   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
+| R-50-FPN (c)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.7   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
+| R-50-FPN (c)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.1   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
+
+**Notes:**
+- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
+- The `3x` schedule is epoch [28, 34, 36].
+- The memory is measured with `torch.cuda.max_memory_allocated()` instead of `torch.cuda.max_memory_cached()`. We will update the memory usage of other models in the future.
+

 ## Comparison with Detectron


--- a/README.md
+++ b/README.md
@@ -36,6 +36,10 @@ This project is released under the [Apache 2.0 license](LICENSE).

 ## Updates

+v0.5.6 (17/01/2019)
+- Add support for Group Normalization.
+- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
+
 v0.5.5 (22/12/2018)
 - Add SSD for COCO and PASCAL VOC.
 - Add ResNeXt backbones and detection models.
@@ -73,6 +77,12 @@ Results and models are available in the [Model zoo](MODEL_ZOO.md).
 | SSD                | ✗        | ✗        | ✗        | ✓        |
 | RetinaNet          | ✓        | ✓        | ☐        | ✗        |

+Other features
+- [x] Group Normalization
+- [x] OHEM
+- [x] Soft-NMS
+
+
 ## Installation

 Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.

--- a/configs/mask_rcnn_r101_fpn_gn_2x.py
+++ b/configs/mask_rcnn_r101_fpn_gn_2x.py
+# model settings
+normalize = dict(type='GN', num_groups=32, frozen=False)
+
+model = dict(
+    type='MaskRCNN',
+    pretrained='open-mmlab://detectron/resnet101_gn',
+    backbone=dict(
+        type='ResNet',
+        depth=101,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='pytorch',
+        normalize=normalize),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5,
+        normalize=normalize),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    bbox_head=dict(
+        type='ConvFCBBoxHead',
+        num_shared_convs=4,
+        num_shared_fcs=1,
+        in_channels=256,
+        conv_out_channels=256,
+        fc_out_channels=1024,
+        roi_feat_size=7,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False,
+        normalize=normalize),
+    mask_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    mask_head=dict(
+        type='FCNMaskHead',
+        num_convs=4,
+        in_channels=256,
+        conv_out_channels=256,
+        num_classes=81,
+        normalize=normalize))
+
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        mask_size=28,
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[16, 22])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 24
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/mask_rcnn_r101_fpn_gn_2x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/configs/mask_rcnn_r50_fpn_gn_2x.py
+++ b/configs/mask_rcnn_r50_fpn_gn_2x.py
 # model settings
-normalize = dict(
-    type='GN',
-    num_groups=32,
-    frozen=False)
+normalize = dict(type='GN', num_groups=32, frozen=False)

 model = dict(
    type='MaskRCNN',
-    pretrained='open-mmlab://contrib/resnet50_gn',
+    pretrained='open-mmlab://detectron/resnet50_gn',
    backbone=dict(
        type='ResNet',
        depth=50,
@@ -114,7 +111,7 @@ test_cfg = dict(
 dataset_type = 'CocoDataset'
 data_root = 'data/coco/'
 img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
 data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,

--- a/configs/mask_rcnn_r50_fpn_gn_contrib_2x.py
+++ b/configs/mask_rcnn_r50_fpn_gn_contrib_2x.py
+# model settings
+normalize = dict(type='GN', num_groups=32, frozen=False)
+
+model = dict(
+    type='MaskRCNN',
+    pretrained='open-mmlab://contrib/resnet50_gn',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        style='pytorch',
+        normalize=normalize),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5,
+        normalize=normalize),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_scales=[8],
+        anchor_ratios=[0.5, 1.0, 2.0],
+        anchor_strides=[4, 8, 16, 32, 64],
+        target_means=[.0, .0, .0, .0],
+        target_stds=[1.0, 1.0, 1.0, 1.0],
+        use_sigmoid_cls=True),
+    bbox_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    bbox_head=dict(
+        type='ConvFCBBoxHead',
+        num_shared_convs=4,
+        num_shared_fcs=1,
+        in_channels=256,
+        conv_out_channels=256,
+        fc_out_channels=1024,
+        roi_feat_size=7,
+        num_classes=81,
+        target_means=[0., 0., 0., 0.],
+        target_stds=[0.1, 0.1, 0.2, 0.2],
+        reg_class_agnostic=False,
+        normalize=normalize),
+    mask_roi_extractor=dict(
+        type='SingleRoIExtractor',
+        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
+        out_channels=256,
+        featmap_strides=[4, 8, 16, 32]),
+    mask_head=dict(
+        type='FCNMaskHead',
+        num_convs=4,
+        in_channels=256,
+        conv_out_channels=256,
+        num_classes=81,
+        normalize=normalize))
+
+# model training and testing settings
+train_cfg = dict(
+    rpn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.7,
+            neg_iou_thr=0.3,
+            min_pos_iou=0.3,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=256,
+            pos_fraction=0.5,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=False),
+        allowed_border=0,
+        pos_weight=-1,
+        smoothl1_beta=1 / 9.0,
+        debug=False),
+    rcnn=dict(
+        assigner=dict(
+            type='MaxIoUAssigner',
+            pos_iou_thr=0.5,
+            neg_iou_thr=0.5,
+            min_pos_iou=0.5,
+            ignore_iof_thr=-1),
+        sampler=dict(
+            type='RandomSampler',
+            num=512,
+            pos_fraction=0.25,
+            neg_pos_ub=-1,
+            add_gt_as_proposals=True),
+        mask_size=28,
+        pos_weight=-1,
+        debug=False))
+test_cfg = dict(
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05,
+        nms=dict(type='nms', iou_thr=0.5),
+        max_per_img=100,
+        mask_thr_binary=0.5))
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+data = dict(
+    imgs_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0.5,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
+# optimizer
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    step=[16, 22])
+checkpoint_config = dict(interval=1)
+# yapf:disable
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+# yapf:enable
+# runtime settings
+total_epochs = 24
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_contrib_2x'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
--- a/mmdet/core/bbox/assigners/max_iou_assigner.py
+++ b/mmdet/core/bbox/assigners/max_iou_assigner.py
@@ -69,7 +69,7 @@ class MaxIoUAssigner(BaseAssigner):
        if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:
            raise ValueError('No gt or bboxes')
        bboxes = bboxes[:, :4]
-        overlaps = bbox_overlaps(bboxes, gt_bboxes)
+        overlaps = bbox_overlaps(gt_bboxes, bboxes)

        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
                gt_bboxes_ignore.numel() > 0):
@@ -88,8 +88,8 @@ class MaxIoUAssigner(BaseAssigner):
        """Assign w.r.t. the overlaps of bboxes with gts.

        Args:
-            overlaps (Tensor): Overlaps between n bboxes and k gt_bboxes,
-                shape(n, k).
+            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
+                shape(k, n).
            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).

        Returns:
@@ -98,19 +98,18 @@ class MaxIoUAssigner(BaseAssigner):
        if overlaps.numel() == 0:
            raise ValueError('No gt or proposals')

-        num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
+        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)

        # 1. assign -1 by default
        assigned_gt_inds = overlaps.new_full(
            (num_bboxes, ), -1, dtype=torch.long)

-        assert overlaps.size() == (num_bboxes, num_gts)
        # for each anchor, which gt best overlaps with it
        # for each anchor, the max iou of all gts
-        max_overlaps, argmax_overlaps = overlaps.max(dim=1)
+        max_overlaps, argmax_overlaps = overlaps.max(dim=0)
        # for each gt, which anchor best overlaps with it
        # for each gt, the max iou of all proposals
-        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0)
+        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)

        # 2. assign negative: below
        if isinstance(self.neg_iou_thr, float):
@@ -129,7 +128,7 @@ class MaxIoUAssigner(BaseAssigner):
        for i in range(num_gts):
            if gt_max_overlaps[i] >= self.min_pos_iou:
                if self.gt_max_assign_all:
-                    max_iou_inds = overlaps[:, i] == gt_max_overlaps[i]
+                    max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
                    assigned_gt_inds[max_iou_inds] = i + 1
                else:
                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1

--- a/mmdet/core/bbox/geometry.py
+++ b/mmdet/core/bbox/geometry.py
@@ -16,7 +16,7 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
            foreground).

    Returns:
-        ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1)
+        ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
    """

    assert mode in ['iou', 'iof']

--- a/mmdet/models/__init__.py
+++ b/mmdet/models/__init__.py
-from .detectors import (BaseDetector, TwoStageDetector, RPN, FastRCNN,
-                        FasterRCNN, MaskRCNN)
-from .builder import (build_neck, build_anchor_head, build_roi_extractor,
-                      build_bbox_head, build_mask_head, build_detector)
+from .backbones import *  # noqa: F401,F403
+from .necks import *  # noqa: F401,F403
+from .roi_extractors import *  # noqa: F401,F403
+from .anchor_heads import *  # noqa: F401,F403
+from .bbox_heads import *  # noqa: F401,F403
+from .mask_heads import *  # noqa: F401,F403
+from .detectors import *  # noqa: F401,F403
+from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
+from .builder import (build_backbone, build_neck, build_roi_extractor,
+                      build_head, build_detector)

 __all__ = [
-    'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN',
-    'MaskRCNN', 'build_backbone', 'build_neck', 'build_anchor_head',
-    'build_roi_extractor', 'build_bbox_head', 'build_mask_head',
+    'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS',
+    'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head',
    'build_detector'
 ]
--- a/mmdet/models/anchor_heads/anchor_head.py
+++ b/mmdet/models/anchor_heads/anchor_head.py
@@ -3,14 +3,16 @@ from __future__ import division
 import numpy as np
 import torch
 import torch.nn as nn
+from mmcv.cnn import normal_init

 from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox,
                        multi_apply, weighted_cross_entropy, weighted_smoothl1,
                        weighted_binary_cross_entropy,
                        weighted_sigmoid_focal_loss, multiclass_nms)
-from ..utils import normal_init
+from ..registry import HEADS


+@HEADS.register_module
 class AnchorHead(nn.Module):
    """Anchor-based head (RPN, RetinaNet, SSD, etc.).


--- a/mmdet/models/anchor_heads/retina_head.py
+++ b/mmdet/models/anchor_heads/retina_head.py
@@ -3,9 +3,11 @@ import torch.nn as nn
 from mmcv.cnn import normal_init

 from .anchor_head import AnchorHead
+from ..registry import HEADS
 from ..utils import bias_init_with_prob


+@HEADS.register_module
 class RetinaHead(AnchorHead):

    def __init__(self,

--- a/mmdet/models/anchor_heads/rpn_head.py
+++ b/mmdet/models/anchor_heads/rpn_head.py
@@ -6,8 +6,10 @@ from mmcv.cnn import normal_init
 from mmdet.core import delta2bbox
 from mmdet.ops import nms
 from .anchor_head import AnchorHead
+from ..registry import HEADS


+@HEADS.register_module
 class RPNHead(AnchorHead):

    def __init__(self, in_channels, **kwargs):

--- a/mmdet/models/anchor_heads/ssd_head.py
+++ b/mmdet/models/anchor_heads/ssd_head.py
@@ -7,8 +7,10 @@ from mmcv.cnn import xavier_init
 from mmdet.core import (AnchorGenerator, anchor_target, weighted_smoothl1,
                        multi_apply)
 from .anchor_head import AnchorHead
+from ..registry import HEADS


+@HEADS.register_module
 class SSDHead(AnchorHead):

    def __init__(self,
@@ -144,7 +146,7 @@ class SSDHead(AnchorHead):
            self.target_stds,
            cfg,
            gt_labels_list=gt_labels,
-            cls_out_channels=self.cls_out_channels,
+            label_channels=1,
            sampling=False,
            unmap_outputs=False)
        if cls_reg_targets is None:

--- a/mmdet/models/backbones/resnet.py
+++ b/mmdet/models/backbones/resnet.py
@@ -10,6 +10,8 @@ from mmdet.ops import DeformConv, ModulatedDeformConv
 from ..registry import BACKBONES
 from ..utils import build_norm_layer

+from ..registry import BACKBONES
+

 def conv3x3(in_planes, out_planes, stride=1, dilation=1):
    "3x3 convolution with padding"

--- a/mmdet/models/backbones/resnext.py
+++ b/mmdet/models/backbones/resnext.py
@@ -4,6 +4,7 @@ import torch.nn as nn

 from .resnet import ResNet
 from .resnet import Bottleneck as _Bottleneck
+from ..registry import BACKBONES
 from ..utils import build_norm_layer


@@ -106,6 +107,7 @@ def make_res_layer(block,
    return nn.Sequential(*layers)


+@BACKBONES.register_module
 class ResNeXt(ResNet):
    """ResNeXt backbone.


--- a/mmdet/models/backbones/ssd_vgg.py
+++ b/mmdet/models/backbones/ssd_vgg.py
@@ -6,8 +6,10 @@ import torch.nn.functional as F
 from mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init,
                      normal_init)
 from mmcv.runner import load_checkpoint
+from ..registry import BACKBONES


+@BACKBONES.register_module
 class SSDVGG(VGG):
    extra_setting = {
        300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),

--- a/mmdet/models/bbox_heads/bbox_head.py
+++ b/mmdet/models/bbox_heads/bbox_head.py
@@ -4,8 +4,10 @@ import torch.nn.functional as F

 from mmdet.core import (delta2bbox, multiclass_nms, bbox_target,
                        weighted_cross_entropy, weighted_smoothl1, accuracy)
+from ..registry import HEADS


+@HEADS.register_module
 class BBoxHead(nn.Module):
    """Simplest RoI head, with only two fc layers for classification and
    regression respectively"""
@@ -78,8 +80,14 @@ class BBoxHead(nn.Module):
            target_stds=self.target_stds)
        return cls_reg_targets

-    def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets,
-             bbox_weights, reduce=True):
+    def loss(self,
+             cls_score,
+             bbox_pred,
+             labels,
+             label_weights,
+             bbox_targets,
+             bbox_weights,
+             reduce=True):
        losses = dict()
        if cls_score is not None:
            losses['loss_cls'] = weighted_cross_entropy(

--- a/mmdet/models/bbox_heads/convfc_bbox_head.py
+++ b/mmdet/models/bbox_heads/convfc_bbox_head.py
 import torch.nn as nn

 from .bbox_head import BBoxHead
+from ..registry import HEADS
 from ..utils import ConvModule


+@HEADS.register_module
 class ConvFCBBoxHead(BBoxHead):
    """More general bbox head, with shared conv and fc layers and two optional
    separated branches.
@@ -165,6 +167,7 @@ class ConvFCBBoxHead(BBoxHead):
        return cls_score, bbox_pred


+@HEADS.register_module
 class SharedFCBBoxHead(ConvFCBBoxHead):

    def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):

--- a/mmdet/models/builder.py
+++ b/mmdet/models/builder.py
-from mmcv.runner import obj_from_dict
+import mmcv
 from torch import nn

-from . import (backbones, necks, roi_extractors, anchor_heads, bbox_heads,
-               mask_heads)
-
-
-def _build_module(cfg, parrent=None, default_args=None):
-    return cfg if isinstance(cfg, nn.Module) else obj_from_dict(
-        cfg, parrent, default_args)
-
-
-def build(cfg, parrent=None, default_args=None):
+from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
+
+
+def _build_module(cfg, registry, default_args):
+    assert isinstance(cfg, dict) and 'type' in cfg
+    assert isinstance(default_args, dict) or default_args is None
+    args = cfg.copy()
+    obj_type = args.pop('type')
+    if mmcv.is_str(obj_type):
+        if obj_type not in registry.module_dict:
+            raise KeyError('{} is not in the {} registry'.format(
+                obj_type, registry.name))
+        obj_type = registry.module_dict[obj_type]
+    elif not isinstance(obj_type, type):
+        raise TypeError('type must be a str or valid type, but got {}'.format(
+            type(obj_type)))
+    if default_args is not None:
+        for name, value in default_args.items():
+            args.setdefault(name, value)
+    return obj_type(**args)
+
+
+def build(cfg, registry, default_args=None):
    if isinstance(cfg, list):
-        modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg]
+        modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]
        return nn.Sequential(*modules)
    else:
-        return _build_module(cfg, parrent, default_args)
+        return _build_module(cfg, registry, default_args)


 def build_backbone(cfg):
-    return build(cfg, backbones)
+    return build(cfg, BACKBONES)


 def build_neck(cfg):
-    return build(cfg, necks)
-
-
-def build_anchor_head(cfg):
-    return build(cfg, anchor_heads)
+    return build(cfg, NECKS)


 def build_roi_extractor(cfg):
-    return build(cfg, roi_extractors)
-
-
-def build_bbox_head(cfg):
-    return build(cfg, bbox_heads)
+    return build(cfg, ROI_EXTRACTORS)


-def build_mask_head(cfg):
-    return build(cfg, mask_heads)
+def build_head(cfg):
+    return build(cfg, HEADS)


 def build_detector(cfg, train_cfg=None, test_cfg=None):
-    from . import detectors
-    return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))
+    return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
--- a/mmdet/models/detectors/cascade_rcnn.py
+++ b/mmdet/models/detectors/cascade_rcnn.py
@@ -6,10 +6,12 @@ import torch.nn as nn
 from .base import BaseDetector
 from .test_mixins import RPNTestMixin
 from .. import builder
+from ..registry import DETECTORS
 from mmdet.core import (assign_and_sample, bbox2roi, bbox2result, multi_apply,
                        merge_aug_masks)


+@DETECTORS.register_module
 class CascadeRCNN(BaseDetector, RPNTestMixin):

    def __init__(self,
@@ -37,7 +39,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
            raise NotImplementedError

        if rpn_head is not None:
-            self.rpn_head = builder.build_anchor_head(rpn_head)
+            self.rpn_head = builder.build_head(rpn_head)

        if bbox_head is not None:
            self.bbox_roi_extractor = nn.ModuleList()
@@ -52,7 +54,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
            for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):
                self.bbox_roi_extractor.append(
                    builder.build_roi_extractor(roi_extractor))
-                self.bbox_head.append(builder.build_bbox_head(head))
+                self.bbox_head.append(builder.build_head(head))

        if mask_head is not None:
            self.mask_roi_extractor = nn.ModuleList()
@@ -67,7 +69,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
            for roi_extractor, head in zip(mask_roi_extractor, mask_head):
                self.mask_roi_extractor.append(
                    builder.build_roi_extractor(roi_extractor))
-                self.mask_head.append(builder.build_mask_head(head))
+                self.mask_head.append(builder.build_head(head))

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

--- a/mmdet/models/detectors/fast_rcnn.py
+++ b/mmdet/models/detectors/fast_rcnn.py
 from .two_stage import TwoStageDetector
+from ..registry import DETECTORS


+@DETECTORS.register_module
 class FastRCNN(TwoStageDetector):

    def __init__(self,