Commit db14d74b authored by Kai Chen's avatar Kai Chen
Browse files

Merge branch 'master' of github.com:open-mmlab/mmdetection into dcn_cpp_extension

parents c1e0884f b7aa30c2
...@@ -168,6 +168,22 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m ...@@ -168,6 +168,22 @@ We released RPN, Faster R-CNN and Mask R-CNN models in the first version. More m
- Inference time is reported for batch size = 1 and batch size = 8. - Inference time is reported for batch size = 1 and batch size = 8.
- The speed difference between VOC and COCO is caused by model parameters and nms. - The speed difference between VOC and COCO is caused by model parameters and nms.
### Group Normalization (GN)
| Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
|:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
| R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.9 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
| R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.2 | 36.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.6 | 37.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.7 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
| R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
| R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
**Notes:**
- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
- The `3x` schedule is epoch [28, 34, 36].
- The memory is measured with `torch.cuda.max_memory_allocated()` instead of `torch.cuda.max_memory_cached()`. We will update the memory usage of other models in the future.
## Comparison with Detectron ## Comparison with Detectron
......
...@@ -36,6 +36,10 @@ This project is released under the [Apache 2.0 license](LICENSE). ...@@ -36,6 +36,10 @@ This project is released under the [Apache 2.0 license](LICENSE).
## Updates ## Updates
v0.5.6 (17/01/2019)
- Add support for Group Normalization.
- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
v0.5.5 (22/12/2018) v0.5.5 (22/12/2018)
- Add SSD for COCO and PASCAL VOC. - Add SSD for COCO and PASCAL VOC.
- Add ResNeXt backbones and detection models. - Add ResNeXt backbones and detection models.
...@@ -73,6 +77,12 @@ Results and models are available in the [Model zoo](MODEL_ZOO.md). ...@@ -73,6 +77,12 @@ Results and models are available in the [Model zoo](MODEL_ZOO.md).
| SSD | ✗ | ✗ | ✗ | ✓ | | SSD | ✗ | ✗ | ✗ | ✓ |
| RetinaNet | ✓ | ✓ | ☐ | ✗ | | RetinaNet | ✓ | ✓ | ☐ | ✗ |
Other features
- [x] Group Normalization
- [x] OHEM
- [x] Soft-NMS
## Installation ## Installation
Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation. Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.
......
# model settings
normalize = dict(type='GN', num_groups=32, frozen=False)
model = dict(
type='MaskRCNN',
pretrained='open-mmlab://detectron/resnet101_gn',
backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
normalize=normalize),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5,
normalize=normalize),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='ConvFCBBoxHead',
num_shared_convs=4,
num_shared_fcs=1,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
normalize=normalize),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81,
normalize=normalize))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[16, 22])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mask_rcnn_r101_fpn_gn_2x'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings # model settings
normalize = dict( normalize = dict(type='GN', num_groups=32, frozen=False)
type='GN',
num_groups=32,
frozen=False)
model = dict( model = dict(
type='MaskRCNN', type='MaskRCNN',
pretrained='open-mmlab://contrib/resnet50_gn', pretrained='open-mmlab://detectron/resnet50_gn',
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -114,7 +111,7 @@ test_cfg = dict( ...@@ -114,7 +111,7 @@ test_cfg = dict(
dataset_type = 'CocoDataset' dataset_type = 'CocoDataset'
data_root = 'data/coco/' data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
data = dict( data = dict(
imgs_per_gpu=2, imgs_per_gpu=2,
workers_per_gpu=2, workers_per_gpu=2,
......
# model settings
normalize = dict(type='GN', num_groups=32, frozen=False)
model = dict(
type='MaskRCNN',
pretrained='open-mmlab://contrib/resnet50_gn',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch',
normalize=normalize),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5,
normalize=normalize),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='ConvFCBBoxHead',
num_shared_convs=4,
num_shared_fcs=1,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
normalize=normalize),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81,
normalize=normalize))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[16, 22])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mask_rcnn_r50_fpn_gn_contrib_2x'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -69,7 +69,7 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -69,7 +69,7 @@ class MaxIoUAssigner(BaseAssigner):
if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0: if bboxes.shape[0] == 0 or gt_bboxes.shape[0] == 0:
raise ValueError('No gt or bboxes') raise ValueError('No gt or bboxes')
bboxes = bboxes[:, :4] bboxes = bboxes[:, :4]
overlaps = bbox_overlaps(bboxes, gt_bboxes) overlaps = bbox_overlaps(gt_bboxes, bboxes)
if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and ( if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
gt_bboxes_ignore.numel() > 0): gt_bboxes_ignore.numel() > 0):
...@@ -88,8 +88,8 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -88,8 +88,8 @@ class MaxIoUAssigner(BaseAssigner):
"""Assign w.r.t. the overlaps of bboxes with gts. """Assign w.r.t. the overlaps of bboxes with gts.
Args: Args:
overlaps (Tensor): Overlaps between n bboxes and k gt_bboxes, overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,
shape(n, k). shape(k, n).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
Returns: Returns:
...@@ -98,19 +98,18 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -98,19 +98,18 @@ class MaxIoUAssigner(BaseAssigner):
if overlaps.numel() == 0: if overlaps.numel() == 0:
raise ValueError('No gt or proposals') raise ValueError('No gt or proposals')
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default # 1. assign -1 by default
assigned_gt_inds = overlaps.new_full( assigned_gt_inds = overlaps.new_full(
(num_bboxes, ), -1, dtype=torch.long) (num_bboxes, ), -1, dtype=torch.long)
assert overlaps.size() == (num_bboxes, num_gts)
# for each anchor, which gt best overlaps with it # for each anchor, which gt best overlaps with it
# for each anchor, the max iou of all gts # for each anchor, the max iou of all gts
max_overlaps, argmax_overlaps = overlaps.max(dim=1) max_overlaps, argmax_overlaps = overlaps.max(dim=0)
# for each gt, which anchor best overlaps with it # for each gt, which anchor best overlaps with it
# for each gt, the max iou of all proposals # for each gt, the max iou of all proposals
gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0) gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)
# 2. assign negative: below # 2. assign negative: below
if isinstance(self.neg_iou_thr, float): if isinstance(self.neg_iou_thr, float):
...@@ -129,7 +128,7 @@ class MaxIoUAssigner(BaseAssigner): ...@@ -129,7 +128,7 @@ class MaxIoUAssigner(BaseAssigner):
for i in range(num_gts): for i in range(num_gts):
if gt_max_overlaps[i] >= self.min_pos_iou: if gt_max_overlaps[i] >= self.min_pos_iou:
if self.gt_max_assign_all: if self.gt_max_assign_all:
max_iou_inds = overlaps[:, i] == gt_max_overlaps[i] max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
assigned_gt_inds[max_iou_inds] = i + 1 assigned_gt_inds[max_iou_inds] = i + 1
else: else:
assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
......
...@@ -16,7 +16,7 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): ...@@ -16,7 +16,7 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
foreground). foreground).
Returns: Returns:
ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1) ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
""" """
assert mode in ['iou', 'iof'] assert mode in ['iou', 'iof']
......
from .detectors import (BaseDetector, TwoStageDetector, RPN, FastRCNN, from .backbones import * # noqa: F401,F403
FasterRCNN, MaskRCNN) from .necks import * # noqa: F401,F403
from .builder import (build_neck, build_anchor_head, build_roi_extractor, from .roi_extractors import * # noqa: F401,F403
build_bbox_head, build_mask_head, build_detector) from .anchor_heads import * # noqa: F401,F403
from .bbox_heads import * # noqa: F401,F403
from .mask_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403
from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
from .builder import (build_backbone, build_neck, build_roi_extractor,
build_head, build_detector)
__all__ = [ __all__ = [
'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN', 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS',
'MaskRCNN', 'build_backbone', 'build_neck', 'build_anchor_head', 'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head',
'build_roi_extractor', 'build_bbox_head', 'build_mask_head',
'build_detector' 'build_detector'
] ]
...@@ -3,14 +3,16 @@ from __future__ import division ...@@ -3,14 +3,16 @@ from __future__ import division
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from mmcv.cnn import normal_init
from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox, from mmdet.core import (AnchorGenerator, anchor_target, delta2bbox,
multi_apply, weighted_cross_entropy, weighted_smoothl1, multi_apply, weighted_cross_entropy, weighted_smoothl1,
weighted_binary_cross_entropy, weighted_binary_cross_entropy,
weighted_sigmoid_focal_loss, multiclass_nms) weighted_sigmoid_focal_loss, multiclass_nms)
from ..utils import normal_init from ..registry import HEADS
@HEADS.register_module
class AnchorHead(nn.Module): class AnchorHead(nn.Module):
"""Anchor-based head (RPN, RetinaNet, SSD, etc.). """Anchor-based head (RPN, RetinaNet, SSD, etc.).
......
...@@ -3,9 +3,11 @@ import torch.nn as nn ...@@ -3,9 +3,11 @@ import torch.nn as nn
from mmcv.cnn import normal_init from mmcv.cnn import normal_init
from .anchor_head import AnchorHead from .anchor_head import AnchorHead
from ..registry import HEADS
from ..utils import bias_init_with_prob from ..utils import bias_init_with_prob
@HEADS.register_module
class RetinaHead(AnchorHead): class RetinaHead(AnchorHead):
def __init__(self, def __init__(self,
......
...@@ -6,8 +6,10 @@ from mmcv.cnn import normal_init ...@@ -6,8 +6,10 @@ from mmcv.cnn import normal_init
from mmdet.core import delta2bbox from mmdet.core import delta2bbox
from mmdet.ops import nms from mmdet.ops import nms
from .anchor_head import AnchorHead from .anchor_head import AnchorHead
from ..registry import HEADS
@HEADS.register_module
class RPNHead(AnchorHead): class RPNHead(AnchorHead):
def __init__(self, in_channels, **kwargs): def __init__(self, in_channels, **kwargs):
......
...@@ -7,8 +7,10 @@ from mmcv.cnn import xavier_init ...@@ -7,8 +7,10 @@ from mmcv.cnn import xavier_init
from mmdet.core import (AnchorGenerator, anchor_target, weighted_smoothl1, from mmdet.core import (AnchorGenerator, anchor_target, weighted_smoothl1,
multi_apply) multi_apply)
from .anchor_head import AnchorHead from .anchor_head import AnchorHead
from ..registry import HEADS
@HEADS.register_module
class SSDHead(AnchorHead): class SSDHead(AnchorHead):
def __init__(self, def __init__(self,
...@@ -144,7 +146,7 @@ class SSDHead(AnchorHead): ...@@ -144,7 +146,7 @@ class SSDHead(AnchorHead):
self.target_stds, self.target_stds,
cfg, cfg,
gt_labels_list=gt_labels, gt_labels_list=gt_labels,
cls_out_channels=self.cls_out_channels, label_channels=1,
sampling=False, sampling=False,
unmap_outputs=False) unmap_outputs=False)
if cls_reg_targets is None: if cls_reg_targets is None:
......
...@@ -10,6 +10,8 @@ from mmdet.ops import DeformConv, ModulatedDeformConv ...@@ -10,6 +10,8 @@ from mmdet.ops import DeformConv, ModulatedDeformConv
from ..registry import BACKBONES from ..registry import BACKBONES
from ..utils import build_norm_layer from ..utils import build_norm_layer
from ..registry import BACKBONES
def conv3x3(in_planes, out_planes, stride=1, dilation=1): def conv3x3(in_planes, out_planes, stride=1, dilation=1):
"3x3 convolution with padding" "3x3 convolution with padding"
......
...@@ -4,6 +4,7 @@ import torch.nn as nn ...@@ -4,6 +4,7 @@ import torch.nn as nn
from .resnet import ResNet from .resnet import ResNet
from .resnet import Bottleneck as _Bottleneck from .resnet import Bottleneck as _Bottleneck
from ..registry import BACKBONES
from ..utils import build_norm_layer from ..utils import build_norm_layer
...@@ -106,6 +107,7 @@ def make_res_layer(block, ...@@ -106,6 +107,7 @@ def make_res_layer(block,
return nn.Sequential(*layers) return nn.Sequential(*layers)
@BACKBONES.register_module
class ResNeXt(ResNet): class ResNeXt(ResNet):
"""ResNeXt backbone. """ResNeXt backbone.
......
...@@ -6,8 +6,10 @@ import torch.nn.functional as F ...@@ -6,8 +6,10 @@ import torch.nn.functional as F
from mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init, from mmcv.cnn import (VGG, xavier_init, constant_init, kaiming_init,
normal_init) normal_init)
from mmcv.runner import load_checkpoint from mmcv.runner import load_checkpoint
from ..registry import BACKBONES
@BACKBONES.register_module
class SSDVGG(VGG): class SSDVGG(VGG):
extra_setting = { extra_setting = {
300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256), 300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),
......
...@@ -4,8 +4,10 @@ import torch.nn.functional as F ...@@ -4,8 +4,10 @@ import torch.nn.functional as F
from mmdet.core import (delta2bbox, multiclass_nms, bbox_target, from mmdet.core import (delta2bbox, multiclass_nms, bbox_target,
weighted_cross_entropy, weighted_smoothl1, accuracy) weighted_cross_entropy, weighted_smoothl1, accuracy)
from ..registry import HEADS
@HEADS.register_module
class BBoxHead(nn.Module): class BBoxHead(nn.Module):
"""Simplest RoI head, with only two fc layers for classification and """Simplest RoI head, with only two fc layers for classification and
regression respectively""" regression respectively"""
...@@ -78,8 +80,14 @@ class BBoxHead(nn.Module): ...@@ -78,8 +80,14 @@ class BBoxHead(nn.Module):
target_stds=self.target_stds) target_stds=self.target_stds)
return cls_reg_targets return cls_reg_targets
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, def loss(self,
bbox_weights, reduce=True): cls_score,
bbox_pred,
labels,
label_weights,
bbox_targets,
bbox_weights,
reduce=True):
losses = dict() losses = dict()
if cls_score is not None: if cls_score is not None:
losses['loss_cls'] = weighted_cross_entropy( losses['loss_cls'] = weighted_cross_entropy(
......
import torch.nn as nn import torch.nn as nn
from .bbox_head import BBoxHead from .bbox_head import BBoxHead
from ..registry import HEADS
from ..utils import ConvModule from ..utils import ConvModule
@HEADS.register_module
class ConvFCBBoxHead(BBoxHead): class ConvFCBBoxHead(BBoxHead):
"""More general bbox head, with shared conv and fc layers and two optional """More general bbox head, with shared conv and fc layers and two optional
separated branches. separated branches.
...@@ -165,6 +167,7 @@ class ConvFCBBoxHead(BBoxHead): ...@@ -165,6 +167,7 @@ class ConvFCBBoxHead(BBoxHead):
return cls_score, bbox_pred return cls_score, bbox_pred
@HEADS.register_module
class SharedFCBBoxHead(ConvFCBBoxHead): class SharedFCBBoxHead(ConvFCBBoxHead):
def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs): def __init__(self, num_fcs=2, fc_out_channels=1024, *args, **kwargs):
......
from mmcv.runner import obj_from_dict import mmcv
from torch import nn from torch import nn
from . import (backbones, necks, roi_extractors, anchor_heads, bbox_heads, from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
mask_heads)
def _build_module(cfg, registry, default_args):
def _build_module(cfg, parrent=None, default_args=None): assert isinstance(cfg, dict) and 'type' in cfg
return cfg if isinstance(cfg, nn.Module) else obj_from_dict( assert isinstance(default_args, dict) or default_args is None
cfg, parrent, default_args) args = cfg.copy()
obj_type = args.pop('type')
if mmcv.is_str(obj_type):
def build(cfg, parrent=None, default_args=None): if obj_type not in registry.module_dict:
raise KeyError('{} is not in the {} registry'.format(
obj_type, registry.name))
obj_type = registry.module_dict[obj_type]
elif not isinstance(obj_type, type):
raise TypeError('type must be a str or valid type, but got {}'.format(
type(obj_type)))
if default_args is not None:
for name, value in default_args.items():
args.setdefault(name, value)
return obj_type(**args)
def build(cfg, registry, default_args=None):
if isinstance(cfg, list): if isinstance(cfg, list):
modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg] modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]
return nn.Sequential(*modules) return nn.Sequential(*modules)
else: else:
return _build_module(cfg, parrent, default_args) return _build_module(cfg, registry, default_args)
def build_backbone(cfg): def build_backbone(cfg):
return build(cfg, backbones) return build(cfg, BACKBONES)
def build_neck(cfg): def build_neck(cfg):
return build(cfg, necks) return build(cfg, NECKS)
def build_anchor_head(cfg):
return build(cfg, anchor_heads)
def build_roi_extractor(cfg): def build_roi_extractor(cfg):
return build(cfg, roi_extractors) return build(cfg, ROI_EXTRACTORS)
def build_bbox_head(cfg):
return build(cfg, bbox_heads)
def build_mask_head(cfg): def build_head(cfg):
return build(cfg, mask_heads) return build(cfg, HEADS)
def build_detector(cfg, train_cfg=None, test_cfg=None): def build_detector(cfg, train_cfg=None, test_cfg=None):
from . import detectors return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))
...@@ -6,10 +6,12 @@ import torch.nn as nn ...@@ -6,10 +6,12 @@ import torch.nn as nn
from .base import BaseDetector from .base import BaseDetector
from .test_mixins import RPNTestMixin from .test_mixins import RPNTestMixin
from .. import builder from .. import builder
from ..registry import DETECTORS
from mmdet.core import (assign_and_sample, bbox2roi, bbox2result, multi_apply, from mmdet.core import (assign_and_sample, bbox2roi, bbox2result, multi_apply,
merge_aug_masks) merge_aug_masks)
@DETECTORS.register_module
class CascadeRCNN(BaseDetector, RPNTestMixin): class CascadeRCNN(BaseDetector, RPNTestMixin):
def __init__(self, def __init__(self,
...@@ -37,7 +39,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): ...@@ -37,7 +39,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
raise NotImplementedError raise NotImplementedError
if rpn_head is not None: if rpn_head is not None:
self.rpn_head = builder.build_anchor_head(rpn_head) self.rpn_head = builder.build_head(rpn_head)
if bbox_head is not None: if bbox_head is not None:
self.bbox_roi_extractor = nn.ModuleList() self.bbox_roi_extractor = nn.ModuleList()
...@@ -52,7 +54,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): ...@@ -52,7 +54,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
for roi_extractor, head in zip(bbox_roi_extractor, bbox_head): for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):
self.bbox_roi_extractor.append( self.bbox_roi_extractor.append(
builder.build_roi_extractor(roi_extractor)) builder.build_roi_extractor(roi_extractor))
self.bbox_head.append(builder.build_bbox_head(head)) self.bbox_head.append(builder.build_head(head))
if mask_head is not None: if mask_head is not None:
self.mask_roi_extractor = nn.ModuleList() self.mask_roi_extractor = nn.ModuleList()
...@@ -67,7 +69,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin): ...@@ -67,7 +69,7 @@ class CascadeRCNN(BaseDetector, RPNTestMixin):
for roi_extractor, head in zip(mask_roi_extractor, mask_head): for roi_extractor, head in zip(mask_roi_extractor, mask_head):
self.mask_roi_extractor.append( self.mask_roi_extractor.append(
builder.build_roi_extractor(roi_extractor)) builder.build_roi_extractor(roi_extractor))
self.mask_head.append(builder.build_mask_head(head)) self.mask_head.append(builder.build_head(head))
self.train_cfg = train_cfg self.train_cfg = train_cfg
self.test_cfg = test_cfg self.test_cfg = test_cfg
......
from .two_stage import TwoStageDetector from .two_stage import TwoStageDetector
from ..registry import DETECTORS
@DETECTORS.register_module
class FastRCNN(TwoStageDetector): class FastRCNN(TwoStageDetector):
def __init__(self, def __init__(self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment