Merge branch 'dev' into single-stage

e23ee232 · Kai Chen · 153a4e90 · a6adf8f0 · e23ee232 · e23ee232
Commit e23ee232 authored Oct 10, 2018 by Kai Chen
16 changed files
--- a/.travis.yml
+++ b/.travis.yml
+dist: trusty
+language: python
+install:
+  - pip install flake8
+python:
+  - "2.7"
+  - "3.5"
+  - "3.6"
+script:
+  - flake8
\ No newline at end of file
--- a/configs/faster_rcnn_r50_fpn_1x.py
+++ b/configs/faster_rcnn_r50_fpn_1x.py
@@ -3,7 +3,7 @@ model = dict(
    type='FasterRCNN',
    pretrained='modelzoo://resnet50',
    backbone=dict(
-        type='resnet',
+        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),

--- a/configs/mask_rcnn_r50_fpn_1x.py
+++ b/configs/mask_rcnn_r50_fpn_1x.py
@@ -3,7 +3,7 @@ model = dict(
    type='MaskRCNN',
    pretrained='modelzoo://resnet50',
    backbone=dict(
-        type='resnet',
+        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),

--- a/configs/rpn_r50_fpn_1x.py
+++ b/configs/rpn_r50_fpn_1x.py
@@ -3,7 +3,7 @@ model = dict(
    type='RPN',
    pretrained='modelzoo://resnet50',
    backbone=dict(
-        type='resnet',
+        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),

--- a/mmdet/core/evaluation/eval_hooks.py
+++ b/mmdet/core/evaluation/eval_hooks.py
@@ -55,6 +55,10 @@ class DistEvalHook(Hook):
                shutil.rmtree(self.lock_dir)
            mmcv.mkdir_or_exist(self.lock_dir)
+    def after_run(self, runner):
+        if runner.rank == 0:
+            shutil.rmtree(self.lock_dir)
    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
@@ -70,7 +74,7 @@ class DistEvalHook(Hook):
            # compute output
            with torch.no_grad():
                result = runner.model(
-                    **data_gpu, return_loss=False, rescale=True)
+                    return_loss=False, rescale=True, **data_gpu)
            results[idx] = result
            batch_size = runner.world_size

--- a/mmdet/datasets/coco.py
+++ b/mmdet/datasets/coco.py
@@ -53,8 +53,14 @@ class CocoDataset(Dataset):
        # color channel order and normalize configs
        self.img_norm_cfg = img_norm_cfg
        # proposals
-        self.proposals = mmcv.load(
+        # TODO: revise _filter_imgs to be more flexible
-            proposal_file) if proposal_file is not None else None
+        if proposal_file is not None:
+            self.proposals = mmcv.load(proposal_file)
+            ori_ids = self.coco.getImgIds()
+            sorted_idx = [ori_ids.index(id) for id in self.img_ids]
+            self.proposals = [self.proposals[idx] for idx in sorted_idx]
+        else:
+            self.proposals = None
        self.num_max_proposals = num_max_proposals
        # flip ratio
        self.flip_ratio = flip_ratio
@@ -271,7 +277,8 @@ class CocoDataset(Dataset):
                scale_factor=scale_factor,
                flip=flip)
            if proposal is not None:
-                _proposal = self.bbox_transform(proposal, scale_factor, flip)
+                _proposal = self.bbox_transform(proposal, img_shape,
+                                                scale_factor, flip)
                _proposal = to_tensor(_proposal)
            else:
                _proposal = None

--- a/mmdet/models/__init__.py
+++ b/mmdet/models/__init__.py
-from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN
+from .detectors import (BaseDetector, TwoStageDetector, RPN, FastRCNN,
+                        FasterRCNN, MaskRCNN)
 from .builder import (build_neck, build_rpn_head, build_roi_extractor,
                      build_bbox_head, build_mask_head, build_detector)
 __all__ = [
-    'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone',
+    'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN',
-    'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head',
+    'MaskRCNN', 'build_backbone', 'build_neck', 'build_rpn_head',
-    'build_mask_head', 'build_detector'
+    'build_roi_extractor', 'build_bbox_head', 'build_mask_head',
+    'build_detector'
 ]
--- a/mmdet/models/backbones/__init__.py
+++ b/mmdet/models/backbones/__init__.py
-from .resnet import resnet
+from .resnet import ResNet
-__all__ = ['resnet']
+__all__ = ['ResNet']
--- a/mmdet/models/backbones/resnet.py
+++ b/mmdet/models/backbones/resnet.py
 import logging
-import math
 import torch.nn as nn
 import torch.utils.checkpoint as cp
+from mmcv.cnn import constant_init, kaiming_init
 from mmcv.runner import load_checkpoint
@@ -27,7 +28,8 @@ class BasicBlock(nn.Module):
                 stride=1,
                 dilation=1,
                 downsample=None,
-                 style='pytorch'):
+                 style='pytorch',
+                 with_cp=False):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride, dilation)
        self.bn1 = nn.BatchNorm2d(planes)
@@ -37,6 +39,7 @@ class BasicBlock(nn.Module):
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation
+        assert not with_cp
    def forward(self, x):
        residual = x
@@ -69,7 +72,6 @@ class Bottleneck(nn.Module):
                 style='pytorch',
                 with_cp=False):
        """Bottleneck block.
        If style is "pytorch", the stride-two layer is the 3x3 conv layer,
        if it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
@@ -174,64 +176,73 @@ def make_res_layer(block,
    return nn.Sequential(*layers)
-class ResHead(nn.Module):
+class ResNet(nn.Module):
+    """ResNet backbone.
-    def __init__(self,
-                 block,
-                 num_blocks,
-                 stride=2,
-                 dilation=1,
-                 style='pytorch'):
-        self.layer4 = make_res_layer(
-            block,
-            1024,
-            512,
-            num_blocks,
-            stride=stride,
-            dilation=dilation,
-            style=style)
-    def forward(self, x):
-        return self.layer4(x)
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        num_stages (int): Resnet stages, normally 4.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze
+            running stats (mean and var).
+        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+    """
-class ResNet(nn.Module):
+    arch_settings = {
+        18: (BasicBlock, (2, 2, 2, 2)),
+        34: (BasicBlock, (3, 4, 6, 3)),
+        50: (Bottleneck, (3, 4, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3))
+    }
    def __init__(self,
-                 block,
+                 depth,
-                 layers,
+                 num_stages=4,
                 strides=(1, 2, 2, 2),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
-                 frozen_stages=-1,
                 style='pytorch',
-                 sync_bn=False,
+                 frozen_stages=-1,
-                 with_cp=False,
+                 bn_eval=True,
-                 strict_frozen=False):
+                 bn_frozen=False,
+                 with_cp=False):
        super(ResNet, self).__init__()
-        if not len(layers) == len(strides) == len(dilations):
+        if depth not in self.arch_settings:
-            raise ValueError(
+            raise KeyError('invalid depth {} for resnet'.format(depth))
-                'The number of layers, strides and dilations must be equal, '
+        assert num_stages >= 1 and num_stages <= 4
-                'but found have {} layers, {} strides and {} dilations'.format(
+        block, stage_blocks = self.arch_settings[depth]
-                    len(layers), len(strides), len(dilations)))
+        stage_blocks = stage_blocks[:num_stages]
-        assert max(out_indices) < len(layers)
+        assert len(strides) == len(dilations) == num_stages
+        assert max(out_indices) < num_stages
        self.out_indices = out_indices
-        self.frozen_stages = frozen_stages
        self.style = style
-        self.sync_bn = sync_bn
+        self.frozen_stages = frozen_stages
+        self.bn_eval = bn_eval
+        self.bn_frozen = bn_frozen
+        self.with_cp = with_cp
        self.inplanes = 64
        self.conv1 = nn.Conv2d(
            3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.res_layers = []
-        for i, num_blocks in enumerate(layers):
+        self.res_layers = []
+        for i, num_blocks in enumerate(stage_blocks):
            stride = strides[i]
            dilation = dilations[i]
-            layer_name = 'layer{}'.format(i + 1)
            planes = 64 * 2**i
            res_layer = make_res_layer(
                block,
@@ -243,12 +254,11 @@ class ResNet(nn.Module):
                style=self.style,
                with_cp=with_cp)
            self.inplanes = planes * block.expansion
+            layer_name = 'layer{}'.format(i + 1)
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)
-        self.feat_dim = block.expansion * 64 * 2**(len(layers) - 1)
-        self.with_cp = with_cp
-        self.strict_frozen = strict_frozen
+        self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1)
    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
@@ -257,11 +267,9 @@ class ResNet(nn.Module):
        elif pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
-                    n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                    kaiming_init(m)
-                    nn.init.normal_(m.weight, 0, math.sqrt(2. / n))
                elif isinstance(m, nn.BatchNorm2d):
-                    nn.init.constant_(m.weight, 1)
+                    constant_init(m, 1)
-                    nn.init.constant_(m.bias, 0)
        else:
            raise TypeError('pretrained must be a str or None')
@@ -283,11 +291,11 @@ class ResNet(nn.Module):
    def train(self, mode=True):
        super(ResNet, self).train(mode)
-        if not self.sync_bn:
+        if self.bn_eval:
            for m in self.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.eval()
-                    if self.strict_frozen:
+                    if self.bn_frozen:
                        for params in m.parameters():
                            params.requires_grad = False
        if mode and self.frozen_stages >= 0:
@@ -303,39 +311,3 @@ class ResNet(nn.Module):
                mod.eval()
                for param in mod.parameters():
                    param.requires_grad = False
-resnet_cfg = {
-    18: (BasicBlock, (2, 2, 2, 2)),
-    34: (BasicBlock, (3, 4, 6, 3)),
-    50: (Bottleneck, (3, 4, 6, 3)),
-    101: (Bottleneck, (3, 4, 23, 3)),
-    152: (Bottleneck, (3, 8, 36, 3))
-}
-def resnet(depth,
-           num_stages=4,
-           strides=(1, 2, 2, 2),
-           dilations=(1, 1, 1, 1),
-           out_indices=(2, ),
-           frozen_stages=-1,
-           style='pytorch',
-           sync_bn=False,
-           with_cp=False,
-           strict_frozen=False):
-    """Constructs a ResNet model.
-    Args:
-        depth (int): depth of resnet, from {18, 34, 50, 101, 152}
-        num_stages (int): num of resnet stages, normally 4
-        strides (list): strides of the first block of each stage
-        dilations (list): dilation of each stage
-        out_indices (list): output from which stages
-    """
-    if depth not in resnet_cfg:
-        raise KeyError('invalid depth {} for resnet'.format(depth))
-    block, layers = resnet_cfg[depth]
-    model = ResNet(block, layers[:num_stages], strides, dilations, out_indices,
-                   frozen_stages, style, sync_bn, with_cp, strict_frozen)
-    return model
--- a/mmdet/models/detectors/__init__.py
+++ b/mmdet/models/detectors/__init__.py
 from .base import BaseDetector
+from .single_stage import SingleStageDetector
+from .two_stage import TwoStageDetector
 from .rpn import RPN
+from .fast_rcnn import FastRCNN
 from .faster_rcnn import FasterRCNN
 from .mask_rcnn import MaskRCNN
 from .retina_net import RetinaNet
-__all__ = ['BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'RetinaNet']
+__all__ = [
+    'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
+    'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'RetinaNet'
+]
--- a/mmdet/models/detectors/base.py
+++ b/mmdet/models/detectors/base.py
@@ -3,7 +3,6 @@ from abc import ABCMeta, abstractmethod
 import mmcv
 import numpy as np
-import torch
 import torch.nn as nn
 from mmdet.core import tensor2imgs, get_classes
@@ -34,11 +33,9 @@ class BaseDetector(nn.Module):
        pass
    def extract_feats(self, imgs):
-        if isinstance(imgs, torch.Tensor):
+        assert isinstance(imgs, list)
-            return self.extract_feat(imgs)
+        for img in imgs:
-        elif isinstance(imgs, list):
+            yield self.extract_feat(img)
-            for img in imgs:
-                yield self.extract_feat(img)
    @abstractmethod
    def forward_train(self, imgs, img_metas, **kwargs):

--- a/mmdet/models/detectors/fast_rcnn.py
+++ b/mmdet/models/detectors/fast_rcnn.py
+from .two_stage import TwoStageDetector
+class FastRCNN(TwoStageDetector):
+    def __init__(self,
+                 backbone,
+                 neck,
+                 bbox_roi_extractor,
+                 bbox_head,
+                 train_cfg,
+                 test_cfg,
+                 mask_roi_extractor=None,
+                 mask_head=None,
+                 pretrained=None):
+        super(FastRCNN, self).__init__(
+            backbone=backbone,
+            neck=neck,
+            bbox_roi_extractor=bbox_roi_extractor,
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            mask_roi_extractor=mask_roi_extractor,
+            mask_head=mask_head,
+            pretrained=pretrained)
+    def forward_test(self, imgs, img_metas, proposals, **kwargs):
+        for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
+            if not isinstance(var, list):
+                raise TypeError('{} must be a list, but got {}'.format(
+                    name, type(var)))
+        num_augs = len(imgs)
+        if num_augs != len(img_metas):
+            raise ValueError(
+                'num of augmentations ({}) != num of image meta ({})'.format(
+                    len(imgs), len(img_metas)))
+        # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
+        imgs_per_gpu = imgs[0].size(0)
+        assert imgs_per_gpu == 1
+        if num_augs == 1:
+            return self.simple_test(imgs[0], img_metas[0], proposals[0],
+                                    **kwargs)
+        else:
+            return self.aug_test(imgs, img_metas, proposals, **kwargs)
--- a/mmdet/models/detectors/test_mixins.py
+++ b/mmdet/models/detectors/test_mixins.py
@@ -135,6 +135,11 @@ class MaskTestMixin(object):
            ori_shape = img_metas[0][0]['ori_shape']
            segm_result = self.mask_head.get_seg_masks(
-                merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn,
+                merged_masks,
-                ori_shape)
+                det_bboxes,
+                det_labels,
+                self.test_cfg.rcnn,
+                ori_shape,
+                scale_factor=1.0,
+                rescale=False)
        return segm_result
--- a/mmdet/models/detectors/two_stage.py
+++ b/mmdet/models/detectors/two_stage.py
@@ -146,7 +146,8 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
        x = self.extract_feat(img)
        proposal_list = self.simple_test_rpn(
-            x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
+            x, img_meta,
+            self.test_cfg.rpn) if proposals is None else proposals
        det_bboxes, det_labels = self.simple_test_bboxes(
            x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale)

--- a/setup.py
+++ b/setup.py
@@ -106,6 +106,7 @@ if __name__ == '__main__':
        setup_requires=['pytest-runner'],
        tests_require=['pytest'],
        install_requires=[
-            'numpy', 'matplotlib', 'six', 'terminaltables', 'pycocotools'
+            'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables',
+            'pycocotools'
        ],
        zip_safe=False)
--- a/tools/test.py
+++ b/tools/test.py
@@ -3,11 +3,11 @@ import argparse
 import torch
 import mmcv
 from mmcv.runner import load_checkpoint, parallel_test, obj_from_dict
-from mmcv.parallel import scatter, MMDataParallel
+from mmcv.parallel import scatter, collate, MMDataParallel
 from mmdet import datasets
 from mmdet.core import results2json, coco_eval
-from mmdet.datasets import collate, build_dataloader
+from mmdet.datasets import build_dataloader
 from mmdet.models import build_detector, detectors
@@ -17,7 +17,7 @@ def single_test(model, data_loader, show=False):
    prog_bar = mmcv.ProgressBar(len(data_loader.dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
-            result = model(**data, return_loss=False, rescale=not show)
+            result = model(return_loss=False, rescale=not show, **data)
        results.append(result)
        if show:
@@ -32,7 +32,7 @@ def single_test(model, data_loader, show=False):
 def _data_func(data, device_id):
    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
-    return dict(**data, return_loss=False, rescale=True)
+    return dict(return_loss=False, rescale=True, **data)
 def parse_args():