Merge branch 'master' into pytorch-1.0

2017c81e · Kai Chen · c4408812 · 6594f862 · 2017c81e · 2017c81e
Commit 2017c81e authored Dec 20, 2018 by Kai Chen
8 changed files
--- a/mmdet/models/backbones/resnext.py
+++ b/mmdet/models/backbones/resnext.py
+import math
+import torch.nn as nn
+from .resnet import ResNet
+from .resnet import Bottleneck as _Bottleneck
+class Bottleneck(_Bottleneck):
+    def __init__(self, *args, groups=1, base_width=4, **kwargs):
+        """Bottleneck block for ResNeXt.
+        If style is "pytorch", the stride-two layer is the 3x3 conv layer,
+        if it is "caffe", the stride-two layer is the first 1x1 conv layer.
+        """
+        super(Bottleneck, self).__init__(*args, **kwargs)
+        if groups == 1:
+            width = self.planes
+        else:
+            width = math.floor(self.planes * (base_width / 64)) * groups
+        self.conv1 = nn.Conv2d(
+            self.inplanes,
+            width,
+            kernel_size=1,
+            stride=self.conv1_stride,
+            bias=False)
+        self.bn1 = nn.BatchNorm2d(width)
+        self.conv2 = nn.Conv2d(
+            width,
+            width,
+            kernel_size=3,
+            stride=self.conv2_stride,
+            padding=self.dilation,
+            dilation=self.dilation,
+            groups=groups,
+            bias=False)
+        self.bn2 = nn.BatchNorm2d(width)
+        self.conv3 = nn.Conv2d(
+            width, self.planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.planes * self.expansion)
+def make_res_layer(block,
+                   inplanes,
+                   planes,
+                   blocks,
+                   stride=1,
+                   dilation=1,
+                   groups=1,
+                   base_width=4,
+                   style='pytorch',
+                   with_cp=False):
+    downsample = None
+    if stride != 1 or inplanes != planes * block.expansion:
+        downsample = nn.Sequential(
+            nn.Conv2d(
+                inplanes,
+                planes * block.expansion,
+                kernel_size=1,
+                stride=stride,
+                bias=False),
+            nn.BatchNorm2d(planes * block.expansion),
+        )
+    layers = []
+    layers.append(
+        block(
+            inplanes,
+            planes,
+            stride=stride,
+            dilation=dilation,
+            downsample=downsample,
+            groups=groups,
+            base_width=base_width,
+            style=style,
+            with_cp=with_cp))
+    inplanes = planes * block.expansion
+    for i in range(1, blocks):
+        layers.append(
+            block(
+                inplanes,
+                planes,
+                stride=1,
+                dilation=dilation,
+                groups=groups,
+                base_width=base_width,
+                style=style,
+                with_cp=with_cp))
+    return nn.Sequential(*layers)
+class ResNeXt(ResNet):
+    """ResNeXt backbone.
+    Args:
+        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+        num_stages (int): Resnet stages, normally 4.
+        groups (int): Group of resnext.
+        base_width (int): Base width of resnext.
+        strides (Sequence[int]): Strides of the first block of each stage.
+        dilations (Sequence[int]): Dilation of each stage.
+        out_indices (Sequence[int]): Output from which stages.
+        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+            layer is the 3x3 conv layer, otherwise the stride-two layer is
+            the first 1x1 conv layer.
+        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+            not freezing any parameters.
+        bn_eval (bool): Whether to set BN layers to eval mode, namely, freeze
+            running stats (mean and var).
+        bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed.
+    """
+    arch_settings = {
+        50: (Bottleneck, (3, 4, 6, 3)),
+        101: (Bottleneck, (3, 4, 23, 3)),
+        152: (Bottleneck, (3, 8, 36, 3))
+    }
+    def __init__(self, groups=1, base_width=4, **kwargs):
+        super(ResNeXt, self).__init__(**kwargs)
+        self.groups = groups
+        self.base_width = base_width
+        self.inplanes = 64
+        self.res_layers = []
+        for i, num_blocks in enumerate(self.stage_blocks):
+            stride = self.strides[i]
+            dilation = self.dilations[i]
+            planes = 64 * 2**i
+            res_layer = make_res_layer(
+                self.block,
+                self.inplanes,
+                planes,
+                num_blocks,
+                stride=stride,
+                dilation=dilation,
+                groups=self.groups,
+                base_width=self.base_width,
+                style=self.style,
+                with_cp=self.with_cp)
+            self.inplanes = planes * self.block.expansion
+            layer_name = 'layer{}'.format(i + 1)
+            self.add_module(layer_name, res_layer)
+            self.res_layers.append(layer_name)
--- a/mmdet/models/bbox_heads/bbox_head.py
+++ b/mmdet/models/bbox_heads/bbox_head.py
@@ -79,11 +79,11 @@ class BBoxHead(nn.Module):
        return cls_reg_targets
    def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets,
-             bbox_weights):
+             bbox_weights, reduce=True):
        losses = dict()
        if cls_score is not None:
            losses['loss_cls'] = weighted_cross_entropy(
-                cls_score, labels, label_weights)
+                cls_score, labels, label_weights, reduce=reduce)
            losses['acc'] = accuracy(cls_score, labels)
        if bbox_pred is not None:
            losses['loss_reg'] = weighted_smoothl1(

--- a/mmdet/models/detectors/base.py
+++ b/mmdet/models/detectors/base.py
@@ -99,10 +99,11 @@ class BaseDetector(nn.Module):
        if isinstance(dataset, str):
            class_names = get_classes(dataset)
-        elif isinstance(dataset, list):
+        elif isinstance(dataset, (list, tuple)) or dataset is None:
            class_names = dataset
        else:
-            raise TypeError('dataset must be a valid dataset name or a list'
+            raise TypeError(
+                'dataset must be a valid dataset name or a sequence'
                ' of class names, not {}'.format(type(dataset)))
        for img, img_meta in zip(imgs, img_metas):

--- a/mmdet/models/detectors/two_stage.py
+++ b/mmdet/models/detectors/two_stage.py
@@ -4,7 +4,7 @@ import torch.nn as nn
 from .base import BaseDetector
 from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin
 from .. import builder
-from mmdet.core import (assign_and_sample, bbox2roi, bbox2result, multi_apply)
+from mmdet.core import bbox2roi, bbox2result, build_assigner, build_sampler
 class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
@@ -102,13 +102,22 @@ class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
        # assign gts and sample proposals
        if self.with_bbox or self.with_mask:
-            assign_results, sampling_results = multi_apply(
+            bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
-                assign_and_sample,
+            bbox_sampler = build_sampler(
-                proposal_list,
+                self.train_cfg.rcnn.sampler, context=self)
-                gt_bboxes,
+            num_imgs = img.size(0)
-                gt_bboxes_ignore,
+            sampling_results = []
-                gt_labels,
+            for i in range(num_imgs):
-                cfg=self.train_cfg.rcnn)
+                assign_result = bbox_assigner.assign(
+                    proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],
+                    gt_labels[i])
+                sampling_result = bbox_sampler.sample(
+                    assign_result,
+                    proposal_list[i],
+                    gt_bboxes[i],
+                    gt_labels[i],
+                    feats=[lvl_feat[i][None] for lvl_feat in x])
+                sampling_results.append(sampling_result)
        # bbox head forward and loss
        if self.with_bbox:

--- a/mmdet/models/mask_heads/fcn_mask_head.py
+++ b/mmdet/models/mask_heads/fcn_mask_head.py
@@ -97,6 +97,10 @@ class FCNMaskHead(nn.Module):
    def loss(self, mask_pred, mask_targets, labels):
        loss = dict()
+        if self.class_agnostic:
+            loss_mask = mask_cross_entropy(mask_pred, mask_targets,
+                                           torch.zeros_like(labels))
+        else:
            loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)
        loss['loss_mask'] = loss_mask
        return loss

--- a/tools/test.py
+++ b/tools/test.py
@@ -14,15 +14,16 @@ from mmdet.models import build_detector, detectors
 def single_test(model, data_loader, show=False):
    model.eval()
    results = []
-    prog_bar = mmcv.ProgressBar(len(data_loader.dataset))
+    dataset = data_loader.dataset
+    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=not show, **data)
        results.append(result)
        if show:
-            model.module.show_result(data, result,
+            model.module.show_result(data, result, dataset.img_norm_cfg,
-                                     data_loader.dataset.img_norm_cfg)
+                                     dataset.CLASSES)
        batch_size = data['img'][0].size(0)
        for _ in range(batch_size):

--- a/tools/train.py
+++ b/tools/train.py
@@ -14,6 +14,8 @@ def parse_args():
    parser = argparse.ArgumentParser(description='Train a detector')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work_dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume_from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--validate',
        action='store_true',
@@ -43,6 +45,8 @@ def main():
    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
+    if args.resume_from is not None:
+        cfg.resume_from = args.resume_from
    cfg.gpus = args.gpus
    if cfg.checkpoint_config is not None:
        # save mmdet version in checkpoints as meta data
@@ -67,6 +71,7 @@ def main():
    model = build_detector(
        cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
    train_dataset = get_dataset(cfg.data.train)
    train_detector(
        model,

--- a/tools/voc_eval.py
+++ b/tools/voc_eval.py
+from argparse import ArgumentParser
+import mmcv
+import numpy as np
+from mmdet import datasets
+from mmdet.core import eval_map
+def voc_eval(result_file, dataset, iou_thr=0.5):
+    det_results = mmcv.load(result_file)
+    gt_bboxes = []
+    gt_labels = []
+    gt_ignore = []
+    for i in range(len(dataset)):
+        ann = dataset.get_ann_info(i)
+        bboxes = ann['bboxes']
+        labels = ann['labels']
+        if 'bboxes_ignore' in ann:
+            ignore = np.concatenate([
+                np.zeros(bboxes.shape[0], dtype=np.bool),
+                np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
+            ])
+            gt_ignore.append(ignore)
+            bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
+            labels = np.concatenate([labels, ann['labels_ignore']])
+        gt_bboxes.append(bboxes)
+        gt_labels.append(labels)
+    if not gt_ignore:
+        gt_ignore = gt_ignore
+    if hasattr(dataset, 'year') and dataset.year == 2007:
+        dataset_name = 'voc07'
+    else:
+        dataset_name = dataset.CLASSES
+    eval_map(
+        det_results,
+        gt_bboxes,
+        gt_labels,
+        gt_ignore=gt_ignore,
+        scale_ranges=None,
+        iou_thr=iou_thr,
+        dataset=dataset_name,
+        print_summary=True)
+def main():
+    parser = ArgumentParser(description='VOC Evaluation')
+    parser.add_argument('result', help='result file path')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument(
+        '--iou-thr',
+        type=float,
+        default=0.5,
+        help='IoU threshold for evaluation')
+    args = parser.parse_args()
+    cfg = mmcv.Config.fromfile(args.config)
+    test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
+    voc_eval(args.result, test_dataset, args.iou_thr)
+if __name__ == '__main__':
+    main()