Merge branch 'dev' into single-stage

45af4242 · Kai Chen · e8d16bf2 · 5686a375 · 45af4242 · 45af4242
Commit 45af4242 authored Oct 07, 2018 by Kai Chen
20 changed files
--- a/mmdet/__init__.py
+++ b/mmdet/__init__.py
 from .version import __version__, short_version
+__all__ = ['__version__', 'short_version']
--- a/mmdet/core/__init__.py
+++ b/mmdet/core/__init__.py
-from .rpn_ops import *
+from .anchor import *  # noqa: F401, F403
-from .bbox_ops import *
+from .bbox_ops import *  # noqa: F401, F403
-from .mask_ops import *
+from .mask_ops import *  # noqa: F401, F403
-from .targets import *
+from .targets import *  # noqa: F401, F403
-from .losses import *
+from .losses import *  # noqa: F401, F403
-from .eval import *
+from .eval import *  # noqa: F401, F403
-from .parallel import *
+from .parallel import *  # noqa: F401, F403
-from .post_processing import *
+from .post_processing import *  # noqa: F401, F403
-from .utils import *
+from .utils import *  # noqa: F401, F403
--- a/mmdet/core/anchor/__init__.py
+++ b/mmdet/core/anchor/__init__.py
+from .anchor_generator import AnchorGenerator
+from .anchor_target import anchor_target
+__all__ = ['AnchorGenerator', 'anchor_target']
--- a/mmdet/core/rpn_ops/anchor_generator.py
+++ b/mmdet/core/rpn_ops/anchor_generator.py
--- a/mmdet/core/rpn_ops/anchor_target.py
+++ b/mmdet/core/rpn_ops/anchor_target.py
--- a/mmdet/core/bbox_ops/__init__.py
+++ b/mmdet/core/bbox_ops/__init__.py
 from .geometry import bbox_overlaps
 from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
-                       bbox_sampling, sample_positives, sample_negatives)
+                       bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
+                       sample_bboxes)
 from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
                         bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
 from .bbox_target import bbox_target
 __all__ = [
    'bbox_overlaps', 'random_choice', 'bbox_assign',
-    'bbox_assign_wrt_overlaps', 'bbox_sampling', 'sample_positives',
+    'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
-    'sample_negatives', 'bbox2delta', 'delta2bbox', 'bbox_flip',
+    'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
-    'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
+    'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
-    'bbox_target'
+    'bbox2result', 'bbox_target'
 ]
--- a/mmdet/core/bbox_ops/sampling.py
+++ b/mmdet/core/bbox_ops/sampling.py
@@ -78,27 +78,32 @@ def bbox_assign_wrt_overlaps(overlaps,
                             pos_iou_thr=0.5,
                             neg_iou_thr=0.5,
                             min_pos_iou=.0):
-    """Assign a corresponding gt bbox or background to each proposal/anchor
+    """Assign a corresponding gt bbox or background to each proposal/anchor.
-    This function assign a gt bbox to every proposal, each proposals will be
+    This method assign a gt bbox to every proposal, each proposals will be
    assigned with -1, 0, or a positive number. -1 means don't care, 0 means
    negative sample, positive number is the index (1-based) of assigned gt.
    The assignment is done in following steps, the order matters:
    1. assign every anchor to -1
    2. assign proposals whose iou with all gts < neg_iou_thr to 0
    3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
    assign it to that bbox
    4. for each gt bbox, assign its nearest proposals(may be more than one)
    to itself
    Args:
-        overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k)
+        overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
-        gt_labels(Tensor, optional): shape (k, )
+            shape(n, k).
-        pos_iou_thr(float): iou threshold for positive bboxes
+        gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
-        neg_iou_thr(float or tuple): iou threshold for negative bboxes
+        pos_iou_thr (float): IoU threshold for positive bboxes.
-        min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
-                            for RPN, it is usually set as 0, for Fast R-CNN,
+        min_pos_iou (float): Minimum IoU for a bbox to be considered as a
-                            it is usually set as pos_iou_thr
+            positive bbox. This argument only affects the 4th step.
    Returns:
-        tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
+        tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
+            max_overlaps), shape (n, )
    """
    num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
    # 1. assign -1 by default
@@ -144,8 +149,9 @@ def bbox_assign_wrt_overlaps(overlaps,
        return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
-def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
+def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
-    """Balance sampling for positive bboxes/anchors
+    """Balance sampling for positive bboxes/anchors.
    1. calculate average positive num for each gt: num_per_gt
    2. sample at most num_per_gt positives for each gt
    3. random sampling from rest anchors if not enough fg
@@ -186,15 +192,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
        return sampled_inds
-def sample_negatives(assigned_gt_inds,
+def bbox_sampling_neg(assigned_gt_inds,
-                     num_expected,
+                      num_expected,
-                     max_overlaps=None,
+                      max_overlaps=None,
-                     balance_thr=0,
+                      balance_thr=0,
-                     hard_fraction=0.5):
+                      hard_fraction=0.5):
-    """Balance sampling for negative bboxes/anchors
+    """Balance sampling for negative bboxes/anchors.
-    negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
-    and easy(iou < balance_thr), around equal number of bg are sampled
+    Negative samples are split into 2 set: hard (balance_thr <= iou <
-    from each set.
+    neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
+    by `hard_fraction`.
    """
    neg_inds = torch.nonzero(assigned_gt_inds == 0)
    if neg_inds.numel() != 0:
@@ -247,17 +254,87 @@ def bbox_sampling(assigned_gt_inds,
                  max_overlaps=None,
                  neg_balance_thr=0,
                  neg_hard_fraction=0.5):
+    """Sample positive and negative bboxes given assigned results.
+    Args:
+        assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
+        num_expected (int): Expected total samples (pos and neg).
+        pos_fraction (float): Positive sample fraction.
+        neg_pos_ub (float): Negative/Positive upper bound.
+        pos_balance_sampling(bool): Whether to sample positive samples around
+            each gt bbox evenly.
+        max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
+            Used for negative balance sampling only.
+        neg_balance_thr (float, optional): IoU threshold for simple/hard
+            negative balance sampling.
+        neg_hard_fraction (float, optional): Fraction of hard negative samples
+            for negative balance sampling.
+    Returns:
+        tuple[Tensor]: positive bbox indices, negative bbox indices.
+    """
    num_expected_pos = int(num_expected * pos_fraction)
-    pos_inds = sample_positives(assigned_gt_inds, num_expected_pos,
+    pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
-                                pos_balance_sampling)
+                                 pos_balance_sampling)
+    # We found that sampled indices have duplicated items occasionally.
+    # (mab be a bug of PyTorch)
    pos_inds = pos_inds.unique()
    num_sampled_pos = pos_inds.numel()
    num_neg_max = int(
        neg_pos_ub *
        num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
    num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
-    neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg,
+    neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
-                                max_overlaps, neg_balance_thr,
+                                 max_overlaps, neg_balance_thr,
-                                neg_hard_fraction)
+                                 neg_hard_fraction)
    neg_inds = neg_inds.unique()
    return pos_inds, neg_inds
+def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
+    """Sample positive and negative bboxes.
+    This is a simple implementation of bbox sampling given candidates and
+    ground truth bboxes, which includes 3 steps.
+    1. Assign gt to each bbox.
+    2. Add gt bboxes to the sampling pool (optional).
+    3. Perform positive and negative sampling.
+    Args:
+        bboxes (Tensor): Boxes to be sampled from.
+        gt_bboxes (Tensor): Ground truth bboxes.
+        gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
+            `crowd` bboxes are considered as ignored.
+        gt_labels (Tensor): Class labels of ground truth bboxes.
+        cfg (dict): Sampling configs.
+    Returns:
+        tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
+            pos_gt_bboxes, pos_gt_labels
+    """
+    bboxes = bboxes[:, :4]
+    assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
+        bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
+                    cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
+                    cfg.crowd_thr)
+    if cfg.add_gt_as_proposals:
+        bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
+        gt_assign_self = torch.arange(
+            1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
+        assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
+        assigned_labels = torch.cat([gt_labels, assigned_labels])
+    pos_inds, neg_inds = bbox_sampling(
+        assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
+        cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
+    pos_bboxes = bboxes[pos_inds]
+    neg_bboxes = bboxes[neg_inds]
+    pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
+    pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
+    pos_gt_labels = assigned_labels[pos_inds]
+    return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
+            pos_gt_labels)
--- a/mmdet/core/mask_ops/mask_target.py
+++ b/mmdet/core/mask_ops/mask_target.py
 import torch
 import numpy as np
+import mmcv
-from .segms import polys_to_mask_wrt_box
+def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
-def mask_target(pos_proposals_list,
-                pos_assigned_gt_inds_list,
-                gt_polys_list,
-                img_meta,
                cfg):
    cfg_list = [cfg for _ in range(len(pos_proposals_list))]
    mask_targets = map(mask_target_single, pos_proposals_list,
-                       pos_assigned_gt_inds_list, gt_polys_list, img_meta,
+                       pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
-                       cfg_list)
+    mask_targets = torch.cat(list(mask_targets))
-    mask_targets = torch.cat(tuple(mask_targets), dim=0)
    return mask_targets
-def mask_target_single(pos_proposals,
+def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
-                       pos_assigned_gt_inds,
-                       gt_polys,
-                       img_meta,
-                       cfg):
    mask_size = cfg.mask_size
    num_pos = pos_proposals.size(0)
-    mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size))
+    mask_targets = []
    if num_pos > 0:
-        pos_proposals = pos_proposals.cpu().numpy()
+        proposals_np = pos_proposals.cpu().numpy()
        pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
-        scale_factor = img_meta['scale_factor']
        for i in range(num_pos):
-            bbox = pos_proposals[i, :] / scale_factor
+            gt_mask = gt_masks[pos_assigned_gt_inds[i]]
-            polys = gt_polys[pos_assigned_gt_inds[i]]
+            bbox = proposals_np[i, :].astype(np.int32)
-            mask = polys_to_mask_wrt_box(polys, bbox, mask_size)
+            x1, y1, x2, y2 = bbox
-            mask = np.array(mask > 0, dtype=np.float32)
+            w = np.maximum(x2 - x1 + 1, 1)
-            mask_targets[i, ...] = torch.from_numpy(mask).to(
+            h = np.maximum(y2 - y1 + 1, 1)
-                mask_targets.device)
+            # mask is uint8 both before and after resizing
+            target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
+                                   (mask_size, mask_size))
+            mask_targets.append(target)
+        mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
+            pos_proposals.device)
+    else:
+        mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
    return mask_targets
--- a/mmdet/core/mask_ops/segms.py
+++ b/mmdet/core/mask_ops/segms.py
+# flake8: noqa
 # This file is copied from Detectron.
 # Copyright (c) 2017-present, Facebook, Inc.

--- a/mmdet/core/rpn_ops/__init__.py
+++ b/mmdet/core/rpn_ops/__init__.py
-from .anchor_generator import *
-from .anchor_target import *
--- a/mmdet/core/utils/__init__.py
+++ b/mmdet/core/utils/__init__.py
 from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook,
                         DistSamplerSeedHook)
-from .hooks import EmptyCacheHook
 from .misc import tensor2imgs, unmap, multi_apply
 __all__ = [
    'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook',
-    'EmptyCacheHook', 'tensor2imgs', 'unmap', 'multi_apply'
+    'tensor2imgs', 'unmap', 'multi_apply'
 ]
--- a/mmdet/core/utils/dist_utils.py
+++ b/mmdet/core/utils/dist_utils.py
@@ -38,7 +38,8 @@ def _init_dist_slurm(backend, **kwargs):
    raise NotImplementedError
-# modified from https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9
+# modified from
+# https://github.com/NVIDIA/apex/blob/master/apex/parallel/distributed.py#L9
 def all_reduce_coalesced(tensors):
    buckets = OrderedDict()
    for tensor in tensors:

--- a/mmdet/core/utils/hooks.py
+++ b/mmdet/core/utils/hooks.py
-import torch
-from mmcv.runner import Hook
-class EmptyCacheHook(Hook):
-    def before_epoch(self, runner):
-        torch.cuda.empty_cache()
-    def after_epoch(self, runner):
-        torch.cuda.empty_cache()
--- a/mmdet/datasets/__init__.py
+++ b/mmdet/datasets/__init__.py
 from .coco import CocoDataset
+from .loader import (collate, GroupSampler, DistributedGroupSampler,
+                     build_dataloader)
+from .utils import DataContainer, to_tensor, random_scale, show_ann
-__all__ = ['CocoDataset']
+__all__ = [
+    'CocoDataset', 'collate', 'GroupSampler', 'DistributedGroupSampler',
+    'build_dataloader', 'DataContainer', 'to_tensor', 'random_scale',
+    'show_ann'
+]
--- a/mmdet/datasets/coco.py
+++ b/mmdet/datasets/coco.py
@@ -5,71 +5,12 @@ import numpy as np
 from pycocotools.coco import COCO
 from torch.utils.data import Dataset
-from .transforms import (ImageTransform, BboxTransform, PolyMaskTransform,
+from .transforms import (ImageTransform, BboxTransform, MaskTransform,
                         Numpy2Tensor)
 from .utils import to_tensor, show_ann, random_scale
 from .utils import DataContainer as DC
-def parse_ann_info(ann_info, cat2label, with_mask=True):
-    """Parse bbox and mask annotation.
-    Args:
-        ann_info (list[dict]): Annotation info of an image.
-        cat2label (dict): The mapping from category ids to labels.
-        with_mask (bool): Whether to parse mask annotations.
-    Returns:
-        tuple: gt_bboxes, gt_labels and gt_mask_info
-    """
-    gt_bboxes = []
-    gt_labels = []
-    gt_bboxes_ignore = []
-    # each mask consists of one or several polys, each poly is a list of float.
-    if with_mask:
-        gt_mask_polys = []
-        gt_poly_lens = []
-    for i, ann in enumerate(ann_info):
-        if ann.get('ignore', False):
-            continue
-        x1, y1, w, h = ann['bbox']
-        if ann['area'] <= 0 or w < 1 or h < 1:
-            continue
-        bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
-        if ann['iscrowd']:
-            gt_bboxes_ignore.append(bbox)
-        else:
-            gt_bboxes.append(bbox)
-            gt_labels.append(cat2label[ann['category_id']])
-            if with_mask:
-                # Note polys are not resized
-                mask_polys = [
-                    p for p in ann['segmentation'] if len(p) >= 6
-                ]  # valid polygons have >= 3 points (6 coordinates)
-                poly_lens = [len(p) for p in mask_polys]
-                gt_mask_polys.append(mask_polys)
-                gt_poly_lens.extend(poly_lens)
-    if gt_bboxes:
-        gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
-        gt_labels = np.array(gt_labels, dtype=np.int64)
-    else:
-        gt_bboxes = np.zeros((0, 4), dtype=np.float32)
-        gt_labels = np.array([], dtype=np.int64)
-    if gt_bboxes_ignore:
-        gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
-    else:
-        gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
-    ann = dict(
-        bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
-    if with_mask:
-        ann['mask_polys'] = gt_mask_polys
-        ann['poly_lens'] = gt_poly_lens
-    return ann
 class CocoDataset(Dataset):
    def __init__(self,
@@ -138,7 +79,7 @@ class CocoDataset(Dataset):
        self.img_transform = ImageTransform(
            size_divisor=self.size_divisor, **self.img_norm_cfg)
        self.bbox_transform = BboxTransform()
-        self.mask_transform = PolyMaskTransform()
+        self.mask_transform = MaskTransform()
        self.numpy2tensor = Numpy2Tensor()
    def __len__(self):
@@ -162,6 +103,70 @@ class CocoDataset(Dataset):
        ann_info = self.coco.loadAnns(ann_ids)
        return ann_info
+    def _parse_ann_info(self, ann_info, with_mask=True):
+        """Parse bbox and mask annotation.
+        Args:
+            ann_info (list[dict]): Annotation info of an image.
+            with_mask (bool): Whether to parse mask annotations.
+        Returns:
+            dict: A dict containing the following keys: bboxes, bboxes_ignore,
+                labels, masks, mask_polys, poly_lens.
+        """
+        gt_bboxes = []
+        gt_labels = []
+        gt_bboxes_ignore = []
+        # Two formats are provided.
+        # 1. mask: a binary map of the same size of the image.
+        # 2. polys: each mask consists of one or several polys, each poly is a
+        # list of float.
+        if with_mask:
+            gt_masks = []
+            gt_mask_polys = []
+            gt_poly_lens = []
+        for i, ann in enumerate(ann_info):
+            if ann.get('ignore', False):
+                continue
+            x1, y1, w, h = ann['bbox']
+            if ann['area'] <= 0 or w < 1 or h < 1:
+                continue
+            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
+            if ann['iscrowd']:
+                gt_bboxes_ignore.append(bbox)
+            else:
+                gt_bboxes.append(bbox)
+                gt_labels.append(self.cat2label[ann['category_id']])
+            if with_mask:
+                gt_masks.append(self.coco.annToMask(ann))
+                mask_polys = [
+                    p for p in ann['segmentation'] if len(p) >= 6
+                ]  # valid polygons have >= 3 points (6 coordinates)
+                poly_lens = [len(p) for p in mask_polys]
+                gt_mask_polys.append(mask_polys)
+                gt_poly_lens.extend(poly_lens)
+        if gt_bboxes:
+            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
+            gt_labels = np.array(gt_labels, dtype=np.int64)
+        else:
+            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+            gt_labels = np.array([], dtype=np.int64)
+        if gt_bboxes_ignore:
+            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
+        else:
+            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+        ann = dict(
+            bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
+        if with_mask:
+            ann['masks'] = gt_masks
+            # poly format is not used in the current implementation
+            ann['mask_polys'] = gt_mask_polys
+            ann['poly_lens'] = gt_poly_lens
+        return ann
    def _set_group_flag(self):
        """Set flag according to image aspect ratio.
@@ -200,7 +205,7 @@ class CocoDataset(Dataset):
                    idx = self._rand_another(idx)
                    continue
-            ann = parse_ann_info(ann_info, self.cat2label, self.with_mask)
+            ann = self._parse_ann_info(ann_info, self.with_mask)
            gt_bboxes = ann['bboxes']
            gt_labels = ann['labels']
            gt_bboxes_ignore = ann['bboxes_ignore']
@@ -223,10 +228,8 @@ class CocoDataset(Dataset):
                                                   scale_factor, flip)
            if self.with_mask:
-                gt_mask_polys, gt_poly_lens, num_polys_per_mask = \
+                gt_masks = self.mask_transform(ann['masks'], pad_shape,
-                    self.mask_transform(
+                                               scale_factor, flip)
-                        ann['mask_polys'], ann['poly_lens'],
-                        img_info['height'], img_info['width'], flip)
            ori_shape = (img_info['height'], img_info['width'], 3)
            img_meta = dict(
@@ -247,10 +250,7 @@ class CocoDataset(Dataset):
            if self.with_crowd:
                data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
            if self.with_mask:
-                data['gt_masks'] = dict(
+                data['gt_masks'] = DC(gt_masks, cpu_only=True)
-                    polys=DC(gt_mask_polys, cpu_only=True),
-                    poly_lens=DC(gt_poly_lens, cpu_only=True),
-                    polys_per_mask=DC(num_polys_per_mask, cpu_only=True))
            return data
    def prepare_test_img(self, idx):

--- a/mmdet/datasets/transforms.py
+++ b/mmdet/datasets/transforms.py
@@ -10,7 +10,8 @@ __all__ = [
 class ImageTransform(object):
-    """Preprocess an image
+    """Preprocess an image.
    1. rescale the image to expected size
    2. normalize the image
    3. flip the image (if needed)
@@ -59,7 +60,8 @@ def bbox_flip(bboxes, img_shape):
 class BboxTransform(object):
-    """Preprocess gt bboxes
+    """Preprocess gt bboxes.
    1. rescale bboxes according to image size
    2. flip bboxes (if needed)
    3. pad the first dimension to `max_num_gts`
@@ -84,17 +86,12 @@ class BboxTransform(object):
 class PolyMaskTransform(object):
+    """Preprocess polygons."""
    def __init__(self):
        pass
    def __call__(self, gt_mask_polys, gt_poly_lens, img_h, img_w, flip=False):
-        """
-        Args:
-            gt_mask_polys(list): a list of masks, each mask is a list of polys,
-                each poly is a list of numbers
-            gt_poly_lens(list): a list of int, indicating the size of each poly
-        """
        if flip:
            gt_mask_polys = segms.flip_segms(gt_mask_polys, img_h, img_w)
        num_polys_per_mask = np.array(
@@ -108,6 +105,28 @@ class PolyMaskTransform(object):
        return gt_mask_polys, gt_poly_lens, num_polys_per_mask
+class MaskTransform(object):
+    """Preprocess masks.
+    1. resize masks to expected size and stack to a single array
+    2. flip the masks (if needed)
+    3. pad the masks (if needed)
+    """
+    def __call__(self, masks, pad_shape, scale_factor, flip=False):
+        masks = [
+            mmcv.imrescale(mask, scale_factor, interpolation='nearest')
+            for mask in masks
+        ]
+        if flip:
+            masks = [mask[:, ::-1] for mask in masks]
+        padded_masks = [
+            mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
+        ]
+        padded_masks = np.stack(padded_masks, axis=0)
+        return padded_masks
 class Numpy2Tensor(object):
    def __init__(self):

--- a/mmdet/datasets/utils/__init__.py
+++ b/mmdet/datasets/utils/__init__.py
 from .data_container import DataContainer
-from .misc import *
+from .misc import to_tensor, random_scale, show_ann
+__all__ = ['DataContainer', 'to_tensor', 'random_scale', 'show_ann']
--- a/mmdet/models/__init__.py
+++ b/mmdet/models/__init__.py
-from .detectors import *
+from .detectors import BaseDetector, RPN, FasterRCNN, MaskRCNN
-from .builder import *
+from .builder import (build_neck, build_rpn_head, build_roi_extractor,
+                      build_bbox_head, build_mask_head, build_detector)
+__all__ = [
+    'BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN', 'build_backbone',
+    'build_neck', 'build_rpn_head', 'build_roi_extractor', 'build_bbox_head',
+    'build_mask_head', 'build_detector'
+]
--- a/mmdet/models/backbones/__init__.py
+++ b/mmdet/models/backbones/__init__.py
 from .resnet import resnet
+__all__ = ['resnet']
--- a/mmdet/models/bbox_heads/convfc_bbox_head.py
+++ b/mmdet/models/bbox_heads/convfc_bbox_head.py
@@ -43,17 +43,21 @@ class ConvFCRoIHead(BBoxHead):
        self.fc_out_channels = fc_out_channels
        # add shared convs and fcs
-        self.shared_convs, self.shared_fcs, last_layer_dim = self._add_conv_fc_branch(
+        self.shared_convs, self.shared_fcs, last_layer_dim = \
-            self.num_shared_convs, self.num_shared_fcs, self.in_channels, True)
+            self._add_conv_fc_branch(
+                self.num_shared_convs, self.num_shared_fcs, self.in_channels,
+                True)
        self.shared_out_channels = last_layer_dim
        # add cls specific branch
-        self.cls_convs, self.cls_fcs, self.cls_last_dim = self._add_conv_fc_branch(
+        self.cls_convs, self.cls_fcs, self.cls_last_dim = \
-            self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)
+            self._add_conv_fc_branch(
+                self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)
        # add reg specific branch
-        self.reg_convs, self.reg_fcs, self.reg_last_dim = self._add_conv_fc_branch(
+        self.reg_convs, self.reg_fcs, self.reg_last_dim = \
-            self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)
+            self._add_conv_fc_branch(
+                self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)
        if self.num_shared_fcs == 0 and not self.with_avg_pool:
            if self.num_cls_fcs == 0: