Merge pull request #8 from hellock/dev

API cleaning and code refactoring (WIP)

Merge pull request #8 from hellock/dev
API cleaning and code refactoring (WIP)
7d343fd2 · Kai Chen · GitHub · 0e0b9246 · 630687f4 · 7d343fd2
Unverified Commit 7d343fd2 authored Oct 09, 2018 by Kai Chen Committed by GitHub Oct 09, 2018
20 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -104,4 +104,6 @@ venv.bak/
 .mypy_cache/
 # cython generated cpp
 mmdet/ops/nms/*.cpp
\ No newline at end of file
+mmdet/version.py
+data
--- a/TDL.md
+++ b/TDL.md
-### MMCV
- [ ] Implement the attr 'get' of 'Config'
- [ ] Config bugs: None type to '{}' with addict
- [ ] Default logger should be only with gpu0
- [ ] Unit Test: mmcv and mmcv.torchpack
-### MMDetection
-#### Basic
- [ ] Implement training function without distributed
- [ ] Verify nccl/nccl2/gloo
- [ ] Replace UGLY code: params plug in 'args' to reach a global flow
- [ ] Replace 'print' by 'logger'
-#### Testing
- [ ] Implement distributed testing
- [ ] Implement single gpu testing
-#### Refactor
- [ ] Re-consider params names
- [ ] Refactor functions in 'core'
- [ ] Merge single test & aug test as one function, so as other redundancy
-#### New features
- [ ] Plug loss params into Config
- [ ] Multi-head communication
--- a/tools/examples/r50_fpn_frcnn_1x.py
+++ b/tools/examples/r50_fpn_frcnn_1x.py
 # model settings
 model = dict(
-    pretrained=
+    type='FasterRCNN',
-    '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
+    pretrained='modelzoo://resnet50',
    backbone=dict(
        type='resnet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
-        style='fb'),
+        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
@@ -18,15 +18,14 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        coarsest_stride=32,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
-    roi_block=dict(
+    bbox_roi_extractor=dict(
-        type='SingleLevelRoI',
+        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
@@ -40,28 +39,23 @@ model = dict(
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False))
-meta_params = dict(
+# model training and testing settings
-    rpn_train_cfg = dict(
+train_cfg = dict(
+    rpn=dict(
        pos_fraction=0.5,
        pos_balance_sampling=False,
        neg_pos_ub=256,
        allowed_border=0,
+        crowd_thr=1.1,
        anchor_batch_size=256,
        pos_iou_thr=0.7,
        neg_iou_thr=0.3,
        neg_balance_thr=0,
-        min_pos_iou=1e-3,
+        min_pos_iou=0.3,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
-    rpn_test_cfg = dict(
+    rcnn=dict(
-        nms_across_levels=False,
-        nms_pre=2000,
-        nms_post=2000,
-        max_num=2000,
-        nms_thr=0.7,
-        min_bbox_size=0),
-    rcnn_train_cfg = dict(
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
        crowd_thr=1.1,
@@ -71,55 +65,84 @@ meta_params = dict(
        pos_balance_sampling=False,
        neg_pos_ub=512,
        neg_balance_thr=0,
+        min_pos_iou=1.1,
        pos_weight=-1,
-        debug=False),
+        debug=False))
-    rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5)
+test_cfg = dict(
-)
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5))
 # dataset settings
-data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
 img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53],
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-    std=[58.395, 57.12, 57.375],
+data = dict(
-    to_rgb=True)
+    imgs_per_gpu=2,
-img_per_gpu = 1
+    workers_per_gpu=2,
-data_workers = 2
+    train=dict(
-train_dataset = dict(
+        type=dataset_type,
-    ann_file=data_root + 'annotations/instances_train2017.json',
+        ann_file=data_root + 'annotations/instances_train2017.json',
-    img_prefix=data_root + 'train2017/',
+        img_prefix=data_root + 'train2017/',
-    img_scale=(1333, 800),
+        img_scale=(1333, 800),
-    img_norm_cfg=img_norm_cfg,
+        img_norm_cfg=img_norm_cfg,
-    size_divisor=32,
+        size_divisor=32,
-    flip_ratio=0.5)
+        flip_ratio=0.5,
-test_dataset = dict(
+        with_mask=False,
-    ann_file=data_root + 'annotations/instances_val2017.json',
+        with_crowd=True,
-    img_prefix=data_root + 'val2017/',
+        with_label=True),
-    img_scale=(1333, 800),
+    val=dict(
-    img_norm_cfg=img_norm_cfg,
+        type=dataset_type,
-    size_divisor=32)
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
 # optimizer
 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
-grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 # learning policy
-lr_policy = dict(
+lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
-    warmup_ratio=0.333,
+    warmup_ratio=1.0 / 3,
    step=[8, 11])
-max_epoch = 12
 checkpoint_config = dict(interval=1)
-dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
-# logging settings
-log_level = 'INFO'
 # yapf:disable
 log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
-        # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
+        # dict(type='TensorboardLoggerHook')
    ])
 # yapf:enable
-work_dir = './model/r50_fpn_frcnn_1x'
+# runtime settings
+total_epochs = 12
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
--- a/tools/examples/r50_fpn_maskrcnn_1x.py
+++ b/tools/examples/r50_fpn_maskrcnn_1x.py
 # model settings
 model = dict(
-    pretrained=
+    type='MaskRCNN',
-    '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
+    pretrained='modelzoo://resnet50',
    backbone=dict(
        type='resnet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
-        style='fb'),
+        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
@@ -18,15 +18,14 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        coarsest_stride=32,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True),
-    roi_block=dict(
+    bbox_roi_extractor=dict(
-        type='SingleLevelRoI',
+        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
@@ -40,8 +39,8 @@ model = dict(
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
        reg_class_agnostic=False),
-    mask_block=dict(
+    mask_roi_extractor=dict(
-        type='SingleLevelRoI',
+        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
@@ -51,28 +50,23 @@ model = dict(
        in_channels=256,
        conv_out_channels=256,
        num_classes=81))
-meta_params = dict(
+# model training and testing settings
-    rpn_train_cfg=dict(
+train_cfg = dict(
+    rpn=dict(
        pos_fraction=0.5,
        pos_balance_sampling=False,
        neg_pos_ub=256,
        allowed_border=0,
+        crowd_thr=1.1,
        anchor_batch_size=256,
        pos_iou_thr=0.7,
        neg_iou_thr=0.3,
        neg_balance_thr=0,
-        min_pos_iou=1e-3,
+        min_pos_iou=0.3,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
        debug=False),
-    rpn_test_cfg=dict(
+    rcnn=dict(
-        nms_across_levels=False,
-        nms_pre=2000,
-        nms_post=2000,
-        max_num=2000,
-        nms_thr=0.7,
-        min_bbox_size=0),
-    rcnn_train_cfg=dict(
        mask_size=28,
        pos_iou_thr=0.5,
        neg_iou_thr=0.5,
@@ -83,54 +77,85 @@ meta_params = dict(
        pos_balance_sampling=False,
        neg_pos_ub=512,
        neg_balance_thr=0,
+        min_pos_iou=1.1,
        pos_weight=-1,
-        debug=False),
+        debug=False))
-    rcnn_test_cfg=dict(
+test_cfg = dict(
-        score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
+    rpn=dict(
+        nms_across_levels=False,
+        nms_pre=2000,
+        nms_post=2000,
+        max_num=2000,
+        nms_thr=0.7,
+        min_bbox_size=0),
+    rcnn=dict(
+        score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
 # dataset settings
-data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-img_per_gpu = 1
+data = dict(
-data_workers = 2
+    imgs_per_gpu=2,
-train_dataset = dict(
+    workers_per_gpu=2,
-    with_mask=True,
+    train=dict(
-    ann_file=data_root + 'annotations/instances_train2017.json',
+        type=dataset_type,
-    img_prefix=data_root + 'train2017/',
+        ann_file=data_root + 'annotations/instances_train2017.json',
-    img_scale=(1333, 800),
+        img_prefix=data_root + 'train2017/',
-    img_norm_cfg=img_norm_cfg,
+        img_scale=(1333, 800),
-    size_divisor=32,
+        img_norm_cfg=img_norm_cfg,
-    flip_ratio=0.5)
+        size_divisor=32,
-test_dataset = dict(
+        flip_ratio=0.5,
-    ann_file=data_root + 'annotations/instances_val2017.json',
+        with_mask=True,
-    img_prefix=data_root + 'val2017/',
+        with_crowd=True,
-    img_scale=(1333, 800),
+        with_label=True),
-    img_norm_cfg=img_norm_cfg,
+    val=dict(
-    size_divisor=32)
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=True,
+        with_crowd=True,
+        with_label=True),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
 # optimizer
 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
-grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 # learning policy
-lr_policy = dict(
+lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
-    warmup_ratio=0.333,
+    warmup_ratio=1.0 / 3,
    step=[8, 11])
-max_epoch = 12
 checkpoint_config = dict(interval=1)
-dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
-# logging settings
-log_level = 'INFO'
 # yapf:disable
 log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
-        # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
+        # dict(type='TensorboardLoggerHook')
    ])
 # yapf:enable
-work_dir = './model/r50_fpn_mask_rcnn_1x'
+# runtime settings
+total_epochs = 12
+device_ids = range(8)
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/mask_rcnn_r50_fpn_1x'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
--- a/tools/examples/r50_fpn_rpn_1x.py
+++ b/tools/examples/r50_fpn_rpn_1x.py
 # model settings
 model = dict(
-    pretrained=
+    type='RPN',
-    '/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
+    pretrained='modelzoo://resnet50',
    backbone=dict(
        type='resnet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
-        style='fb'),
+        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
@@ -18,28 +18,30 @@ model = dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
-        coarsest_stride=32,
        anchor_scales=[8],
        anchor_ratios=[0.5, 1.0, 2.0],
        anchor_strides=[4, 8, 16, 32, 64],
        target_means=[.0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0],
        use_sigmoid_cls=True))
-meta_params = dict(
+# model training and testing settings
-    rpn_train_cfg=dict(
+train_cfg = dict(
+    rpn=dict(
        pos_fraction=0.5,
        pos_balance_sampling=False,
        neg_pos_ub=256,
        allowed_border=0,
+        crowd_thr=1.1,
        anchor_batch_size=256,
        pos_iou_thr=0.7,
        neg_iou_thr=0.3,
        neg_balance_thr=0,
-        min_pos_iou=1e-3,
+        min_pos_iou=0.3,
        pos_weight=-1,
        smoothl1_beta=1 / 9.0,
-        debug=False),
+        debug=False))
-    rpn_test_cfg=dict(
+test_cfg = dict(
+    rpn=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=2000,
@@ -47,49 +49,70 @@ meta_params = dict(
        nms_thr=0.7,
        min_bbox_size=0))
 # dataset settings
-data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-img_per_gpu = 1
+data = dict(
-data_workers = 2
+    imgs_per_gpu=2,
-train_dataset = dict(
+    workers_per_gpu=2,
-    ann_file=data_root + 'annotations/instances_train2017.json',
+    train=dict(
-    img_prefix=data_root + 'train2017/',
+        type=dataset_type,
-    img_scale=(1333, 800),
+        ann_file=data_root + 'annotations/instances_train2017.json',
-    img_norm_cfg=img_norm_cfg,
+        img_prefix=data_root + 'train2017/',
-    size_divisor=32,
+        img_scale=(1333, 800),
-    flip_ratio=0.5)
+        img_norm_cfg=img_norm_cfg,
-test_dataset = dict(
+        size_divisor=32,
-    ann_file=data_root + 'annotations/instances_val2017.json',
+        flip_ratio=0.5,
-    img_prefix=data_root + 'val2017/',
+        with_mask=False,
-    img_scale=(1333, 800),
+        with_crowd=False,
-    img_norm_cfg=img_norm_cfg,
+        with_label=False),
-    size_divisor=32,
+    val=dict(
-    test_mode=True)
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_crowd=False,
+        with_label=False),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        img_scale=(1333, 800),
+        img_norm_cfg=img_norm_cfg,
+        size_divisor=32,
+        flip_ratio=0,
+        with_mask=False,
+        with_label=False,
+        test_mode=True))
 # optimizer
 optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
-grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
+# runner configs
-# learning policy
+optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
-lr_policy = dict(
+lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
-    warmup_ratio=0.333,
+    warmup_ratio=1.0 / 3,
    step=[8, 11])
-max_epoch = 12
 checkpoint_config = dict(interval=1)
-dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
-# logging settings
-log_level = 'INFO'
 # yapf:disable
 log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
-        # ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
+        # dict(type='TensorboardLoggerHook')
    ])
 # yapf:enable
-work_dir = './model/r50_fpn_1x'
+# runtime settings
+total_epochs = 12
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/rpn_r50_fpn_1x'
 load_from = None
 resume_from = None
 workflow = [('train', 1)]
--- a/mmdet/__init__.py
+++ b/mmdet/__init__.py
-from .version import __version__
+from .version import __version__, short_version
+__all__ = ['__version__', 'short_version']
--- a/mmdet/core/__init__.py
+++ b/mmdet/core/__init__.py
-from .train_engine import *
+from .anchor import *  # noqa: F401, F403
-from .test_engine import *
+from .bbox import *  # noqa: F401, F403
-from .rpn_ops import *
+from .mask import *  # noqa: F401, F403
-from .bbox_ops import *
+from .loss import *  # noqa: F401, F403
-from .mask_ops import *
+from .evaluation import *  # noqa: F401, F403
-from .losses import *
+from .post_processing import *  # noqa: F401, F403
-from .eval import *
+from .utils import *  # noqa: F401, F403
-from .post_processing import *
-from .utils import *
--- a/mmdet/core/anchor/__init__.py
+++ b/mmdet/core/anchor/__init__.py
+from .anchor_generator import AnchorGenerator
+from .anchor_target import anchor_target
+__all__ = ['AnchorGenerator', 'anchor_target']
--- a/mmdet/core/rpn_ops/anchor_generator.py
+++ b/mmdet/core/rpn_ops/anchor_generator.py
@@ -50,15 +50,18 @@ class AnchorGenerator(object):
            return yy, xx
    def grid_anchors(self, featmap_size, stride=16, device='cuda'):
+        base_anchors = self.base_anchors.to(device)
        feat_h, feat_w = featmap_size
        shift_x = torch.arange(0, feat_w, device=device) * stride
        shift_y = torch.arange(0, feat_h, device=device) * stride
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
+        shifts = shifts.type_as(base_anchors)
        # first feat_w elements correspond to the first row of shifts
        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
        # shifted anchors (K, A, 4), reshape to (K*A, 4)
-        base_anchors = self.base_anchors.to(device)
        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.view(-1, 4)
        # first A rows correspond to A anchors of (0, 0) in feature map,

--- a/mmdet/core/rpn_ops/anchor_target.py
+++ b/mmdet/core/rpn_ops/anchor_target.py
 import torch
-import numpy as np
-from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling)
+from ..bbox import bbox_assign, bbox2delta, bbox_sampling
+from ..utils import multi_apply
-def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list,
-                  img_shapes, target_means, target_stds, cfg):
+def anchor_target(anchor_list, valid_flag_list, gt_bboxes_list, img_metas,
-    """Compute anchor regression and classification targets
+                  target_means, target_stds, cfg):
+    """Compute regression and classification targets for anchors.
    Args:
-        anchor_list(list): anchors of each feature map level
+        anchor_list (list[list]): Multi level anchors of each image.
-        featuremap_sizes(list): feature map sizes
+        valid_flag_list (list[list]): Multi level valid flags of each image.
-        gt_bboxes_list(list): ground truth bbox of images in a mini-batch
+        gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
-        img_shapes(list): shape of each image in a mini-batch
+        img_metas (list[dict]): Meta info of each image.
-        cfg(dict): configs
+        target_means (Iterable): Mean value of regression targets.
+        target_stds (Iterable): Std value of regression targets.
+        cfg (dict): RPN train configs.
    Returns:
        tuple
    """
-    if len(featmap_sizes) == len(anchor_list):
+    num_imgs = len(img_metas)
-        all_anchors = torch.cat(anchor_list, 0)
+    assert len(anchor_list) == len(valid_flag_list) == num_imgs
-        anchor_nums = [anchors.size(0) for anchors in anchor_list]
-        use_isomerism_anchors = False
+    # anchor number of multi levels
-    elif len(img_shapes) == len(anchor_list):
+    num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
-        # using different anchors for different images
+    # concat all level anchors and flags to a single tensor
-        all_anchors_list = [
+    for i in range(num_imgs):
-            torch.cat(anchor_list[img_id], 0)
+        assert len(anchor_list[i]) == len(valid_flag_list[i])
-            for img_id in range(len(img_shapes))
+        anchor_list[i] = torch.cat(anchor_list[i])
-        ]
+        valid_flag_list[i] = torch.cat(valid_flag_list[i])
-        anchor_nums = [anchors.size(0) for anchors in anchor_list[0]]
-        use_isomerism_anchors = True
+    # compute targets for each image
-    else:
+    means_replicas = [target_means for _ in range(num_imgs)]
-        raise ValueError('length of anchor_list should be equal to number of '
+    stds_replicas = [target_stds for _ in range(num_imgs)]
-                         'feature lvls or number of images in a batch')
+    cfg_replicas = [cfg for _ in range(num_imgs)]
-    all_labels = []
+    (all_labels, all_label_weights, all_bbox_targets,
-    all_label_weights = []
+     all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(
-    all_bbox_targets = []
+         anchor_target_single, anchor_list, valid_flag_list, gt_bboxes_list,
-    all_bbox_weights = []
+         img_metas, means_replicas, stds_replicas, cfg_replicas)
-    num_total_sampled = 0
+    # no valid anchors
-    for img_id in range(len(img_shapes)):
+    if any([labels is None for labels in all_labels]):
-        if isinstance(valid_flag_list[img_id], list):
+        return None
-            valid_flags = torch.cat(valid_flag_list[img_id], 0)
+    # sampled anchors of all images
-        else:
+    num_total_samples = sum([
-            valid_flags = valid_flag_list[img_id]
+        max(pos_inds.numel() + neg_inds.numel(), 1)
-        if use_isomerism_anchors:
+        for pos_inds, neg_inds in zip(pos_inds_list, neg_inds_list)
-            all_anchors = all_anchors_list[img_id]
+    ])
-        inside_flags = anchor_inside_flags(all_anchors, valid_flags,
+    # split targets to a list w.r.t. multiple levels
-                                           img_shapes[img_id][:2],
+    labels_list = images_to_levels(all_labels, num_level_anchors)
-                                           cfg.allowed_border)
+    label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
-        if not inside_flags.any():
+    bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
-            return None
+    bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
-        gt_bboxes = gt_bboxes_list[img_id]
+    return (labels_list, label_weights_list, bbox_targets_list,
-        anchor_targets = anchor_target_single(all_anchors, inside_flags,
+            bbox_weights_list, num_total_samples)
-                                              gt_bboxes, target_means,
-                                              target_stds, cfg)
-        (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
+def images_to_levels(target, num_level_anchors):
-         neg_inds) = anchor_targets
+    """Convert targets by image to targets by feature level.
-        all_labels.append(labels)
-        all_label_weights.append(label_weights)
+    [target_img0, target_img1] -> [target_level0, target_level1, ...]
-        all_bbox_targets.append(bbox_targets)
+    """
-        all_bbox_weights.append(bbox_weights)
+    target = torch.stack(target, 0)
-        num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1)
+    level_targets = []
-    all_labels = torch.stack(all_labels, 0)
-    all_label_weights = torch.stack(all_label_weights, 0)
-    all_bbox_targets = torch.stack(all_bbox_targets, 0)
-    all_bbox_weights = torch.stack(all_bbox_weights, 0)
-    # split into different feature levels
-    labels_list = []
-    label_weights_list = []
-    bbox_targets_list = []
-    bbox_weights_list = []
    start = 0
-    for anchor_num in anchor_nums:
+    for n in num_level_anchors:
-        end = start + anchor_num
+        end = start + n
-        labels_list.append(all_labels[:, start:end].squeeze(0))
+        level_targets.append(target[:, start:end].squeeze(0))
-        label_weights_list.append(all_label_weights[:, start:end].squeeze(0))
-        bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0))
-        bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0))
        start = end
-    return (labels_list, label_weights_list, bbox_targets_list,
+    return level_targets
-            bbox_weights_list, num_total_sampled)
-def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
+def anchor_target_single(flat_anchors, valid_flags, gt_bboxes, img_meta,
-                         target_stds, cfg):
+                         target_means, target_stds, cfg):
-    num_total_anchors = all_anchors.size(0)
+    inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
-    anchors = all_anchors[inside_flags, :]
+                                       img_meta['img_shape'][:2],
+                                       cfg.allowed_border)
+    if not inside_flags.any():
+        return (None, ) * 6
+    # assign gt and sample anchors
+    anchors = flat_anchors[inside_flags, :]
    assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign(
        anchors,
        gt_bboxes,
@@ -99,14 +94,13 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
    bbox_targets = torch.zeros_like(anchors)
    bbox_weights = torch.zeros_like(anchors)
    labels = torch.zeros_like(assigned_gt_inds)
-    label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float)
+    label_weights = torch.zeros_like(assigned_gt_inds, dtype=anchors.dtype)
    if len(pos_inds) > 0:
-        pos_inds = unique(pos_inds)
        pos_anchors = anchors[pos_inds, :]
        pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :]
-        pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox,
+        pos_bbox_targets = bbox2delta(pos_anchors, pos_gt_bbox, target_means,
-                                          target_means, target_stds)
+                                      target_stds)
        bbox_targets[pos_inds, :] = pos_bbox_targets
        bbox_weights[pos_inds, :] = 1.0
        labels[pos_inds] = 1
@@ -115,10 +109,10 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
        else:
            label_weights[pos_inds] = cfg.pos_weight
    if len(neg_inds) > 0:
-        neg_inds = unique(neg_inds)
        label_weights[neg_inds] = 1.0
    # map up to original set of anchors
+    num_total_anchors = flat_anchors.size(0)
    labels = unmap(labels, num_total_anchors, inside_flags)
    label_weights = unmap(label_weights, num_total_anchors, inside_flags)
    bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
@@ -127,24 +121,20 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
    return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
            neg_inds)
-def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0):
-    img_h, img_w = img_shape.float()
+def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
+                        allowed_border=0):
+    img_h, img_w = img_shape[:2]
    if allowed_border >= 0:
        inside_flags = valid_flags & \
-            (all_anchors[:, 0] >= -allowed_border) & \
+            (flat_anchors[:, 0] >= -allowed_border) & \
-            (all_anchors[:, 1] >= -allowed_border) & \
+            (flat_anchors[:, 1] >= -allowed_border) & \
-            (all_anchors[:, 2] < img_w + allowed_border) & \
+            (flat_anchors[:, 2] < img_w + allowed_border) & \
-            (all_anchors[:, 3] < img_h + allowed_border)
+            (flat_anchors[:, 3] < img_h + allowed_border)
    else:
        inside_flags = valid_flags
    return inside_flags
-def unique(tensor):
-    if tensor.is_cuda:
-        u_tensor = np.unique(tensor.cpu().numpy())
-        return tensor.new_tensor(u_tensor)
-    else:
-        return torch.unique(tensor)
 def unmap(data, count, inds, fill=0):
    """ Unmap a subset of item (data) back to the original set of items (of

--- a/mmdet/core/bbox/__init__.py
+++ b/mmdet/core/bbox/__init__.py
+from .geometry import bbox_overlaps
+from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
+                       bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
+                       sample_bboxes)
+from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
+                         bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
+from .bbox_target import bbox_target
+__all__ = [
+    'bbox_overlaps', 'random_choice', 'bbox_assign',
+    'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
+    'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
+    'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
+    'bbox2result', 'bbox_target'
+]
--- a/mmdet/core/bbox_ops/bbox_target.py
+++ b/mmdet/core/bbox_ops/bbox_target.py
-import mmcv
 import torch
-from .geometry import bbox_overlaps
+from .transforms import bbox2delta
-from .transforms import bbox_transform, bbox_transform_inv
+from ..utils import multi_apply
 def bbox_target(pos_proposals_list,
@@ -13,33 +12,23 @@ def bbox_target(pos_proposals_list,
                reg_num_classes=1,
                target_means=[.0, .0, .0, .0],
                target_stds=[1.0, 1.0, 1.0, 1.0],
-                return_list=False):
+                concat=True):
-    img_per_gpu = len(pos_proposals_list)
+    labels, label_weights, bbox_targets, bbox_weights = multi_apply(
-    all_labels = []
+        proposal_target_single,
-    all_label_weights = []
+        pos_proposals_list,
-    all_bbox_targets = []
+        neg_proposals_list,
-    all_bbox_weights = []
+        pos_gt_bboxes_list,
-    for img_id in range(img_per_gpu):
+        pos_gt_labels_list,
-        pos_proposals = pos_proposals_list[img_id]
+        cfg=cfg,
-        neg_proposals = neg_proposals_list[img_id]
+        reg_num_classes=reg_num_classes,
-        pos_gt_bboxes = pos_gt_bboxes_list[img_id]
+        target_means=target_means,
-        pos_gt_labels = pos_gt_labels_list[img_id]
+        target_stds=target_stds)
-        debug_img = debug_imgs[img_id] if cfg.debug else None
-        labels, label_weights, bbox_targets, bbox_weights = proposal_target_single(
-            pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
-            reg_num_classes, cfg, target_means, target_stds)
-        all_labels.append(labels)
-        all_label_weights.append(label_weights)
-        all_bbox_targets.append(bbox_targets)
-        all_bbox_weights.append(bbox_weights)
-    if return_list:
+    if concat:
-        return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights
+        labels = torch.cat(labels, 0)
+        label_weights = torch.cat(label_weights, 0)
-    labels = torch.cat(all_labels, 0)
+        bbox_targets = torch.cat(bbox_targets, 0)
-    label_weights = torch.cat(all_label_weights, 0)
+        bbox_weights = torch.cat(bbox_weights, 0)
-    bbox_targets = torch.cat(all_bbox_targets, 0)
-    bbox_weights = torch.cat(all_bbox_weights, 0)
    return labels, label_weights, bbox_targets, bbox_weights
@@ -47,8 +36,8 @@ def proposal_target_single(pos_proposals,
                           neg_proposals,
                           pos_gt_bboxes,
                           pos_gt_labels,
-                           reg_num_classes,
                           cfg,
+                           reg_num_classes=1,
                           target_means=[.0, .0, .0, .0],
                           target_stds=[1.0, 1.0, 1.0, 1.0]):
    num_pos = pos_proposals.size(0)
@@ -62,8 +51,8 @@ def proposal_target_single(pos_proposals,
        labels[:num_pos] = pos_gt_labels
        pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
        label_weights[:num_pos] = pos_weight
-        pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes,
+        pos_bbox_targets = bbox2delta(pos_proposals, pos_gt_bboxes,
-                                          target_means, target_stds)
+                                      target_means, target_stds)
        bbox_targets[:num_pos, :] = pos_bbox_targets
        bbox_weights[:num_pos, :] = 1
    if num_neg > 0:

--- a/mmdet/core/bbox_ops/geometry.py
+++ b/mmdet/core/bbox_ops/geometry.py
--- a/mmdet/core/bbox_ops/sampling.py
+++ b/mmdet/core/bbox_ops/sampling.py
@@ -5,6 +5,11 @@ from .geometry import bbox_overlaps
 def random_choice(gallery, num):
+    """Random select some elements from the gallery.
+    It seems that Pytorch's implementation is slower than numpy so we use numpy
+    to randperm the indices.
+    """
    assert len(gallery) >= num
    if isinstance(gallery, list):
        gallery = np.array(gallery)
@@ -12,38 +17,42 @@ def random_choice(gallery, num):
    np.random.shuffle(cands)
    rand_inds = cands[:num]
    if not isinstance(gallery, np.ndarray):
-        rand_inds = torch.from_numpy(rand_inds).long()
+        rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
-        if gallery.is_cuda:
-            rand_inds = rand_inds.cuda(gallery.get_device())
    return gallery[rand_inds]
 def bbox_assign(proposals,
                gt_bboxes,
-                gt_crowd_bboxes=None,
+                gt_bboxes_ignore=None,
                gt_labels=None,
                pos_iou_thr=0.5,
                neg_iou_thr=0.5,
                min_pos_iou=.0,
                crowd_thr=-1):
-    """Assign a corresponding gt bbox or background to each proposal/anchor
+    """Assign a corresponding gt bbox or background to each proposal/anchor.
-    This function assign a gt bbox to every proposal, each proposals will be
-    assigned with -1, 0, or a positive number. -1 means don't care, 0 means
+    Each proposals will be assigned with `-1`, `0`, or a positive integer.
-    negative sample, positive number is the index (1-based) of assigned gt.
-    If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground)
+    - -1: don't care
-    with crowd bboxes over crowd_thr will be ignored
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    If `gt_bboxes_ignore` is specified, bboxes which have iof (intersection
+    over foreground) with `gt_bboxes_ignore` above `crowd_thr` will be ignored.
    Args:
-        proposals(Tensor): proposals or RPN anchors, shape (n, 4)
+        proposals (Tensor): Proposals or RPN anchors, shape (n, 4).
-        gt_bboxes(Tensor): shape (k, 4)
+        gt_bboxes (Tensor): Ground truth bboxes, shape (k, 4).
-        gt_crowd_bboxes(Tensor): shape(m, 4)
+        gt_bboxes_ignore (Tensor, optional): shape(m, 4).
-        gt_labels(Tensor, optional): shape (k, )
+        gt_labels (Tensor, optional): shape (k, ).
-        pos_iou_thr(float): iou threshold for positive bboxes
+        pos_iou_thr (float): IoU threshold for positive bboxes.
-        neg_iou_thr(float or tuple): iou threshold for negative bboxes
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
-        min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
+        min_pos_iou (float): Minimum iou for a bbox to be considered as a
-                            for RPN, it is usually set as 0, for Fast R-CNN,
+            positive bbox. For RPN, it is usually set as 0.3, for Fast R-CNN,
-                            it is usually set as pos_iou_thr
+            it is usually set as pos_iou_thr
-        crowd_thr: ignore proposals which have iof(intersection over foreground) with
+        crowd_thr (float): IoF threshold for ignoring bboxes. Negative value
-        crowd bboxes over crowd_thr
+            for not ignoring any bboxes.
    Returns:
        tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
    """
@@ -54,45 +63,50 @@ def bbox_assign(proposals,
        raise ValueError('No gt bbox or proposals')
    # ignore proposals according to crowd bboxes
-    if (crowd_thr > 0) and (gt_crowd_bboxes is
+    if (crowd_thr > 0) and (gt_bboxes_ignore is
-                            not None) and (gt_crowd_bboxes.numel() > 0):
+                            not None) and (gt_bboxes_ignore.numel() > 0):
-        crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof')
+        crowd_overlaps = bbox_overlaps(proposals, gt_bboxes_ignore, mode='iof')
        crowd_max_overlaps, _ = crowd_overlaps.max(dim=1)
        crowd_bboxes_inds = torch.nonzero(
            crowd_max_overlaps > crowd_thr).long()
        if crowd_bboxes_inds.numel() > 0:
            overlaps[crowd_bboxes_inds, :] = -1
-    return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr,
+    return bbox_assign_wrt_overlaps(overlaps, gt_labels, pos_iou_thr,
                                    neg_iou_thr, min_pos_iou)
-def bbox_assign_via_overlaps(overlaps,
+def bbox_assign_wrt_overlaps(overlaps,
                             gt_labels=None,
                             pos_iou_thr=0.5,
                             neg_iou_thr=0.5,
                             min_pos_iou=.0):
-    """Assign a corresponding gt bbox or background to each proposal/anchor
+    """Assign a corresponding gt bbox or background to each proposal/anchor.
-    This function assign a gt bbox to every proposal, each proposals will be
+    This method assign a gt bbox to every proposal, each proposals will be
    assigned with -1, 0, or a positive number. -1 means don't care, 0 means
    negative sample, positive number is the index (1-based) of assigned gt.
    The assignment is done in following steps, the order matters:
    1. assign every anchor to -1
    2. assign proposals whose iou with all gts < neg_iou_thr to 0
    3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
    assign it to that bbox
    4. for each gt bbox, assign its nearest proposals(may be more than one)
    to itself
    Args:
-        overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k)
+        overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
-        gt_labels(Tensor, optional): shape (k, )
+            shape(n, k).
-        pos_iou_thr(float): iou threshold for positive bboxes
+        gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
-        neg_iou_thr(float or tuple): iou threshold for negative bboxes
+        pos_iou_thr (float): IoU threshold for positive bboxes.
-        min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
+        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
-                            for RPN, it is usually set as 0, for Fast R-CNN,
+        min_pos_iou (float): Minimum IoU for a bbox to be considered as a
-                            it is usually set as pos_iou_thr
+            positive bbox. This argument only affects the 4th step.
    Returns:
-        tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
+        tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
+            max_overlaps), shape (n, )
    """
    num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
    # 1. assign -1 by default
@@ -138,8 +152,9 @@ def bbox_assign_via_overlaps(overlaps,
        return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
-def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
+def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
-    """Balance sampling for positive bboxes/anchors
+    """Balance sampling for positive bboxes/anchors.
    1. calculate average positive num for each gt: num_per_gt
    2. sample at most num_per_gt positives for each gt
    3. random sampling from rest anchors if not enough fg
@@ -180,15 +195,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
        return sampled_inds
-def sample_negatives(assigned_gt_inds,
+def bbox_sampling_neg(assigned_gt_inds,
-                     num_expected,
+                      num_expected,
-                     max_overlaps=None,
+                      max_overlaps=None,
-                     balance_thr=0,
+                      balance_thr=0,
-                     hard_fraction=0.5):
+                      hard_fraction=0.5):
-    """Balance sampling for negative bboxes/anchors
+    """Balance sampling for negative bboxes/anchors.
-    negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
-    and easy(iou < balance_thr), around equal number of bg are sampled
+    Negative samples are split into 2 set: hard (balance_thr <= iou <
-    from each set.
+    neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
+    by `hard_fraction`.
    """
    neg_inds = torch.nonzero(assigned_gt_inds == 0)
    if neg_inds.numel() != 0:
@@ -241,55 +257,87 @@ def bbox_sampling(assigned_gt_inds,
                  max_overlaps=None,
                  neg_balance_thr=0,
                  neg_hard_fraction=0.5):
+    """Sample positive and negative bboxes given assigned results.
+    Args:
+        assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
+        num_expected (int): Expected total samples (pos and neg).
+        pos_fraction (float): Positive sample fraction.
+        neg_pos_ub (float): Negative/Positive upper bound.
+        pos_balance_sampling(bool): Whether to sample positive samples around
+            each gt bbox evenly.
+        max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
+            Used for negative balance sampling only.
+        neg_balance_thr (float, optional): IoU threshold for simple/hard
+            negative balance sampling.
+        neg_hard_fraction (float, optional): Fraction of hard negative samples
+            for negative balance sampling.
+    Returns:
+        tuple[Tensor]: positive bbox indices, negative bbox indices.
+    """
    num_expected_pos = int(num_expected * pos_fraction)
-    pos_inds = sample_positives(assigned_gt_inds, num_expected_pos,
+    pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
-                                pos_balance_sampling)
+                                 pos_balance_sampling)
+    # We found that sampled indices have duplicated items occasionally.
+    # (mab be a bug of PyTorch)
+    pos_inds = pos_inds.unique()
    num_sampled_pos = pos_inds.numel()
    num_neg_max = int(
        neg_pos_ub *
        num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
    num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
-    neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg,
+    neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
-                                max_overlaps, neg_balance_thr,
+                                 max_overlaps, neg_balance_thr,
-                                neg_hard_fraction)
+                                 neg_hard_fraction)
+    neg_inds = neg_inds.unique()
    return pos_inds, neg_inds
+def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
+    """Sample positive and negative bboxes.
-def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list,
+    This is a simple implementation of bbox sampling given candidates and
-                     gt_labels_list, cfg):
+    ground truth bboxes, which includes 3 steps.
-    cfg_list = [cfg for _ in range(len(proposals_list))]
-    results = map(sample_proposals_single, proposals_list, gt_bboxes_list,
-                  gt_crowds_list, gt_labels_list, cfg_list)
-    # list of tuple to tuple of list
-    return tuple(map(list, zip(*results)))
+    1. Assign gt to each bbox.
+    2. Add gt bboxes to the sampling pool (optional).
+    3. Perform positive and negative sampling.
-def sample_proposals_single(proposals,
+    Args:
-                            gt_bboxes,
+        bboxes (Tensor): Boxes to be sampled from.
-                            gt_crowds,
+        gt_bboxes (Tensor): Ground truth bboxes.
-                            gt_labels,
+        gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
-                            cfg):
+            `crowd` bboxes are considered as ignored.
-    proposals = proposals[:, :4]
+        gt_labels (Tensor): Class labels of ground truth bboxes.
+        cfg (dict): Sampling configs.
+    Returns:
+        tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
+            pos_gt_bboxes, pos_gt_labels
+    """
+    bboxes = bboxes[:, :4]
    assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
-        bbox_assign(
+        bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
-            proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr,
+                    cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
-            cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr)
+                    cfg.crowd_thr)
    if cfg.add_gt_as_proposals:
-        proposals = torch.cat([gt_bboxes, proposals], dim=0)
+        bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
        gt_assign_self = torch.arange(
-            1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device)
+            1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
        assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
        assigned_labels = torch.cat([gt_labels, assigned_labels])
    pos_inds, neg_inds = bbox_sampling(
        assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
        cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
-    pos_proposals = proposals[pos_inds]
-    neg_proposals = proposals[neg_inds]
+    pos_bboxes = bboxes[pos_inds]
+    neg_bboxes = bboxes[neg_inds]
    pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
    pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
    pos_gt_labels = assigned_labels[pos_inds]
-    return (pos_inds, neg_inds, pos_proposals, neg_proposals,
+    return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
-            pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels)
+            pos_gt_labels)
--- a/mmdet/core/bbox_ops/transforms.py
+++ b/mmdet/core/bbox_ops/transforms.py
@@ -3,7 +3,7 @@ import numpy as np
 import torch
-def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
+def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
    assert proposals.size() == gt.size()
    proposals = proposals.float()
@@ -31,12 +31,12 @@ def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
    return deltas
-def bbox_transform_inv(rois,
+def delta2bbox(rois,
-                       deltas,
+               deltas,
-                       means=[0, 0, 0, 0],
+               means=[0, 0, 0, 0],
-                       stds=[1, 1, 1, 1],
+               stds=[1, 1, 1, 1],
-                       max_shape=None,
+               max_shape=None,
-                       wh_ratio_clip=16 / 1000):
+               wh_ratio_clip=16 / 1000):
    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
    denorm_deltas = deltas * stds + means
@@ -69,10 +69,14 @@ def bbox_transform_inv(rois,
 def bbox_flip(bboxes, img_shape):
-    """Flip bboxes horizontally
+    """Flip bboxes horizontally.
    Args:
-        bboxes(Tensor): shape (..., 4*k)
+        bboxes(Tensor or ndarray): Shape (..., 4*k)
-        img_shape(Tensor): image shape
+        img_shape(tuple): Image shape.
+    Returns:
+        Same type as `bboxes`: Flipped bboxes.
    """
    if isinstance(bboxes, torch.Tensor):
        assert bboxes.shape[-1] % 4 == 0
@@ -84,25 +88,28 @@ def bbox_flip(bboxes, img_shape):
        return mmcv.bbox_flip(bboxes, img_shape)
-def bbox_mapping(bboxes, img_shape, flip):
+def bbox_mapping(bboxes, img_shape, scale_factor, flip):
    """Map bboxes from the original image scale to testing scale"""
-    new_bboxes = bboxes * img_shape[-1]
+    new_bboxes = bboxes * scale_factor
    if flip:
        new_bboxes = bbox_flip(new_bboxes, img_shape)
    return new_bboxes
-def bbox_mapping_back(bboxes, img_shape, flip):
+def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
    """Map bboxes from testing scale to original image scale"""
    new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
-    new_bboxes = new_bboxes / img_shape[-1]
+    new_bboxes = new_bboxes / scale_factor
    return new_bboxes
 def bbox2roi(bbox_list):
    """Convert a list of bboxes to roi format.
    Args:
-        bbox_list (Tensor): a list of bboxes corresponding to a list of images
+        bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
+            of images.
    Returns:
        Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
    """
@@ -129,11 +136,13 @@ def roi2bbox(rois):
 def bbox2result(bboxes, labels, num_classes):
-    """Convert detection results to a list of numpy arrays
+    """Convert detection results to a list of numpy arrays.
    Args:
        bboxes (Tensor): shape (n, 5)
        labels (Tensor): shape (n, )
        num_classes (int): class number, including background class
    Returns:
        list(ndarray): bbox results of each class
    """

--- a/mmdet/core/bbox_ops/__init__.py
+++ b/mmdet/core/bbox_ops/__init__.py
-from .geometry import bbox_overlaps
-from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps,
-                       bbox_sampling, sample_positives, sample_negatives,
-                       sample_proposals)
-from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip,
-                         bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox,
-                         bbox2result)
-from .bbox_target import bbox_target
-__all__ = [
-    'bbox_overlaps', 'random_choice', 'bbox_assign',
-    'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives',
-    'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip',
-    'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
-    'bbox_target', 'sample_proposals'
-]
--- a/mmdet/core/eval/__init__.py
+++ b/mmdet/core/eval/__init__.py
 from .class_names import (voc_classes, imagenet_det_classes,
                          imagenet_vid_classes, coco_classes, dataset_aliases,
                          get_classes)
+from .coco_utils import coco_eval, fast_eval_recall, results2json
+from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook,
+                         CocoDistEvalmAPHook)
 from .mean_ap import average_precision, eval_map, print_map_summary
 from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
                     plot_iou_recall)
 __all__ = [
    'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
-    'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision',
+    'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
+    'fast_eval_recall', 'results2json', 'DistEvalHook',
+    'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
    'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
    'plot_num_recall', 'plot_iou_recall'
 ]
--- a/mmdet/core/eval/bbox_overlaps.py
+++ b/mmdet/core/eval/bbox_overlaps.py
--- a/mmdet/core/eval/class_names.py
+++ b/mmdet/core/eval/class_names.py
@@ -95,7 +95,7 @@ def get_classes(dataset):
    if mmcv.is_str(dataset):
        if dataset in alias2name:
-            labels = eval(alias2name[dataset] + '_labels()')
+            labels = eval(alias2name[dataset] + '_classes()')
        else:
            raise ValueError('Unrecognized dataset: {}'.format(dataset))
    else:

--- a/mmdet/core/evaluation/coco_utils.py
+++ b/mmdet/core/evaluation/coco_utils.py
+import mmcv
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from .recall import eval_recalls
+def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
+    for res_type in result_types:
+        assert res_type in [
+            'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
+        ]
+    if mmcv.is_str(coco):
+        coco = COCO(coco)
+    assert isinstance(coco, COCO)
+    if res_type == 'proposal_fast':
+        ar = fast_eval_recall(result_file, coco, max_dets)
+        for i, num in enumerate(max_dets):
+            print('AR@{}\t= {:.4f}'.format(num, ar[i]))
+        return
+    assert result_file.endswith('.json')
+    coco_dets = coco.loadRes(result_file)
+    img_ids = coco.getImgIds()
+    for res_type in result_types:
+        iou_type = 'bbox' if res_type == 'proposal' else res_type
+        cocoEval = COCOeval(coco, coco_dets, iou_type)
+        cocoEval.params.imgIds = img_ids
+        if res_type == 'proposal':
+            cocoEval.params.useCats = 0
+            cocoEval.params.maxDets = list(max_dets)
+        cocoEval.evaluate()
+        cocoEval.accumulate()
+        cocoEval.summarize()
+def fast_eval_recall(results,
+                     coco,
+                     max_dets,
+                     iou_thrs=np.arange(0.5, 0.96, 0.05)):
+    if mmcv.is_str(results):
+        assert results.endswith('.pkl')
+        results = mmcv.load(results)
+    elif not isinstance(results, list):
+        raise TypeError(
+            'results must be a list of numpy arrays or a filename, not {}'.
+            format(type(results)))
+    gt_bboxes = []
+    img_ids = coco.getImgIds()
+    for i in range(len(img_ids)):
+        ann_ids = coco.getAnnIds(imgIds=img_ids[i])
+        ann_info = coco.loadAnns(ann_ids)
+        if len(ann_info) == 0:
+            gt_bboxes.append(np.zeros((0, 4)))
+            continue
+        bboxes = []
+        for ann in ann_info:
+            if ann.get('ignore', False) or ann['iscrowd']:
+                continue
+            x1, y1, w, h = ann['bbox']
+            bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
+        bboxes = np.array(bboxes, dtype=np.float32)
+        if bboxes.shape[0] == 0:
+            bboxes = np.zeros((0, 4))
+        gt_bboxes.append(bboxes)
+    recalls = eval_recalls(
+        gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
+    ar = recalls.mean(axis=1)
+    return ar
+def xyxy2xywh(bbox):
+    _bbox = bbox.tolist()
+    return [
+        _bbox[0],
+        _bbox[1],
+        _bbox[2] - _bbox[0] + 1,
+        _bbox[3] - _bbox[1] + 1,
+    ]
+def proposal2json(dataset, results):
+    json_results = []
+    for idx in range(len(dataset)):
+        img_id = dataset.img_ids[idx]
+        bboxes = results[idx]
+        for i in range(bboxes.shape[0]):
+            data = dict()
+            data['image_id'] = img_id
+            data['bbox'] = xyxy2xywh(bboxes[i])
+            data['score'] = float(bboxes[i][4])
+            data['category_id'] = 1
+            json_results.append(data)
+    return json_results
+def det2json(dataset, results):
+    json_results = []
+    for idx in range(len(dataset)):
+        img_id = dataset.img_ids[idx]
+        result = results[idx]
+        for label in range(len(result)):
+            bboxes = result[label]
+            for i in range(bboxes.shape[0]):
+                data = dict()
+                data['image_id'] = img_id
+                data['bbox'] = xyxy2xywh(bboxes[i])
+                data['score'] = float(bboxes[i][4])
+                data['category_id'] = dataset.cat_ids[label]
+                json_results.append(data)
+    return json_results
+def segm2json(dataset, results):
+    json_results = []
+    for idx in range(len(dataset)):
+        img_id = dataset.img_ids[idx]
+        det, seg = results[idx]
+        for label in range(len(det)):
+            bboxes = det[label]
+            segms = seg[label]
+            for i in range(bboxes.shape[0]):
+                data = dict()
+                data['image_id'] = img_id
+                data['bbox'] = xyxy2xywh(bboxes[i])
+                data['score'] = float(bboxes[i][4])
+                data['category_id'] = dataset.cat_ids[label]
+                segms[i]['counts'] = segms[i]['counts'].decode()
+                data['segmentation'] = segms[i]
+                json_results.append(data)
+    return json_results
+def results2json(dataset, results, out_file):
+    if isinstance(results[0], list):
+        json_results = det2json(dataset, results)
+    elif isinstance(results[0], tuple):
+        json_results = segm2json(dataset, results)
+    elif isinstance(results[0], np.ndarray):
+        json_results = proposal2json(dataset, results)
+    else:
+        raise TypeError('invalid type of results')
+    mmcv.dump(json_results, out_file)